how to copy unique files to new directory python

Solutions on MaxInterview for how to copy unique files to new directory python by the best coders in the world

showing results for - "how to copy unique files to new directory python"
Emiliano
27 Sep 2019
1# dupFinder.py
2import os, sys
3import hashlib
4 
5def findDup(parentFolder):
6    # Dups in format {hash:[names]}
7    dups = {}
8    for dirName, subdirs, fileList in os.walk(parentFolder):
9        print('Scanning %s...' % dirName)
10        for filename in fileList:
11            # Get the path to the file
12            path = os.path.join(dirName, filename)
13            # Calculate hash
14            file_hash = hashfile(path)
15            # Add or append the file path
16            if file_hash in dups:
17                dups[file_hash].append(path)
18            else:
19                dups[file_hash] = [path]
20    return dups
21 
22 
23# Joins two dictionaries
24def joinDicts(dict1, dict2):
25    for key in dict2.keys():
26        if key in dict1:
27            dict1[key] = dict1[key] + dict2[key]
28        else:
29            dict1[key] = dict2[key]
30 
31 
32def hashfile(path, blocksize = 65536):
33    afile = open(path, 'rb')
34    hasher = hashlib.md5()
35    buf = afile.read(blocksize)
36    while len(buf) > 0:
37        hasher.update(buf)
38        buf = afile.read(blocksize)
39    afile.close()
40    return hasher.hexdigest()
41 
42 
43def printResults(dict1):
44    results = list(filter(lambda x: len(x) > 1, dict1.values()))
45    if len(results) > 0:
46        print('Duplicates Found:')
47        print('The following files are identical. The name could differ, but the content is identical')
48        print('___________________')
49        for result in results:
50            for subresult in result:
51                print('\t\t%s' % subresult)
52            print('___________________')
53 
54    else:
55        print('No duplicate files found.')
56 
57 
58if __name__ == '__main__':
59    if len(sys.argv) > 1:
60        dups = {}
61        folders = sys.argv[1:]
62        for i in folders:
63            # Iterate the folders given
64            if os.path.exists(i):
65                # Find the duplicated files and append them to the dups
66                joinDicts(dups, findDup(i))
67            else:
68                print('%s is not a valid path, please verify' % i)
69                sys.exit()
70        printResults(dups)
71    else:
72        print('Usage: python dupFinder.py folder or python dupFinder.py folder1 folder2 folder3')
73