Let quickid use any of hashlib.algorithms_guaranteed.

This commit is contained in:
Ethan Dalool 2020-09-09 14:33:45 -07:00
parent 7052b2c432
commit 52afa2489a

View file

@ -2,7 +2,6 @@
This module is designed to provide a GOOD ENOUGH means of identifying duplicate This module is designed to provide a GOOD ENOUGH means of identifying duplicate
files very quickly, so that more in-depth checks can be done on likely matches. files very quickly, so that more in-depth checks can be done on likely matches.
''' '''
import hashlib import hashlib
import os import os
import sys import sys
@ -11,6 +10,11 @@ SEEK_END = 2
CHUNK_SIZE = 2**20 CHUNK_SIZE = 2**20
FORMAT = '{size}_{hashtype}_{chunk_size}_{hash}' FORMAT = '{size}_{hashtype}_{chunk_size}_{hash}'
HASH_CLASSES = {
name: getattr(hashlib, name)
for name in hashlib.algorithms_guaranteed
}
def equal_handle(handle1, handle2, *args, **kwargs): def equal_handle(handle1, handle2, *args, **kwargs):
size1 = handle1.seek(0, SEEK_END) size1 = handle1.seek(0, SEEK_END)
size2 = handle2.seek(0, SEEK_END) size2 = handle2.seek(0, SEEK_END)
@ -18,6 +22,7 @@ def equal_handle(handle1, handle2, *args, **kwargs):
handle2.seek(0) handle2.seek(0)
if size1 != size2: if size1 != size2:
return False return False
id1 = quickid_handle(handle1, *args, **kwargs) id1 = quickid_handle(handle1, *args, **kwargs)
id2 = quickid_handle(handle2, *args, **kwargs) id2 = quickid_handle(handle2, *args, **kwargs)
return id1 == id2 return id1 == id2
@ -30,12 +35,11 @@ def equal_file(filename1, filename2, *args, **kwargs):
with open(filename1, 'rb') as handle1, open(filename2, 'rb') as handle2: with open(filename1, 'rb') as handle1, open(filename2, 'rb') as handle2:
return equal_handle(handle1, handle2, *args, **kwargs) return equal_handle(handle1, handle2, *args, **kwargs)
def quickid_handle(handle, chunk_size=None): def quickid_handle(handle, hashtype='md5', chunk_size=None):
if chunk_size is None: if chunk_size is None:
chunk_size = CHUNK_SIZE chunk_size = CHUNK_SIZE
hashtype = 'md5' hasher = HASH_CLASSES[hashtype]()
hasher = hashlib.md5()
size = handle.seek(0, SEEK_END) size = handle.seek(0, SEEK_END)
handle.seek(0) handle.seek(0)