Rewrite hash_hardlink with argparse.

This commit is contained in:
voussoir 2021-01-14 02:35:43 -08:00
parent 1359ab040e
commit 70da07608f

View file

@ -1,10 +1,16 @@
import argparse
import hashlib import hashlib
import os import os
import send2trash import send2trash
import sys import sys
from voussoirkit import lazychain
from voussoirkit import pathclass from voussoirkit import pathclass
from voussoirkit import pipeable
from voussoirkit import spinal from voussoirkit import spinal
from voussoirkit import vlogging
log = vlogging.getLogger(__name__)
def hash_file(file): def hash_file(file):
hasher = hashlib.md5() hasher = hashlib.md5()
@ -16,25 +22,31 @@ def hash_file(file):
hasher.update(chunk) hasher.update(chunk)
return hasher.hexdigest() return hasher.hexdigest()
def main(argv): def hash_hardlink_argparse(args):
folders = [pathclass.Path(p) for p in argv] paths = [pathclass.Path(p) for p in pipeable.input_many(args.paths, strip=True, skip_blank=True)]
drives = set(os.path.splitdrive(folder.absolute_path)[0] for folder in folders) drives = set(path.stat.st_dev for path in paths)
if len(drives) != 1: if len(drives) != 1:
raise ValueError('All paths must be on the same drive.') raise ValueError('All paths must be on the same drive.')
files = lazychain.LazyChain()
for path in paths:
if path.is_file:
files.append(path)
elif path.is_dir:
files.extend(spinal.walk_generator(path))
inodes = set() inodes = set()
hashes = {} hashes = {}
for folder in folders: for file in files:
for file in spinal.walk_generator(folder): if file.stat.st_ino in inodes:
if file.stat.st_ino in inodes: # This file is already a hardlink of another file we've seen.
# This file is already a hardlink of another file we've seen. continue
continue inodes.add(file.stat.st_ino)
inodes.add(file.stat.st_ino) h = hash_file(file)
h = hash_file(file) print(file.absolute_path, h)
print(file.absolute_path, h) hashes.setdefault(h, []).append(file)
hashes.setdefault(h, []).append(file)
hashes = {h: files for (h, files) in hashes.items() if len(files) > 1} hashes = {h: files for (h, files) in hashes.items() if len(files) > 1}
@ -45,5 +57,16 @@ def main(argv):
send2trash.send2trash(follower.absolute_path) send2trash.send2trash(follower.absolute_path)
os.link(leader.absolute_path, follower.absolute_path) os.link(leader.absolute_path, follower.absolute_path)
def main(argv):
argv = vlogging.set_level_by_argv(log, argv)
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('paths', nargs='+')
parser.set_defaults(func=hash_hardlink_argparse)
args = parser.parse_args(argv)
return args.func(args)
if __name__ == '__main__': if __name__ == '__main__':
raise SystemExit(main(sys.argv[1:])) raise SystemExit(main(sys.argv[1:]))