Rewrite hash_hardlink with argparse.

This commit is contained in:
voussoir 2021-01-14 02:35:43 -08:00
parent 1359ab040e
commit 70da07608f

View file

@ -1,10 +1,16 @@
import argparse
import hashlib
import os
import send2trash
import sys
from voussoirkit import lazychain
from voussoirkit import pathclass
from voussoirkit import pipeable
from voussoirkit import spinal
from voussoirkit import vlogging
log = vlogging.getLogger(__name__)
def hash_file(file):
hasher = hashlib.md5()
@ -16,25 +22,31 @@ def hash_file(file):
hasher.update(chunk)
return hasher.hexdigest()
def main(argv):
folders = [pathclass.Path(p) for p in argv]
def hash_hardlink_argparse(args):
paths = [pathclass.Path(p) for p in pipeable.input_many(args.paths, strip=True, skip_blank=True)]
drives = set(os.path.splitdrive(folder.absolute_path)[0] for folder in folders)
drives = set(path.stat.st_dev for path in paths)
if len(drives) != 1:
raise ValueError('All paths must be on the same drive.')
files = lazychain.LazyChain()
for path in paths:
if path.is_file:
files.append(path)
elif path.is_dir:
files.extend(spinal.walk_generator(path))
inodes = set()
hashes = {}
for folder in folders:
for file in spinal.walk_generator(folder):
if file.stat.st_ino in inodes:
# This file is already a hardlink of another file we've seen.
continue
inodes.add(file.stat.st_ino)
h = hash_file(file)
print(file.absolute_path, h)
hashes.setdefault(h, []).append(file)
for file in files:
if file.stat.st_ino in inodes:
# This file is already a hardlink of another file we've seen.
continue
inodes.add(file.stat.st_ino)
h = hash_file(file)
print(file.absolute_path, h)
hashes.setdefault(h, []).append(file)
hashes = {h: files for (h, files) in hashes.items() if len(files) > 1}
@ -45,5 +57,16 @@ def main(argv):
send2trash.send2trash(follower.absolute_path)
os.link(leader.absolute_path, follower.absolute_path)
def main(argv):
argv = vlogging.set_level_by_argv(log, argv)
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('paths', nargs='+')
parser.set_defaults(func=hash_hardlink_argparse)
args = parser.parse_args(argv)
return args.func(args)
if __name__ == '__main__':
raise SystemExit(main(sys.argv[1:]))