From 1d8d85cb8aab7e025ea24038d5f33d3c7241c4bb Mon Sep 17 00:00:00 2001 From: Ethan Dalool Date: Tue, 25 May 2021 01:37:50 -0700 Subject: [PATCH] Add argument --if-larger-than. It is trivial for files of a few bytes to be exact matches. --- hash_hardlink.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hash_hardlink.py b/hash_hardlink.py index df42c57..cea24f1 100644 --- a/hash_hardlink.py +++ b/hash_hardlink.py @@ -4,6 +4,7 @@ import os import send2trash import sys +from voussoirkit import bytestring from voussoirkit import lazychain from voussoirkit import pathclass from voussoirkit import pipeable @@ -25,7 +26,6 @@ def hash_file(file): @pipeable.ctrlc_return1 def hash_hardlink_argparse(args): paths = [pathclass.Path(p) for p in pipeable.input_many(args.paths, strip=True, skip_blank=True)] - drives = set(path.stat.st_dev for path in paths) if len(drives) != 1: raise ValueError('All paths must be on the same drive.') @@ -40,10 +40,17 @@ def hash_hardlink_argparse(args): inodes = set() hashes = {} + if args.if_larger_than: + larger = bytestring.parsebytes(args.if_larger_than) + else: + larger = None + for file in files: if file.stat.st_ino in inodes: # This file is already a hardlink of another file we've seen. continue + if larger is not None and file.size < larger: + continue inodes.add(file.stat.st_ino) h = hash_file(file) print(file.absolute_path, h) @@ -64,6 +71,7 @@ def main(argv): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('paths', nargs='+') + parser.add_argument('--if_larger_than', '--if-larger-than', default=None) parser.set_defaults(func=hash_hardlink_argparse) args = parser.parse_args(argv)