Add argument --if-larger-than.

It is trivial for files of a few bytes to be exact matches.
This commit is contained in:
voussoir 2021-05-25 01:37:50 -07:00
parent e696dfaf77
commit 1d8d85cb8a
No known key found for this signature in database
GPG key ID: 5F7554F8C26DACCB

View file

@ -4,6 +4,7 @@ import os
import send2trash
import sys
from voussoirkit import bytestring
from voussoirkit import lazychain
from voussoirkit import pathclass
from voussoirkit import pipeable
@ -25,7 +26,6 @@ def hash_file(file):
@pipeable.ctrlc_return1
def hash_hardlink_argparse(args):
paths = [pathclass.Path(p) for p in pipeable.input_many(args.paths, strip=True, skip_blank=True)]
drives = set(path.stat.st_dev for path in paths)
if len(drives) != 1:
raise ValueError('All paths must be on the same drive.')
@ -40,10 +40,17 @@ def hash_hardlink_argparse(args):
inodes = set()
hashes = {}
if args.if_larger_than:
larger = bytestring.parsebytes(args.if_larger_than)
else:
larger = None
for file in files:
if file.stat.st_ino in inodes:
# This file is already a hardlink of another file we've seen.
continue
if larger is not None and file.size < larger:
continue
inodes.add(file.stat.st_ino)
h = hash_file(file)
print(file.absolute_path, h)
@ -64,6 +71,7 @@ def main(argv):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('paths', nargs='+')
parser.add_argument('--if_larger_than', '--if-larger-than', default=None)
parser.set_defaults(func=hash_hardlink_argparse)
args = parser.parse_args(argv)