Add argument --if-larger-than.

It is trivial for files of a few bytes to be exact matches.
This commit is contained in:
voussoir 2021-05-25 01:37:50 -07:00
parent e696dfaf77
commit 1d8d85cb8a
No known key found for this signature in database
GPG key ID: 5F7554F8C26DACCB

View file

@ -4,6 +4,7 @@ import os
import send2trash import send2trash
import sys import sys
from voussoirkit import bytestring
from voussoirkit import lazychain from voussoirkit import lazychain
from voussoirkit import pathclass from voussoirkit import pathclass
from voussoirkit import pipeable from voussoirkit import pipeable
@ -25,7 +26,6 @@ def hash_file(file):
@pipeable.ctrlc_return1 @pipeable.ctrlc_return1
def hash_hardlink_argparse(args): def hash_hardlink_argparse(args):
paths = [pathclass.Path(p) for p in pipeable.input_many(args.paths, strip=True, skip_blank=True)] paths = [pathclass.Path(p) for p in pipeable.input_many(args.paths, strip=True, skip_blank=True)]
drives = set(path.stat.st_dev for path in paths) drives = set(path.stat.st_dev for path in paths)
if len(drives) != 1: if len(drives) != 1:
raise ValueError('All paths must be on the same drive.') raise ValueError('All paths must be on the same drive.')
@ -40,10 +40,17 @@ def hash_hardlink_argparse(args):
inodes = set() inodes = set()
hashes = {} hashes = {}
if args.if_larger_than:
larger = bytestring.parsebytes(args.if_larger_than)
else:
larger = None
for file in files: for file in files:
if file.stat.st_ino in inodes: if file.stat.st_ino in inodes:
# This file is already a hardlink of another file we've seen. # This file is already a hardlink of another file we've seen.
continue continue
if larger is not None and file.size < larger:
continue
inodes.add(file.stat.st_ino) inodes.add(file.stat.st_ino)
h = hash_file(file) h = hash_file(file)
print(file.absolute_path, h) print(file.absolute_path, h)
@ -64,6 +71,7 @@ def main(argv):
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('paths', nargs='+') parser.add_argument('paths', nargs='+')
parser.add_argument('--if_larger_than', '--if-larger-than', default=None)
parser.set_defaults(func=hash_hardlink_argparse) parser.set_defaults(func=hash_hardlink_argparse)
args = parser.parse_args(argv) args = parser.parse_args(argv)