Improve if_larger_than and take glob patterns.

This commit is contained in:
voussoir 2022-01-19 19:52:23 -08:00
parent 48fd831dc7
commit 07e1d0d406
No known key found for this signature in database
GPG key ID: 5F7554F8C26DACCB

View file

@ -8,7 +8,6 @@ from voussoirkit import bytestring
from voussoirkit import lazychain from voussoirkit import lazychain
from voussoirkit import pathclass from voussoirkit import pathclass
from voussoirkit import pipeable from voussoirkit import pipeable
from voussoirkit import spinal
from voussoirkit import vlogging from voussoirkit import vlogging
log = vlogging.getLogger(__name__, 'hash_hardlink') log = vlogging.getLogger(__name__, 'hash_hardlink')
@ -25,7 +24,8 @@ def hash_file(file):
@pipeable.ctrlc_return1 @pipeable.ctrlc_return1
def hash_hardlink_argparse(args): def hash_hardlink_argparse(args):
paths = [pathclass.Path(p) for p in pipeable.input_many(args.paths, strip=True, skip_blank=True)] patterns = pipeable.input_many(args.patterns, strip=True, skip_blank=True)
paths = list(pathclass.glob_many(patterns))
drives = set(path.stat.st_dev for path in paths) drives = set(path.stat.st_dev for path in paths)
if len(drives) != 1: if len(drives) != 1:
raise ValueError('All paths must be on the same drive.') raise ValueError('All paths must be on the same drive.')
@ -34,23 +34,18 @@ def hash_hardlink_argparse(args):
for path in paths: for path in paths:
if path.is_file: if path.is_file:
files.append(path) files.append(path)
elif path.is_dir: else:
files.extend(spinal.walk(path)) files.extend(path.walk_files())
files = (file for file in files if file.size >= args.if_larger_than)
inodes = set() inodes = set()
hashes = {} hashes = {}
if args.if_larger_than:
larger = bytestring.parsebytes(args.if_larger_than)
else:
larger = None
for file in files: for file in files:
if file.stat.st_ino in inodes: if file.stat.st_ino in inodes:
# This file is already a hardlink of another file we've seen. # This file is already a hardlink of another file we've seen.
continue continue
if larger is not None and file.size < larger:
continue
inodes.add(file.stat.st_ino) inodes.add(file.stat.st_ino)
h = hash_file(file) h = hash_file(file)
print(file.absolute_path, h) print(file.absolute_path, h)
@ -71,8 +66,8 @@ def hash_hardlink_argparse(args):
def main(argv): def main(argv):
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('paths', nargs='+') parser.add_argument('patterns', nargs='+')
parser.add_argument('--if_larger_than', '--if-larger-than', default=None) parser.add_argument('--if_larger_than', '--if-larger-than', type=bytestring.parsebytes, default=-1)
parser.set_defaults(func=hash_hardlink_argparse) parser.set_defaults(func=hash_hardlink_argparse)
args = parser.parse_args(argv) args = parser.parse_args(argv)