Initial commandline utilities.

2019-08-06 23:49:47 -07:00 · 2019-08-06 23:49:47 -07:00 · d77dcacc21
commit d77dcacc21
parent 89420ff5d7
2 changed files with 257 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -27,6 +27,24 @@ book.save('modifiedbook.epub')

 epubfile provides simple editing of epub books. epubfile attempts to keep file modifications to a minimum. It does not add, remove, or rearrange files unless you ask it to, and does not inject additional metadata. As such, it works for both epub2 and epub3 assuming you stick to supported operations for your book version.

+# Command line utilities
+
+This library was born out of my own needs. So there are a couple of builtin utilities.
+
+```
+addfile:
+    Add files into the book.
+
+covercomesfirst:
+    Rename the cover image file so that it is the alphabetically-first image.
+
+merge:
+    Merge multiple books into one.
+
+normalize:
+    Rename files and directories in the book to match a common structure.
+```
+
 # Spec compliance

 epubfile does not rigorously enforce the epub spec and you can create noncompliant books with it. Basic errors are checked, and I am open to issues and comments regarding ways to improve spec-compliance without adding significant size or complexity to the library. I am prioritizing simplicity and ease of use over perfection.
--- a/epubfile.py
+++ b/epubfile.py
@ -883,3 +883,242 @@ class Epub:
        for id in self.get_texts():
            self.fix_interlinking_text(id, rename_map, old_relative_to=old_filepaths[id].parent)
        self.fix_interlinking_ncx(rename_map, old_relative_to=old_ncx_parent)
+
+
+# COMMAND LINE TOOLS
+################################################################################
+import argparse
+import html
+import random
+import string
+import sys
+
+from voussoirkit import betterhelp
+
+DOCSTRING = '''
+{addfile}
+
+{covercomesfirst}
+
+{merge}
+
+{normalize}
+'''.lstrip()
+
+SUB_DOCSTRINGS = {
+'addfile':
+'''
+addfile:
+    Add files into the book.
+
+    > epubfile.py addfile book.epub page1.html image.jpg
+'''.strip(),
+
+'covercomesfirst':
+'''
+covercomesfirst:
+    Rename the cover image file so that it is the alphabetically-first image.
+
+    > epubfile.py covercomesfirst book.epub
+
+    I use CBXShell to get thumbnails of epub files on Windows, and because it
+    is generalized for zip files and doesn't read epub metadata, alphabetized
+    mode works best for getting epub covers as icons.
+
+    In my testing, CBXShell considers the image's whole path and not just the
+    basename, so you may want to consider normalizing the directory structure
+    first, otherwise some /a/image.jpg will always be before /images/cover.jpg.
+'''.strip(),
+
+'merge':
+'''
+merge:
+    Merge multiple books into one.
+
+    > epubfile.py merge book1.epub book2.epub --output final.epub <flags>
+
+    flags:
+    --headerfile:
+        Add a file before each book with an <h1> containing its title.
+
+    -y | --autoyes:
+        Overwrite the output file without prompting.
+'''.strip(),
+
+'normalize':
+'''
+normalize:
+    Rename files and directories in the book to match a common structure.
+
+    Moves all book content from / into /OEBPS and sorts files into
+    subdirectories by type: Text, Images, Styles, etc.
+
+    > epubfile.py normalize book.epub
+'''.strip()
+}
+
+DOCSTRING = betterhelp.add_previews(DOCSTRING, SUB_DOCSTRINGS)
+
+def random_string(length, characters=string.ascii_lowercase):
+    return ''.join(random.choice(characters) for x in range(length))
+
+def addfile_argparse(args):
+    book = Epub.open(args.epub)
+
+    for file in args.files:
+        print(f'Adding file {file}.')
+        file = pathclass.Path(file)
+        try:
+            book.easy_add_file(file)
+        except (IDExists, FileExists) as exc:
+            rand_suffix = random_string(3, string.digits)
+            base = file.replace_extension('').basename
+            id = f'{base}_{rand_suffix}'
+            basename = f'{base}_{rand_suffix}{file.dot_extension}'
+            content = open(file.absolute_path, 'rb').read()
+            book.add_file(id, basename, content)
+
+    book.move_nav_to_end()
+    book.save(args.epub)
+
+def covercomesfirst_argparse(args):
+    book = Epub.open(args.epub)
+    basenames = {i: book.get_filepath(i).basename for i in book.get_images()}
+    if len(basenames) <= 1:
+        return
+
+    cover_image = book.get_cover_image()
+    if not cover_image:
+        return
+
+    cover_basename = book.get_filepath(cover_image).basename
+
+    cover_index = sorted(basenames.values()).index(cover_basename)
+    if cover_index == 0:
+        return
+
+    rename_map = basenames.copy()
+
+    if not cover_basename.startswith('!'):
+        cover_basename = '!' + cover_basename
+        rename_map[cover_image] = cover_basename
+    else:
+        rename_map.pop(cover_image)
+
+    for (id, basename) in rename_map.copy().items():
+        if id == cover_image:
+            continue
+        if basename > cover_basename:
+            rename_map.pop(id)
+            continue
+        if basename < cover_basename and basename.startswith('!'):
+            basename = basename.lstrip('!')
+            rename_map[id] = basename
+        if basename < cover_basename or basename.startswith('.'):
+            basename = '_' + basename
+            rename_map[id] = basename
+
+    book.rename_file(rename_map)
+
+    book.save(args.epub)
+
+def merge(input_filepaths, output_filename, do_headerfile=False):
+    book = Epub.new()
+
+    index_length = len(str(len(input_filepaths)))
+    rand_prefix = random_string(3, string.digits)
+
+    input_filepaths = [pathclass.Path(p) for p in input_filepaths]
+
+    for (index, input_filepath) in enumerate(input_filepaths):
+        print(f'Merging {input_filepath}.')
+        prefix = f'{rand_prefix}_{index:>0{index_length}}_{{}}'
+        input_book = Epub.open(input_filepath)
+        input_book.normalize_directory_structure()
+
+        input_ncx = input_book.get_ncx()
+        input_nav = input_book.get_nav()
+        manifest_ids = input_book.get_manifest_items(spine_order=True)
+        manifest_ids = [x for x in manifest_ids if x not in (input_ncx, input_nav)]
+
+        basename_map = {}
+        for id in manifest_ids:
+            old_basename = input_book.get_filepath(id).basename
+            new_basename = prefix.format(old_basename)
+            basename_map[id] = new_basename
+
+        # Don't worry, we're not going to save this!
+        input_book.rename_file(basename_map)
+
+        if do_headerfile:
+            content = ''
+            try:
+                title = input_book.get_titles()[0]
+            except IndexError:
+                title = input_filepath.replace_extension('').basename
+            finally:
+                content += f'<h1>{html.escape(title)}</h1>'
+
+            try:
+                author = input_book.get_authors()[0]
+                content += f'<p>{html.escape(author)}</p>'
+            except IndexError:
+                pass
+
+            headerfile_id = prefix.format('headerfile')
+            headerfile_basename = prefix.format('headerfile.html')
+            book.add_file(headerfile_id, headerfile_basename, content)
+
+        for id in manifest_ids:
+            new_id = f'{rand_prefix}_{index:>0{index_length}}_{id}'
+            new_basename = basename_map[id]
+            book.add_file(new_id, new_basename, input_book.read_file(id))
+
+    book.move_nav_to_end()
+    book.save(output_filename)
+
+def merge_argparse(args):
+    if os.path.exists(args.output):
+        ok = args.autoyes
+        if not ok:
+            ok = input(f'Overwrite {args.output}? y/n\n>').lower() in ('y', 'yes')
+        if not ok:
+            raise ValueError(f'{args.output} exists.')
+
+    return merge(input_filepaths=args.epubs, output_filename=args.output, do_headerfile=args.headerfile)
+
+def normalize_argparse(args):
+    book = Epub.open(args.epub)
+    book.normalize_directory_structure()
+    book.save(args.epub)
+
+@betterhelp.subparser_betterhelp(main_docstring=DOCSTRING, sub_docstrings=SUB_DOCSTRINGS)
+def main(argv):
+    parser = argparse.ArgumentParser(description=__doc__)
+    subparsers = parser.add_subparsers()
+
+    p_addfile = subparsers.add_parser('addfile')
+    p_addfile.add_argument('epub')
+    p_addfile.add_argument('files', nargs='+', default=[])
+    p_addfile.set_defaults(func=addfile_argparse)
+
+    p_covercomesfirst = subparsers.add_parser('covercomesfirst')
+    p_covercomesfirst.add_argument('epub')
+    p_covercomesfirst.set_defaults(func=covercomesfirst_argparse)
+
+    p_merge = subparsers.add_parser('merge')
+    p_merge.add_argument('epubs', nargs='+', default=[])
+    p_merge.add_argument('--output', dest='output', default=None, required=True)
+    p_merge.add_argument('--headerfile', dest='headerfile', action='store_true')
+    p_merge.add_argument('-y', '--autoyes', dest='autoyes', action='store_true')
+    p_merge.set_defaults(func=merge_argparse)
+
+    p_normalize = subparsers.add_parser('normalize')
+    p_normalize.add_argument('epub')
+    p_normalize.set_defaults(func=normalize_argparse)
+
+    args = parser.parse_args(argv)
+    args.func(args)
+
+if __name__ == '__main__':
+    raise SystemExit(main(sys.argv[1:]))