Initial commandline utilities.

2019-08-06 23:49:47 -07:00 · 2019-08-06 23:49:47 -07:00 · d77dcacc21
commit d77dcacc21
parent 89420ff5d7
2 changed files with 257 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -27,6 +27,24 @@ book.save('modifiedbook.epub')
 epubfile provides simple editing of epub books. epubfile attempts to keep file modifications to a minimum. It does not add, remove, or rearrange files unless you ask it to, and does not inject additional metadata. As such, it works for both epub2 and epub3 assuming you stick to supported operations for your book version.
 # Command line utilities
 This library was born out of my own needs. So there are a couple of builtin utilities.
 ```
 addfile:
    Add files into the book.
 covercomesfirst:
    Rename the cover image file so that it is the alphabetically-first image.
 merge:
    Merge multiple books into one.
 normalize:
    Rename files and directories in the book to match a common structure.
 ```
 # Spec compliance
 epubfile does not rigorously enforce the epub spec and you can create noncompliant books with it. Basic errors are checked, and I am open to issues and comments regarding ways to improve spec-compliance without adding significant size or complexity to the library. I am prioritizing simplicity and ease of use over perfection.
--- a/epubfile.py
+++ b/epubfile.py
@ -883,3 +883,242 @@ class Epub:
        for id in self.get_texts():
            self.fix_interlinking_text(id, rename_map, old_relative_to=old_filepaths[id].parent)
        self.fix_interlinking_ncx(rename_map, old_relative_to=old_ncx_parent)
 # COMMAND LINE TOOLS
 ################################################################################
 import argparse
 import html
 import random
 import string
 import sys
 from voussoirkit import betterhelp
 DOCSTRING = '''
 {addfile}
 {covercomesfirst}
 {merge}
 {normalize}
 '''.lstrip()
 SUB_DOCSTRINGS = {
 'addfile':
 '''
 addfile:
    Add files into the book.
    > epubfile.py addfile book.epub page1.html image.jpg
 '''.strip(),
 'covercomesfirst':
 '''
 covercomesfirst:
    Rename the cover image file so that it is the alphabetically-first image.
    > epubfile.py covercomesfirst book.epub
    I use CBXShell to get thumbnails of epub files on Windows, and because it
    is generalized for zip files and doesn't read epub metadata, alphabetized
    mode works best for getting epub covers as icons.
    In my testing, CBXShell considers the image's whole path and not just the
    basename, so you may want to consider normalizing the directory structure
    first, otherwise some /a/image.jpg will always be before /images/cover.jpg.
 '''.strip(),
 'merge':
 '''
 merge:
    Merge multiple books into one.
    > epubfile.py merge book1.epub book2.epub --output final.epub <flags>
    flags:
    --headerfile:
        Add a file before each book with an <h1> containing its title.
    -y | --autoyes:
        Overwrite the output file without prompting.
 '''.strip(),
 'normalize':
 '''
 normalize:
    Rename files and directories in the book to match a common structure.
    Moves all book content from / into /OEBPS and sorts files into
    subdirectories by type: Text, Images, Styles, etc.
    > epubfile.py normalize book.epub
 '''.strip()
 }
 DOCSTRING = betterhelp.add_previews(DOCSTRING, SUB_DOCSTRINGS)
 def random_string(length, characters=string.ascii_lowercase):
    return ''.join(random.choice(characters) for x in range(length))
 def addfile_argparse(args):
    book = Epub.open(args.epub)
    for file in args.files:
        print(f'Adding file {file}.')
        file = pathclass.Path(file)
        try:
            book.easy_add_file(file)
        except (IDExists, FileExists) as exc:
            rand_suffix = random_string(3, string.digits)
            base = file.replace_extension('').basename
            id = f'{base}_{rand_suffix}'
            basename = f'{base}_{rand_suffix}{file.dot_extension}'
            content = open(file.absolute_path, 'rb').read()
            book.add_file(id, basename, content)
    book.move_nav_to_end()
    book.save(args.epub)
 def covercomesfirst_argparse(args):
    book = Epub.open(args.epub)
    basenames = {i: book.get_filepath(i).basename for i in book.get_images()}
    if len(basenames) <= 1:
        return
    cover_image = book.get_cover_image()
    if not cover_image:
        return
    cover_basename = book.get_filepath(cover_image).basename
    cover_index = sorted(basenames.values()).index(cover_basename)
    if cover_index == 0:
        return
    rename_map = basenames.copy()
    if not cover_basename.startswith('!'):
        cover_basename = '!' + cover_basename
        rename_map[cover_image] = cover_basename
    else:
        rename_map.pop(cover_image)
    for (id, basename) in rename_map.copy().items():
        if id == cover_image:
            continue
        if basename > cover_basename:
            rename_map.pop(id)
            continue
        if basename < cover_basename and basename.startswith('!'):
            basename = basename.lstrip('!')
            rename_map[id] = basename
        if basename < cover_basename or basename.startswith('.'):
            basename = '_' + basename
            rename_map[id] = basename
    book.rename_file(rename_map)
    book.save(args.epub)
 def merge(input_filepaths, output_filename, do_headerfile=False):
    book = Epub.new()
    index_length = len(str(len(input_filepaths)))
    rand_prefix = random_string(3, string.digits)
    input_filepaths = [pathclass.Path(p) for p in input_filepaths]
    for (index, input_filepath) in enumerate(input_filepaths):
        print(f'Merging {input_filepath}.')
        prefix = f'{rand_prefix}_{index:>0{index_length}}_{{}}'
        input_book = Epub.open(input_filepath)
        input_book.normalize_directory_structure()
        input_ncx = input_book.get_ncx()
        input_nav = input_book.get_nav()
        manifest_ids = input_book.get_manifest_items(spine_order=True)
        manifest_ids = [x for x in manifest_ids if x not in (input_ncx, input_nav)]
        basename_map = {}
        for id in manifest_ids:
            old_basename = input_book.get_filepath(id).basename
            new_basename = prefix.format(old_basename)
            basename_map[id] = new_basename
        # Don't worry, we're not going to save this!
        input_book.rename_file(basename_map)
        if do_headerfile:
            content = ''
            try:
                title = input_book.get_titles()[0]
            except IndexError:
                title = input_filepath.replace_extension('').basename
            finally:
                content += f'<h1>{html.escape(title)}</h1>'
            try:
                author = input_book.get_authors()[0]
                content += f'<p>{html.escape(author)}</p>'
            except IndexError:
                pass
            headerfile_id = prefix.format('headerfile')
            headerfile_basename = prefix.format('headerfile.html')
            book.add_file(headerfile_id, headerfile_basename, content)
        for id in manifest_ids:
            new_id = f'{rand_prefix}_{index:>0{index_length}}_{id}'
            new_basename = basename_map[id]
            book.add_file(new_id, new_basename, input_book.read_file(id))
    book.move_nav_to_end()
    book.save(output_filename)
 def merge_argparse(args):
    if os.path.exists(args.output):
        ok = args.autoyes
        if not ok:
            ok = input(f'Overwrite {args.output}? y/n\n>').lower() in ('y', 'yes')
        if not ok:
            raise ValueError(f'{args.output} exists.')
    return merge(input_filepaths=args.epubs, output_filename=args.output, do_headerfile=args.headerfile)
 def normalize_argparse(args):
    book = Epub.open(args.epub)
    book.normalize_directory_structure()
    book.save(args.epub)
@betterhelp.subparser_betterhelp(main_docstring=DOCSTRING, sub_docstrings=SUB_DOCSTRINGS)
 def main(argv):
    parser = argparse.ArgumentParser(description=__doc__)
    subparsers = parser.add_subparsers()
    p_addfile = subparsers.add_parser('addfile')
    p_addfile.add_argument('epub')
    p_addfile.add_argument('files', nargs='+', default=[])
    p_addfile.set_defaults(func=addfile_argparse)
    p_covercomesfirst = subparsers.add_parser('covercomesfirst')
    p_covercomesfirst.add_argument('epub')
    p_covercomesfirst.set_defaults(func=covercomesfirst_argparse)
    p_merge = subparsers.add_parser('merge')
    p_merge.add_argument('epubs', nargs='+', default=[])
    p_merge.add_argument('--output', dest='output', default=None, required=True)
    p_merge.add_argument('--headerfile', dest='headerfile', action='store_true')
    p_merge.add_argument('-y', '--autoyes', dest='autoyes', action='store_true')
    p_merge.set_defaults(func=merge_argparse)
    p_normalize = subparsers.add_parser('normalize')
    p_normalize.add_argument('epub')
    p_normalize.set_defaults(func=normalize_argparse)
    args = parser.parse_args(argv)
    args.func(args)
 if __name__ == '__main__':
    raise SystemExit(main(sys.argv[1:]))