voussoirkit/voussoirkit/bytestring.py

'''
bytestring
==========

This module provides integer constants for power-of-two byte size units, and
functions for converting between ints and human-readable strings. E.g.:
bytestring.bytestring(5000000) -> '4.768 MiB'
bytestring.parsebytes('8.5gb') -> 9126805504

Commandline usage:

> bytestring number1 [number2 number3...]

number:
    An integer. Uses pipeable to support !c clipboard, !i stdin, which should
    be one number per line.

Examples:
> bytestring 12345 89989
> some_process | bytestring !i
'''
import argparse
import re
import sys

from voussoirkit import betterhelp
from voussoirkit import pipeable

BYTE = 1
KIBIBYTE = 1024 * BYTE
MIBIBYTE = 1024 * KIBIBYTE
GIBIBYTE = 1024 * MIBIBYTE
TEBIBYTE = 1024 * GIBIBYTE
PEBIBYTE = 1024 * TEBIBYTE
EXIBYTE = 1024 * PEBIBYTE
ZEBIBYTE = 1024 * EXIBYTE
YOBIBYTE = 1024 * ZEBIBYTE

BYTE_STRING = 'b'
KIBIBYTE_STRING = 'KiB'
MIBIBYTE_STRING = 'MiB'
GIBIBYTE_STRING = 'GiB'
TEBIBYTE_STRING = 'TiB'
PEBIBYTE_STRING = 'PiB'
EXIBYTE_STRING = 'EiB'
ZEBIBYTE_STRING = 'ZiB'
YOBIBYTE_STRING = 'YiB'

UNIT_STRINGS = {
    BYTE: BYTE_STRING,
    KIBIBYTE: KIBIBYTE_STRING,
    MIBIBYTE: MIBIBYTE_STRING,
    GIBIBYTE: GIBIBYTE_STRING,
    TEBIBYTE: TEBIBYTE_STRING,
    PEBIBYTE: PEBIBYTE_STRING,
    EXIBYTE: EXIBYTE_STRING,
    ZEBIBYTE: ZEBIBYTE_STRING,
    YOBIBYTE: YOBIBYTE_STRING,
}
REVERSED_UNIT_STRINGS = {value: key for (key, value) in UNIT_STRINGS.items()}
UNIT_SIZES = sorted(UNIT_STRINGS.keys(), reverse=True)

class BytestringException(Exception):
    pass

class ParseError(BytestringException, ValueError):
    pass

def bytestring(size, decimal_places=3, force_unit=None):
    '''
    Convert a number into a string like "100 MiB".

    >>> bytestring(1000)
    '1000 b'
    >>> bytestring(1024)
    '1.000 KiB'
    >>> bytestring(123456)
    '120.562 KiB'
    >>> bytestring(800000000)
    '762.939 MiB'
    >>> bytestring(800000000, decimal_places=0)
    '763 MiB'

    force_unit:
        If None, an appropriate size unit is chosen automatically.
        Otherwise, you can provide one of the size constants to force that divisor.
    '''
    if force_unit is None:
        divisor = get_appropriate_divisor(size)
    else:
        if isinstance(force_unit, str):
            force_unit = normalize_unit_string(force_unit)
            force_unit = REVERSED_UNIT_STRINGS[force_unit]
        divisor = force_unit

    size_unit_string = UNIT_STRINGS[divisor]

    if divisor == BYTE:
        decimal_places = 0

    size_string = '{number:.0{decimal_places}f} {unit}'
    size_string = size_string.format(
        decimal_places=decimal_places,
        number=size/divisor,
        unit=size_unit_string,
    )
    return size_string

def get_appropriate_divisor(size):
    '''
    Return the divisor that would be appropriate for displaying this byte size.

    >>> get_appropriate_divisor(1000)
    1
    >>> get_appropriate_divisor(1024)
    1024
    >>> get_appropriate_divisor(123456789)
    1048576
    '''
    size = abs(size)
    for unit in UNIT_SIZES:
        if size >= unit:
            appropriate_unit = unit
            break
    else:
        appropriate_unit = 1
    return appropriate_unit

def normalize_unit_string(string):
    '''
    Given a string "k" or "kb" or "kib" in any case, return "KiB", etc.
    '''
    string = string.lower().strip()
    for (size, unit_string) in UNIT_STRINGS.items():
        unit_string_l = unit_string.lower()
        if string in (unit_string_l, unit_string_l[0], unit_string_l.replace('i', '')):
            return unit_string
    raise ParseError(f'Unrecognized unit string "{string}".')

def parsebytes(string):
    '''
    Given a string like "100 kib", return the appropriate integer value.
    Accepts "k", "kb", "kib" in any casing.
    '''
    string = string.lower().strip()
    string = string.replace(',', '')

    matches = re.findall(r'[\d\.-]+', string)
    if len(matches) == 0:
        raise ParseError('No numbers found.')
    if len(matches) > 1:
        raise ParseError('Too many numbers found.')
    number = matches[0]

    if not string.startswith(number):
        raise ParseError('Number is not at start of string.')

    number_string = number

    try:
        number = float(number)
    except ValueError as exc:
        raise ParseError(number) from exc

    # if the string has no text besides the number, treat it as int of bytes.
    unit_string = string.replace(number_string, '')
    if unit_string == '':
        return int(number)

    unit_string = normalize_unit_string(unit_string)
    multiplier = REVERSED_UNIT_STRINGS[unit_string]

    return int(number * multiplier)

def bytestring_argparse(args):
    numbers = pipeable.input_many(args.numbers)
    for number in numbers:
        try:
            number = int(number)
        except ValueError:
            pipeable.stderr(f'bytestring: Each number should be an integer, not {number}.')
            return 1
        pipeable.stdout(bytestring(number))

def main(argv):
    parser = argparse.ArgumentParser(description=__doc__)

    parser.add_argument('numbers', nargs='+')
    parser.set_defaults(func=bytestring_argparse)

    return betterhelp.single_main(argv, parser, __doc__)

if __name__ == '__main__':
    raise SystemExit(main(sys.argv[1:]))
Add a few docstrings, remove getpermission.py. 2021-01-14 00:37:28 +00:00			`'''`
Add argparse, betterhelp to bytestring. 2021-05-04 01:55:12 +00:00			`bytestring`
			`==========`

Add a few docstrings, remove getpermission.py. 2021-01-14 00:37:28 +00:00			`This module provides integer constants for power-of-two byte size units, and`
Add argparse, betterhelp to bytestring. 2021-05-04 01:55:12 +00:00			`functions for converting between ints and human-readable strings. E.g.:`
			`bytestring.bytestring(5000000) -> '4.768 MiB'`
			`bytestring.parsebytes('8.5gb') -> 9126805504`

			`Commandline usage:`

			`> bytestring number1 [number2 number3...]`

			`number:`
			`An integer. Uses pipeable to support !c clipboard, !i stdin, which should`
			`be one number per line.`

			`Examples:`
			`> bytestring 12345 89989`
			`> some_process \| bytestring !i`
Add a few docstrings, remove getpermission.py. 2021-01-14 00:37:28 +00:00			`'''`
Add argparse, betterhelp to bytestring. 2021-05-04 01:55:12 +00:00			`import argparse`
Initial migratory commit. 2018-12-18 06:10:00 +00:00			`import re`
			`import sys`

Add argparse, betterhelp to bytestring. 2021-05-04 01:55:12 +00:00			`from voussoirkit import betterhelp`
Add pipeable to voussoirkit. 2019-06-12 05:45:04 +00:00			`from voussoirkit import pipeable`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
			`BYTE = 1`
			`KIBIBYTE = 1024 * BYTE`
			`MIBIBYTE = 1024 * KIBIBYTE`
			`GIBIBYTE = 1024 * MIBIBYTE`
			`TEBIBYTE = 1024 * GIBIBYTE`
			`PEBIBYTE = 1024 * TEBIBYTE`
			`EXIBYTE = 1024 * PEBIBYTE`
			`ZEBIBYTE = 1024 * EXIBYTE`
			`YOBIBYTE = 1024 * ZEBIBYTE`

			`BYTE_STRING = 'b'`
			`KIBIBYTE_STRING = 'KiB'`
			`MIBIBYTE_STRING = 'MiB'`
			`GIBIBYTE_STRING = 'GiB'`
			`TEBIBYTE_STRING = 'TiB'`
			`PEBIBYTE_STRING = 'PiB'`
			`EXIBYTE_STRING = 'EiB'`
			`ZEBIBYTE_STRING = 'ZiB'`
			`YOBIBYTE_STRING = 'YiB'`

			`UNIT_STRINGS = {`
			`BYTE: BYTE_STRING,`
			`KIBIBYTE: KIBIBYTE_STRING,`
			`MIBIBYTE: MIBIBYTE_STRING,`
			`GIBIBYTE: GIBIBYTE_STRING,`
			`TEBIBYTE: TEBIBYTE_STRING,`
			`PEBIBYTE: PEBIBYTE_STRING,`
			`EXIBYTE: EXIBYTE_STRING,`
			`ZEBIBYTE: ZEBIBYTE_STRING,`
			`YOBIBYTE: YOBIBYTE_STRING,`
			`}`
			`REVERSED_UNIT_STRINGS = {value: key for (key, value) in UNIT_STRINGS.items()}`
			`UNIT_SIZES = sorted(UNIT_STRINGS.keys(), reverse=True)`

Add BytestringException, ParseError. Allows for more granular catching on the caller side. 2021-01-18 09:05:47 +00:00			`class BytestringException(Exception):`
			`pass`

			`class ParseError(BytestringException, ValueError):`
			`pass`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
			`def bytestring(size, decimal_places=3, force_unit=None):`
			`'''`
Add some example calls to bytestring docstring. 2020-03-15 22:21:00 +00:00			`Convert a number into a string like "100 MiB".`

			`>>> bytestring(1000)`
			`'1000 b'`
			`>>> bytestring(1024)`
			`'1.000 KiB'`
			`>>> bytestring(123456)`
			`'120.562 KiB'`
			`>>> bytestring(800000000)`
			`'762.939 MiB'`
			`>>> bytestring(800000000, decimal_places=0)`
			`'763 MiB'`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
			`force_unit:`
			`If None, an appropriate size unit is chosen automatically.`
			`Otherwise, you can provide one of the size constants to force that divisor.`
			`'''`
			`if force_unit is None:`
			`divisor = get_appropriate_divisor(size)`
			`else:`
			`if isinstance(force_unit, str):`
			`force_unit = normalize_unit_string(force_unit)`
			`force_unit = REVERSED_UNIT_STRINGS[force_unit]`
			`divisor = force_unit`

			`size_unit_string = UNIT_STRINGS[divisor]`

Set decimal_places to 0 when result is in bytes. So that you get "100 b" instead of "100.000 b". 2020-03-12 21:00:38 +00:00			`if divisor == BYTE:`
			`decimal_places = 0`

Initial migratory commit. 2018-12-18 06:10:00 +00:00			`size_string = '{number:.0{decimal_places}f} {unit}'`
			`size_string = size_string.format(`
			`decimal_places=decimal_places,`
			`number=size/divisor,`
			`unit=size_unit_string,`
			`)`
			`return size_string`

			`def get_appropriate_divisor(size):`
			`'''`
			`Return the divisor that would be appropriate for displaying this byte size.`
A few docstring improvements. 2021-08-21 22:20:11 +00:00
			`>>> get_appropriate_divisor(1000)`
			`1`
			`>>> get_appropriate_divisor(1024)`
			`1024`
			`>>> get_appropriate_divisor(123456789)`
			`1048576`
Initial migratory commit. 2018-12-18 06:10:00 +00:00			`'''`
			`size = abs(size)`
			`for unit in UNIT_SIZES:`
			`if size >= unit:`
			`appropriate_unit = unit`
			`break`
			`else:`
			`appropriate_unit = 1`
			`return appropriate_unit`

			`def normalize_unit_string(string):`
			`'''`
			`Given a string "k" or "kb" or "kib" in any case, return "KiB", etc.`
			`'''`
Don't remove spaces from input string. This was allowing inputs like "5 5" to equal 55. 2021-01-18 09:08:35 +00:00			`string = string.lower().strip()`
Initial migratory commit. 2018-12-18 06:10:00 +00:00			`for (size, unit_string) in UNIT_STRINGS.items():`
			`unit_string_l = unit_string.lower()`
			`if string in (unit_string_l, unit_string_l[0], unit_string_l.replace('i', '')):`
			`return unit_string`
Add BytestringException, ParseError. Allows for more granular catching on the caller side. 2021-01-18 09:05:47 +00:00			`raise ParseError(f'Unrecognized unit string "{string}".')`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
			`def parsebytes(string):`
			`'''`
			`Given a string like "100 kib", return the appropriate integer value.`
			`Accepts "k", "kb", "kib" in any casing.`
			`'''`
			`string = string.lower().strip()`
Don't remove spaces from input string. This was allowing inputs like "5 5" to equal 55. 2021-01-18 09:08:35 +00:00			`string = string.replace(',', '')`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
Simplify this regex, remove unnecessary capture group. 2020-03-12 21:02:25 +00:00			`matches = re.findall(r'[\d\.-]+', string)`
Initial migratory commit. 2018-12-18 06:10:00 +00:00			`if len(matches) == 0:`
Add BytestringException, ParseError. Allows for more granular catching on the caller side. 2021-01-18 09:05:47 +00:00			`raise ParseError('No numbers found.')`
Initial migratory commit. 2018-12-18 06:10:00 +00:00			`if len(matches) > 1:`
Add BytestringException, ParseError. Allows for more granular catching on the caller side. 2021-01-18 09:05:47 +00:00			`raise ParseError('Too many numbers found.')`
Rename variables byte_value -> number, string -> unit_string. 2020-03-12 21:03:56 +00:00			`number = matches[0]`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
Rename variables byte_value -> number, string -> unit_string. 2020-03-12 21:03:56 +00:00			`if not string.startswith(number):`
Add BytestringException, ParseError. Allows for more granular catching on the caller side. 2021-01-18 09:05:47 +00:00			`raise ParseError('Number is not at start of string.')`

			`number_string = number`

			`try:`
			`number = float(number)`
			`except ValueError as exc:`
			`raise ParseError(number) from exc`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
Add BytestringException, ParseError. Allows for more granular catching on the caller side. 2021-01-18 09:05:47 +00:00			`# if the string has no text besides the number, treat it as int of bytes.`
			`unit_string = string.replace(number_string, '')`
Rename variables byte_value -> number, string -> unit_string. 2020-03-12 21:03:56 +00:00			`if unit_string == '':`
Add BytestringException, ParseError. Allows for more granular catching on the caller side. 2021-01-18 09:05:47 +00:00			`return int(number)`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
Rename variables byte_value -> number, string -> unit_string. 2020-03-12 21:03:56 +00:00			`unit_string = normalize_unit_string(unit_string)`
Initial migratory commit. 2018-12-18 06:10:00 +00:00			`multiplier = REVERSED_UNIT_STRINGS[unit_string]`

Rename variables byte_value -> number, string -> unit_string. 2020-03-12 21:03:56 +00:00			`return int(number * multiplier)`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
Add argparse, betterhelp to bytestring. 2021-05-04 01:55:12 +00:00			`def bytestring_argparse(args):`
			`numbers = pipeable.input_many(args.numbers)`
			`for number in numbers:`
			`try:`
			`number = int(number)`
			`except ValueError:`
			`pipeable.stderr(f'bytestring: Each number should be an integer, not {number}.')`
			`return 1`
			`pipeable.stdout(bytestring(number))`

Rename args -> argv. 2020-03-12 21:04:23 +00:00			`def main(argv):`
Add argparse, betterhelp to bytestring. 2021-05-04 01:55:12 +00:00			`parser = argparse.ArgumentParser(description=__doc__)`

			`parser.add_argument('numbers', nargs='+')`
			`parser.set_defaults(func=bytestring_argparse)`

			`return betterhelp.single_main(argv, parser, __doc__)`
Initial migratory commit. 2018-12-18 06:10:00 +00:00
			`if __name__ == '__main__':`
Use SystemExit instead of sys.exit, like my other programs. 2020-02-18 06:56:19 +00:00			`raise SystemExit(main(sys.argv[1:]))`