This commit is contained in:
unknown 2016-01-16 17:43:17 -08:00
parent da06cb0461
commit 4b7cfea08d
69 changed files with 2126 additions and 412 deletions

2
.gitignore vendored
View file

@ -1,5 +1,7 @@
AwfulCrateBox/
Classifieds/
Toddo/toddo.db
Meal/meal.db
# Windows image file caches
Thumbs.db

View file

@ -0,0 +1,102 @@
import shlex
HEADER_TEXT = 'number name age address misc. "blank column?"'
BODY_TEXT = '''
4 "John Smith" 26 "123 north street"
17 "Jenny Smith" 8 "123 north street"
55 "Veronica Dove" 20 "456 west street"
77 "Austin Texas" 33 "789 south avenue"
89 "Mister Super Long Name" 123 "planet earth" "he's really old"
120 "Bill" "" "" "Deceased"
999 "Nine Nine" 999 "999 ninth boulevard" "favorite number is 9"
'''
BODY_TEXT = BODY_TEXT.strip()
DELIMITER = ' | '
output_text = ''
# Keep track of the longest entry in each column, to determine
# how wide we should make them.
# Also, look for numerical columns so we can right-justify them.
header = shlex.split(HEADER_TEXT)
column_widths = {index:len(item) for (index, item) in enumerate(header)}
column_types = {}
document_lines = [shlex.split(line) for line in BODY_TEXT.splitlines()]
for line in document_lines:
for (index, word) in enumerate(line):
current_width = column_widths.get(index, 0)
column_widths[index] = max(len(word), current_width)
try:
float(word)
except ValueError:
# It only takes one failure to make the whole column
# string type.
if word != "":
column_types[index] = 's'
# Move the dictionary into a list where the index is the column
# number, and the value is how wide it should be.
column_widths = list(column_widths.items())
column_widths.sort(key=lambda x: x[0])
column_widths = [x[1] for x in column_widths]
# Format column widths into a string which will become the basis
# for each row.
column_format = '{:%s%d%s}'
column_formats = []
for (index, width) in enumerate(column_widths):
formtype = column_types.get(index, 'g')
justify = '<' if formtype == 's' else '>'
form = column_format % (justify, width, formtype)
column_formats.append(form)
# Format the header.
column_count = len(column_widths)
diff = len(header) - column_count
if diff > 0:
# We have labels for columns that were empty.
column_count = len(header)
else:
diff *= -1
header += [''] * diff
for (index, label) in enumerate(header):
form = '{:<%ds}' % column_widths[index]
header[index] = form.format(label)
header = DELIMITER.join(header)
output_text += header + '\n'
# Format the rows.
for (rowindex, line) in enumerate(document_lines):
# Does this row need any blank columns?
diff = column_count - len(line)
if diff > 0:
line += [''] * diff
document_lines[rowindex] = line
# Format and replace it into the list.
for (columnindex, word) in enumerate(line):
if word == '':
line[columnindex] = ' ' * column_widths[columnindex]
else:
if column_types.get(columnindex, 'g') == 'g':
word = float(word)
else:
word = str(word)
line[columnindex] = column_formats[columnindex].format(word)
document_lines = [DELIMITER.join(line) for line in document_lines]
document_lines = '\n'.join(document_lines)
output_text += document_lines
print(output_text)
'''
number | name | age | address | misc. | blank column?
4 | John Smith | 26 | 123 north street | |
17 | Jenny Smith | 8 | 123 north street | |
55 | Veronica Dove | 20 | 456 west street | |
77 | Austin Texas | 33 | 789 south avenue | |
89 | Mister Super Long Name | 123 | planet earth | he's really old |
120 | Bill | | | Deceased |
999 | Nine Nine | 999 | 999 ninth boulevard | favorite number is 9 |
'''

4
MassStitching/README.md Normal file
View file

@ -0,0 +1,4 @@
Mass Stitch
===========
Given the name of a directory, stich together all the images in that directory into one large iamge.

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 205 KiB

View file

@ -0,0 +1,58 @@
from PIL import Image
import hashlib
import os
import sys
def load_all_images(iterable):
images = []
for filename in iterable:
print('Loading "%s"' % filename)
try:
image = Image.open(filename)
image.filename = filename
print('Loaded "%s"' % filename)
images.append(image)
except OSError:
print('Could not load "%s"' % filename)
return images
def listfiles(directory):
files = [name for name in os.listdir(directory)]
files = [os.path.join(directory, name) for name in files]
files = [name for name in files if os.path.isfile(name)]
return files
def stitch(images, outputfilename):
largest_width = max(image.size[0] for image in images)
largest_height = max(image.size[1] for image in images)
print('Using cell size of %dx%dpx' % (largest_width, largest_height))
grid_width = round(len(images) ** 0.5)
# overflow adds an extra line for nonperfect squares.
overflow = 1 if (len(images) % grid_width != 0) else 0
grid_height = (len(images) // grid_width) + overflow
grid_width_pixels = grid_width * largest_width
grid_height_pixels = grid_height * largest_height
print('Creating image of size: %dx%d (%dx%dpx)' % (grid_width, grid_height, grid_width_pixels, grid_height_pixels))
stitched_image = Image.new('RGBA', [grid_width_pixels, grid_height_pixels])
print('Pasting components')
for (index, image) in enumerate(images):
pad_x = int((largest_width - image.size[0]) / 2)
pad_y = int((largest_height - image.size[1]) / 2)
gridspot_x = index % grid_width
gridspot_y = index // grid_width
pixel_x = (gridspot_x * largest_width) + pad_x
pixel_y = (gridspot_y * largest_height) + pad_y
print(index, image.filename, gridspot_x, gridspot_y, pixel_x, pixel_y)
stitched_image.paste(image, (pixel_x, pixel_y))
print('Saving "%s"' % outputfilename)
stitched_image.save(outputfilename)
directory = sys.argv[1]
images = listfiles(directory)
directory_id = 'massstitch_%s.png' % directory
if directory_id in images:
images.remove(directory_id)
images = load_all_images(images)
stitch(images, directory_id)

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

6
Meal/README.md Normal file
View file

@ -0,0 +1,6 @@
Meal
=======
Just how much pizza do you eat, anyway?
Read HELP_TEXT inside the meal.py file.

330
Meal/meal.py Normal file
View file

@ -0,0 +1,330 @@
import datetime
import math
import os
import sqlite3
import sys
import time
UID_CHARACTERS = 16
RECENT_COUNT = 12
STRFTIME = '%Y %m %d %H:%M'
SQL_MEAL_ID = 0
SQL_MEAL_CREATED = 1
SQL_MEAL_HUMAN = 2
SQL_REL_FOOD = 1
SQL_GROUP_FOOD = 0
SQL_GROUP_GROUP = 1
DB_INIT = '''
CREATE TABLE IF NOT EXISTS meals(id TEXT, created INT, human TEXT);
CREATE TABLE IF NOT EXISTS meal_foods(mealid TEXT, food TEXT);
CREATE TABLE IF NOT EXISTS food_groups(food TEXT, foodgroup TEXT);
CREATE INDEX IF NOT EXISTS index_meal_id on meals(id);
CREATE INDEX IF NOT EXISTS index_meal_created on meals(created);
CREATE INDEX IF NOT EXISTS index_food_mealid on meal_foods(mealid);
CREATE INDEX IF NOT EXISTS index_food_food on meal_foods(food);
CREATE INDEX IF NOT EXISTS index_group_food on food_groups(food);
'''.strip()
HELP_TEXT = '''
> meal add pizza, soda : Add a new meal with the foods "pizza" and "soda".
> meal adjust ec2 +10 : Adjust the timestamp of the meal starting with "ec2" by +10 seconds.
> meal adjust ec2 +10*60 : Adjusting timestamps supports math operations.
> meal group water drinks : Add "water" to foodgroup "drinks". Used for organization & reports.
> meal group water : Display the name of the group "water" belongs to.
> meal recent : Display info and foods for recent meals. Default {recent_count}.
> meal recent 4 : Display the last 4 meals.
> meal recent all : Display ALL meals.
> meal remove ec2 : Remove the meal whose ID starts with "ec2".
> meal show ec2 : Display info and foods for the meal whose ID starts with "ec2".
> meal ungroup water : Remove "water" from its foodgroup.
'''.format(recent_count=RECENT_COUNT)
def listget(li, index, fallback=None):
try:
return li[index]
except IndexError:
return fallback
def uid(length=None):
'''
Generate a u-random hex string..
'''
if length is None:
length = UID_CHARACTERS
identifier = ''.join('{:02x}'.format(x) for x in os.urandom(math.ceil(length / 2)))
if len(identifier) > length:
identifier = identifier[:length]
return identifier
class MealDB():
def __init__(self, dbname='C:/Git/else/Meal/meal.db'):
self.dbname = dbname
self._sql = None
self._cur = None
@property
def sql(self):
if self._sql is None:
self._sql = sqlite3.connect(self.dbname)
return self._sql
@property
def cur(self):
if self._cur is None:
self._cur = self.sql.cursor()
statements = DB_INIT.split(';')
for statement in statements:
#print(statement)
self._cur.execute(statement)
return self._cur
def add_meal(self, foods=None):
if foods is None:
raise Exception('Empty meal!')
assert isinstance(foods, (list, tuple))
foods = set(foods)
if ''.join(foods).replace(' ', '') == '':
raise Exception('Empty meal!')
mealid = self.new_uid('meals')
now = datetime.datetime.now()
now_stamp = int(now.timestamp())
now_string = now.strftime(STRFTIME)
self.normalized_query('INSERT INTO meals VALUES(?, ?, ?)', [mealid, now_stamp, now_string])
for food in foods:
self.normalized_query('INSERT INTO meal_foods VALUES(?, ?)', [mealid, food])
self.sql.commit()
foods = ', '.join(foods)
print('Added meal %s at %s with %s' % (mealid, now_string, foods))
return mealid
def adjust_timestamp(self, mealid, adjustment):
'''
Move a certain meal by `adjustment` seconds. This is useful when you need to
report a meal that happened a while ago, rather than the current timestamp.
'''
meal = self.get_meal_by_id(mealid)
mealid = meal[SQL_MEAL_ID]
meal_time = meal[SQL_MEAL_CREATED]
meal_time += adjustment
time_string = datetime.datetime.fromtimestamp(meal_time).strftime(STRFTIME)
self.normalized_query('UPDATE meals SET created=?, human=? WHERE id=?', [meal_time, time_string, mealid])
self.sql.commit()
print('Adjusted %s to %s' % (mealid, time_string))
def normalized_query(self, query, bindings):
nbindings = []
for binding in bindings:
if isinstance(binding, str):
nbindings.append(binding.lower())
continue
nbindings.append(binding)
self.cur.execute(query, nbindings)
def get_foods_for_meal(self, mealid):
meal = self.get_meal_by_id(mealid)
mealid = meal[SQL_MEAL_ID]
self.normalized_query('SELECT food FROM meal_foods WHERE mealid == ?', [mealid])
items = self.cur.fetchall()
items = [item[0] for item in items]
return items
def get_meal_by_id(self, mealid):
if len(mealid) == UID_CHARACTERS:
meal_q = mealid
self.normalized_query('SELECT * FROM meals WHERE id == ?', [meal_q])
else:
meal_q = mealid + '%'
self.normalized_query('SELECT * FROM meals WHERE id LIKE ?', [meal_q])
items = self.cur.fetchall()
if len(items) > 1:
items = [str(item) for item in items]
items = '\n'.join(items)
raise Exception('Found multiple meals for id "%s"\n%s' % (meal_q, items))
if len(items) == 0:
raise Exception('Found no meal for id "%s"' % (meal_q))
meal = items[0]
return meal
def group(self, food, groupname):
'''
Insert `food` into the foodgroup `groupname`. This is used for organization,
normalization, and creating dietary reports.
'''
self.normalized_query('SELECT * FROM food_groups WHERE food == ?', [food])
belongs = self.cur.fetchone()
if groupname is None:
self.normalized_query('SELECT * FROM food_groups where foodgroup == ?', [food])
contains = self.cur.fetchall()
if belongs is not None:
print('"%s" belongs to group "%s".' % (food, belongs[1]))
else:
print('"%s" is not in any group.' % (food))
if contains is not None and len(contains) > 0:
contains = [x[0] for x in contains]
contains = [repr(x) for x in contains]
contains = ', '.join(contains)
print('The "%s" group contains: %s' % (food, contains))
return
if belongs is not None:
raise Exception('"%s" is already in group "%s"' % (f[0], f[1]))
self.normalized_query('INSERT INTO food_groups VALUES(?, ?)', [food, groupname])
self.sql.commit()
print('Added "%s" to group "%s"' % (food, groupname))
def new_uid(self, table):
'''
Create a new UID that is unique to the given table.
'''
result = None
query = 'SELECT * FROM {table} WHERE id == ?'.format(table=table)
while result is None:
i = uid()
# Just gotta be sure, man.
self.normalized_query(query, [i])
if self.cur.fetchone() is None:
result = i
return result
def remove_meal(self, mealid):
meal = self.get_meal_by_id(mealid)
mealid = meal[SQL_MEAL_ID]
self.normalized_query('DELETE FROM meals WHERE id == ?', [mealid])
self.normalized_query('DELETE FROM meal_foods WHERE mealid == ?', [mealid])
self.sql.commit()
print('Removed meal %s' % (mealid))
def show_meal(self, mealid):
'''
Display:
id
timestamp
foods
for the meal with the given ID.
'''
meal = self.get_meal_by_id(mealid)
foods = self.get_foods_for_meal(mealid)
print(meal[SQL_MEAL_ID])
print(meal[SQL_MEAL_HUMAN])
foods = ', '.join(foods)
print(foods)
def show_recent(self, count=RECENT_COUNT):
'''
Display:
id : timestamp : foods
for the `count` most recent meals. If count is "all" or "*", show ALL meals.
'''
if count in ('all', '*'):
self.normalized_query('SELECT * FROM meals ORDER BY created DESC', [])
else:
self.normalized_query('SELECT * FROM meals ORDER BY created DESC LIMIT ?', [count])
meals = self.cur.fetchall()
output = []
for meal in meals:
mealid = meal[SQL_MEAL_ID]
human = meal[SQL_MEAL_HUMAN]
foods = self.get_foods_for_meal(mealid)
foods = ', '.join(foods)
output.append('%s : %s : %s' % (mealid, human, foods))
output = '\n'.join(output)
print(output)
def ungroup(self, food):
'''
Remove `food` from whatever group it is in.
'''
self.normalized_query('SELECT * FROM food_groups WHERE food == ?', [food])
f = self.cur.fetchone()
if f is None:
raise Exception('"%s" is not part of a group' % (food))
groupname = f[1]
self.normalized_query('DELETE FROM food_groups WHERE food == ?', [food])
self.sql.commit()
print('Removed "%s" from group "%s"' % (food, groupname))
if __name__ == '__main__':
mealdb = MealDB()
args = sys.argv[1:]
if len(args) == 0:
command = ''
else:
command = args[0].lower()
if command == 'add':
args = args[1:]
elif command == 'adjust':
mealid = args[1]
adjustment = args[2]
adjustment = eval(adjustment)
mealdb.adjust_timestamp(mealid, adjustment)
quit()
elif command == 'group':
food = args[1]
groupname = listget(args, 2, None)
mealdb.group(food, groupname)
quit()
elif command == 'recent':
count = listget(args, 1, RECENT_COUNT)
mealdb.show_recent(count)
quit()
elif command == 'remove':
mealids = args[1]
mealids = mealids.replace(' ', '')
mealids = mealids.split(',')
for mealid in mealids:
mealdb.remove_meal(mealid)
quit()
elif command == 'show':
mealid = args[1]
mealdb.show_meal(mealid)
quit()
elif command == 'ungroup':
food = args[1]
mealdb.ungroup(food)
quit()
else:
print(HELP_TEXT)
quit()
args = ' '.join(args)
if ';' in args:
(args, adjustment) = args.split(';')
adjustment = adjustment.strip()
adjustment = eval(adjustment)
else:
adjustment = 0
args = args.strip()
args = args.split(',')
args = [food.strip() for food in args]
meal = mealdb.add_meal(args)
if adjustment != 0:
mealdb.adjust_timestamp(meal, adjustment)
quit()

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 186 B

View file

@ -0,0 +1,89 @@
from PIL import Image
import sys
def chunk_iterable(iterable, chunk_length, allow_incomplete=True):
'''
Given an iterable, divide it into chunks of length `chunk_length`.
If `allow_incomplete` is True, the final element of the returned list may be shorter
than `chunk_length`. If it is False, those items are discarded.
'''
if len(iterable) % chunk_length != 0 and allow_incomplete:
overflow = 1
else:
overflow = 0
steps = (len(iterable) // chunk_length) + overflow
return [iterable[chunk_length * x : (chunk_length * x) + chunk_length] for x in range(steps)]
def hex_to_rgb(x):
x = x.replace('#', '')
x = chunk_iterable(x, 2)
print(x)
x = tuple(int(i, 16) for i in x)
return x
def mesh_generator(image_width, image_height, square_size, mode):
square = square_size * 2
print(mode)
x_space = square_size * mode[0]
y_space = square_size * mode[1]
odd_x = False
odd_y = False
for y in range(0, image_height, y_space):
odd_y = not odd_y
for x in range(0, image_width, x_space):
odd_x = not odd_x
boost_x = int(odd_y) * square_size * mode[2]
boost_y = int(odd_x) * square_size * mode[3]
#print(boost_x, boost_y)
#x += boost_x
#y += boost_y
yield (x + boost_x, y + boost_y)
def make_image(image_width, image_height, square_size, mode, bgcolor='#00000000', fgcolor='#000000'):
pattern_repeat_width = (2 * square_size * mode[0])
pattern_repeat_height = (2 * square_size * mode[1])
print(pattern_repeat_width, pattern_repeat_height)
bgcolor = hex_to_rgb(bgcolor)
fgcolor = hex_to_rgb(fgcolor)
pattern = Image.new('RGBA', (pattern_repeat_width, pattern_repeat_height), bgcolor)
#image = Image.new('RGBA', (image_width, image_height))
blackbox = Image.new('RGBA', (square_size, square_size), fgcolor)
for pair in mesh_generator(pattern_repeat_width, pattern_repeat_height, square_size, mode):
pattern.paste(blackbox, pair)
while pattern.size[0] < image_width or pattern.size[1] < image_height:
p = pattern
(w, h) = p.size
print('expanding from', w, h)
pattern = Image.new('RGBA', (w * 2, h * 2))
for y in range(2):
for x in range(2):
pattern.paste(p, (w * x, h * y))
image = pattern.crop((0, 0, image_width, image_height))
mode = [str(x) for x in mode]
mode = ''.join(mode)
filename = 'mesh_%dx%d_%d_%s.png' % (image_width, image_height, square_size, mode)
image.save(filename)
print('Saved %s' % filename)
def listget(li, index, fallback=None):
try:
return li[index]
except IndexError:
return fallback
if __name__ == '__main__':
image_width = int(sys.argv[1])
image_height = int(sys.argv[2])
square_size = int(listget(sys.argv, 3, 1))
x_spacing = int(listget(sys.argv, 4, 2))
y_spacing = int(listget(sys.argv, 5, 2))
x_alternator = int(listget(sys.argv, 6, 0))
y_alternator = int(listget(sys.argv, 7, 0))
bgcolor = listget(sys.argv, 8, '#00000000')
fgcolor = listget(sys.argv, 9, '#000000')
mode = (x_spacing, y_spacing, x_alternator, y_alternator)
make_image(image_width, image_height, square_size, mode, bgcolor, fgcolor)

332
OpenDirDL/opendirdl.py Normal file
View file

@ -0,0 +1,332 @@
import bs4
import hashlib
import json
import os
import re
import requests
import string
import sys
import time
import traceback
import urllib.parse
FILENAME_BADCHARS = '/\\:*?"<>|'
DOWNLOAD_CHUNK = 2048
# When doing a basic scan, we will not send HEAD requests to URLs that end in these strings,
# because they're probably files.
# This isn't meant to be a comprehensive filetype library, but it covers enough of the
# typical opendir to speed things up.
SKIPPABLE_FILETYPES = [
'.avi',
'.bmp',
'.epub',
'.db',
'.flac',
'.ico',
'.iso',
'.jpg',
'.m4a',
'.mkv',
'.mov',
'.mp3',
'.mp4',
'.pdf',
'.png',
'.srt',
'.txt',
'.webm',
'.zip',
]
SKIPPABLE_FILETYPES = [x.lower() for x in SKIPPABLE_FILETYPES]
class Downloader:
def __init__(self, urlfile, outputdir=None, headers=None):
jdict = file_to_dict(urlfile)
self.urls = [item[0] for item in jdict.items()]
self.urls.sort(key=str.lower)
self.outputdir = outputdir
if self.outputdir is None or self.outputdir == "":
# returns (root, path, filename). Keep root.
self.outputdir = url_to_filepath(self.urls[0])[0]
def download(self, overwrite=False):
overwrite = bool(overwrite)
for url in self.urls:
''' Creating the Path '''
(root, folder, filename) = url_to_filepath(url)
# In case the user has set a custom download directory,
# ignore the above value of `root`.
root = self.outputdir
folder = os.path.join(root, folder)
if not os.path.exists(folder):
os.makedirs(folder)
localname = os.path.join(folder, filename)
temporary_basename = hashit(url, 16) + '.oddltemporary'
temporary_localname = os.path.join(folder, temporary_basename)
''' Managing overwrite '''
if os.path.isfile(localname):
if overwrite is True:
os.remove(localname)
else:
safeprint('Skipping "%s". Use `overwrite=True`' % localname)
continue
safeprint('Downloading "%s" as "%s"' % (localname, temporary_basename))
filehandle = open(temporary_localname, 'wb')
try:
download_file(url, filehandle, hookfunction=hook1)
os.rename(temporary_localname, localname)
except:
filehandle.close()
raise
class Walker:
def __init__(self, website, outputfile, fullscan=False):
self.website = website
self.fullscan = bool(fullscan)
if os.path.exists(outputfile):
self.results = file_to_dict(outputfile)
else:
self.results = {}
self.already_seen = set()
def add_head_to_results(self, head):
if isinstance(head, str):
# For when we're doing a basic scan, which skips urls that
# look like a file.
self.results[head] = {
'Content-Length': -1,
'Content-Type': '?',
}
self.already_seen.add(head)
else:
# For when we're doing a full scan, which does a HEAD request
# for all urls.
self.results[head.url] = {
'Content-Length': int(head.headers.get('Content-Length', -1)),
'Content-Type': head.headers.get('Content-Type', '?'),
}
self.already_seen.add(head.url)
def extract_hrefs(self, response):
soup = bs4.BeautifulSoup(response.text)
elements = soup.findAll('a')
hrefs = []
for element in elements:
try:
href = element['href']
except KeyError:
continue
href = urllib.parse.urljoin(response.url, href)
if not href.startswith(self.website):
# Don't go to other sites or parent directories
continue
if 'C=' in href and 'O=' in href:
# Alternative sort modes for index pages
continue
if href.endswith('desktop.ini'):
# I hate these things
continue
hrefs.append(href)
return hrefs
def walk(self, url=None):
if url is None:
url = self.website
else:
url = urllib.parse.urljoin(self.website, url)
results = []
urll = url.lower()
if self.fullscan is False and any(urll.endswith(ext) for ext in SKIPPABLE_FILETYPES):
print('Skipping "%s" due to extension' % url)
self.add_head_to_results(url)
return results
if not url.startswith(self.website):
# Don't follow external links or parent directory.
return results
head = requests.head(url)
head.raise_for_status()
safeprint('HEAD: %s : %s' % (url, head))
content_type = head.headers.get('Content-Type', '?')
self.already_seen.add(head.url)
if content_type.startswith('text/html') and head.url.endswith('/'):
# This is an index page, let's get recursive.
page = requests.get(url)
safeprint(' GET: %s : %s' % (url, page))
hrefs = self.extract_hrefs(page)
for url in hrefs:
if url not in self.results and url not in self.already_seen:
results += self.walk(url)
else:
# Don't add index pages to the results.
self.add_head_to_results(head)
return results
def dict_to_file(jdict, filename):
filehandle = open(filename, 'wb')
text = json.dumps(jdict, indent=4, sort_keys=True)
text = text.encode('utf-8')
filehandle.write(text)
filehandle.close()
def download_file(url, filehandle, getsizeheaders=True, hookfunction=None, headers={}, auth=None):
if getsizeheaders:
totalsize = requests.head(url, headers=headers, auth=auth)
totalsize = int(totalsize.headers['content-length'])
else:
totalsize = 1
currentblock = 0
downloading = requests.get(url, stream=True, headers=headers, auth=auth)
for chunk in downloading.iter_content(chunk_size=DOWNLOAD_CHUNK):
if chunk:
currentblock += 1
filehandle.write(chunk)
if hookfunction is not None:
hookfunction(currentblock, DOWNLOAD_CHUNK, totalsize)
filehandle.close()
size = os.path.getsize(filehandle.name)
if size < totalsize:
raise Exception('Did not receive expected total size. %d / %d' % (size, totalsize))
return True
def file_to_dict(filename):
filehandle = open(filename, 'rb')
jdict = json.loads(filehandle.read().decode('utf-8'))
filehandle.close()
return jdict
def filepath_sanitize(text, exclusions=''):
bet = FILENAME_BADCHARS.replace(exclusions, '')
for char in bet:
text = text.replace(char, '')
return text
def hashit(text, length=None):
h = hashlib.sha512(text.encode('utf-8')).hexdigest()
if length is not None:
h = h[:length]
return h
def hook1(currentblock, chunksize, totalsize):
currentbytes = currentblock * chunksize
if currentbytes > totalsize:
currentbytes = totalsize
currentbytes = '{:,}'.format(currentbytes)
totalsize = '{:,}'.format(totalsize)
currentbytes = currentbytes.rjust(len(totalsize), ' ')
print('%s / %s bytes' % (currentbytes, totalsize), end='\r')
if currentbytes == totalsize:
print()
def safeprint(text, **kwargs):
text = str(text)
text = text.encode('ascii', 'replace').decode()
text = text.replace('?', '_')
print(text, **kwargs)
def url_to_filepath(text):
text = urllib.parse.unquote(text)
parts = urllib.parse.urlsplit(text)
root = parts.netloc
(folder, filename) = os.path.split(parts.path)
while folder.startswith('/'):
folder = folder[1:]
# Folders are allowed to have slashes
folder = filepath_sanitize(folder, exclusions='/\\')
folder = folder.replace('\\', os.path.sep)
folder = folder.replace('/', os.path.sep)
# But Files are not.
filename = filepath_sanitize(filename)
return (root, folder, filename)
## Commandline functions ####################################################\\
def digest(website, outputfile, fullscan, *trash):
fullscan = bool(fullscan)
if website[-1] != '/':
website += '/'
walker = Walker(website, outputfile, fullscan=fullscan)
try:
walker.walk()
dict_to_file(walker.results, outputfile)
except:
dict_to_file(walker.results, outputfile)
traceback.print_exc()
print('SAVED PROGRESS SO FAR')
def download(urlfile, outputdir, overwrite, *trash):
downloader = Downloader(urlfile, outputdir)
downloader.download(overwrite)
def filter_pattern(urlfile, patterns, negative=False, *trash):
'''
When `negative` is True, items are kept when they do NOT match the pattern,
allowing you to delete trash files.
When `negative` is False, items are keep when they DO match the pattern,
allowing you to keep items of interest.
'''
if isinstance(patterns, str):
patterns = [patterns]
jdict = file_to_dict(urlfile)
keys = list(jdict.keys())
for key in keys:
for pattern in patterns:
contains = re.search(pattern, key) is not None
if contains ^ negative:
safeprint('Removing "%s"' % key)
del jdict[key]
dict_to_file(jdict, urlfile)
def keep_pattern(urlfile, patterns, *trash):
filter_pattern(urlfile=urlfile, patterns=patterns, negative=True)
def measure(urlfile, *trash):
jdict = file_to_dict(urlfile)
totalbytes = 0
for (url, info) in jdict.items():
bytes = info['Content-Length']
if bytes > 0:
totalbytes += bytes
bytestring = '{:,}'.format(totalbytes)
print(bytestring)
return totalbytes
def remove_pattern(urlfile, patterns, *trash):
filter_pattern(urlfile=urlfile, patterns=patterns, negative=False)
def listget(l, index, default=None):
try:
return l[index]
except IndexError:
return default
cmdfunctions = [digest, download, keep_pattern, measure, remove_pattern]
## End of commandline functions #############################################//
if __name__ == '__main__':
command = listget(sys.argv, 1, None)
arg1 = listget(sys.argv, 2, None)
arg2 = listget(sys.argv, 3, None)
arg3 = listget(sys.argv, 4, None)
if command is None:
quit()
did_something = False
for function in cmdfunctions:
if command == function.__name__:
function(arg1, arg2, arg3)
did_something = True
break
if not did_something:
print('No matching function')

View file

@ -5,116 +5,181 @@ import sys
DEFAULT_LENGTH = 32
DEFAULT_SENTENCE = 5
HELP_MESSAGE = '''
---------------------------------------------------------------
|Generates a randomized password. |
| |
|> passwordy [length] ["p"] ["d"] |
| |
| length : How many characters. Default %03d. |
| p : If present, the password will contain punctuation |
| characters. Otherwise not. |
| d : If present, the password will contain digits. |
| Otherwise not. |
| |
| The password can always contain upper and lowercase |
| letters. |
---------------------------------------------------------------
'''[1:-1] % (DEFAULT_LENGTH)
===============================================================================
Generates a randomized password.
HELP_SENTENCE = '''
---------------------------------------------------------------
|Generates a randomized sentence |
| |
|> passwordy sent [length] [join] |
| |
| length : How many words to retrieve. Default %03d. |
| join : The character that will join the words together. |
| Default space. |
---------------------------------------------------------------
'''[1:-1] % (DEFAULT_SENTENCE)
> passwordy [length] [options]
def make_password(length=None, allowpunctuation=False, allowdigits=False, digits_only=False, binary=False):
'''
Returns a string of length `length` consisting of a random selection
of uppercase and lowercase letters, as well as punctuation and digits
if parameters permit
'''
if length is None:
length = DEFAULT_LENGTH
if digits_only is False and binary is False:
s = string.ascii_letters
if allowpunctuation is True:
s += string.punctuation
if allowdigits is True:
s += string.digits
elif digits_only:
s = string.digits
elif binary:
s = '01'
length: How many characters. Default %03d.
options:
h : consist entirely of hexadecimal characters.
b : consist entirely of binary characters.
dd : consist entirely of decimal characters.
default : consist entirely of upper+lower letters.
password = ''.join([random.choice(s) for x in range(length)])
return password
p : allow punctuation in conjunction with above.
d : allow digits in conjunction with above.
l : convert to lowercase.
u : convert to uppercase.
nd : no duplicates. Each character can only appear once.
Examples:
> passwordy 32 h l
98f17b6016cf08cc00f2aeecc8d8afeb
> passwordy 32 h u
2AA706866BF7A5C18328BF866136A261
> passwordy 32 u
JHEPTKCEFZRFXILMASHNPSTFFNWQHTTN
> passwordy 32 p
Q+:iSKX!Nt)ewUvlE*!+^D}hp+|<wpJ}
> passwordy 32 l p
m*'otz/"!qo?-^wwdu@fasf:|ldkosi`
===============================================================================
Generates a randomized sentence of words.
> passwordy sent [length] [join]
length : How many words. Default %03d.
join : The character that will join words together.
Default space.
Examples:
> passwordy sent
arrowroot sheared rustproof undo propionic acid
> passwordy sent 8
cipher competition solid angle rigmarole lachrymal social class critter consequently
> passwordy sent 8 _
Kahn_secondary_emission_unskilled_superior_court_straight_ticket_voltameter_hopper_crass
===============================================================================
'''.strip() % (DEFAULT_LENGTH, DEFAULT_SENTENCE)
def listget(li, index, fallback=None):
try:
return li[index]
except IndexError:
return fallback
def make_password(length=None, passtype='standard'):
'''
Returns a string of length `length` consisting of a random selection
of uppercase and lowercase letters, as well as punctuation and digits
if parameters permit
'''
if length is None:
length = DEFAULT_LENGTH
alphabet = ''
if 'standard' in passtype:
alphabet = string.ascii_letters
elif 'digit_only' in passtype:
alphabet = string.digits
elif 'hex' in passtype:
alphabet = '0123456789abcdef'
elif 'binary' in passtype:
alphabet = '01'
if '+digits' in passtype:
alphabet += string.digits
if '+punctuation' in passtype:
alphabet += string.punctuation
if '+lowercase' in passtype:
alphabet = alphabet.lower()
elif '+uppercase' in passtype:
alphabet = alphabet.upper()
alphabet = list(set(alphabet))
if '+noduplicates' in passtype:
if len(alphabet) < length:
message = 'Alphabet "%s" is not long enough to support no-dupe password of length %d'
message = message % (alphabet, length)
raise Exception(message)
password = ''
for x in range(length):
random.shuffle(alphabet)
password += alphabet.pop(0)
else:
password = ''.join([random.choice(alphabet) for x in range(length)])
return password
def make_sentence(length=None, joiner=' '):
'''
Returns a string containing `length` words, which come from
dictionary.common.
'''
import dictionary.common as common
if length is None:
length = DEFAULT_LENGTH
words = [random.choice(common.words) for x in range(length)]
words = [w.replace(' ', joiner) for w in words]
result = joiner.join(words)
return result
'''
Returns a string containing `length` words, which come from
dictionary.common.
'''
import dictionary.common as common
if length is None:
length = DEFAULT_LENGTH
words = [random.choice(common.words) for x in range(length)]
words = [w.replace(' ', joiner) for w in words]
result = joiner.join(words)
return result
if __name__ == '__main__':
args = sys.argv
argc = len(args) - 1
args = sys.argv[1:]
argc = len(args)
if argc == 0:
mode = 'password'
length = DEFAULT_LENGTH
mode = listget(args, 0, 'password')
if 'help' in mode:
print(HELP_MESSAGE)
quit()
elif args[1].isdigit():
mode = 'password'
length = int(args[1])
if 'sent' not in mode:
length = listget(args, 0, str(DEFAULT_LENGTH))
options = [a.lower() for a in args[1:]]
elif args[1] in 'DdPp':
mode = 'password'
length = DEFAULT_LENGTH
if '-' in length:
length = length.replace(' ', '')
length = [int(x) for x in length.split('-', 1)]
length = random.randint(*length)
elif 'sent' in args[1].lower() and argc == 1:
mode = 'sentence'
length = DEFAULT_SENTENCE
elif not length.isdigit() and options == []:
options = [length]
length = DEFAULT_LENGTH
elif argc == 1:
mode = None
print(HELP_MESSAGE)
print(HELP_SENTENCE)
length = int(length)
elif 'sent' in args[1].lower() and args[2].isdigit():
mode = 'sentence'
length = int(args[2])
passtype = 'standard'
if 'dd' in options:
passtype = 'digit_only'
if 'b' in options:
passtype = 'binary'
if 'h' in options:
passtype = 'hex'
elif 'sent' in args[1].lower():
mode = 'sentence'
length = DEFAULT_SENTENCE
if 'l' in options:
passtype += '+lowercase'
elif 'u' in options:
passtype += '+uppercase'
if 'p' in options:
passtype += '+punctuation'
if 'd' in options:
passtype += '+digits'
if 'nd' in options:
passtype += '+noduplicates'
if mode == 'password':
punc = 'p' in args
digi = 'd' in args
digi_only = 'dd' in args
binary = 'b' in args
print(make_password(length, punc, digi, digi_only, binary))
elif mode == 'sentence':
if argc == 3:
joiner = args[3]
else:
joiner = ' '
print(make_sentence(length, joiner))
print(make_password(length, passtype=passtype))
else:
length = listget(args, 1, str(DEFAULT_SENTENCE))
joiner = listget(args, 2, ' ')
else:
pass
if not length.isdigit():
joiner = length
length = DEFAULT_SENTENCE
length = int(length)
print(make_sentence(length, joiner))