else
This commit is contained in:
parent
4b7cfea08d
commit
001a8d970f
10 changed files with 716 additions and 199 deletions
4
DeLetterbox/README.md
Normal file
4
DeLetterbox/README.md
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
DeLetterbox
|
||||||
|
===========
|
||||||
|
|
||||||
|
I didn't test this very much, just needed something quick.
|
42
DeLetterbox/deletterbox.py
Normal file
42
DeLetterbox/deletterbox.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
from PIL import Image
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
CLOSE_ENOUGH_THRESHOLD = 10
|
||||||
|
|
||||||
|
def close_enough(a, b):
|
||||||
|
for (a_channel, b_channel) in zip(a, b):
|
||||||
|
if abs(a_channel - b_channel) > CLOSE_ENOUGH_THRESHOLD:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def deletterbox(filename):
|
||||||
|
image = Image.open(filename)
|
||||||
|
trim_top(image)
|
||||||
|
for x in range(4):
|
||||||
|
image = trim_top(image)
|
||||||
|
image = image.rotate(90)
|
||||||
|
(base, ext) = os.path.splitext(filename)
|
||||||
|
#filename = base + 'X' + ext
|
||||||
|
image.save(filename)
|
||||||
|
|
||||||
|
def trim_top(image):
|
||||||
|
letterbox_color = image.getpixel((0, 0))
|
||||||
|
for y in range(image.size[1]):
|
||||||
|
solid = True
|
||||||
|
for x in range(image.size[0]):
|
||||||
|
pixel = image.getpixel((x, y))
|
||||||
|
if not close_enough(letterbox_color, pixel):
|
||||||
|
solid = False
|
||||||
|
break
|
||||||
|
if not solid:
|
||||||
|
break
|
||||||
|
bounds = (0, y, image.size[0], image.size[1])
|
||||||
|
#print(bounds)
|
||||||
|
image = image.crop(bounds)
|
||||||
|
return image
|
||||||
|
|
||||||
|
filenames = sys.argv[1:]
|
||||||
|
for filename in filenames:
|
||||||
|
deletterbox(filename)
|
||||||
|
|
BIN
DeLetterbox/example1_after.jpg
Normal file
BIN
DeLetterbox/example1_after.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 37 KiB |
BIN
DeLetterbox/example1_before.jpg
Normal file
BIN
DeLetterbox/example1_before.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 73 KiB |
BIN
DeLetterbox/example2_after.jpg
Normal file
BIN
DeLetterbox/example2_after.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 24 KiB |
BIN
DeLetterbox/example2_before.jpg
Normal file
BIN
DeLetterbox/example2_before.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 28 KiB |
6
OpenDirDL/README.md
Normal file
6
OpenDirDL/README.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
Open Dir DL
|
||||||
|
===========
|
||||||
|
|
||||||
|
Requires `pip install beautifulsoup4`
|
||||||
|
|
||||||
|
See inside opendirdl.py for usage instructions.
|
|
@ -1,17 +1,85 @@
|
||||||
import bs4
|
'''
|
||||||
import hashlib
|
OpenDirDL
|
||||||
import json
|
downloads open directories
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
DIGEST:
|
||||||
|
Recursively fetch directories and build a database of file URLs.
|
||||||
|
|
||||||
|
> opendirdl digest !clipboard <flags>
|
||||||
|
> opendirdl digest http://website.com/directory/ <flags>
|
||||||
|
|
||||||
|
flags:
|
||||||
|
-f | --fullscan : When included, perform HEAD requests on all files, to
|
||||||
|
know the size of the entire directory.
|
||||||
|
-dv "x.db" | --databasename "x.db" : Use a custom database filename. By default, databases
|
||||||
|
are named after the web domain.
|
||||||
|
|
||||||
|
|
||||||
|
DOWNLOAD:
|
||||||
|
Download the files whose URLs are enabled in the database.
|
||||||
|
|
||||||
|
> opendirdl download website.com.db <flags>
|
||||||
|
|
||||||
|
flags:
|
||||||
|
-o "x" | --outputdir "x" : Save the files to a custom directory, "x". By default,
|
||||||
|
files are saved to a folder named after the web domain.
|
||||||
|
-ow | --overwrite : When included, download and overwrite files even if they
|
||||||
|
already exist in the output directory.
|
||||||
|
-bps 100 | --bytespersecond 100 : Ratelimit yourself to downloading at 100 BYTES per second.
|
||||||
|
The webmaster will appreciate this.
|
||||||
|
|
||||||
|
KEEP_PATTERN:
|
||||||
|
Enable URLs which match a regex pattern. Matches are based on the percent-encoded strings!
|
||||||
|
|
||||||
|
> opendirdl keep_pattern website.com.db ".*"
|
||||||
|
|
||||||
|
REMOVE_PATTERN:
|
||||||
|
Disable URLs which match a regex pattern. Matches are based on the percent-encoded strings!
|
||||||
|
|
||||||
|
> opendirdl remove_pattern website.com.db ".*"
|
||||||
|
|
||||||
|
LIST_BASENAMES:
|
||||||
|
List enabled URLs in order of their base filename. This makes it easier to find titles of
|
||||||
|
interest in a directory that is very scattered or poorly organized.
|
||||||
|
|
||||||
|
> opendirdl list_basenames website.com.db <flags>
|
||||||
|
|
||||||
|
flags:
|
||||||
|
-o "x.txt" | --outputfile "x.txt" : Output the results to a file instead of stdout. This is
|
||||||
|
useful if the filenames contain special characters that
|
||||||
|
crash Python, or are so long that the console becomes
|
||||||
|
unreadable.
|
||||||
|
|
||||||
|
MEASURE:
|
||||||
|
Sum up the filesizes of all enabled URLs.
|
||||||
|
|
||||||
|
> opendirdl measure website.com.db <flags>
|
||||||
|
|
||||||
|
flags:
|
||||||
|
-f | --fullscan : When included, perform HEAD requests on any URL whose size is not known.
|
||||||
|
If this flag is not included, and some file's size is unkown, you will
|
||||||
|
receive a note.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Module names preceeded by two hashes indicate modules that are imported during
|
||||||
|
# a function, because they are not used anywhere else and we don't need to waste
|
||||||
|
# time importing them usually.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
## import bs4
|
||||||
|
## import hashlib
|
||||||
import os
|
import os
|
||||||
import re
|
import ratelimiter
|
||||||
|
## import re
|
||||||
import requests
|
import requests
|
||||||
import string
|
import sqlite3
|
||||||
import sys
|
## import sys
|
||||||
import time
|
## tkinter
|
||||||
import traceback
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
FILENAME_BADCHARS = '/\\:*?"<>|'
|
FILENAME_BADCHARS = '/\\:*?"<>|'
|
||||||
DOWNLOAD_CHUNK = 2048
|
|
||||||
|
|
||||||
# When doing a basic scan, we will not send HEAD requests to URLs that end in these strings,
|
# When doing a basic scan, we will not send HEAD requests to URLs that end in these strings,
|
||||||
# because they're probably files.
|
# because they're probably files.
|
||||||
|
@ -23,197 +91,343 @@ SKIPPABLE_FILETYPES = [
|
||||||
'.epub',
|
'.epub',
|
||||||
'.db',
|
'.db',
|
||||||
'.flac',
|
'.flac',
|
||||||
|
'.gif',
|
||||||
|
'.gz'
|
||||||
'.ico',
|
'.ico',
|
||||||
'.iso',
|
'.iso',
|
||||||
|
'.jpeg',
|
||||||
'.jpg',
|
'.jpg',
|
||||||
|
'.m3u',
|
||||||
'.m4a',
|
'.m4a',
|
||||||
'.mkv',
|
'.mkv',
|
||||||
'.mov',
|
'.mov',
|
||||||
'.mp3',
|
'.mp3',
|
||||||
'.mp4',
|
'.mp4',
|
||||||
|
'.nfo',
|
||||||
|
'.ogg',
|
||||||
'.pdf',
|
'.pdf',
|
||||||
'.png',
|
'.png',
|
||||||
'.srt',
|
'.srt',
|
||||||
|
'.tar',
|
||||||
'.txt',
|
'.txt',
|
||||||
'.webm',
|
'.webm',
|
||||||
'.zip',
|
'.zip',
|
||||||
]
|
]
|
||||||
|
SKIPPABLE_FILETYPES = set(x.lower() for x in SKIPPABLE_FILETYPES)
|
||||||
|
|
||||||
SKIPPABLE_FILETYPES = [x.lower() for x in SKIPPABLE_FILETYPES]
|
BYTE = 1
|
||||||
|
KIBIBYTE = 1024 * BYTE
|
||||||
|
MIBIBYTE = 1024 * KIBIBYTE
|
||||||
|
GIBIBYTE = 1024 * MIBIBYTE
|
||||||
|
TEBIBYTE = 1024 * GIBIBYTE
|
||||||
|
SIZE_UNITS = (TEBIBYTE, GIBIBYTE, MIBIBYTE, KIBIBYTE, BYTE)
|
||||||
|
|
||||||
|
UNIT_STRINGS = {
|
||||||
|
BYTE: 'b',
|
||||||
|
KIBIBYTE: 'KiB',
|
||||||
|
MIBIBYTE: 'MiB',
|
||||||
|
GIBIBYTE: 'GiB',
|
||||||
|
TEBIBYTE: 'TiB',
|
||||||
|
}
|
||||||
|
|
||||||
|
DOWNLOAD_CHUNK = 2 * KIBIBYTE
|
||||||
|
|
||||||
|
|
||||||
|
DB_INIT = '''
|
||||||
|
CREATE TABLE IF NOT EXISTS urls(
|
||||||
|
url TEXT,
|
||||||
|
basename TEXT,
|
||||||
|
content_length INT,
|
||||||
|
content_type TEXT,
|
||||||
|
do_download INT
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS urlindex on urls(url);
|
||||||
|
CREATE INDEX IF NOT EXISTS baseindex on urls(basename);
|
||||||
|
CREATE INDEX IF NOT EXISTS sizeindex on urls(content_length);
|
||||||
|
'''.strip()
|
||||||
|
SQL_URL = 0
|
||||||
|
SQL_BASENAME = 1
|
||||||
|
SQL_CONTENT_LENGTH = 2
|
||||||
|
SQL_CONTENT_TYPE = 3
|
||||||
|
SQL_DO_DOWNLOAD = 4
|
||||||
|
|
||||||
|
|
||||||
|
## DOWNLOADER ######################################################################################
|
||||||
|
## ##
|
||||||
class Downloader:
|
class Downloader:
|
||||||
def __init__(self, urlfile, outputdir=None, headers=None):
|
def __init__(self, databasename, outputdir=None, headers=None):
|
||||||
jdict = file_to_dict(urlfile)
|
self.databasename = databasename
|
||||||
self.urls = [item[0] for item in jdict.items()]
|
self.sql = sqlite3.connect(databasename)
|
||||||
self.urls.sort(key=str.lower)
|
self.cur = self.sql.cursor()
|
||||||
|
|
||||||
|
if outputdir is None or outputdir == "":
|
||||||
|
# This assumes that all URLs in the database are from the same domain.
|
||||||
|
# If they aren't, it's the user's fault.
|
||||||
|
self.cur.execute('SELECT url FROM urls LIMIT 1')
|
||||||
|
url = self.cur.fetchone()[0]
|
||||||
|
# returns (root, path, filename). Keep root.
|
||||||
|
outputdir = url_to_filepath(url)[0]
|
||||||
self.outputdir = outputdir
|
self.outputdir = outputdir
|
||||||
|
|
||||||
if self.outputdir is None or self.outputdir == "":
|
def download(self, overwrite=False, bytespersecond=None):
|
||||||
# returns (root, path, filename). Keep root.
|
|
||||||
self.outputdir = url_to_filepath(self.urls[0])[0]
|
|
||||||
|
|
||||||
def download(self, overwrite=False):
|
|
||||||
overwrite = bool(overwrite)
|
overwrite = bool(overwrite)
|
||||||
for url in self.urls:
|
|
||||||
|
self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
|
||||||
|
while True:
|
||||||
|
fetch = self.cur.fetchone()
|
||||||
|
if fetch is None:
|
||||||
|
break
|
||||||
|
url = fetch[SQL_URL]
|
||||||
|
|
||||||
''' Creating the Path '''
|
''' Creating the Path '''
|
||||||
(root, folder, filename) = url_to_filepath(url)
|
(root, folder, basename) = url_to_filepath(url)
|
||||||
# In case the user has set a custom download directory,
|
# Ignore this value of `root`, because we might have a custom outputdir.
|
||||||
# ignore the above value of `root`.
|
|
||||||
root = self.outputdir
|
root = self.outputdir
|
||||||
folder = os.path.join(root, folder)
|
folder = os.path.join(root, folder)
|
||||||
if not os.path.exists(folder):
|
if not os.path.exists(folder):
|
||||||
os.makedirs(folder)
|
os.makedirs(folder)
|
||||||
localname = os.path.join(folder, filename)
|
fullname = os.path.join(folder, basename)
|
||||||
temporary_basename = hashit(url, 16) + '.oddltemporary'
|
temporary_basename = hashit(url, 16) + '.oddltemporary'
|
||||||
temporary_localname = os.path.join(folder, temporary_basename)
|
temporary_fullname = os.path.join(folder, temporary_basename)
|
||||||
|
|
||||||
''' Managing overwrite '''
|
''' Managing overwrite '''
|
||||||
if os.path.isfile(localname):
|
if os.path.isfile(fullname):
|
||||||
if overwrite is True:
|
if overwrite is True:
|
||||||
os.remove(localname)
|
os.remove(fullname)
|
||||||
else:
|
else:
|
||||||
safeprint('Skipping "%s". Use `overwrite=True`' % localname)
|
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
safeprint('Downloading "%s" as "%s"' % (localname, temporary_basename))
|
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
|
||||||
filehandle = open(temporary_localname, 'wb')
|
filehandle = open(temporary_fullname, 'wb')
|
||||||
try:
|
try:
|
||||||
download_file(url, filehandle, hookfunction=hook1)
|
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
|
||||||
os.rename(temporary_localname, localname)
|
os.rename(temporary_fullname, fullname)
|
||||||
except:
|
except:
|
||||||
filehandle.close()
|
filehandle.close()
|
||||||
raise
|
raise
|
||||||
|
## ##
|
||||||
|
## DOWNLOADER ######################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
## GENERIC #########################################################################################
|
||||||
|
## ##
|
||||||
|
class Generic:
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
for kwarg in kwargs:
|
||||||
|
setattr(self, kwarg, kwargs[kwarg])
|
||||||
|
## ##
|
||||||
|
## GENERIC #########################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
## WALKER ##########################################################################################
|
||||||
|
## ##
|
||||||
class Walker:
|
class Walker:
|
||||||
def __init__(self, website, outputfile, fullscan=False):
|
def __init__(self, walkurl, databasename=None, fullscan=False):
|
||||||
self.website = website
|
if walkurl[-1] != '/':
|
||||||
|
walkurl += '/'
|
||||||
|
self.walkurl = walkurl
|
||||||
|
if databasename is None:
|
||||||
|
self.domain = url_to_filepath(walkurl)[0]
|
||||||
|
databasename = self.domain + '.db'
|
||||||
|
self.databasename = databasename
|
||||||
|
|
||||||
|
self.sql = sqlite3.connect(self.databasename)
|
||||||
|
self.cur = self.sql.cursor()
|
||||||
|
db_init(self.sql, self.cur)
|
||||||
|
|
||||||
self.fullscan = bool(fullscan)
|
self.fullscan = bool(fullscan)
|
||||||
if os.path.exists(outputfile):
|
self.queue = []
|
||||||
self.results = file_to_dict(outputfile)
|
self.seen_directories = set()
|
||||||
else:
|
|
||||||
self.results = {}
|
|
||||||
self.already_seen = set()
|
|
||||||
|
|
||||||
def add_head_to_results(self, head):
|
def smart_insert(self, url=None, head=None, commit=True):
|
||||||
if isinstance(head, str):
|
'''
|
||||||
# For when we're doing a basic scan, which skips urls that
|
See `smart_insert`.
|
||||||
# look like a file.
|
'''
|
||||||
self.results[head] = {
|
smart_insert(self.sql, self.cur, url=url, head=head, commit=commit)
|
||||||
'Content-Length': -1,
|
|
||||||
'Content-Type': '?',
|
|
||||||
}
|
|
||||||
self.already_seen.add(head)
|
|
||||||
else:
|
|
||||||
# For when we're doing a full scan, which does a HEAD request
|
|
||||||
# for all urls.
|
|
||||||
self.results[head.url] = {
|
|
||||||
'Content-Length': int(head.headers.get('Content-Length', -1)),
|
|
||||||
'Content-Type': head.headers.get('Content-Type', '?'),
|
|
||||||
}
|
|
||||||
self.already_seen.add(head.url)
|
|
||||||
|
|
||||||
def extract_hrefs(self, response):
|
def extract_hrefs(self, response, tag='a', attribute='href'):
|
||||||
|
'''
|
||||||
|
Given a Response object, extract href urls.
|
||||||
|
External links, index sort links, and desktop.ini are discarded.
|
||||||
|
'''
|
||||||
|
import bs4
|
||||||
soup = bs4.BeautifulSoup(response.text)
|
soup = bs4.BeautifulSoup(response.text)
|
||||||
elements = soup.findAll('a')
|
elements = soup.findAll(tag)
|
||||||
hrefs = []
|
|
||||||
for element in elements:
|
for element in elements:
|
||||||
try:
|
try:
|
||||||
href = element['href']
|
href = element[attribute]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
href = urllib.parse.urljoin(response.url, href)
|
href = urllib.parse.urljoin(response.url, href)
|
||||||
if not href.startswith(self.website):
|
if not href.startswith(self.walkurl):
|
||||||
# Don't go to other sites or parent directories
|
# Don't go to other sites or parent directories.
|
||||||
continue
|
continue
|
||||||
if 'C=' in href and 'O=' in href:
|
if 'C=' in href and 'O=' in href:
|
||||||
# Alternative sort modes for index pages
|
# Alternative sort modes for index pages.
|
||||||
continue
|
continue
|
||||||
if href.endswith('desktop.ini'):
|
if href.endswith('desktop.ini'):
|
||||||
# I hate these things
|
# I hate these things.
|
||||||
continue
|
continue
|
||||||
hrefs.append(href)
|
yield href
|
||||||
return hrefs
|
|
||||||
|
|
||||||
def walk(self, url=None):
|
def process_url(self, url=None):
|
||||||
|
'''
|
||||||
|
Given a URL, check whether it is an index page or an actual file.
|
||||||
|
If it is an index page, it's links are extracted and queued.
|
||||||
|
If it is a file, its information is saved to the database.
|
||||||
|
|
||||||
|
We perform a
|
||||||
|
HEAD:
|
||||||
|
when `self.fullscan` is True.
|
||||||
|
when `self.fullscan` is False but the url is not a SKIPPABLE_FILETYPE.
|
||||||
|
when the url is an index page.
|
||||||
|
GET:
|
||||||
|
when the url is a index page.
|
||||||
|
'''
|
||||||
if url is None:
|
if url is None:
|
||||||
url = self.website
|
url = self.walkurl
|
||||||
else:
|
else:
|
||||||
url = urllib.parse.urljoin(self.website, url)
|
url = urllib.parse.urljoin(self.walkurl, url)
|
||||||
|
|
||||||
results = []
|
if not url.startswith(self.walkurl):
|
||||||
|
# Don't follow external links or parent directory.
|
||||||
|
print('Skipping "%s" due to external url.' % url)
|
||||||
|
return
|
||||||
|
|
||||||
urll = url.lower()
|
urll = url.lower()
|
||||||
if self.fullscan is False and any(urll.endswith(ext) for ext in SKIPPABLE_FILETYPES):
|
if self.fullscan is False:
|
||||||
print('Skipping "%s" due to extension' % url)
|
skippable = any(urll.endswith(ext) for ext in SKIPPABLE_FILETYPES)
|
||||||
self.add_head_to_results(url)
|
if skippable:
|
||||||
return results
|
safeprint('Skipping "%s" due to extension.' % url)
|
||||||
|
self.smart_insert(url=url)
|
||||||
|
return
|
||||||
|
self.cur.execute('SELECT * FROM urls WHERE url == ?', [url])
|
||||||
|
skippable = self.cur.fetchone() is not None
|
||||||
|
if skippable:
|
||||||
|
safeprint('Skipping "%s" since we already have it.' % url)
|
||||||
|
return
|
||||||
|
|
||||||
if not url.startswith(self.website):
|
try:
|
||||||
# Don't follow external links or parent directory.
|
head = do_head(url)
|
||||||
return results
|
except requests.exceptions.HTTPError as e:
|
||||||
|
if e.response.status_code == 403:
|
||||||
head = requests.head(url)
|
print('403 FORBIDDEN!')
|
||||||
head.raise_for_status()
|
return
|
||||||
|
if e.response.status_code == 404:
|
||||||
safeprint('HEAD: %s : %s' % (url, head))
|
print('404 NOT FOUND!')
|
||||||
|
return
|
||||||
|
raise
|
||||||
content_type = head.headers.get('Content-Type', '?')
|
content_type = head.headers.get('Content-Type', '?')
|
||||||
self.already_seen.add(head.url)
|
|
||||||
|
|
||||||
if content_type.startswith('text/html') and head.url.endswith('/'):
|
if content_type.startswith('text/html') and head.url.endswith('/'):
|
||||||
# This is an index page, let's get recursive.
|
# This is an index page, so extract links and queue them.
|
||||||
page = requests.get(url)
|
response = do_get(url)
|
||||||
safeprint(' GET: %s : %s' % (url, page))
|
hrefs = self.extract_hrefs(response)
|
||||||
hrefs = self.extract_hrefs(page)
|
self.seen_directories.add(head.url)
|
||||||
for url in hrefs:
|
added = 0
|
||||||
if url not in self.results and url not in self.already_seen:
|
for href in hrefs:
|
||||||
results += self.walk(url)
|
if href in self.seen_directories:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
self.queue.append(href)
|
||||||
|
added += 1
|
||||||
|
print('Queued %d urls' % added)
|
||||||
else:
|
else:
|
||||||
# Don't add index pages to the results.
|
# This is not an index page, so save it.
|
||||||
self.add_head_to_results(head)
|
self.smart_insert(head=head)
|
||||||
|
|
||||||
return results
|
def walk(self, url=None):
|
||||||
|
self.queue.append(url)
|
||||||
|
while len(self.queue) > 0:
|
||||||
|
url = self.queue.pop(0)
|
||||||
|
self.process_url(url)
|
||||||
|
## ##
|
||||||
|
## WALKER ##########################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
## GENERAL FUNCTIONS ###############################################################################
|
||||||
|
## ##
|
||||||
|
def bytes_to_unit_string(bytes):
|
||||||
|
size_unit = 1
|
||||||
|
for unit in SIZE_UNITS:
|
||||||
|
if bytes >= unit:
|
||||||
|
size_unit = unit
|
||||||
|
break
|
||||||
|
size_unit_string = UNIT_STRINGS[size_unit]
|
||||||
|
size_string = '%.3f %s' % ((bytes / size_unit), size_unit_string)
|
||||||
|
return size_string
|
||||||
|
|
||||||
|
def db_init(sql, cur):
|
||||||
|
lines = DB_INIT.split(';')
|
||||||
|
for line in lines:
|
||||||
|
cur.execute(line)
|
||||||
|
sql.commit()
|
||||||
|
return True
|
||||||
|
|
||||||
def dict_to_file(jdict, filename):
|
def dict_to_file(jdict, filename):
|
||||||
filehandle = open(filename, 'wb')
|
text = dict_to_string(jdict)
|
||||||
text = json.dumps(jdict, indent=4, sort_keys=True)
|
|
||||||
text = text.encode('utf-8')
|
text = text.encode('utf-8')
|
||||||
|
filehandle = open(filename, 'wb')
|
||||||
filehandle.write(text)
|
filehandle.write(text)
|
||||||
filehandle.close()
|
filehandle.close()
|
||||||
|
|
||||||
def download_file(url, filehandle, getsizeheaders=True, hookfunction=None, headers={}, auth=None):
|
def do_get(url):
|
||||||
if getsizeheaders:
|
return do_request('GET', requests.get, url)
|
||||||
totalsize = requests.head(url, headers=headers, auth=auth)
|
|
||||||
totalsize = int(totalsize.headers['content-length'])
|
def do_head(url):
|
||||||
|
return do_request('HEAD', requests.head, url)
|
||||||
|
|
||||||
|
def do_request(message, method, url):
|
||||||
|
import sys
|
||||||
|
message = '{message:>4s}: {url} : '.format(message=message, url=url)
|
||||||
|
safeprint(message, end='')
|
||||||
|
sys.stdout.flush()
|
||||||
|
response = method(url)
|
||||||
|
safeprint(response)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response
|
||||||
|
|
||||||
|
def download_file(url, filehandle, hookfunction=None, headers={}, bytespersecond=None):
|
||||||
|
if bytespersecond is not None:
|
||||||
|
limiter = ratelimiter.Ratelimiter(allowance_per_period=bytespersecond, period=1)
|
||||||
else:
|
else:
|
||||||
totalsize = 1
|
limiter = None
|
||||||
|
|
||||||
currentblock = 0
|
currentblock = 0
|
||||||
downloading = requests.get(url, stream=True, headers=headers, auth=auth)
|
downloading = requests.get(url, stream=True, headers=headers)
|
||||||
|
totalsize = int(downloading.headers.get('content-length', 1))
|
||||||
for chunk in downloading.iter_content(chunk_size=DOWNLOAD_CHUNK):
|
for chunk in downloading.iter_content(chunk_size=DOWNLOAD_CHUNK):
|
||||||
if chunk:
|
if not chunk:
|
||||||
currentblock += 1
|
break
|
||||||
filehandle.write(chunk)
|
currentblock += 1
|
||||||
if hookfunction is not None:
|
filehandle.write(chunk)
|
||||||
hookfunction(currentblock, DOWNLOAD_CHUNK, totalsize)
|
if limiter is not None:
|
||||||
|
limiter.limit(len(chunk))
|
||||||
|
if hookfunction is not None:
|
||||||
|
hookfunction(currentblock, DOWNLOAD_CHUNK, totalsize)
|
||||||
|
|
||||||
filehandle.close()
|
filehandle.close()
|
||||||
size = os.path.getsize(filehandle.name)
|
size = os.path.getsize(filehandle.name)
|
||||||
if size < totalsize:
|
if size < totalsize:
|
||||||
raise Exception('Did not receive expected total size. %d / %d' % (size, totalsize))
|
raise Exception('Did not receive expected total size. %d / %d' % (size, totalsize))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def file_to_dict(filename):
|
def filepath_sanitize(text, allowed=''):
|
||||||
filehandle = open(filename, 'rb')
|
bet = FILENAME_BADCHARS.replace(allowed, '')
|
||||||
jdict = json.loads(filehandle.read().decode('utf-8'))
|
|
||||||
filehandle.close()
|
|
||||||
return jdict
|
|
||||||
|
|
||||||
def filepath_sanitize(text, exclusions=''):
|
|
||||||
bet = FILENAME_BADCHARS.replace(exclusions, '')
|
|
||||||
for char in bet:
|
for char in bet:
|
||||||
text = text.replace(char, '')
|
text = text.replace(char, '')
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def get_clipboard():
|
||||||
|
import tkinter
|
||||||
|
t = tkinter.Tk()
|
||||||
|
clip = t.clipboard_get()
|
||||||
|
t.destroy()
|
||||||
|
return clip
|
||||||
|
|
||||||
def hashit(text, length=None):
|
def hashit(text, length=None):
|
||||||
|
import hashlib
|
||||||
h = hashlib.sha512(text.encode('utf-8')).hexdigest()
|
h = hashlib.sha512(text.encode('utf-8')).hexdigest()
|
||||||
if length is not None:
|
if length is not None:
|
||||||
h = h[:length]
|
h = h[:length]
|
||||||
|
@ -230,12 +444,66 @@ def hook1(currentblock, chunksize, totalsize):
|
||||||
if currentbytes == totalsize:
|
if currentbytes == totalsize:
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
def listget(l, index, default=None):
|
||||||
|
try:
|
||||||
|
return l[index]
|
||||||
|
except IndexError:
|
||||||
|
return default
|
||||||
|
|
||||||
|
def longest_length(li):
|
||||||
|
longest = 0
|
||||||
|
for item in li:
|
||||||
|
longest = max(longest, len(item))
|
||||||
|
return longest
|
||||||
|
|
||||||
def safeprint(text, **kwargs):
|
def safeprint(text, **kwargs):
|
||||||
text = str(text)
|
text = str(text)
|
||||||
text = text.encode('ascii', 'replace').decode()
|
text = text.encode('ascii', 'replace').decode()
|
||||||
text = text.replace('?', '_')
|
text = text.replace('?', '_')
|
||||||
print(text, **kwargs)
|
print(text, **kwargs)
|
||||||
|
|
||||||
|
def smart_insert(sql, cur, url=None, head=None, commit=True):
|
||||||
|
'''
|
||||||
|
INSERT or UPDATE the appropriate entry.
|
||||||
|
'''
|
||||||
|
if bool(url) is bool(head):
|
||||||
|
raise ValueError('One and only one of `url` or `head` is necessary.')
|
||||||
|
|
||||||
|
if url is not None:
|
||||||
|
# When doing a basic scan, all we get is the URL.
|
||||||
|
content_length = None
|
||||||
|
content_type = None
|
||||||
|
|
||||||
|
elif head is not None:
|
||||||
|
# When doing a full scan, we get a Response object.
|
||||||
|
url = head.url
|
||||||
|
content_length = head.headers.get('Content-Length', None)
|
||||||
|
if content_length is not None:
|
||||||
|
content_length = int(content_length)
|
||||||
|
content_type = head.headers.get('Content-Type', None)
|
||||||
|
|
||||||
|
basename = url_to_filepath(url)[2]
|
||||||
|
basename = urllib.parse.unquote(basename)
|
||||||
|
do_download = True
|
||||||
|
cur.execute('SELECT * FROM urls WHERE url == ?', [url])
|
||||||
|
existing_entry = cur.fetchone()
|
||||||
|
is_new = existing_entry is None
|
||||||
|
data = (url, basename, content_length, content_type, do_download)
|
||||||
|
if is_new:
|
||||||
|
|
||||||
|
cur.execute('INSERT INTO urls VALUES(?, ?, ?, ?, ?)', data)
|
||||||
|
else:
|
||||||
|
command = '''
|
||||||
|
UPDATE urls SET
|
||||||
|
content_length = coalesce(?, content_length),
|
||||||
|
content_type = coalesce(?, content_type)
|
||||||
|
WHERE url == ?
|
||||||
|
'''
|
||||||
|
cur.execute(command, [content_length, content_type, url])
|
||||||
|
if commit:
|
||||||
|
sql.commit()
|
||||||
|
return data
|
||||||
|
|
||||||
def url_to_filepath(text):
|
def url_to_filepath(text):
|
||||||
text = urllib.parse.unquote(text)
|
text = urllib.parse.unquote(text)
|
||||||
parts = urllib.parse.urlsplit(text)
|
parts = urllib.parse.urlsplit(text)
|
||||||
|
@ -244,89 +512,230 @@ def url_to_filepath(text):
|
||||||
while folder.startswith('/'):
|
while folder.startswith('/'):
|
||||||
folder = folder[1:]
|
folder = folder[1:]
|
||||||
|
|
||||||
# Folders are allowed to have slashes
|
# Folders are allowed to have slashes...
|
||||||
folder = filepath_sanitize(folder, exclusions='/\\')
|
folder = filepath_sanitize(folder, allowed='/\\')
|
||||||
folder = folder.replace('\\', os.path.sep)
|
folder = folder.replace('\\', os.path.sep)
|
||||||
folder = folder.replace('/', os.path.sep)
|
folder = folder.replace('/', os.path.sep)
|
||||||
# But Files are not.
|
# ...but Files are not.
|
||||||
filename = filepath_sanitize(filename)
|
filename = filepath_sanitize(filename)
|
||||||
|
|
||||||
return (root, folder, filename)
|
return (root, folder, filename)
|
||||||
|
## ##
|
||||||
|
## GENERAL FUNCTIONS ###############################################################################
|
||||||
|
|
||||||
## Commandline functions ####################################################\\
|
|
||||||
def digest(website, outputfile, fullscan, *trash):
|
|
||||||
fullscan = bool(fullscan)
|
|
||||||
if website[-1] != '/':
|
|
||||||
website += '/'
|
|
||||||
walker = Walker(website, outputfile, fullscan=fullscan)
|
|
||||||
try:
|
|
||||||
walker.walk()
|
|
||||||
dict_to_file(walker.results, outputfile)
|
|
||||||
except:
|
|
||||||
dict_to_file(walker.results, outputfile)
|
|
||||||
traceback.print_exc()
|
|
||||||
print('SAVED PROGRESS SO FAR')
|
|
||||||
|
|
||||||
def download(urlfile, outputdir, overwrite, *trash):
|
## COMMANDLINE FUNCTIONS ###########################################################################
|
||||||
downloader = Downloader(urlfile, outputdir)
|
## ##
|
||||||
downloader.download(overwrite)
|
def digest(args):
|
||||||
|
fullscan = args.fullscan
|
||||||
|
if isinstance(fullscan, str):
|
||||||
|
fullscan = bool(eval(fullscan))
|
||||||
|
walkurl = args.walkurl
|
||||||
|
if walkurl == '!clipboard':
|
||||||
|
walkurl = get_clipboard()
|
||||||
|
safeprint('From clipboard: %s' % walkurl)
|
||||||
|
walker = Walker(
|
||||||
|
databasename=args.databasename,
|
||||||
|
fullscan=fullscan,
|
||||||
|
walkurl=walkurl,
|
||||||
|
)
|
||||||
|
walker.walk()
|
||||||
|
|
||||||
def filter_pattern(urlfile, patterns, negative=False, *trash):
|
def download(args):
|
||||||
|
bytespersecond = args.bytespersecond
|
||||||
|
if isinstance(bytespersecond, str):
|
||||||
|
bytespersecond = eval(bytespersecond)
|
||||||
|
|
||||||
|
downloader = Downloader(
|
||||||
|
databasename=args.databasename,
|
||||||
|
outputdir=args.outputdir,
|
||||||
|
)
|
||||||
|
downloader.download(
|
||||||
|
bytespersecond=bytespersecond,
|
||||||
|
overwrite=args.overwrite,
|
||||||
|
)
|
||||||
|
|
||||||
|
def filter_pattern(databasename, regex, action='keep', *trash):
|
||||||
'''
|
'''
|
||||||
When `negative` is True, items are kept when they do NOT match the pattern,
|
When `action` is 'keep', then any URLs matching the regex will have their
|
||||||
allowing you to delete trash files.
|
`do_download` flag set to True.
|
||||||
|
|
||||||
When `negative` is False, items are keep when they DO match the pattern,
|
When `action` is 'remove', then any URLs matching the regex will have their
|
||||||
allowing you to keep items of interest.
|
`do_download` flag set to False.
|
||||||
|
|
||||||
|
Actions will not act on each other's behalf. A 'keep' will NEVER disable a url,
|
||||||
|
and 'remove' will NEVER enable one.
|
||||||
'''
|
'''
|
||||||
if isinstance(patterns, str):
|
import re
|
||||||
patterns = [patterns]
|
if isinstance(regex, str):
|
||||||
jdict = file_to_dict(urlfile)
|
regex = [regex]
|
||||||
keys = list(jdict.keys())
|
|
||||||
for key in keys:
|
|
||||||
for pattern in patterns:
|
|
||||||
contains = re.search(pattern, key) is not None
|
|
||||||
if contains ^ negative:
|
|
||||||
safeprint('Removing "%s"' % key)
|
|
||||||
del jdict[key]
|
|
||||||
dict_to_file(jdict, urlfile)
|
|
||||||
|
|
||||||
def keep_pattern(urlfile, patterns, *trash):
|
keep = action == 'keep'
|
||||||
filter_pattern(urlfile=urlfile, patterns=patterns, negative=True)
|
remove = action == 'remove'
|
||||||
|
|
||||||
def measure(urlfile, *trash):
|
sql = sqlite3.connect(databasename)
|
||||||
jdict = file_to_dict(urlfile)
|
cur = sql.cursor()
|
||||||
totalbytes = 0
|
cur2 = sql.cursor()
|
||||||
for (url, info) in jdict.items():
|
|
||||||
bytes = info['Content-Length']
|
|
||||||
if bytes > 0:
|
|
||||||
totalbytes += bytes
|
|
||||||
bytestring = '{:,}'.format(totalbytes)
|
|
||||||
print(bytestring)
|
|
||||||
return totalbytes
|
|
||||||
|
|
||||||
def remove_pattern(urlfile, patterns, *trash):
|
cur2.execute('SELECT * FROM urls')
|
||||||
filter_pattern(urlfile=urlfile, patterns=patterns, negative=False)
|
while True:
|
||||||
|
fetch = cur2.fetchone()
|
||||||
def listget(l, index, default=None):
|
if fetch is None:
|
||||||
try:
|
|
||||||
return l[index]
|
|
||||||
except IndexError:
|
|
||||||
return default
|
|
||||||
cmdfunctions = [digest, download, keep_pattern, measure, remove_pattern]
|
|
||||||
## End of commandline functions #############################################//
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
command = listget(sys.argv, 1, None)
|
|
||||||
arg1 = listget(sys.argv, 2, None)
|
|
||||||
arg2 = listget(sys.argv, 3, None)
|
|
||||||
arg3 = listget(sys.argv, 4, None)
|
|
||||||
if command is None:
|
|
||||||
quit()
|
|
||||||
did_something = False
|
|
||||||
for function in cmdfunctions:
|
|
||||||
if command == function.__name__:
|
|
||||||
function(arg1, arg2, arg3)
|
|
||||||
did_something = True
|
|
||||||
break
|
break
|
||||||
if not did_something:
|
url = fetch[SQL_URL]
|
||||||
print('No matching function')
|
current_do_dl = fetch[SQL_DO_DOWNLOAD]
|
||||||
|
for pattern in regex:
|
||||||
|
contains = re.search(pattern, url) is not None
|
||||||
|
|
||||||
|
should_keep = (keep and contains)
|
||||||
|
if keep and contains and not current_do_dl:
|
||||||
|
safeprint('Keeping "%s"' % url)
|
||||||
|
cur.execute('UPDATE urls SET do_download = 1 WHERE url == ?', [url])
|
||||||
|
if remove and contains and current_do_dl:
|
||||||
|
safeprint('Removing "%s"' % url)
|
||||||
|
cur.execute('UPDATE urls SET do_download = 0 WHERE url == ?', [url])
|
||||||
|
sql.commit()
|
||||||
|
|
||||||
|
def keep_pattern(args):
|
||||||
|
'''
|
||||||
|
See `filter_pattern`.
|
||||||
|
'''
|
||||||
|
filter_pattern(
|
||||||
|
action='keep',
|
||||||
|
databasename=args.databasename,
|
||||||
|
regex=args.regex,
|
||||||
|
)
|
||||||
|
|
||||||
|
def list_basenames(args):
|
||||||
|
'''
|
||||||
|
Given a database, print the entries in order of the file basenames.
|
||||||
|
This makes it easier to find interesting titles without worrying about
|
||||||
|
what directory they're in.
|
||||||
|
'''
|
||||||
|
databasename = args.databasename
|
||||||
|
outputfile = args.outputfile
|
||||||
|
|
||||||
|
sql = sqlite3.connect(databasename)
|
||||||
|
cur = sql.cursor()
|
||||||
|
cur.execute('SELECT basename FROM urls WHERE do_download == 1 ORDER BY LENGTH(basename) DESC LIMIT 1')
|
||||||
|
|
||||||
|
longest = len(cur.fetchone()[0])
|
||||||
|
cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY basename')
|
||||||
|
form = '{bn:<%ds} : {url}' % longest
|
||||||
|
if outputfile:
|
||||||
|
outputfile = open(outputfile, 'w', encoding='utf-8')
|
||||||
|
while True:
|
||||||
|
fetch = cur.fetchone()
|
||||||
|
if fetch is None:
|
||||||
|
break
|
||||||
|
line = form.format(bn=fetch[SQL_BASENAME], url=fetch[SQL_URL])
|
||||||
|
if outputfile:
|
||||||
|
outputfile.write(line + '\n')
|
||||||
|
else:
|
||||||
|
print(line)
|
||||||
|
if outputfile:
|
||||||
|
outputfile.close()
|
||||||
|
|
||||||
|
def measure(args):
|
||||||
|
'''
|
||||||
|
Given a database, print the sum of all Content-Lengths.
|
||||||
|
If `fullscan`, then URLs with no Content-Length will be
|
||||||
|
HEAD requested, and the result will be saved back into the file.
|
||||||
|
'''
|
||||||
|
databasename = args.databasename
|
||||||
|
fullscan = args.fullscan
|
||||||
|
if isinstance(fullscan, str):
|
||||||
|
fullscan = bool(fullscan)
|
||||||
|
|
||||||
|
totalsize = 0
|
||||||
|
sql = sqlite3.connect(databasename)
|
||||||
|
cur1 = sql.cursor()
|
||||||
|
cur2 = sql.cursor()
|
||||||
|
cur2.execute('SELECT * FROM urls WHERE do_download == 1')
|
||||||
|
filecount = 0
|
||||||
|
files_without_size = 0
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
fetch = cur2.fetchone()
|
||||||
|
if fetch is None:
|
||||||
|
break
|
||||||
|
size = fetch[SQL_CONTENT_LENGTH]
|
||||||
|
if size is None:
|
||||||
|
if fullscan:
|
||||||
|
url = fetch[SQL_URL]
|
||||||
|
head = do_head(url)
|
||||||
|
fetch = smart_insert(sql, cur1, head=head, commit=False)
|
||||||
|
size = fetch[SQL_CONTENT_LENGTH]
|
||||||
|
if size is None:
|
||||||
|
safeprint('"%s" is not revealing Content-Length' % url)
|
||||||
|
size = 0
|
||||||
|
else:
|
||||||
|
files_without_size += 1
|
||||||
|
size = 0
|
||||||
|
totalsize += size
|
||||||
|
filecount += 1
|
||||||
|
except:
|
||||||
|
sql.commit()
|
||||||
|
raise
|
||||||
|
|
||||||
|
sql.commit()
|
||||||
|
short_string = bytes_to_unit_string(totalsize)
|
||||||
|
totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount)
|
||||||
|
print(totalsize_string)
|
||||||
|
if files_without_size > 0:
|
||||||
|
print('Note: %d files do not have a stored Content-Length.' % files_without_size)
|
||||||
|
print('Run `measure` with `-f` or `--fullscan` to HEAD request those files.')
|
||||||
|
return totalsize
|
||||||
|
|
||||||
|
def remove_pattern(args):
|
||||||
|
'''
|
||||||
|
See `filter_pattern`.
|
||||||
|
'''
|
||||||
|
filter_pattern(
|
||||||
|
action='remove',
|
||||||
|
databasename=args.databasename,
|
||||||
|
regex=args.regex,
|
||||||
|
)
|
||||||
|
## ##
|
||||||
|
## COMMANDLINE FUNCTIONS ###########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
subparsers = parser.add_subparsers()
|
||||||
|
|
||||||
|
p_digest = subparsers.add_parser('digest')
|
||||||
|
p_digest.add_argument('walkurl')
|
||||||
|
p_digest.add_argument('-db', '--database', dest='databasename', default=None)
|
||||||
|
p_digest.add_argument('-f', '--fullscan', action='store_true')
|
||||||
|
p_digest.set_defaults(func=digest)
|
||||||
|
|
||||||
|
p_download = subparsers.add_parser('download')
|
||||||
|
p_download.add_argument('databasename')
|
||||||
|
p_download.add_argument('-o', '--outputdir', dest='outputdir', default=None)
|
||||||
|
p_download.add_argument('-ow', '--overwrite', dest='overwrite', default=False)
|
||||||
|
p_download.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
|
||||||
|
p_download.set_defaults(func=download)
|
||||||
|
|
||||||
|
p_keep_pattern = subparsers.add_parser('keep_pattern')
|
||||||
|
p_keep_pattern.add_argument('databasename')
|
||||||
|
p_keep_pattern.add_argument('regex')
|
||||||
|
p_keep_pattern.set_defaults(func=keep_pattern)
|
||||||
|
|
||||||
|
p_list_basenames = subparsers.add_parser('list_basenames')
|
||||||
|
p_list_basenames.add_argument('databasename')
|
||||||
|
p_list_basenames.add_argument('-o', '--outputfile', dest='outputfile', default=None)
|
||||||
|
p_list_basenames.set_defaults(func=list_basenames)
|
||||||
|
|
||||||
|
p_measure = subparsers.add_parser('measure')
|
||||||
|
p_measure.add_argument('databasename')
|
||||||
|
p_measure.add_argument('-f', '--fullscan', action='store_true')
|
||||||
|
p_measure.set_defaults(func=measure)
|
||||||
|
|
||||||
|
p_remove_pattern = subparsers.add_parser('remove_pattern')
|
||||||
|
p_remove_pattern.add_argument('databasename')
|
||||||
|
p_remove_pattern.add_argument('regex')
|
||||||
|
p_remove_pattern.set_defaults(func=remove_pattern)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
args.func(args)
|
||||||
|
|
56
OpenDirDL/ratelimiter.py
Normal file
56
OpenDirDL/ratelimiter.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
class Ratelimiter:
|
||||||
|
def __init__(self, allowance_per_period, period, operation_cost=1, mode='sleep'):
|
||||||
|
'''
|
||||||
|
allowance_per_period:
|
||||||
|
The number of operations we can perform per `period` seconds.
|
||||||
|
|
||||||
|
period:
|
||||||
|
The number of seconds over which we can perform `allowance_per_period` operations.
|
||||||
|
|
||||||
|
operation_cost:
|
||||||
|
The default amount to remove from our balance after each operation.
|
||||||
|
Pass a `cost` parameter to `self.limit` to use a nondefault value.
|
||||||
|
|
||||||
|
mode:
|
||||||
|
'sleep': If we do not have the balance for an operation, sleep until we do.
|
||||||
|
Return True every time.
|
||||||
|
'reject': If we do not have the balance for an operation, return False.
|
||||||
|
'''
|
||||||
|
if mode not in ('sleep', 'reject'):
|
||||||
|
raise ValueError('Invalid mode %s' % repr(mode))
|
||||||
|
self.allowance_per_period = allowance_per_period
|
||||||
|
self.period = period
|
||||||
|
self.operation_cost = operation_cost
|
||||||
|
self.mode = mode
|
||||||
|
|
||||||
|
self.last_operation = time.time()
|
||||||
|
self.balance = 0
|
||||||
|
self.gain_rate = allowance_per_period / period
|
||||||
|
|
||||||
|
def limit(self, cost=None):
|
||||||
|
if cost is None:
|
||||||
|
cost = self.operation_cost
|
||||||
|
timediff = time.time() - self.last_operation
|
||||||
|
self.balance += timediff * self.gain_rate
|
||||||
|
self.balance = min(self.balance, self.allowance_per_period)
|
||||||
|
successful = False
|
||||||
|
|
||||||
|
deficit = cost - self.balance
|
||||||
|
if deficit > 0 and self.mode == 'sleep':
|
||||||
|
time_needed = (deficit / self.gain_rate)
|
||||||
|
#print(self.balance, deficit, 'Need to sleep %f' % time_needed)
|
||||||
|
time.sleep(time_needed)
|
||||||
|
self.balance = cost
|
||||||
|
|
||||||
|
#print(self.balance)
|
||||||
|
if self.balance >= cost:
|
||||||
|
#print('pass')
|
||||||
|
self.balance -= cost
|
||||||
|
successful = True
|
||||||
|
|
||||||
|
self.last_operation = time.time()
|
||||||
|
|
||||||
|
return successful
|
|
@ -99,15 +99,24 @@ class BitsToImage:
|
||||||
class ImageToBits:
|
class ImageToBits:
|
||||||
def __init__(self, image, bitness):
|
def __init__(self, image, bitness):
|
||||||
self.image = image
|
self.image = image
|
||||||
|
self.bitness = bitness
|
||||||
self.width = image.size[0]
|
self.width = image.size[0]
|
||||||
|
self.height = image.size[1]
|
||||||
self.pixel_index = -1
|
self.pixel_index = -1
|
||||||
|
self.bit_index = bitness
|
||||||
self.active_byte = []
|
self.active_byte = []
|
||||||
|
self.pixels = self.image.getdata()
|
||||||
|
#self.bits = ''
|
||||||
|
#for pixel in self.pixels:
|
||||||
|
# for channel in pixel:
|
||||||
|
# self.bits += binary(channel)[-bitness:]
|
||||||
|
#print(len(self.bits))
|
||||||
|
|
||||||
|
|
||||||
def _read(self):
|
def _read(self):
|
||||||
if len(self.active_byte) == 0:
|
if len(self.active_byte) == 0:
|
||||||
self.pixel_index += 1
|
self.pixel_index += 1
|
||||||
(x, y) = index_to_xy(self.pixel_index, self.width)
|
self.active_byte = self.pixels[self.pixel_index]
|
||||||
self.active_byte = list(self.image.getpixel((x, y)))
|
|
||||||
self.active_byte = self.active_byte[:3]
|
self.active_byte = self.active_byte[:3]
|
||||||
self.active_byte = [binary(channel) for channel in self.active_byte]
|
self.active_byte = [binary(channel) for channel in self.active_byte]
|
||||||
self.active_byte = [channel[-bitness:] for channel in self.active_byte]
|
self.active_byte = [channel[-bitness:] for channel in self.active_byte]
|
||||||
|
@ -115,6 +124,7 @@ class ImageToBits:
|
||||||
self.active_byte = list(self.active_byte)
|
self.active_byte = list(self.active_byte)
|
||||||
|
|
||||||
ret = self.active_byte.pop(0)
|
ret = self.active_byte.pop(0)
|
||||||
|
self.bit_index += 1
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def read(self, bits=1):
|
def read(self, bits=1):
|
||||||
|
@ -196,24 +206,14 @@ def encode(imagefilename, secretfilename, bitness=1):
|
||||||
secret_content_length = (secret_size) + (len(secret_extension)) + 1
|
secret_content_length = (secret_size) + (len(secret_extension)) + 1
|
||||||
requiredpixels = math.ceil(((secret_content_length * 8) + 32) / (3 * bitness))
|
requiredpixels = math.ceil(((secret_content_length * 8) + 32) / (3 * bitness))
|
||||||
if totalpixels < requiredpixels:
|
if totalpixels < requiredpixels:
|
||||||
raise StegError('Image does not have enough pixels to store the Secret'
|
raise StegError('Image does not have enough pixels to store the Secret. '
|
||||||
'Must have at least %d pixels' % requiredpixels)
|
'Must have at least %d pixels' % requiredpixels)
|
||||||
|
|
||||||
print('%d pixels available, %d required' % (totalpixels, requiredpixels))
|
print('%d pixels available, %d required' % (totalpixels, requiredpixels))
|
||||||
|
|
||||||
# --> YOU ARE HERE <--
|
# --> YOU ARE HERE <--
|
||||||
|
|
||||||
# Because bitness may be between 1 and 8, we need to create a writing buffer
|
|
||||||
# called `binary_write_buffer`, so that we're always writing the same amount
|
|
||||||
# of data per color channel.
|
|
||||||
# If we were to write the secret length / extension on the fly, we might end
|
|
||||||
# up using the wrong number of bits for the final channel of some pixel.
|
|
||||||
# Example: 10010101 broken into groups of 3 is [100, 101, 01]
|
|
||||||
# Note that the last group is not the same size as the desired bitness, and
|
|
||||||
# will cause decode errors.
|
|
||||||
|
|
||||||
pixel = list(image.getpixel((0, 0)))
|
pixel = list(image.getpixel((0, 0)))
|
||||||
binary_write_buffer = ''
|
|
||||||
|
|
||||||
# Write secret length
|
# Write secret length
|
||||||
secret_content_length_b = binary(secret_content_length).rjust(32, '0')
|
secret_content_length_b = binary(secret_content_length).rjust(32, '0')
|
||||||
|
|
Loading…
Reference in a new issue