This commit is contained in:
Ethan Dalool 2016-08-02 18:44:32 -07:00
parent b93ec8f8f1
commit 74aee1822e
3 changed files with 79 additions and 89 deletions

View file

@ -7,6 +7,11 @@ Requires `pip install beautifulsoup4`
See inside opendirdl.py for usage instructions. See inside opendirdl.py for usage instructions.
- 2016 08 02
- Removed the usage of div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
- Rewrote `build_file_tree` with a way simpler algorithm.
- Removed the ability to set a Node's parent during `__init__` because it wasn't fully fleshed out and doesn't need to be used anyway.
- 2016 08 01 - 2016 08 01
- Made the digest work even if you forget the http:// - Made the digest work even if you forget the http://

View file

@ -196,9 +196,9 @@ HTML_TREE_HEADER = '''
<meta charset="UTF-8"> <meta charset="UTF-8">
<script type="text/javascript"> <script type="text/javascript">
function collapse(id) function collapse(div)
{ {
div = document.getElementById(id); //div = document.getElementById(id);
if (div.style.display != "none") if (div.style.display != "none")
{ {
div.style.display = "none"; div.style.display = "none";
@ -330,7 +330,7 @@ class Walker:
def process_url(self, url=None): def process_url(self, url=None):
''' '''
Given a URL, check whether it is an index page or an actual file. Given a URL, check whether it is an index page or an actual file.
If it is an index page, it's links are extracted and queued. If it is an index page, its links are extracted and queued.
If it is a file, its information is saved to the database. If it is a file, its information is saved to the database.
We perform a We perform a
@ -372,10 +372,10 @@ class Walker:
head = do_head(url) head = do_head(url)
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
if e.response.status_code == 403: if e.response.status_code == 403:
print('403 FORBIDDEN!') write('403 FORBIDDEN!')
return return
if e.response.status_code == 404: if e.response.status_code == 404:
print('404 NOT FOUND!') write('404 NOT FOUND!')
return return
raise raise
content_type = head.headers.get('Content-Type', '?') content_type = head.headers.get('Content-Type', '?')
@ -395,7 +395,7 @@ class Walker:
else: else:
self.queue.append(href) self.queue.append(href)
added += 1 added += 1
print('Queued %d urls' % added) write('Queued %d urls' % added)
else: else:
# This is not an index page, so save it. # This is not an index page, so save it.
self.smart_insert(head=head, commit=False) self.smart_insert(head=head, commit=False)
@ -407,7 +407,7 @@ class Walker:
url = self.queue.popleft() url = self.queue.popleft()
self.process_url(url) self.process_url(url)
line = '{:,} Remaining'.format(len(self.queue)) line = '{:,} Remaining'.format(len(self.queue))
print(line) write(line)
except: except:
self.sql.commit() self.sql.commit()
raise raise
@ -420,8 +420,8 @@ class Walker:
## ## ## ##
class Generic: class Generic:
def __init__(self, **kwargs): def __init__(self, **kwargs):
for kwarg in kwargs: for (key, value) in kwargs.items():
setattr(self, kwarg, kwargs[kwarg]) setattr(self, key, value)
class TreeExistingChild(Exception): class TreeExistingChild(Exception):
@ -431,17 +431,22 @@ class TreeInvalidIdentifier(Exception):
pass pass
class TreeNode: class TreeNode:
def __init__(self, identifier, data, parent=None): def __init__(self, identifier, data=None):
assert isinstance(identifier, str) assert isinstance(identifier, str)
assert '\\' not in identifier assert '\\' not in identifier
self.identifier = identifier self.identifier = identifier
self.data = data self.data = data
self.parent = parent
self.children = {} self.children = {}
def __eq__(self, other):
return isinstance(other, Treenode) and self.abspath() == other.abspath()
def __getitem__(self, key): def __getitem__(self, key):
return self.children[key] return self.children[key]
def __hash__(self):
return hash(self.abspath())
def __repr__(self): def __repr__(self):
return 'TreeNode %s' % self.abspath() return 'TreeNode %s' % self.abspath()
@ -498,7 +503,7 @@ class TreeNode:
def walk(self, customsort=None): def walk(self, customsort=None):
yield self yield self
for child in self.listnodes(customsort=customsort): for child in self.list_children(customsort=customsort):
yield from child.walk(customsort=customsort) yield from child.walk(customsort=customsort)
## ## ## ##
## OTHER CLASSES ################################################################################### ## OTHER CLASSES ###################################################################################
@ -510,82 +515,61 @@ def build_file_tree(databasename):
sql = sqlite3.connect(databasename) sql = sqlite3.connect(databasename)
cur = sql.cursor() cur = sql.cursor()
cur.execute('SELECT * FROM urls WHERE do_download == 1') cur.execute('SELECT * FROM urls WHERE do_download == 1')
items = cur.fetchall() all_items = cur.fetchall()
sql.close() sql.close()
if len(items) == 0:
if len(all_items) == 0:
return return
items.sort(key=lambda x: x[SQL_URL]) path_form = '{root}\\{folder}\\{filename}'
all_items = [
path_parts = url_to_filepath(items[0][SQL_URL]) {
root_identifier = path_parts['root'] 'url': item[SQL_URL],
#print('Root', root_identifier) 'size': item[SQL_CONTENT_LENGTH],
root_data = {'name': root_identifier, 'item_type': 'directory'} 'path': path_form.format(**url_to_filepath(item[SQL_URL])).split('\\'),
root_identifier = root_identifier.replace(':', '')
tree = TreeNode(
identifier=root_identifier,
data=root_data
)
node_map = {}
for item in items:
path = url_to_filepath(item[SQL_URL])
scheme = path['scheme']
# I join and re-split because 'folder' may contain slashes of its own
# and I want to break all the pieces
path = '\\'.join([path['root'], path['folder'], path['filename']])
parts = path.split('\\')
#print(path)
for (index, part) in enumerate(parts):
this_path = '/'.join(parts[:index + 1])
parent_path = '/'.join(parts[:index])
#input()
data = {
'name': part,
'url': scheme + '://' + this_path,
} }
this_identifier = this_path.replace(':', '') for item in all_items
parent_identifier = parent_path.replace(':', '') ]
all_items.sort(key=lambda x: x['url'])
if (index + 1) == len(parts): root_data = {
data['item_type'] = 'file' 'item_type': 'directory',
if item[SQL_CONTENT_LENGTH]: 'name': databasename,
data['size'] = item[SQL_CONTENT_LENGTH] }
else: tree = TreeNode(databasename, data=root_data)
data['size'] = None tree.unsorted_children = all_items
else: node_queue = set()
data['item_type'] = 'directory' node_queue.add(tree)
# In this process, URLs are divided up into their nodes one directory layer at a time.
# The root receives all URLs, and creates nodes for each of the top-level
# directories. Those nodes receive all subdirectories, and repeat.
while len(node_queue) > 0:
node = node_queue.pop()
for to_sort in node.unsorted_children:
path = to_sort['path']
# Create a new node for the directory, path[0]
# path[1:] is assigned to that node to be divided next.
child_identifier = path.pop(0)
# Ensure this comment is in a node of its own child = node.children.get(child_identifier, None)
this_node = node_map.get(this_identifier, None) if not child:
if this_node: child = TreeNode(child_identifier, data={})
# This ID was detected as a parent of a previous iteration child.unsorted_children = []
# Now we're actually filling it in. node.add_child(child)
this_node.data = data
else:
this_node = TreeNode(this_identifier, data)
node_map[this_identifier] = this_node
# Attach this node to the parent. child.data['url'] = to_sort['url']
if parent_identifier == root_identifier: child.data['name'] = child_identifier
try: if len(path) > 0:
tree.add_child(this_node) child.data['item_type'] = 'directory'
except TreeExistingChild: child.unsorted_children.append(to_sort)
pass node_queue.add(child)
else: else:
parent_node = node_map.get(parent_identifier, None) child.data['item_type'] = 'file'
if not parent_node: child.data['size'] = to_sort['size']
parent_node = TreeNode(parent_identifier, data=None)
node_map[parent_identifier] = parent_node del node.unsorted_children
try:
parent_node.add_child(this_node)
except TreeExistingChild:
pass
this_node.parent = parent_node
#print(this_node.data)
return tree return tree
def db_init(sql, cur): def db_init(sql, cur):
@ -596,10 +580,10 @@ def db_init(sql, cur):
return True return True
def do_get(url, raise_for_status=True): def do_get(url, raise_for_status=True):
return do_request('GET', requests.get, url) return do_request('GET', requests.get, url, raise_for_status=raise_for_status)
def do_head(url, raise_for_status=True): def do_head(url, raise_for_status=True):
return do_request('HEAD', requests.head, url) return do_request('HEAD', requests.head, url, raise_for_status=raise_for_status)
def do_request(message, method, url, raise_for_status=True): def do_request(message, method, url, raise_for_status=True):
message = '{message:>4s}: {url} : '.format(message=message, url=url) message = '{message:>4s}: {url} : '.format(message=message, url=url)
@ -685,10 +669,9 @@ def recursive_print_node(node, depth=0, use_html=False, output_file=None):
if use_html: if use_html:
css_class = 'directory_even' if depth % 2 == 0 else 'directory_odd' css_class = 'directory_even' if depth % 2 == 0 else 'directory_odd'
if node.data['item_type'] == 'directory': if node.data['item_type'] == 'directory':
div_id = hashit(node.identifier, 16) line = '<button onclick="collapse(this.nextSibling)">{name} ({size})</button>'
line = '<button onclick="collapse(\'{div_id}\')">{name} ({size})</button>' line += '<div class="{css}" style="display:none">'
line += '<div class="{css}" id="{div_id}" style="display:none">' line = line.format(name=node.data['name'], size=size, css=css_class)
line = line.format(div_id=div_id, name=node.data['name'], size=size, css=css_class)
else: else:
line = '<a href="{url}">{name} ({size})</a><br>' line = '<a href="{url}">{name} ({size})</a><br>'
line = line.format(url=node.data['url'], name=node.data['name'], size=size) line = line.format(url=node.data['url'], name=node.data['name'], size=size)
@ -1057,9 +1040,9 @@ def measure(databasename, fullscan=False, new_only=False):
sql.commit() sql.commit()
short_string = bytestring.bytestring(totalsize) short_string = bytestring.bytestring(totalsize)
totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount) totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount)
print(totalsize_string) write(totalsize_string)
if unmeasured_file_count > 0: if unmeasured_file_count > 0:
print(UNMEASURED_WARNING % unmeasured_file_count) write(UNMEASURED_WARNING % unmeasured_file_count)
return totalsize return totalsize
def measure_argparse(args): def measure_argparse(args):
@ -1118,7 +1101,7 @@ def tree_argparse(args):
def main(argv): def main(argv):
if listget(argv, 1, '').lower() in ('help', '-h', '--help', ''): if listget(argv, 1, '').lower() in ('help', '-h', '--help', ''):
print(DOCSTRING) write(DOCSTRING)
return return
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers() subparsers = parser.add_subparsers()

View file

@ -157,14 +157,14 @@ def create_line(degree_offset, **kwargs):
def create_line_frame(offset=None, line=None): def create_line_frame(offset=None, line=None):
if line is None: if line is None:
if offset is None: if offset is None:
offset = entry_add.get() offset = t.entry_add.get()
offset = offset.replace(' ', '') offset = offset.replace(' ', '')
offset = offset.split(',') offset = offset.split(',')
try: try:
offset = [int(o) for o in offset] offset = [int(o) for o in offset]
except ValueError: except ValueError:
return return
entry_add.delete(0, 'end') t.entry_add.delete(0, 'end')
lines = [] lines = []
for x in offset: for x in offset:
@ -253,10 +253,12 @@ def unregister_line(line):
variables['lines'].remove(line) variables['lines'].remove(line)
def main(): def main():
global t
t = tkinter.Tk() t = tkinter.Tk()
frame_add = tkinter.Frame(t) frame_add = tkinter.Frame(t)
entry_add = tkinter.Entry(frame_add) entry_add = tkinter.Entry(frame_add)
t.entry_add = entry_add
entry_add.grid(row=0, column=0) entry_add.grid(row=0, column=0)
tkinter.Button(frame_add, text='+', command=create_line_frame).grid(row=0, column=1) tkinter.Button(frame_add, text='+', command=create_line_frame).grid(row=0, column=1)
frame_add.grid(row=0, column=0) frame_add.grid(row=0, column=0)