else
This commit is contained in:
parent
b93ec8f8f1
commit
74aee1822e
3 changed files with 79 additions and 89 deletions
|
@ -7,6 +7,11 @@ Requires `pip install beautifulsoup4`
|
||||||
|
|
||||||
See inside opendirdl.py for usage instructions.
|
See inside opendirdl.py for usage instructions.
|
||||||
|
|
||||||
|
- 2016 08 02
|
||||||
|
- Removed the usage of div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
|
||||||
|
- Rewrote `build_file_tree` with a way simpler algorithm.
|
||||||
|
- Removed the ability to set a Node's parent during `__init__` because it wasn't fully fleshed out and doesn't need to be used anyway.
|
||||||
|
|
||||||
- 2016 08 01
|
- 2016 08 01
|
||||||
- Made the digest work even if you forget the http://
|
- Made the digest work even if you forget the http://
|
||||||
|
|
||||||
|
|
|
@ -196,9 +196,9 @@ HTML_TREE_HEADER = '''
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
|
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
function collapse(id)
|
function collapse(div)
|
||||||
{
|
{
|
||||||
div = document.getElementById(id);
|
//div = document.getElementById(id);
|
||||||
if (div.style.display != "none")
|
if (div.style.display != "none")
|
||||||
{
|
{
|
||||||
div.style.display = "none";
|
div.style.display = "none";
|
||||||
|
@ -330,7 +330,7 @@ class Walker:
|
||||||
def process_url(self, url=None):
|
def process_url(self, url=None):
|
||||||
'''
|
'''
|
||||||
Given a URL, check whether it is an index page or an actual file.
|
Given a URL, check whether it is an index page or an actual file.
|
||||||
If it is an index page, it's links are extracted and queued.
|
If it is an index page, its links are extracted and queued.
|
||||||
If it is a file, its information is saved to the database.
|
If it is a file, its information is saved to the database.
|
||||||
|
|
||||||
We perform a
|
We perform a
|
||||||
|
@ -372,10 +372,10 @@ class Walker:
|
||||||
head = do_head(url)
|
head = do_head(url)
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
if e.response.status_code == 403:
|
if e.response.status_code == 403:
|
||||||
print('403 FORBIDDEN!')
|
write('403 FORBIDDEN!')
|
||||||
return
|
return
|
||||||
if e.response.status_code == 404:
|
if e.response.status_code == 404:
|
||||||
print('404 NOT FOUND!')
|
write('404 NOT FOUND!')
|
||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
content_type = head.headers.get('Content-Type', '?')
|
content_type = head.headers.get('Content-Type', '?')
|
||||||
|
@ -395,7 +395,7 @@ class Walker:
|
||||||
else:
|
else:
|
||||||
self.queue.append(href)
|
self.queue.append(href)
|
||||||
added += 1
|
added += 1
|
||||||
print('Queued %d urls' % added)
|
write('Queued %d urls' % added)
|
||||||
else:
|
else:
|
||||||
# This is not an index page, so save it.
|
# This is not an index page, so save it.
|
||||||
self.smart_insert(head=head, commit=False)
|
self.smart_insert(head=head, commit=False)
|
||||||
|
@ -407,7 +407,7 @@ class Walker:
|
||||||
url = self.queue.popleft()
|
url = self.queue.popleft()
|
||||||
self.process_url(url)
|
self.process_url(url)
|
||||||
line = '{:,} Remaining'.format(len(self.queue))
|
line = '{:,} Remaining'.format(len(self.queue))
|
||||||
print(line)
|
write(line)
|
||||||
except:
|
except:
|
||||||
self.sql.commit()
|
self.sql.commit()
|
||||||
raise
|
raise
|
||||||
|
@ -420,8 +420,8 @@ class Walker:
|
||||||
## ##
|
## ##
|
||||||
class Generic:
|
class Generic:
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
for kwarg in kwargs:
|
for (key, value) in kwargs.items():
|
||||||
setattr(self, kwarg, kwargs[kwarg])
|
setattr(self, key, value)
|
||||||
|
|
||||||
|
|
||||||
class TreeExistingChild(Exception):
|
class TreeExistingChild(Exception):
|
||||||
|
@ -431,17 +431,22 @@ class TreeInvalidIdentifier(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class TreeNode:
|
class TreeNode:
|
||||||
def __init__(self, identifier, data, parent=None):
|
def __init__(self, identifier, data=None):
|
||||||
assert isinstance(identifier, str)
|
assert isinstance(identifier, str)
|
||||||
assert '\\' not in identifier
|
assert '\\' not in identifier
|
||||||
self.identifier = identifier
|
self.identifier = identifier
|
||||||
self.data = data
|
self.data = data
|
||||||
self.parent = parent
|
|
||||||
self.children = {}
|
self.children = {}
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, Treenode) and self.abspath() == other.abspath()
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
return self.children[key]
|
return self.children[key]
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.abspath())
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'TreeNode %s' % self.abspath()
|
return 'TreeNode %s' % self.abspath()
|
||||||
|
|
||||||
|
@ -498,7 +503,7 @@ class TreeNode:
|
||||||
|
|
||||||
def walk(self, customsort=None):
|
def walk(self, customsort=None):
|
||||||
yield self
|
yield self
|
||||||
for child in self.listnodes(customsort=customsort):
|
for child in self.list_children(customsort=customsort):
|
||||||
yield from child.walk(customsort=customsort)
|
yield from child.walk(customsort=customsort)
|
||||||
## ##
|
## ##
|
||||||
## OTHER CLASSES ###################################################################################
|
## OTHER CLASSES ###################################################################################
|
||||||
|
@ -510,82 +515,61 @@ def build_file_tree(databasename):
|
||||||
sql = sqlite3.connect(databasename)
|
sql = sqlite3.connect(databasename)
|
||||||
cur = sql.cursor()
|
cur = sql.cursor()
|
||||||
cur.execute('SELECT * FROM urls WHERE do_download == 1')
|
cur.execute('SELECT * FROM urls WHERE do_download == 1')
|
||||||
items = cur.fetchall()
|
all_items = cur.fetchall()
|
||||||
sql.close()
|
sql.close()
|
||||||
if len(items) == 0:
|
|
||||||
|
if len(all_items) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
items.sort(key=lambda x: x[SQL_URL])
|
path_form = '{root}\\{folder}\\{filename}'
|
||||||
|
all_items = [
|
||||||
path_parts = url_to_filepath(items[0][SQL_URL])
|
{
|
||||||
root_identifier = path_parts['root']
|
'url': item[SQL_URL],
|
||||||
#print('Root', root_identifier)
|
'size': item[SQL_CONTENT_LENGTH],
|
||||||
root_data = {'name': root_identifier, 'item_type': 'directory'}
|
'path': path_form.format(**url_to_filepath(item[SQL_URL])).split('\\'),
|
||||||
root_identifier = root_identifier.replace(':', '')
|
|
||||||
tree = TreeNode(
|
|
||||||
identifier=root_identifier,
|
|
||||||
data=root_data
|
|
||||||
)
|
|
||||||
node_map = {}
|
|
||||||
|
|
||||||
for item in items:
|
|
||||||
path = url_to_filepath(item[SQL_URL])
|
|
||||||
scheme = path['scheme']
|
|
||||||
|
|
||||||
# I join and re-split because 'folder' may contain slashes of its own
|
|
||||||
# and I want to break all the pieces
|
|
||||||
path = '\\'.join([path['root'], path['folder'], path['filename']])
|
|
||||||
parts = path.split('\\')
|
|
||||||
#print(path)
|
|
||||||
for (index, part) in enumerate(parts):
|
|
||||||
this_path = '/'.join(parts[:index + 1])
|
|
||||||
parent_path = '/'.join(parts[:index])
|
|
||||||
|
|
||||||
#input()
|
|
||||||
data = {
|
|
||||||
'name': part,
|
|
||||||
'url': scheme + '://' + this_path,
|
|
||||||
}
|
}
|
||||||
this_identifier = this_path.replace(':', '')
|
for item in all_items
|
||||||
parent_identifier = parent_path.replace(':', '')
|
]
|
||||||
|
all_items.sort(key=lambda x: x['url'])
|
||||||
|
|
||||||
if (index + 1) == len(parts):
|
root_data = {
|
||||||
data['item_type'] = 'file'
|
'item_type': 'directory',
|
||||||
if item[SQL_CONTENT_LENGTH]:
|
'name': databasename,
|
||||||
data['size'] = item[SQL_CONTENT_LENGTH]
|
}
|
||||||
else:
|
tree = TreeNode(databasename, data=root_data)
|
||||||
data['size'] = None
|
tree.unsorted_children = all_items
|
||||||
else:
|
node_queue = set()
|
||||||
data['item_type'] = 'directory'
|
node_queue.add(tree)
|
||||||
|
|
||||||
|
# In this process, URLs are divided up into their nodes one directory layer at a time.
|
||||||
|
# The root receives all URLs, and creates nodes for each of the top-level
|
||||||
|
# directories. Those nodes receive all subdirectories, and repeat.
|
||||||
|
while len(node_queue) > 0:
|
||||||
|
node = node_queue.pop()
|
||||||
|
for to_sort in node.unsorted_children:
|
||||||
|
path = to_sort['path']
|
||||||
|
# Create a new node for the directory, path[0]
|
||||||
|
# path[1:] is assigned to that node to be divided next.
|
||||||
|
child_identifier = path.pop(0)
|
||||||
|
|
||||||
# Ensure this comment is in a node of its own
|
child = node.children.get(child_identifier, None)
|
||||||
this_node = node_map.get(this_identifier, None)
|
if not child:
|
||||||
if this_node:
|
child = TreeNode(child_identifier, data={})
|
||||||
# This ID was detected as a parent of a previous iteration
|
child.unsorted_children = []
|
||||||
# Now we're actually filling it in.
|
node.add_child(child)
|
||||||
this_node.data = data
|
|
||||||
else:
|
|
||||||
this_node = TreeNode(this_identifier, data)
|
|
||||||
node_map[this_identifier] = this_node
|
|
||||||
|
|
||||||
# Attach this node to the parent.
|
child.data['url'] = to_sort['url']
|
||||||
if parent_identifier == root_identifier:
|
child.data['name'] = child_identifier
|
||||||
try:
|
if len(path) > 0:
|
||||||
tree.add_child(this_node)
|
child.data['item_type'] = 'directory'
|
||||||
except TreeExistingChild:
|
child.unsorted_children.append(to_sort)
|
||||||
pass
|
node_queue.add(child)
|
||||||
else:
|
else:
|
||||||
parent_node = node_map.get(parent_identifier, None)
|
child.data['item_type'] = 'file'
|
||||||
if not parent_node:
|
child.data['size'] = to_sort['size']
|
||||||
parent_node = TreeNode(parent_identifier, data=None)
|
|
||||||
node_map[parent_identifier] = parent_node
|
del node.unsorted_children
|
||||||
try:
|
|
||||||
parent_node.add_child(this_node)
|
|
||||||
except TreeExistingChild:
|
|
||||||
pass
|
|
||||||
this_node.parent = parent_node
|
|
||||||
#print(this_node.data)
|
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
def db_init(sql, cur):
|
def db_init(sql, cur):
|
||||||
|
@ -596,10 +580,10 @@ def db_init(sql, cur):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def do_get(url, raise_for_status=True):
|
def do_get(url, raise_for_status=True):
|
||||||
return do_request('GET', requests.get, url)
|
return do_request('GET', requests.get, url, raise_for_status=raise_for_status)
|
||||||
|
|
||||||
def do_head(url, raise_for_status=True):
|
def do_head(url, raise_for_status=True):
|
||||||
return do_request('HEAD', requests.head, url)
|
return do_request('HEAD', requests.head, url, raise_for_status=raise_for_status)
|
||||||
|
|
||||||
def do_request(message, method, url, raise_for_status=True):
|
def do_request(message, method, url, raise_for_status=True):
|
||||||
message = '{message:>4s}: {url} : '.format(message=message, url=url)
|
message = '{message:>4s}: {url} : '.format(message=message, url=url)
|
||||||
|
@ -685,10 +669,9 @@ def recursive_print_node(node, depth=0, use_html=False, output_file=None):
|
||||||
if use_html:
|
if use_html:
|
||||||
css_class = 'directory_even' if depth % 2 == 0 else 'directory_odd'
|
css_class = 'directory_even' if depth % 2 == 0 else 'directory_odd'
|
||||||
if node.data['item_type'] == 'directory':
|
if node.data['item_type'] == 'directory':
|
||||||
div_id = hashit(node.identifier, 16)
|
line = '<button onclick="collapse(this.nextSibling)">{name} ({size})</button>'
|
||||||
line = '<button onclick="collapse(\'{div_id}\')">{name} ({size})</button>'
|
line += '<div class="{css}" style="display:none">'
|
||||||
line += '<div class="{css}" id="{div_id}" style="display:none">'
|
line = line.format(name=node.data['name'], size=size, css=css_class)
|
||||||
line = line.format(div_id=div_id, name=node.data['name'], size=size, css=css_class)
|
|
||||||
else:
|
else:
|
||||||
line = '<a href="{url}">{name} ({size})</a><br>'
|
line = '<a href="{url}">{name} ({size})</a><br>'
|
||||||
line = line.format(url=node.data['url'], name=node.data['name'], size=size)
|
line = line.format(url=node.data['url'], name=node.data['name'], size=size)
|
||||||
|
@ -1057,9 +1040,9 @@ def measure(databasename, fullscan=False, new_only=False):
|
||||||
sql.commit()
|
sql.commit()
|
||||||
short_string = bytestring.bytestring(totalsize)
|
short_string = bytestring.bytestring(totalsize)
|
||||||
totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount)
|
totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount)
|
||||||
print(totalsize_string)
|
write(totalsize_string)
|
||||||
if unmeasured_file_count > 0:
|
if unmeasured_file_count > 0:
|
||||||
print(UNMEASURED_WARNING % unmeasured_file_count)
|
write(UNMEASURED_WARNING % unmeasured_file_count)
|
||||||
return totalsize
|
return totalsize
|
||||||
|
|
||||||
def measure_argparse(args):
|
def measure_argparse(args):
|
||||||
|
@ -1118,7 +1101,7 @@ def tree_argparse(args):
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
if listget(argv, 1, '').lower() in ('help', '-h', '--help', ''):
|
if listget(argv, 1, '').lower() in ('help', '-h', '--help', ''):
|
||||||
print(DOCSTRING)
|
write(DOCSTRING)
|
||||||
return
|
return
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
subparsers = parser.add_subparsers()
|
subparsers = parser.add_subparsers()
|
||||||
|
|
|
@ -157,14 +157,14 @@ def create_line(degree_offset, **kwargs):
|
||||||
def create_line_frame(offset=None, line=None):
|
def create_line_frame(offset=None, line=None):
|
||||||
if line is None:
|
if line is None:
|
||||||
if offset is None:
|
if offset is None:
|
||||||
offset = entry_add.get()
|
offset = t.entry_add.get()
|
||||||
offset = offset.replace(' ', '')
|
offset = offset.replace(' ', '')
|
||||||
offset = offset.split(',')
|
offset = offset.split(',')
|
||||||
try:
|
try:
|
||||||
offset = [int(o) for o in offset]
|
offset = [int(o) for o in offset]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return
|
return
|
||||||
entry_add.delete(0, 'end')
|
t.entry_add.delete(0, 'end')
|
||||||
|
|
||||||
lines = []
|
lines = []
|
||||||
for x in offset:
|
for x in offset:
|
||||||
|
@ -253,10 +253,12 @@ def unregister_line(line):
|
||||||
variables['lines'].remove(line)
|
variables['lines'].remove(line)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
global t
|
||||||
t = tkinter.Tk()
|
t = tkinter.Tk()
|
||||||
|
|
||||||
frame_add = tkinter.Frame(t)
|
frame_add = tkinter.Frame(t)
|
||||||
entry_add = tkinter.Entry(frame_add)
|
entry_add = tkinter.Entry(frame_add)
|
||||||
|
t.entry_add = entry_add
|
||||||
entry_add.grid(row=0, column=0)
|
entry_add.grid(row=0, column=0)
|
||||||
tkinter.Button(frame_add, text='+', command=create_line_frame).grid(row=0, column=1)
|
tkinter.Button(frame_add, text='+', command=create_line_frame).grid(row=0, column=1)
|
||||||
frame_add.grid(row=0, column=0)
|
frame_add.grid(row=0, column=0)
|
||||||
|
|
Loading…
Reference in a new issue