diff --git a/README.md b/README.md index 4109cf1..05d0784 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,56 @@ Youtube Channel Downloader ========================== -You are responsible for your own `bot.py` file, containing a variable `YOUTUBE_KEY`. \ No newline at end of file +YouTubeChannelDownloader creates an SQLite3 database of Youtube channels and their videos, and serves it out of a web server. + +## YCDL solves three main problems: + +### Metadata archive + +The database acts as a permanent archive of video metadata including title, description, duration, view count, and more. Even if a video or channel is deleted from Youtube, you will still have this information. Perfect for never losing track of unlisted videos, too. + +The thumbnails, however, are not stored in the database, but you can use `utilities\download_thumbnails.py` to download them. + +Note: At this time, refreshing a channel in YCDL will update video titles, descriptions, and view counts with their current values. If you refresh a channel after they have changed their video's title or description you will lose the previous value. + +### Easily watch every video on the channel + +When I discover a channel, I like to watch through their videos over the course of weeks or months. Within Youtube's own interface, it becomes difficult to know which videos I've watched and which ones I haven't. Scrolling through all of a channel's videos is tough especially if there are many. + +In YCDL, videos start off as pending and you can mark them as ignore or download, so the pending page is always your "to-watch" list. + +On my Youtube subscription box, I would often press the "hide" button on videos only to find them come back a few days later, and hiding live broadcasts was never reliable. YCDL makes watching my subscriptions much easier. + +### Send video IDs to youtube-dl + +YCDL does not perform the downloading of videos itself. When you click on the download button, it will create an empty file called `xxxxxxxxxxx.ytqueue` in the directory specified by the `ycdl.json` config file. You can send this ID into youtube-dl in your preferred way. + +## Features + +- Web interface with video embeds +- "Sub-box" page where newest videos from all channels are listed in order +- Sort videos by date, duration, views, or random +- Background thread will refresh channels over time +- Automark channels as ignore or download + +## Your API key + +You are responsible for your own `bot.py` file, with a function `get_youtube_key`, called with no arguments, that returns a Youtube API key. + +## Screenshots + +![2020-04-04_15-27-15](https://user-images.githubusercontent.com/7299570/78462830-ca4f9900-768a-11ea-98c9-a4e622d3da62.png) + +![2020-04-04_15-29-25](https://user-images.githubusercontent.com/7299570/78462831-cb80c600-768a-11ea-9ff0-517c231e0469.png) + +![2020-04-04_15-36-05](https://user-images.githubusercontent.com/7299570/78462832-cb80c600-768a-11ea-9b86-529e1a22616c.png) + +![2020-04-04_15-36-10](https://user-images.githubusercontent.com/7299570/78462833-cc195c80-768a-11ea-9cac-208b8c79cad9.png) + +![2020-04-04_15-40-27](https://user-images.githubusercontent.com/7299570/78462834-cc195c80-768a-11ea-942b-e89a3dabe64d.png) + +## To do list + +- Keep permanent record of titles and descriptions. +- Progress indicator for channel refresh. +- Delete channel from web interface. diff --git a/frontends/ycdl_flask/backend/endpoints.py b/frontends/ycdl_flask/backend/endpoints.py index 59a7ea6..0844e1b 100644 --- a/frontends/ycdl_flask/backend/endpoints.py +++ b/frontends/ycdl_flask/backend/endpoints.py @@ -21,8 +21,6 @@ def favicon(): @site.route('/channels') def get_channels(): channels = common.ycdldb.get_channels() - for channel in channels: - channel['has_pending'] = common.ycdldb.channel_has_pending(channel['id']) return flask.render_template('channels.html', channels=channels) @site.route('/videos') @@ -55,7 +53,7 @@ def get_channel(channel_id=None, download_filter=None): search_terms = request.args.get('q', '').lower().strip().replace('+', ' ').split() if search_terms: - videos = [v for v in videos if all(term in v['title'].lower() for term in search_terms)] + videos = [v for v in videos if all(term in v.title.lower() for term in search_terms)] limit = request.args.get('limit', None) if limit is not None: @@ -66,10 +64,10 @@ def get_channel(channel_id=None, download_filter=None): pass for video in videos: - published = video['published'] + published = video.published published = datetime.datetime.utcfromtimestamp(published) published = published.strftime('%Y %m %d') - video['_published_str'] = published + video._published_str = published all_states = common.ycdldb.get_all_states() @@ -91,7 +89,8 @@ def post_mark_video_state(): try: video_ids = video_ids.split(',') for video_id in video_ids: - common.ycdldb.mark_video_state(video_id, state, commit=False) + video = common.ycdldb.get_video(video_id) + video.mark_state(state, commit=False) common.ycdldb.sql.commit() except ycdl.exceptions.NoSuchVideo: @@ -129,8 +128,8 @@ def post_refresh_channel(): force = request.form.get('force', False) force = ycdl.helpers.truthystring(force) - common.ycdldb.add_channel(channel_id, commit=False) - common.ycdldb.refresh_channel(channel_id, force=force) + channel = common.ycdldb.add_channel(channel_id, commit=False) + channel.refresh(force=force) return jsonify.make_json_response({}) @site.route('/start_download', methods=['POST']) diff --git a/frontends/ycdl_flask/static/common.css b/frontends/ycdl_flask/static/common.css index 75f0737..495c3e6 100644 --- a/frontends/ycdl_flask/static/common.css +++ b/frontends/ycdl_flask/static/common.css @@ -26,7 +26,7 @@ body } .hidden { - display: none; + display: none !important; } #content_body { diff --git a/frontends/ycdl_flask/static/common.js b/frontends/ycdl_flask/static/common.js index a4797c8..df98e9f 100644 --- a/frontends/ycdl_flask/static/common.js +++ b/frontends/ycdl_flask/static/common.js @@ -1,3 +1,6 @@ +var common = {}; + +common.post_example = function post_example(key, value, callback) { var url = "/postexample"; @@ -6,11 +9,13 @@ function post_example(key, value, callback) return post(url, data, callback); } +common.null_callback = function null_callback() { return; } +common.post = function post(url, data, callback) { var request = new XMLHttpRequest(); @@ -32,6 +37,7 @@ function post(url, data, callback) request.send(data); } +common.bind_box_to_button = function bind_box_to_button(box, button) { box.onkeydown=function() @@ -42,6 +48,7 @@ function bind_box_to_button(box, button) } }; } +common.entry_with_history_hook = function entry_with_history_hook(box, button) { //console.log(event.keyCode); @@ -81,3 +88,25 @@ function entry_with_history_hook(box, button) box.entry_history_pos = -1; } } + +common.init_atag_merge_params = +function init_atag_merge_params() +{ + var as = Array.from(document.getElementsByClassName("merge_params")); + page_params = new URLSearchParams(window.location.search); + as.forEach(function(a){ + var a_params = new URLSearchParams(a.search); + var new_params = new URLSearchParams(); + page_params.forEach(function(value, key) {new_params.set(key, value); }); + a_params.forEach(function(value, key) {new_params.set(key, value); }); + a.search = new_params.toString(); + a.classList.remove("merge_params"); + }); +} + +common.on_pageload = +function on_pageload() +{ + common.init_atag_merge_params(); +} +document.addEventListener("DOMContentLoaded", common.on_pageload); diff --git a/frontends/ycdl_flask/templates/channel.html b/frontends/ycdl_flask/templates/channel.html index 97b3284..4a7db6d 100644 --- a/frontends/ycdl_flask/templates/channel.html +++ b/frontends/ycdl_flask/templates/channel.html @@ -2,7 +2,7 @@ {% import "header.html" as header %} - {{channel['name']}} + {{channel.name}} @@ -22,8 +22,16 @@ .video_card { position: relative; + display: grid; + grid-template: + "thumbnail details toolbox" auto + "embed embed embed" auto + /auto 1fr auto; + grid-gap: 4px; + margin: 8px; - padding: 10px; + padding: 8px; + border-radius: 4px; border: 1px solid #000; } @@ -48,13 +56,51 @@ background-color: #aaffaa; } +.video_thumbnail +{ + grid-area: thumbnail; + justify-self: center; +} + +.video_details +{ + grid-area: details; + align-self: center; + /* + margin-right prevents the empty space of the tag from swallowing + click events meant for the video card. + */ + margin-right: auto; +} + +.embed_toolbox +{ + grid-area: embed; + /* + disabling pointer events on the toolbox prevents it from swallowing click + events meant for the video card. Then we must re-enable them for child + elements so the embed button is still clickable. + This one uses pointer-events instead of margin because margin makes the + whole embed too small. + */ + pointer-events: none; +} +.embed_toolbox * +{ + pointer-events: auto; +} + .action_toolbox { - float: right; + grid-area: toolbox; + justify-self: right; display: inline-flex; flex-direction: row; position: relative; + margin-top: auto; + margin-bottom: auto; } + .video_action_dropdown { z-index: 1; @@ -87,6 +133,19 @@ https://stackoverflow.com/a/35153397 left: 0; top: 0; } + +@media screen and (max-width: 600px) +{ + .video_card + { + grid-template: + "thumbnail" + "details" + "toolbox" + "embed" + /1fr; + } +} @@ -95,30 +154,29 @@ https://stackoverflow.com/a/35153397 {{header.make_header()}}
{% if channel is not none %} - - - All (?q) + + + + All {% else %} - All (?q) + All {% endif %} {% for statename in all_states %} {% if channel is not none %} - {{statename}} (?q) + {{statename}} {% else %} - {{statename}} (?q) + {{statename}} {% endif %} {% endfor %}

Sort by - Date, - Duration, - Views, - Random + Date, + Duration, + Views, + Random
@@ -126,21 +184,24 @@ https://stackoverflow.com/a/35153397
{% for video in videos %} -
- - {{video['_published_str']}} - {{video['title']}} - ({{video['duration'] | seconds_to_hms}}) - ({{video['views']}}) + +
+ {{video._published_str}} - {{video.title}} + ({{video.duration | seconds_to_hms}}) + ({{video.views}}) {% if channel is none %} - ({{video.get('author_name', 'Chan')}}) + ({{video.author.name if video.author else video.author_id}}) {% endif %} +
+
-
+
-
+
{% endfor %}
@@ -231,18 +292,19 @@ function toggle_embed_video(video_id) var video_card = document.getElementById("video_card_" + video_id); var show_button = video_card.getElementsByClassName("show_embed_button")[0]; var hide_button = video_card.getElementsByClassName("hide_embed_button")[0]; + var embed_toolbox = video_card.getElementsByClassName("embed_toolbox")[0]; var embeds = video_card.getElementsByClassName("video_iframe_holder"); if (embeds.length == 0) { var html = `
` var embed = html_to_element(html); - video_card.appendChild(embed); + embed_toolbox.appendChild(embed); show_button.classList.add("hidden"); hide_button.classList.remove("hidden"); } else { - video_card.removeChild(embeds[0]); + embeds[0].parentElement.removeChild(embeds[0]); show_button.classList.remove("hidden"); hide_button.classList.add("hidden"); } @@ -412,7 +474,7 @@ function refresh_channel(channel_id, force, callback) data = new FormData(); data.append("channel_id", channel_id); data.append("force", force) - return post(url, data, callback); + return common.post(url, data, callback); } function mark_video_state(video_ids, state, callback) @@ -421,7 +483,7 @@ function mark_video_state(video_ids, state, callback) data = new FormData(); data.append("video_ids", video_ids); data.append("state", state); - return post(url, data, callback); + return common.post(url, data, callback); } function start_download(video_ids, callback) @@ -429,7 +491,7 @@ function start_download(video_ids, callback) var url = "/start_download"; data = new FormData(); data.append("video_ids", video_ids); - return post(url, data, callback); + return common.post(url, data, callback); } diff --git a/frontends/ycdl_flask/templates/channels.html b/frontends/ycdl_flask/templates/channels.html index c3ea46c..e1fd01e 100644 --- a/frontends/ycdl_flask/templates/channels.html +++ b/frontends/ycdl_flask/templates/channels.html @@ -55,12 +55,12 @@
{% for channel in channels %} - {% if channel['has_pending'] %} + {% if channel.has_pending() %}
{% else %}
{% endif %} - {{channel['name']}} (p) + {{channel.name}} (p)
{% endfor %}
@@ -70,7 +70,7 @@ diff --git a/frontends/ycdl_flask/ycdl_flask_launch.py b/frontends/ycdl_flask/ycdl_flask_launch.py index ffc4bad..7709796 100644 --- a/frontends/ycdl_flask/ycdl_flask_launch.py +++ b/frontends/ycdl_flask/ycdl_flask_launch.py @@ -1,5 +1,5 @@ import logging -logging.getLogger('googleapicliet.discovery_cache').setLevel(logging.ERROR) +logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR) import gevent.monkey gevent.monkey.patch_all() diff --git a/utilities/download_thumbnails.py b/utilities/download_thumbnails.py index 60c7d4c..89ca303 100644 --- a/utilities/download_thumbnails.py +++ b/utilities/download_thumbnails.py @@ -5,22 +5,21 @@ import bot3 as bot import os import traceback import ycdl -import ycdl_repl from voussoirkit import downloady youtube_core = ycdl.ytapi.Youtube(bot.get_youtube_key()) -youtube = ycdl.YCDL(youtube_core) +ycdldb = ycdl.ycdldb.YCDLDB(youtube_core) DIRECTORY = '.\\youtube thumbnails' -videos = ycdl_repl.ydl.get_videos() +videos = ycdldb.get_videos() for video in videos: try: - thumbnail_path = os.path.join(DIRECTORY, video['id']) + '.jpg' + thumbnail_path = os.path.join(DIRECTORY, video.id) + '.jpg' if os.path.exists(thumbnail_path): continue - result = downloady.download_file(video['thumbnail'], thumbnail_path) + result = downloady.download_file(video.thumbnail, thumbnail_path) print(result) except Exception as e: traceback.print_exc() diff --git a/ycdl/constants.py b/ycdl/constants.py new file mode 100644 index 0000000..b48cd63 --- /dev/null +++ b/ycdl/constants.py @@ -0,0 +1,55 @@ +from voussoirkit import sqlhelpers + +DATABASE_VERSION = 4 +DB_VERSION_PRAGMA = f''' +PRAGMA user_version = {DATABASE_VERSION}; +''' + +DB_PRAGMAS = f''' +PRAGMA count_changes = OFF; +PRAGMA cache_size = 10000; +''' + +DB_INIT = f''' +BEGIN; +---------------------------------------------------------------------------------------------------- +{DB_PRAGMAS} +{DB_VERSION_PRAGMA} +CREATE TABLE IF NOT EXISTS channels( + id TEXT, + name TEXT, + directory TEXT COLLATE NOCASE, + automark TEXT +); +CREATE TABLE IF NOT EXISTS videos( + id TEXT, + published INT, + author_id TEXT, + title TEXT, + description TEXT, + duration INT, + views INT, + thumbnail TEXT, + download TEXT +); + +CREATE INDEX IF NOT EXISTS index_channel_id on channels(id); +CREATE INDEX IF NOT EXISTS index_video_author on videos(author_id); +CREATE INDEX IF NOT EXISTS index_video_author_download on videos(author_id, download); +CREATE INDEX IF NOT EXISTS index_video_id on videos(id); +CREATE INDEX IF NOT EXISTS index_video_published on videos(published); +CREATE INDEX IF NOT EXISTS index_video_download on videos(download); +---------------------------------------------------------------------------------------------------- +COMMIT; +''' + +SQL_COLUMNS = sqlhelpers.extract_table_column_map(DB_INIT) +SQL_INDEX = sqlhelpers.reverse_table_column_map(SQL_COLUMNS) + +DEFAULT_DATADIR = '.' +DEFAULT_DBNAME = 'ycdl.db' +DEFAULT_CONFIGNAME = 'ycdl.json' + +DEFAULT_CONFIGURATION = { + 'download_directory': '.', +} diff --git a/ycdl/exceptions.py b/ycdl/exceptions.py index 0c70199..527dd47 100644 --- a/ycdl/exceptions.py +++ b/ycdl/exceptions.py @@ -46,8 +46,23 @@ class YCDLException(Exception, metaclass=ErrorTypeAdder): class InvalidVideoState(YCDLException): error_message = '{} is not a valid state.' + +# NO SUCH +class NoSuchChannel(YCDLException): + error_message = 'Channel {} does not exist.' + class NoSuchVideo(YCDLException): error_message = 'Video {} does not exist.' + +# SQL ERRORS +class BadSQL(YCDLException): + pass + +class BadTable(BadSQL): + error_message = 'Table "{}" does not exist.' + + +# GENERAL ERRORS class DatabaseOutOfDate(YCDLException): error_message = 'Database is out-of-date. {current} should be {new}.' diff --git a/ycdl/objects.py b/ycdl/objects.py new file mode 100644 index 0000000..ae6f2b7 --- /dev/null +++ b/ycdl/objects.py @@ -0,0 +1,97 @@ +from . import constants +from . import exceptions + +def normalize_db_row(db_row, table): + if isinstance(db_row, (list, tuple)): + db_row = dict(zip(constants.SQL_COLUMNS[table], db_row)) + return db_row + +class Base: + def __init__(self, ycdldb): + super().__init__() + self.ycdldb = ycdldb + +class Channel(Base): + table = 'channels' + + def __init__(self, ycdldb, db_row): + super().__init__(ycdldb) + db_row = normalize_db_row(db_row, self.table) + + self.id = db_row['id'] + self.name = db_row['name'] + self.directory = db_row['directory'] + self.automark = db_row['automark'] + + def has_pending(self): + query = 'SELECT 1 FROM videos WHERE author_id == ? AND download == "pending" LIMIT 1' + bindings = [self.id] + return self.ycdldb.sql_select_one(query, bindings) is not None + + def refresh(self, force=False, commit=True): + seen_ids = set() + video_generator = self.ycdldb.youtube.get_user_videos(uid=self.id) + self.ycdldb.log.debug('Refreshing channel: %s', self.id) + for video in video_generator: + seen_ids.add(video.id) + status = self.ycdldb.insert_video(video, commit=False) + + video = status['video'] + if status['new'] and self.automark is not None: + video.mark_state(self.automark, commit=False) + if self.automark == 'downloaded': + self.ycdldb.download_video(video.id, commit=False) + + if not force and not status['new']: + break + + if force: + known_videos = self.ycdldb.get_videos(channel_id=self.id) + known_ids = {v.id for v in known_videos} + refresh_ids = list(known_ids.difference(seen_ids)) + for video in self.ycdldb.youtube.get_video(refresh_ids): + self.ycdldb.insert_video(video, commit=False) + + if commit: + self.ycdldb.commit() + +class Video(Base): + table = 'videos' + + def __init__(self, ycdldb, db_row): + super().__init__(ycdldb) + db_row = normalize_db_row(db_row, self.table) + + self.id = db_row['id'] + self.published = db_row['published'] + self.author_id = db_row['author_id'] + self.title = db_row['title'] + self.description = db_row['description'] + self.duration = db_row['duration'] + self.views = db_row['views'] + self.thumbnail = db_row['thumbnail'] + self.download = db_row['download'] + + @property + def author(self): + try: + return self.ycdldb.get_channel(self.author_id) + except exceptions.NoSuchChannel: + return None + + def mark_state(self, state, commit=True): + ''' + Mark the video as ignored, pending, or downloaded. + ''' + if state not in ['ignored', 'pending', 'downloaded']: + raise exceptions.InvalidVideoState(state) + + pairs = { + 'id': self.id, + 'download': state, + } + self.download = state + self.ycdldb.sql_update(table='videos', pairs=pairs, where_key='id') + + if commit: + self.ycdldb.commit() diff --git a/ycdl/ycdldb.py b/ycdl/ycdldb.py index ef6aa41..76264b5 100644 --- a/ycdl/ycdldb.py +++ b/ycdl/ycdldb.py @@ -1,148 +1,111 @@ +import json import logging import os import sqlite3 import traceback +from . import constants from . import exceptions -from . import helpers +from . import objects from . import ytapi +from voussoirkit import cacheclass +from voussoirkit import configlayers from voussoirkit import pathclass from voussoirkit import sqlhelpers def YOUTUBE_DL_COMMAND(video_id): - path = f'D:\\Incoming\\ytqueue\\{video_id}.ytqueue' + path = f'{video_id}.ytqueue' open(path, 'w') -logging.basicConfig(level=logging.DEBUG) -log = logging.getLogger(__name__) +logging.basicConfig() logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING) logging.getLogger('requests.packages.urllib3.util.retry').setLevel(logging.WARNING) -DATABASE_VERSION = 4 -DB_VERSION_PRAGMA = ''' -PRAGMA user_version = {user_version}; -''' -DB_PRAGMAS = ''' -PRAGMA count_changes = OFF; -PRAGMA cache_size = 10000; -''' -DB_INIT = f''' -BEGIN; ----------------------------------------------------------------------------------------------------- -{DB_PRAGMAS} -{DB_VERSION_PRAGMA} -CREATE TABLE IF NOT EXISTS channels( - id TEXT, - name TEXT, - directory TEXT COLLATE NOCASE, - automark TEXT -); -CREATE TABLE IF NOT EXISTS videos( - id TEXT, - published INT, - author_id TEXT, - title TEXT, - description TEXT, - duration INT, - views INT, - thumbnail TEXT, - download TEXT -); -CREATE INDEX IF NOT EXISTS index_channel_id on channels(id); -CREATE INDEX IF NOT EXISTS index_video_author on videos(author_id); -CREATE INDEX IF NOT EXISTS index_video_author_download on videos(author_id, download); -CREATE INDEX IF NOT EXISTS index_video_id on videos(id); -CREATE INDEX IF NOT EXISTS index_video_published on videos(published); -CREATE INDEX IF NOT EXISTS index_video_download on videos(download); ----------------------------------------------------------------------------------------------------- -COMMIT; -'''.format(user_version=DATABASE_VERSION) +class YCDLDBCacheManagerMixin: + _THING_CLASSES = { + 'channel': + { + 'class': objects.Channel, + 'exception': exceptions.NoSuchChannel, + }, + 'video': + { + 'class': objects.Video, + 'exception': exceptions.NoSuchVideo, + }, + } -SQL_CHANNEL_COLUMNS = [ - 'id', - 'name', - 'directory', - 'automark', -] + def __init__(self): + super().__init__() -SQL_VIDEO_COLUMNS = [ - 'id', - 'published', - 'author_id', - 'title', - 'description', - 'duration', - 'views', - 'thumbnail', - 'download', -] - -SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)} -SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)} - -DEFAULT_DBNAME = 'ycdl.db' - -def assert_is_abspath(path): - ''' - TO DO: Determine whether this is actually correct. - ''' - if os.path.abspath(path) != path: - raise ValueError('Not an abspath') - - -class YCDLDB: - def __init__( - self, - youtube, - database_filename=None, - youtube_dl_function=None, - skip_version_check=False, - ): - self.youtube = youtube - if database_filename is None: - database_filename = DEFAULT_DBNAME - - self.database_filepath = pathclass.Path(database_filename) - existing_database = self.database_filepath.exists - self.sql = sqlite3.connect(database_filename) - self.cur = self.sql.cursor() - - if existing_database: - if not skip_version_check: - self._check_version() - self._load_pragmas() - else: - self._first_time_setup() - - if youtube_dl_function: - self.youtube_dl_function = youtube_dl_function - else: - self.youtube_dl_function = YOUTUBE_DL_COMMAND - - def _check_version(self): + def get_cached_instance(self, thing_type, db_row): ''' - Compare database's user_version against DATABASE_VERSION, - raising exceptions.DatabaseOutOfDate if not correct. + Check if there is already an instance in the cache and return that. + Otherwise, a new instance is created, cached, and returned. + + Note that in order to call this method you have to already have a + db_row which means performing some select. If you only have the ID, + use get_thing_by_id, as there may already be a cached instance to save + you the select. ''' - existing = self.sql.execute('PRAGMA user_version').fetchone()[0] - if existing != DATABASE_VERSION: - raise exceptions.DatabaseOutOfDate( - existing=existing, - new=DATABASE_VERSION, - filepath=self.database_filepath, - ) + thing_map = self._THING_CLASSES[thing_type] - def _first_time_setup(self): - self.sql.executescript(DB_INIT) - self.sql.commit() + thing_class = thing_map['class'] + thing_table = thing_class.table + thing_cache = self.caches[thing_type] - def _load_pragmas(self): - self.sql.executescript(DB_PRAGMAS) - self.sql.commit() + if isinstance(db_row, dict): + thing_id = db_row['id'] + else: + thing_index = constants.SQL_INDEX[thing_table] + thing_id = db_row[thing_index['id']] + + try: + thing = thing_cache[thing_id] + except KeyError: + thing = thing_class(self, db_row) + thing_cache[thing_id] = thing + return thing + + def get_thing_by_id(self, thing_type, thing_id): + ''' + This method will first check the cache to see if there is already an + instance with that ID, in which case we don't need to perform any SQL + select. If it is not in the cache, then a new instance is created, + cached, and returned. + ''' + thing_map = self._THING_CLASSES[thing_type] + + thing_class = thing_map['class'] + if isinstance(thing_id, thing_class): + # This could be used to check if your old reference to an object is + # still in the cache, or re-select it from the db to make sure it + # still exists and re-cache. + # Probably an uncommon need but... no harm I think. + thing_id = thing_id.id + + thing_cache = self.caches[thing_type] + try: + return thing_cache[thing_id] + except KeyError: + pass + + query = f'SELECT * FROM {thing_class.table} WHERE id == ?' + bindings = [thing_id] + thing_row = self.sql_select_one(query, bindings) + if thing_row is None: + raise thing_map['exception'](thing_id) + thing = thing_class(self, thing_row) + thing_cache[thing_id] = thing + return thing + +class YCDLDBChannelMixin: + def __init__(self): + super().__init__() def add_channel( self, @@ -153,113 +116,170 @@ class YCDLDB: get_videos=False, name=None, ): - if self.get_channel(channel_id) is not None: - return + try: + return self.get_channel(channel_id) + except exceptions.NoSuchChannel: + pass if name is None: name = self.youtube.get_user_name(channel_id) if download_directory is not None: - assert_is_abspath(download_directory) + download_directory = pathclass.Path(download_directory).absolute_path data = { 'id': channel_id, 'name': name, 'directory': download_directory, + 'automark': None, } + self.sql_insert(table='channels', data=data) - (qmarks, bindings) = sqlhelpers.insert_filler(SQL_CHANNEL, data) - query = f'INSERT INTO channels VALUES({qmarks})' - self.cur.execute(query) + channel = self.get_cached_instance('channel', data) if get_videos: - self.refresh_channel(channel_id, commit=False) + channel.refresh(commit=False) if commit: - self.sql.commit() + self.commit() + return channel - return data + def get_channel(self, channel_id): + return self.get_thing_by_id('channel', channel_id) - def channel_has_pending(self, channel_id): - query = 'SELECT 1 FROM videos WHERE author_id == ? AND download == "pending" LIMIT 1' - self.cur.execute(query, [channel_id]) - return self.cur.fetchone() is not None + def get_channels(self): + query = 'SELECT * FROM channels' + rows = self.sql_select(query) + channels = [self.get_cached_instance('channel', row) for row in rows] + channels.sort(key=lambda c: c.name) + return channels + + def refresh_all_channels(self, force=False, skip_failures=False, commit=True): + exceptions = [] + for channel in self.get_channels(): + try: + channel.refresh(force=force, commit=commit) + except Exception as exc: + if skip_failures: + traceback.print_exc() + exceptions.append(exc) + else: + raise + if commit: + self.commit() + return exceptions + +class YCDLSQLMixin: + def __init__(self): + super().__init__() + self._cached_sql_tables = None + + def assert_table_exists(self, table): + if not self._cached_sql_tables: + self._cached_sql_tables = self.get_sql_tables() + if table not in self._cached_sql_tables: + raise exceptions.BadTable(table) + + def commit(self, message=None): + if message is not None: + self.log.debug('Committing - %s.', message) + + self.sql.commit() + + def get_sql_tables(self): + query = 'SELECT name FROM sqlite_master WHERE type = "table"' + cur = self.sql_execute(query) + tables = set(row[0] for row in cur.fetchall()) + return tables + + def rollback(self): + self.log.debug('Rolling back.') + self.sql_execute('ROLLBACK') + + def sql_delete(self, table, pairs): + self.assert_table_exists(table) + (qmarks, bindings) = sqlhelpers.delete_filler(pairs) + query = f'DELETE FROM {table} {qmarks}' + self.sql_execute(query, bindings) + + def sql_execute(self, query, bindings=[]): + if bindings is None: + bindings = [] + cur = self.sql.cursor() + #self.log.log(1, f'{query} {bindings}') + cur.execute(query, bindings) + return cur + + def sql_insert(self, table, data): + self.assert_table_exists(table) + column_names = constants.SQL_COLUMNS[table] + (qmarks, bindings) = sqlhelpers.insert_filler(column_names, data) + + query = f'INSERT INTO {table} VALUES({qmarks})' + self.sql_execute(query, bindings) + + def sql_select(self, query, bindings=None): + cur = self.sql_execute(query, bindings) + while True: + fetch = cur.fetchone() + if fetch is None: + break + yield fetch + + def sql_select_one(self, query, bindings=None): + cur = self.sql_execute(query, bindings) + return cur.fetchone() + + def sql_update(self, table, pairs, where_key): + self.assert_table_exists(table) + (qmarks, bindings) = sqlhelpers.update_filler(pairs, where_key=where_key) + query = f'UPDATE {table} {qmarks}' + self.sql_execute(query, bindings) + +class YCDLDBVideoMixin: + def __init__(self): + super().__init__() def download_video(self, video, commit=True, force=False): ''' Execute the `YOUTUBE_DL_COMMAND`, within the channel's associated directory if applicable. ''' - # This logic is a little hazier than I would like, but it's all in the - # interest of minimizing unnecessary API calls. if isinstance(video, ytapi.Video): video_id = video.id else: video_id = video - self.cur.execute('SELECT * FROM videos WHERE id == ?', [video_id]) - video_row = self.cur.fetchone() - if video_row is None: - # Since the video was not in the db, we may not know about the channel either. - if not isinstance(video, ytapi.Video): - print('get video') - video = self.youtube.get_video(video) - channel_id = video.author_id - self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id]) - if self.cur.fetchone() is None: - print('add channel') - self.add_channel(channel_id, get_videos=False, commit=False) - video_row = self.insert_video(video, commit=False)['row'] - else: - channel_id = video_row[SQL_VIDEO['author_id']] - if video_row[SQL_VIDEO['download']] != 'pending' and not force: + video = self.get_video(video_id) + if video.download != 'pending' and not force: print('That video does not need to be downloaded.') return - current_directory = os.getcwd() - download_directory = self.get_channel(channel_id)['directory'] - download_directory = download_directory or current_directory + try: + channel = self.get_channel(video.author_id) + download_directory = channel.directory + download_directory = download_directory or self.config['download_directory'] + except exceptions.NoSuchChannel: + download_directory = self.config['download_directory'] os.makedirs(download_directory, exist_ok=True) + + current_directory = os.getcwd() os.chdir(download_directory) - self.youtube_dl_function(video_id) - os.chdir(current_directory) - self.cur.execute('UPDATE videos SET download = "downloaded" WHERE id == ?', [video_id]) + pairs = { + 'id': video_id, + 'download': 'downloaded', + } + self.sql_update(table='videos', pairs=pairs, where_key='id') + if commit: - self.sql.commit() - - def get_all_states(self): - query = 'SELECT DISTINCT download FROM videos' - self.cur.execute(query) - states = self.cur.fetchall() - if states is None: - return [] - states = [row[0] for row in states] - return sorted(states) - - def get_channel(self, channel_id): - self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id]) - fetch = self.cur.fetchone() - if not fetch: - return None - fetch = {key: fetch[SQL_CHANNEL[key]] for key in SQL_CHANNEL} - return fetch - - def get_channels(self): - self.cur.execute('SELECT * FROM channels') - channels = self.cur.fetchall() - channels = [{key: channel[SQL_CHANNEL[key]] for key in SQL_CHANNEL} for channel in channels] - channels.sort(key=lambda x: x['name'].lower()) - return channels + self.commit() def get_video(self, video_id): - self.cur.execute('SELECT * FROM videos WHERE id == ?', [video_id]) - video = self.cur.fetchone() - video = {key: video[SQL_VIDEO[key]] for key in SQL_VIDEO} - return video + return self.get_thing_by_id('video', video_id) def get_videos(self, channel_id=None, *, download_filter=None, orderby=None): wheres = [] @@ -294,23 +314,8 @@ class YCDLDB: orderbys = ' ORDER BY ' + orderbys query = 'SELECT * FROM videos' + wheres + orderbys - self.cur.execute(query, bindings) - rows = self.cur.fetchall() - if not rows: - return [] - - videos = [] - channels = {} - for row in rows: - video = {key: row[SQL_VIDEO[key]] for key in SQL_VIDEO} - author_id = video['author_id'] - if author_id in channels: - video['author_name'] = channels[author_id] - author = self.get_channel(author_id) - if author: - channels[author_id] = author['name'] - video['author_name'] = author['name'] - videos.append(video) + rows = self.sql_select(query, bindings) + videos = [self.get_cached_instance('video', row) for row in rows] return videos def insert_playlist(self, playlist_id, commit=True): @@ -318,7 +323,7 @@ class YCDLDB: results = [self.insert_video(video, commit=False) for video in video_generator] if commit: - self.sql.commit() + self.commit() return results @@ -328,12 +333,13 @@ class YCDLDB: if add_channel: self.add_channel(video.author_id, get_videos=False, commit=False) - self.cur.execute('SELECT * FROM videos WHERE id == ?', [video.id]) - fetch = self.cur.fetchone() - existing = fetch is not None - - download_status = 'pending' if not existing else fetch[SQL_VIDEO['download']] + try: + existing = self.get_video(video.id) + download_status = existing.download + except exceptions.NoSuchVideo: + existing = None + download_status = 'pending' data = { 'id': video.id, @@ -348,72 +354,116 @@ class YCDLDB: } if existing: - (qmarks, bindings) = sqlhelpers.update_filler(data, where_key='id') - query = f'UPDATE videos {qmarks}' + self.sql_update(table='videos', pairs=data, where_key='id') else: - (qmarks, bindings) = sqlhelpers.insert_filler(SQL_VIDEO_COLUMNS, data) - query = f'INSERT INTO videos VALUES({qmarks})' + self.sql_insert(table='videos', data=data) - self.cur.execute(query, bindings) + video = self.get_cached_instance('video', data) if commit: - self.sql.commit() + self.commit() - return {'new': not existing, 'row': data} + return {'new': not existing, 'video': video} - def mark_video_state(self, video_id, state, commit=True): +class YCDLDB( + YCDLDBCacheManagerMixin, + YCDLDBChannelMixin, + YCDLDBVideoMixin, + YCDLSQLMixin, + ): + def __init__( + self, + youtube, + data_directory=None, + youtube_dl_function=None, + skip_version_check=False, + ): + super().__init__() + self.youtube = youtube + + # DATA DIR PREP + if data_directory is None: + data_directory = constants.DEFAULT_DATADIR + + self.data_directory = pathclass.Path(data_directory) + + # LOGGING + self.log = logging.getLogger(__name__) + self.log.setLevel(logging.DEBUG) + + # DATABASE + self.database_filepath = self.data_directory.with_child(constants.DEFAULT_DBNAME) + existing_database = self.database_filepath.exists + self.sql = sqlite3.connect(self.database_filepath.absolute_path) + + if existing_database: + if not skip_version_check: + self._check_version() + self._load_pragmas() + else: + self._first_time_setup() + + # DOWNLOAD COMMAND + if youtube_dl_function: + self.youtube_dl_function = youtube_dl_function + else: + self.youtube_dl_function = YOUTUBE_DL_COMMAND + + # CONFIG + self.config_filepath = self.data_directory.with_child(constants.DEFAULT_CONFIGNAME) + self.load_config() + + self.caches = { + 'channel': cacheclass.Cache(maxlen=20_000), + 'video': cacheclass.Cache(maxlen=50_000), + } + + def _check_version(self): ''' - Mark the video as ignored, pending, or downloaded. + Compare database's user_version against constants.DATABASE_VERSION, + raising exceptions.DatabaseOutOfDate if not correct. ''' - if state not in ['ignored', 'pending', 'downloaded', 'coldstorage']: - raise exceptions.InvalidVideoState(state) - self.cur.execute('SELECT * FROM videos WHERE id == ?', [video_id]) - if self.cur.fetchone() is None: - raise exceptions.NoSuchVideo(video_id) - self.cur.execute('UPDATE videos SET download = ? WHERE id == ?', [state, video_id]) - if commit: - self.sql.commit() + existing = self.sql.execute('PRAGMA user_version').fetchone()[0] + if existing != constants.DATABASE_VERSION: + raise exceptions.DatabaseOutOfDate( + existing=existing, + new=constants.DATABASE_VERSION, + filepath=self.database_filepath, + ) - def refresh_all_channels(self, force=False, skip_failures=False, commit=True): - exceptions = [] - for channel in self.get_channels(): - try: - self.refresh_channel(channel, force=force, commit=commit) - except Exception as exc: - if skip_failures: - traceback.print_exc() - exceptions.append(exc) - else: - raise - if commit: - self.sql.commit() - return exceptions + def _first_time_setup(self): + self.log.debug('Running first-time database setup.') + self.sql.executescript(constants.DB_INIT) + self.commit() - def refresh_channel(self, channel, force=False, commit=True): - if isinstance(channel, str): - channel = self.get_channel(channel) + def _load_pragmas(self): + self.log.debug('Reloading pragmas.') + self.sql.executescript(constants.DB_PRAGMAS) + self.commit() - seen_ids = set() - video_generator = self.youtube.get_user_videos(uid=channel['id']) - log.debug('Refreshing channel: %s', channel['id']) - for video in video_generator: - seen_ids.add(video.id) - status = self.insert_video(video, commit=False) + def get_all_states(self): + ''' + Get a list of all the different `download` states that are currently in + use in the database. + ''' + # Note: This function was added while I was considering the addition of + # arbitrarily many states for user-defined purposes, but I kind of went + # back on that so I'm not sure if it will be useful. + query = 'SELECT DISTINCT download FROM videos' + states = self.sql_select(query) + states = [row[0] for row in states] + return sorted(states) - if status['new'] and channel['automark'] is not None: - self.mark_video_state(video.id, channel['automark'], commit=False) - if channel['automark'] == 'downloaded': - self.download_video(video.id, commit=False) + def load_config(self): + (config, needs_rewrite) = configlayers.load_file( + filepath=self.config_filepath, + defaults=constants.DEFAULT_CONFIGURATION, + ) + self.config = config - if not force and not status['new']: - break + if needs_rewrite: + self.save_config() - if force: - known_videos = self.get_videos(channel_id=channel['id']) - known_ids = {v['id'] for v in known_videos} - refresh_ids = list(known_ids.difference(seen_ids)) - for video in self.youtube.get_video(refresh_ids): - self.insert_video(video, commit=False) - - if commit: - self.sql.commit() + def save_config(self): + with open(self.config_filepath.absolute_path, 'w', encoding='utf-8') as handle: + handle.write(json.dumps(self.config, indent=4, sort_keys=True)) diff --git a/ycdl/ytapi.py b/ycdl/ytapi.py index 4013a77..63c3dff 100644 --- a/ycdl/ytapi.py +++ b/ycdl/ytapi.py @@ -75,6 +75,8 @@ class Youtube: videos = self.get_video(video_ids) videos.sort(key=lambda x: x.published, reverse=True) + log.debug('Got %d more videos.', len(videos)) + for video in videos: yield video @@ -118,7 +120,10 @@ class Youtube: chunks = helpers.chunk_sequence(video_ids, 50) for chunk in chunks: chunk = ','.join(chunk) - data = self.youtube.videos().list(part='id,contentDetails,snippet,statistics', id=chunk).execute() + data = self.youtube.videos().list( + part='id,contentDetails,snippet,statistics', + id=chunk, + ).execute() items = data['items'] snippets.extend(items) videos = []