diff --git a/frontends/ycdl_flask/templates/channel.html b/frontends/ycdl_flask/templates/channel.html index da98e5e..5d2d813 100644 --- a/frontends/ycdl_flask/templates/channel.html +++ b/frontends/ycdl_flask/templates/channel.html @@ -103,6 +103,7 @@ {{video['_published_str']}} - {{video['title']}} ({{video['duration'] | seconds_to_hms}}) + ({{video['views']}}) {% if channel is none %} (Chan) {% endif %} diff --git a/utilities/database_upgrader.py b/utilities/database_upgrader.py index 5c97af9..6d052c2 100644 --- a/utilities/database_upgrader.py +++ b/utilities/database_upgrader.py @@ -5,10 +5,49 @@ import sys import ycdl +def upgrade_3_to_4(sql): + ''' + In this version, the views column was added. + ''' + cur = sql.cursor() + cur.executescript(''' + ALTER TABLE videos RENAME TO videos_old; + CREATE TABLE videos( + id TEXT, + published INT, + author_id TEXT, + title TEXT, + description TEXT, + duration INT, + views INT, + thumbnail TEXT, + download TEXT + ); + INSERT INTO videos SELECT + id, + published, + author_id, + title, + description, + duration, + NULL, + thumbnail, + download + FROM videos_old; + DROP TABLE videos_old; + ''') + +def upgrade_2_to_3(sql): + ''' + In this version, a column `automark` was added to the channels table, where + you can set channels to automatically mark videos as ignored or downloaded. + ''' + cur = sql.cursor() + cur.execute('ALTER TABLE channels ADD COLUMN automark TEXT') + def upgrade_1_to_2(sql): ''' - In this version, a column `tagged_at` was added to the Photos table, to keep - track of the last time the photo's tags were edited (added or removed). + In this version, the duration column was added. ''' cur = sql.cursor() cur.executescript(''' diff --git a/ycdl/ycdl.py b/ycdl/ycdl.py index b0dec1c..618fc35 100644 --- a/ycdl/ycdl.py +++ b/ycdl/ycdl.py @@ -18,27 +18,7 @@ logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING) logging.getLogger('requests.packages.urllib3.util.retry').setLevel(logging.WARNING) -SQL_CHANNEL_COLUMNS = [ - 'id', - 'name', - 'directory', -] - -SQL_VIDEO_COLUMNS = [ - 'id', - 'published', - 'author_id', - 'title', - 'description', - 'duration', - 'thumbnail', - 'download', -] - -SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)} -SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)} - -DATABASE_VERSION = 2 +DATABASE_VERSION = 4 DB_INIT = ''' PRAGMA count_changes = OFF; PRAGMA cache_size = 10000; @@ -46,7 +26,8 @@ PRAGMA user_version = {user_version}; CREATE TABLE IF NOT EXISTS channels( id TEXT, name TEXT, - directory TEXT COLLATE NOCASE + directory TEXT COLLATE NOCASE, + automark TEXT ); CREATE TABLE IF NOT EXISTS videos( id TEXT, @@ -55,6 +36,7 @@ CREATE TABLE IF NOT EXISTS videos( title TEXT, description TEXT, duration INT, + views INT, thumbnail TEXT, download TEXT ); @@ -67,12 +49,34 @@ CREATE INDEX IF NOT EXISTS index_video_published on videos(published); CREATE INDEX IF NOT EXISTS index_video_download on videos(download); '''.format(user_version=DATABASE_VERSION) +SQL_CHANNEL_COLUMNS = [ + 'id', + 'name', + 'directory', + 'automark', +] + +SQL_VIDEO_COLUMNS = [ + 'id', + 'published', + 'author_id', + 'title', + 'description', + 'duration', + 'views', + 'thumbnail', + 'download', +] + +SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)} +SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)} + DEFAULT_DBNAME = 'ycdl.db' -ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}' +ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}.' -def verify_is_abspath(path): +def assert_is_abspath(path): ''' TO DO: Determine whether this is actually correct. ''' @@ -86,7 +90,6 @@ class InvalidVideoState(Exception): class NoSuchVideo(Exception): pass - class YCDL: def __init__(self, youtube, database_filename=None, youtube_dl_function=None): self.youtube = youtube @@ -119,6 +122,7 @@ class YCDL: def add_channel( self, channel_id, + *, commit=True, download_directory=None, get_videos=False, @@ -134,27 +138,24 @@ class YCDL: data[SQL_CHANNEL['id']] = channel_id data[SQL_CHANNEL['name']] = name if download_directory is not None: - verify_is_abspath(download_directory) + assert_is_abspath(download_directory) data[SQL_CHANNEL['directory']] = download_directory - self.cur.execute('INSERT INTO channels VALUES(?, ?, ?)', data) + self.cur.execute('INSERT INTO channels VALUES(?, ?, ?, ?)', data) + if get_videos: self.refresh_channel(channel_id, commit=False) + if commit: self.sql.commit() + return data + def channel_has_pending(self, channel_id): - query = 'SELECT * FROM videos WHERE author_id == ? AND download == "pending"' + query = 'SELECT 1 FROM videos WHERE author_id == ? AND download == "pending" LIMIT 1' self.cur.execute(query, [channel_id]) return self.cur.fetchone() is not None - def channel_directory(self, channel_id): - self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id]) - fetch = self.cur.fetchone() - if fetch is None: - return None - return fetch[SQL_CHANNEL['directory']] - def download_video(self, video, commit=True, force=False): ''' Execute the `YOUTUBE_DL_COMMAND`, within the channel's associated @@ -187,7 +188,7 @@ class YCDL: return current_directory = os.getcwd() - download_directory = self.channel_directory(channel_id) + download_directory = self.get_channel(channel_id)['directory'] download_directory = download_directory or current_directory os.makedirs(download_directory, exist_ok=True) @@ -269,6 +270,7 @@ class YCDL: 'title': video.title, 'description': video.description, 'duration': video.duration, + 'views': video.views, 'thumbnail': video.thumbnail['url'], 'download': download_status, } @@ -302,16 +304,35 @@ class YCDL: def refresh_all_channels(self, force=False, commit=True): for channel in self.get_channels(): - self.refresh_channel(channel['id'], force=force, commit=commit) + self.refresh_channel(channel, force=force, commit=commit) if commit: self.sql.commit() - def refresh_channel(self, channel_id, force=False, commit=True): - video_generator = self.youtube.get_user_videos(uid=channel_id) - log.debug('Refreshing channel: %s', channel_id) + def refresh_channel(self, channel, force=False, commit=True): + if isinstance(channel, str): + channel = self.get_channel(channel) + + seen_ids = set() + video_generator = self.youtube.get_user_videos(uid=channel['id']) + log.debug('Refreshing channel: %s', channel['id']) for video in video_generator: + seen_ids.add(video.id) status = self.insert_video(video, commit=False) + + if status['new'] and channel['automark'] is not None: + self.mark_video_state(video.id, channel['automark'], commit=False) + if channel['automark'] == 'downloaded': + self.download_video(video.id, commit=False) + if not force and not status['new']: break + + if force: + known_videos = self.get_videos(channel_id=channel['id']) + known_ids = {v['id'] for v in known_videos} + refresh_ids = list(known_ids.difference(seen_ids)) + for video in self.youtube.get_video(refresh_ids): + self.insert_video(video, commit=False) + if commit: self.sql.commit() diff --git a/ycdl/ytapi.py b/ycdl/ytapi.py index e7f9dc1..a5ee461 100644 --- a/ycdl/ytapi.py +++ b/ycdl/ytapi.py @@ -21,6 +21,7 @@ class Video: snippet = data['snippet'] content_details = data['contentDetails'] + statistics = data['statistics'] self.title = snippet['title'] or '[untitled]' self.description = snippet['description'] @@ -33,6 +34,7 @@ class Video: self.published = published.timestamp() self.duration = isodate.parse_duration(content_details['duration']).seconds + self.views = statistics['viewCount'] thumbnails = snippet['thumbnails'] best_thumbnail = max(thumbnails, key=lambda x: thumbnails[x]['width'] * thumbnails[x]['height']) @@ -66,7 +68,6 @@ class Youtube: user = self.youtube.channels().list(part='contentDetails', id=uid).execute() upload_playlist = user['items'][0]['contentDetails']['relatedPlaylists']['uploads'] page_token = None - total = 0 while True: response = self.youtube.playlistItems().list( maxResults=50, @@ -78,12 +79,11 @@ class Youtube: video_ids = [item['contentDetails']['videoId'] for item in response['items']] videos = self.get_video(video_ids) videos.sort(key=lambda x: x.published, reverse=True) - yield from videos - count = len(videos) - total += count - print(f'Found {count} more, {total} total') - if page_token is None or count < 50: + for video in videos: + yield video + + if page_token is None: break def get_related_videos(self, video_id, count=50): @@ -108,17 +108,25 @@ class Youtube: else: singular = False - results = [] + snippets = [] chunks = helpers.chunk_sequence(video_ids, 50) for chunk in chunks: chunk = ','.join(chunk) - data = self.youtube.videos().list(part='id,contentDetails,snippet', id=chunk).execute() + data = self.youtube.videos().list(part='id,contentDetails,snippet,statistics', id=chunk).execute() items = data['items'] - results.extend(items) - results = [Video(snippet) for snippet in results] + snippets.extend(items) + videos = [] + broken = [] + for snippet in snippets: + try: + videos.append(Video(snippet)) + except KeyError: + broken.append(snippet) + if broken: + print('broken:', broken) if singular: - if len(results) == 1: - return results[0] - elif len(results) == 0: + if len(videos) == 1: + return videos[0] + elif len(videos) == 0: raise VideoNotFound(video_ids[0]) - return results + return videos