diff --git a/ycdl/exceptions.py b/ycdl/exceptions.py index 5665a42..7c18b7e 100644 --- a/ycdl/exceptions.py +++ b/ycdl/exceptions.py @@ -57,6 +57,11 @@ class NoSuchVideo(YCDLException): class InvalidVideoState(YCDLException): error_message = '{} is not a valid state.' +# RSS ERRORS ####################################################################################### + +class RSSAssistFailed(YCDLException): + error_message = '{}' + # SQL ERRORS ####################################################################################### class BadSQL(YCDLException): diff --git a/ycdl/objects.py b/ycdl/objects.py index de99fdf..2f1f4c0 100644 --- a/ycdl/objects.py +++ b/ycdl/objects.py @@ -1,5 +1,6 @@ from . import constants from . import exceptions +from . import ytrss def normalize_db_row(db_row, table): if isinstance(db_row, (list, tuple)): @@ -25,6 +26,12 @@ class Channel(Base): self.queuefile_extension = db_row['queuefile_extension'] self.automark = db_row['automark'] or "pending" + def _rss_assisted_videos(self): + most_recent_video = self.get_most_recent_video_id() + new_ids = ytrss.get_user_videos_since(self.id, most_recent_video) + videos = self.ycdldb.youtube.get_videos(new_ids) + return videos + def delete(self, commit=True): self.ycdldb.sql_delete(table='videos', pairs={'author_id': self.id}) self.ycdldb.sql_delete(table='channels', pairs={'id': self.id}) @@ -32,6 +39,12 @@ class Channel(Base): if commit: self.ycdldb.commit() + def get_most_recent_video_id(self): + query = 'SELECT id FROM videos WHERE author_id == ? ORDER BY published DESC LIMIT 1' + bindings = [self.id] + most_recent_video = self.ycdldb.sql_select_one(query, bindings)[0] + return most_recent_video + def has_pending(self): query = 'SELECT 1 FROM videos WHERE author_id == ? AND state == "pending" LIMIT 1' bindings = [self.id] @@ -44,8 +57,12 @@ class Channel(Base): self.uploads_playlist = self.ycdldb.youtube.get_user_uploads_playlist_id(self.id) self.set_uploads_playlist_id(self.uploads_playlist) + try: + video_generator = self._rss_assisted_videos() + except exceptions.RSSAssistFailed: + video_generator = self.ycdldb.youtube.get_playlist_videos(self.uploads_playlist) + seen_ids = set() - video_generator = self.ycdldb.youtube.get_playlist_videos(self.uploads_playlist) for video in video_generator: seen_ids.add(video.id) status = self.ycdldb.ingest_video(video, commit=False) diff --git a/ycdl/ycdldb.py b/ycdl/ycdldb.py index d501953..df26b58 100644 --- a/ycdl/ycdldb.py +++ b/ycdl/ycdldb.py @@ -170,8 +170,7 @@ class YCDLDBChannelMixin: paginate. So, for any channel with more than 14 new videos, we'll do a traditional refresh. ''' - query = 'SELECT id FROM videos WHERE author_id == ? ORDER BY published DESC LIMIT 1' - exceptions = [] + excs = [] def traditional(channel): try: @@ -179,56 +178,46 @@ class YCDLDBChannelMixin: except Exception as exc: if skip_failures: traceback.print_exc() - exceptions.append(exc) + excs.append(exc) else: raise - def gen(): + def assisted(): for channel in self.get_channels(): - most_recent_id = self.sql_select_one(query, [channel.id])[0] + most_recent_video = channel.get_most_recent_video_id() try: - rss_ids = ytrss.get_user_videos(channel.id) - except Exception: - # traceback.print_exc() + new_ids = ytrss.get_user_videos_since(channel.id, most_recent_video) + except exceptions.RSSAssistFailed: traditional(channel) continue - - try: - index = rss_ids.index(most_recent_id) - except ValueError: - self.log.debug('RSS didn\'t contain %s. Calling API refresh.', most_recent_id) - traditional(channel) - continue - - new_ids = rss_ids[:index] yield from new_ids - for video in self.youtube.get_videos(gen()): + for video in self.youtube.get_videos(assisted()): self.ingest_video(video, commit=False) if commit: self.commit() - return exceptions + return excs def refresh_all_channels(self, force=False, skip_failures=False, commit=True): if not force: return self._rss_assisted_refresh(skip_failures=skip_failures, commit=commit) - exceptions = [] + excs = [] for channel in self.get_channels(): try: channel.refresh(force=force, commit=commit) except Exception as exc: if skip_failures: traceback.print_exc() - exceptions.append(exc) + excs.append(exc) else: raise if commit: self.commit() - return exceptions + return excs class YCDLSQLMixin: def __init__(self): @@ -313,7 +302,7 @@ class YCDLDBVideoMixin: video = self.get_video(video_id) if video.state != 'pending' and not force: - print(f'{video.id} does not need to be downloaded.') + self.ycdldb.log.debug('%s does not need to be downloaded.', video_id) return try: @@ -372,9 +361,9 @@ class YCDLDBVideoMixin: query = 'SELECT * FROM videos' + wheres + orderbys - print(query, bindings) + self.log.debug(f'{query} {bindings}') explain = self.sql_execute('EXPLAIN QUERY PLAN ' + query, bindings) - print('\n'.join(str(x) for x in explain.fetchall())) + self.log.debug('\n'.join(str(x) for x in explain.fetchall())) rows = self.sql_select(query, bindings) for row in rows: diff --git a/ycdl/ytrss.py b/ycdl/ytrss.py index 61d830c..0efe6a3 100644 --- a/ycdl/ytrss.py +++ b/ycdl/ytrss.py @@ -2,9 +2,11 @@ import bs4 import logging import requests +from . import exceptions + log = logging.getLogger(__name__) -def get_user_videos(uid): +def _get_user_videos(uid): log.debug(f'Fetching RSS for {uid}.') url = f'https://www.youtube.com/feeds/videos.xml?channel_id={uid}' response = requests.get(url) @@ -13,3 +15,20 @@ def get_user_videos(uid): # find_all does not work on namespaced tags unless you add a limit paramter. video_ids = [v.text for v in soup.find_all('yt:videoid', limit=9999)] return video_ids + +def get_user_videos(uid): + try: + return _get_user_videos(uid) + except Exception: + raise exceptions.RSSAssistFailed() from exc + +def get_user_videos_since(uid, most_recent_video): + video_ids = get_user_videos(uid) + try: + index = video_ids.index(most_recent_video) + # log.debug(f'RSS contained {most_recent_video}.') + except ValueError: + message = f'RSS didn\'t contain {most_recent_video}.' + # log.debug(message) + raise exceptions.RSSAssistFailed(message) + return video_ids[:index]