Add RSS-assisted refresh to single channel refresh method.

This commit is contained in:
voussoir 2020-10-02 11:13:02 -07:00
parent d1b42b71cb
commit 7910e12bc8
4 changed files with 57 additions and 27 deletions

View file

@ -57,6 +57,11 @@ class NoSuchVideo(YCDLException):
class InvalidVideoState(YCDLException): class InvalidVideoState(YCDLException):
error_message = '{} is not a valid state.' error_message = '{} is not a valid state.'
# RSS ERRORS #######################################################################################
class RSSAssistFailed(YCDLException):
error_message = '{}'
# SQL ERRORS ####################################################################################### # SQL ERRORS #######################################################################################
class BadSQL(YCDLException): class BadSQL(YCDLException):

View file

@ -1,5 +1,6 @@
from . import constants from . import constants
from . import exceptions from . import exceptions
from . import ytrss
def normalize_db_row(db_row, table): def normalize_db_row(db_row, table):
if isinstance(db_row, (list, tuple)): if isinstance(db_row, (list, tuple)):
@ -25,6 +26,12 @@ class Channel(Base):
self.queuefile_extension = db_row['queuefile_extension'] self.queuefile_extension = db_row['queuefile_extension']
self.automark = db_row['automark'] or "pending" self.automark = db_row['automark'] or "pending"
def _rss_assisted_videos(self):
most_recent_video = self.get_most_recent_video_id()
new_ids = ytrss.get_user_videos_since(self.id, most_recent_video)
videos = self.ycdldb.youtube.get_videos(new_ids)
return videos
def delete(self, commit=True): def delete(self, commit=True):
self.ycdldb.sql_delete(table='videos', pairs={'author_id': self.id}) self.ycdldb.sql_delete(table='videos', pairs={'author_id': self.id})
self.ycdldb.sql_delete(table='channels', pairs={'id': self.id}) self.ycdldb.sql_delete(table='channels', pairs={'id': self.id})
@ -32,6 +39,12 @@ class Channel(Base):
if commit: if commit:
self.ycdldb.commit() self.ycdldb.commit()
def get_most_recent_video_id(self):
query = 'SELECT id FROM videos WHERE author_id == ? ORDER BY published DESC LIMIT 1'
bindings = [self.id]
most_recent_video = self.ycdldb.sql_select_one(query, bindings)[0]
return most_recent_video
def has_pending(self): def has_pending(self):
query = 'SELECT 1 FROM videos WHERE author_id == ? AND state == "pending" LIMIT 1' query = 'SELECT 1 FROM videos WHERE author_id == ? AND state == "pending" LIMIT 1'
bindings = [self.id] bindings = [self.id]
@ -44,8 +57,12 @@ class Channel(Base):
self.uploads_playlist = self.ycdldb.youtube.get_user_uploads_playlist_id(self.id) self.uploads_playlist = self.ycdldb.youtube.get_user_uploads_playlist_id(self.id)
self.set_uploads_playlist_id(self.uploads_playlist) self.set_uploads_playlist_id(self.uploads_playlist)
seen_ids = set() try:
video_generator = self._rss_assisted_videos()
except exceptions.RSSAssistFailed:
video_generator = self.ycdldb.youtube.get_playlist_videos(self.uploads_playlist) video_generator = self.ycdldb.youtube.get_playlist_videos(self.uploads_playlist)
seen_ids = set()
for video in video_generator: for video in video_generator:
seen_ids.add(video.id) seen_ids.add(video.id)
status = self.ycdldb.ingest_video(video, commit=False) status = self.ycdldb.ingest_video(video, commit=False)

View file

@ -170,8 +170,7 @@ class YCDLDBChannelMixin:
paginate. So, for any channel with more than 14 new videos, we'll paginate. So, for any channel with more than 14 new videos, we'll
do a traditional refresh. do a traditional refresh.
''' '''
query = 'SELECT id FROM videos WHERE author_id == ? ORDER BY published DESC LIMIT 1' excs = []
exceptions = []
def traditional(channel): def traditional(channel):
try: try:
@ -179,56 +178,46 @@ class YCDLDBChannelMixin:
except Exception as exc: except Exception as exc:
if skip_failures: if skip_failures:
traceback.print_exc() traceback.print_exc()
exceptions.append(exc) excs.append(exc)
else: else:
raise raise
def gen(): def assisted():
for channel in self.get_channels(): for channel in self.get_channels():
most_recent_id = self.sql_select_one(query, [channel.id])[0] most_recent_video = channel.get_most_recent_video_id()
try: try:
rss_ids = ytrss.get_user_videos(channel.id) new_ids = ytrss.get_user_videos_since(channel.id, most_recent_video)
except Exception: except exceptions.RSSAssistFailed:
# traceback.print_exc()
traditional(channel) traditional(channel)
continue continue
try:
index = rss_ids.index(most_recent_id)
except ValueError:
self.log.debug('RSS didn\'t contain %s. Calling API refresh.', most_recent_id)
traditional(channel)
continue
new_ids = rss_ids[:index]
yield from new_ids yield from new_ids
for video in self.youtube.get_videos(gen()): for video in self.youtube.get_videos(assisted()):
self.ingest_video(video, commit=False) self.ingest_video(video, commit=False)
if commit: if commit:
self.commit() self.commit()
return exceptions return excs
def refresh_all_channels(self, force=False, skip_failures=False, commit=True): def refresh_all_channels(self, force=False, skip_failures=False, commit=True):
if not force: if not force:
return self._rss_assisted_refresh(skip_failures=skip_failures, commit=commit) return self._rss_assisted_refresh(skip_failures=skip_failures, commit=commit)
exceptions = [] excs = []
for channel in self.get_channels(): for channel in self.get_channels():
try: try:
channel.refresh(force=force, commit=commit) channel.refresh(force=force, commit=commit)
except Exception as exc: except Exception as exc:
if skip_failures: if skip_failures:
traceback.print_exc() traceback.print_exc()
exceptions.append(exc) excs.append(exc)
else: else:
raise raise
if commit: if commit:
self.commit() self.commit()
return exceptions return excs
class YCDLSQLMixin: class YCDLSQLMixin:
def __init__(self): def __init__(self):
@ -313,7 +302,7 @@ class YCDLDBVideoMixin:
video = self.get_video(video_id) video = self.get_video(video_id)
if video.state != 'pending' and not force: if video.state != 'pending' and not force:
print(f'{video.id} does not need to be downloaded.') self.ycdldb.log.debug('%s does not need to be downloaded.', video_id)
return return
try: try:
@ -372,9 +361,9 @@ class YCDLDBVideoMixin:
query = 'SELECT * FROM videos' + wheres + orderbys query = 'SELECT * FROM videos' + wheres + orderbys
print(query, bindings) self.log.debug(f'{query} {bindings}')
explain = self.sql_execute('EXPLAIN QUERY PLAN ' + query, bindings) explain = self.sql_execute('EXPLAIN QUERY PLAN ' + query, bindings)
print('\n'.join(str(x) for x in explain.fetchall())) self.log.debug('\n'.join(str(x) for x in explain.fetchall()))
rows = self.sql_select(query, bindings) rows = self.sql_select(query, bindings)
for row in rows: for row in rows:

View file

@ -2,9 +2,11 @@ import bs4
import logging import logging
import requests import requests
from . import exceptions
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def get_user_videos(uid): def _get_user_videos(uid):
log.debug(f'Fetching RSS for {uid}.') log.debug(f'Fetching RSS for {uid}.')
url = f'https://www.youtube.com/feeds/videos.xml?channel_id={uid}' url = f'https://www.youtube.com/feeds/videos.xml?channel_id={uid}'
response = requests.get(url) response = requests.get(url)
@ -13,3 +15,20 @@ def get_user_videos(uid):
# find_all does not work on namespaced tags unless you add a limit paramter. # find_all does not work on namespaced tags unless you add a limit paramter.
video_ids = [v.text for v in soup.find_all('yt:videoid', limit=9999)] video_ids = [v.text for v in soup.find_all('yt:videoid', limit=9999)]
return video_ids return video_ids
def get_user_videos(uid):
try:
return _get_user_videos(uid)
except Exception:
raise exceptions.RSSAssistFailed() from exc
def get_user_videos_since(uid, most_recent_video):
video_ids = get_user_videos(uid)
try:
index = video_ids.index(most_recent_video)
# log.debug(f'RSS contained {most_recent_video}.')
except ValueError:
message = f'RSS didn\'t contain {most_recent_video}.'
# log.debug(message)
raise exceptions.RSSAssistFailed(message)
return video_ids[:index]