ycdl/ycdl/objects.py

396 lines
13 KiB
Python

import datetime
import googleapiclient.errors
import typing
from voussoirkit import pathclass
from voussoirkit import stringtools
from voussoirkit import vlogging
from voussoirkit import worms
log = vlogging.getLogger(__name__)
from . import constants
from . import exceptions
from . import ytrss
class ObjectBase(worms.Object):
def __init__(self, ycdldb):
super().__init__(ycdldb)
self.ycdldb = ycdldb
class Channel(ObjectBase):
table = 'channels'
no_such_exception = exceptions.NoSuchChannel
def __init__(self, ycdldb, db_row):
super().__init__(ycdldb)
db_row = self.ycdldb.normalize_db_row(db_row, self.table)
self.id = db_row['id']
self.name = db_row['name'] or self.id
self.uploads_playlist = db_row['uploads_playlist']
self.download_directory = self.normalize_download_directory(
db_row['download_directory'],
do_assert=False,
)
self.queuefile_extension = self.normalize_queuefile_extension(db_row['queuefile_extension'])
self.automark = db_row['automark'] or 'pending'
self.autorefresh = stringtools.truthystring(db_row['autorefresh'])
def __repr__(self):
return f'Channel:{self.id}'
def __str__(self):
return f'Channel:{self.id}:{self.name}'
@staticmethod
def normalize_autorefresh(autorefresh):
if isinstance(autorefresh, (str, int)):
autorefresh = stringtools.truthystring(autorefresh, none_set={})
if not isinstance(autorefresh, bool):
raise TypeError(f'autorefresh should be a boolean, not {autorefresh}.')
return autorefresh
@staticmethod
def normalize_download_directory(
download_directory,
do_assert=True,
) -> typing.Optional[pathclass.Path]:
if download_directory is None:
return None
if not isinstance(download_directory, (str, pathclass.Path)):
raise TypeError(f'download_directory should be {str} or {pathclass.Path}, not {type(download_directory)}.')
if isinstance(download_directory, str):
download_directory = download_directory.strip()
if not download_directory:
return None
download_directory = pathclass.Path(download_directory)
download_directory.correct_case()
if do_assert:
download_directory.assert_is_directory()
return download_directory
@staticmethod
def normalize_name(name):
if name is None:
return None
if not isinstance(name, str):
raise TypeError(f'name should be {str}, not {type(name)}.')
name = name.strip()
if not name:
return None
return name
@staticmethod
def normalize_queuefile_extension(queuefile_extension) -> typing.Optional[str]:
if queuefile_extension is None:
return None
if not isinstance(queuefile_extension, str):
raise TypeError(f'queuefile_extension should be {str}, not {type(queuefile_extension)}.')
queuefile_extension = queuefile_extension.strip()
if not queuefile_extension:
return None
return queuefile_extension
def _rss_assisted_videos(self):
'''
RSS-assisted refresh will use the channel's RSS feed to find videos
that are newer than the most recent video we have in the database.
Then, these new videos can be queried using the regular API since the
RSS doesn't contain all the attributes we need. This saves us from
wasting any metered API calls in the case that the RSS has nothing new.
Raises exceptions.RSSAssistFailed for any of these reasons:
- The channel has no stored videos, so we don't have a reference point
for the RSS assist.
- The RSS did not contain the latest stored video (it has become deleted
or unlisted), so we don't have a reference point.
- The RSS fetch request experiences any HTTP error.
- ytrss fails for any other reason.
'''
try:
most_recent_video = self.get_most_recent_video_id()
except exceptions.NoVideos as exc:
raise exceptions.RSSAssistFailed(f'Channel has no videos to reference.') from exc
# This might raise RSSAssistFailed.
new_ids = ytrss.get_user_videos_since(self.id, most_recent_video)
if not new_ids:
return []
videos = self.ycdldb.youtube.get_videos(new_ids)
return videos
@worms.transaction
def delete(self):
log.info('Deleting %s.', self)
self.ycdldb.delete(table='videos', pairs={'author_id': self.id})
self.ycdldb.delete(table='channels', pairs={'id': self.id})
def get_most_recent_video_id(self) -> str:
'''
Return the ID of this channel's most recent video by publication date.
Used primarily for the RSS assisted refresh where we check for videos
newer than the stored videos.
'''
query = 'SELECT id FROM videos WHERE author_id == ? ORDER BY published DESC LIMIT 1'
bindings = [self.id]
row = self.ycdldb.select_one(query, bindings)
if row is None:
raise exceptions.NoVideos(self)
return row[0]
def has_pending(self) -> bool:
'''
Return True if this channel has any videos in the pending state.
Used primarily for generating channel listings.
'''
query = 'SELECT 1 FROM videos WHERE author_id == ? AND state == "pending" LIMIT 1'
bindings = [self.id]
return self.ycdldb.select_one(query, bindings) is not None
def jsonify(self):
j = {
'id': self.id,
'name': self.name,
'automark': self.automark,
}
return j
@worms.transaction
def refresh(self, *, force=False, rss_assisted=True):
'''
Fetch new videos on the channel.
force:
If True, all of the channel's videos will be re-downloaded.
If False, we will first look for new videos, then refresh any
individual videos that need special attention (unlisted, premieres,
livestreams).
rss_assisted:
If True, we will use the RSS feed to look for new videos, so that
we can save some API calls.
If False, we will only use the tokened Youtube API.
Has no effect when force=True.
'''
log.info('Refreshing %s.', self)
if force or (not self.uploads_playlist):
self.reset_uploads_playlist_id()
if force or not rss_assisted:
video_generator = self.ycdldb.youtube.get_playlist_videos(self.uploads_playlist)
else:
try:
video_generator = self._rss_assisted_videos()
except exceptions.RSSAssistFailed as exc:
log.debug('Caught %s.', exc)
video_generator = self.ycdldb.youtube.get_playlist_videos(self.uploads_playlist)
seen_ids = set()
try:
for video in video_generator:
seen_ids.add(video.id)
status = self.ycdldb.ingest_video(video)
if (not status['new']) and (not force):
break
except googleapiclient.errors.HttpError as exc:
raise exceptions.ChannelRefreshFailed(channel=self.id, exc=exc)
# Now we will refresh some other IDs that may not have been refreshed
# by the previous loop.
refresh_ids = set()
# 1. Videos which have become unlisted, therefore not returned by the
# get_playlist_videos call. Take the set of all known ids minus those
# refreshed by the earlier loop, the difference will be unlisted,
# private, or deleted videos. At this time we have no special handling
# for deleted videos, but they simply won't come back from ytapi.
if force:
known_ids = {v.id for v in self.ycdldb.get_videos(channel_id=self.id)}
refresh_ids.update(known_ids.difference(seen_ids))
# 2. Premieres or live events which may now be over but were not
# included in the requested batch of IDs because they are not the most
# recent.
query = 'SELECT id FROM videos WHERE author_id == ? AND live_broadcast IS NOT NULL'
bindings = [self.id]
premiere_ids = self.ycdldb.select_column(query, bindings)
refresh_ids.update(premiere_ids)
if refresh_ids:
log.debug('Refreshing %d ids separately.', len(refresh_ids))
# We call ingest_video instead of insert_video so that
# premieres / livestreams which have finished can be automarked.
for video in self.ycdldb.youtube.get_videos(refresh_ids):
self.ycdldb.ingest_video(video)
def reset_uploads_playlist_id(self):
'''
Reset the stored uploads_playlist id with current data from the API.
'''
self.uploads_playlist = self.ycdldb.youtube.get_user_uploads_playlist_id(self.id)
self.set_uploads_playlist_id(self.uploads_playlist)
return self.uploads_playlist
@worms.transaction
def set_automark(self, state):
self.ycdldb.assert_valid_state(state)
pairs = {
'id': self.id,
'automark': state,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.automark = state
@worms.transaction
def set_autorefresh(self, autorefresh):
autorefresh = self.normalize_autorefresh(autorefresh)
pairs = {
'id': self.id,
'autorefresh': autorefresh,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.autorefresh = autorefresh
@worms.transaction
def set_download_directory(self, download_directory):
download_directory = self.normalize_download_directory(download_directory)
pairs = {
'id': self.id,
'download_directory': download_directory.absolute_path if download_directory else None,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.download_directory = download_directory
@worms.transaction
def set_name(self, name):
name = self.normalize_name(name)
pairs = {
'id': self.id,
'name': name,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.name = name
@worms.transaction
def set_queuefile_extension(self, queuefile_extension):
queuefile_extension = self.normalize_queuefile_extension(queuefile_extension)
pairs = {
'id': self.id,
'queuefile_extension': queuefile_extension,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.queuefile_extension = queuefile_extension
@worms.transaction
def set_uploads_playlist_id(self, playlist_id):
log.debug('Setting %s upload playlist to %s.', self, playlist_id)
if not isinstance(playlist_id, str):
raise TypeError(f'Playlist id must be a string, not {type(playlist_id)}.')
pairs = {
'id': self.id,
'uploads_playlist': playlist_id,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.uploads_playlist = playlist_id
class Video(ObjectBase):
table = 'videos'
no_such_exception = exceptions.NoSuchVideo
def __init__(self, ycdldb, db_row):
super().__init__(ycdldb)
db_row = self.ycdldb.normalize_db_row(db_row, self.table)
self.id = db_row['id']
self.published = db_row['published']
self.author_id = db_row['author_id']
self.title = db_row['title']
self.description = db_row['description']
self.duration = db_row['duration']
self.views = db_row['views']
self.thumbnail = db_row['thumbnail']
self.live_broadcast = db_row['live_broadcast']
self.state = db_row['state']
def __repr__(self):
return f'Video:{self.id}'
@property
def author(self):
try:
return self.ycdldb.get_channel(self.author_id)
except exceptions.NoSuchChannel:
return None
@worms.transaction
def delete(self):
log.info('Deleting %s.', self)
self.ycdldb.delete(table='videos', pairs={'id': self.id})
def jsonify(self):
j = {
'id': self.id,
'published': self.published,
'author_id': self.author_id,
'title': self.title,
'description': self.description,
'duration': self.duration,
'views': self.views,
'thumbnail': self.thumbnail,
'state': self.state,
}
return j
@worms.transaction
def mark_state(self, state):
'''
Mark the video as ignored, pending, or downloaded.
Note: Marking as downloaded will not create the queue file, this only
updates the database. See yclddb.download_video.
'''
self.ycdldb.assert_valid_state(state)
log.info('Marking %s as %s.', self, state)
pairs = {
'id': self.id,
'state': state,
}
self.state = state
self.ycdldb.update(table='videos', pairs=pairs, where_key='id')
@property
def published_string(self):
published = self.published
published = datetime.datetime.utcfromtimestamp(published)
published = published.strftime('%Y-%m-%d')
return published