ycdl/ycdl/objects.py

import datetime
import googleapiclient.errors
import typing

from voussoirkit import pathclass
from voussoirkit import stringtools
from voussoirkit import vlogging
from voussoirkit import worms

log = vlogging.getLogger(__name__)

from . import constants
from . import exceptions
from . import ytrss

class ObjectBase(worms.Object):
    def __init__(self, ycdldb):
        super().__init__(ycdldb)
        self.ycdldb = ycdldb

class Channel(ObjectBase):
    table = 'channels'
    no_such_exception = exceptions.NoSuchChannel

    def __init__(self, ycdldb, db_row):
        super().__init__(ycdldb)
        db_row = self.ycdldb.normalize_db_row(db_row, self.table)

        self.id = db_row['id']
        self.name = db_row['name'] or self.id
        self.uploads_playlist = db_row['uploads_playlist']
        self.download_directory = self.normalize_download_directory(
            db_row['download_directory'],
            do_assert=False,
        )
        self.queuefile_extension = self.normalize_queuefile_extension(db_row['queuefile_extension'])
        self.automark = db_row['automark'] or 'pending'
        self.autorefresh = stringtools.truthystring(db_row['autorefresh'])

    def __repr__(self):
        return f'Channel:{self.id}'

    def __str__(self):
        return f'Channel:{self.id}:{self.name}'

    @staticmethod
    def normalize_autorefresh(autorefresh):
        if isinstance(autorefresh, (str, int)):
            autorefresh = stringtools.truthystring(autorefresh, none_set={})

        if not isinstance(autorefresh, bool):
            raise TypeError(f'autorefresh should be a boolean, not {autorefresh}.')

        return autorefresh

    @staticmethod
    def normalize_download_directory(
            download_directory,
            do_assert=True,
        ) -> typing.Optional[pathclass.Path]:
        if download_directory is None:
            return None

        if not isinstance(download_directory, (str, pathclass.Path)):
            raise TypeError(f'download_directory should be {str} or {pathclass.Path}, not {type(download_directory)}.')

        if isinstance(download_directory, str):
            download_directory = download_directory.strip()
            if not download_directory:
                return None

        download_directory = pathclass.Path(download_directory)
        download_directory.correct_case()

        if do_assert:
            download_directory.assert_is_directory()

        return download_directory

    @staticmethod
    def normalize_name(name):
        if name is None:
            return None

        if not isinstance(name, str):
            raise TypeError(f'name should be {str}, not {type(name)}.')

        name = name.strip()
        if not name:
            return None

        return name

    @staticmethod
    def normalize_queuefile_extension(queuefile_extension) -> typing.Optional[str]:
        if queuefile_extension is None:
            return None

        if not isinstance(queuefile_extension, str):
            raise TypeError(f'queuefile_extension should be {str}, not {type(queuefile_extension)}.')

        queuefile_extension = queuefile_extension.strip()
        if not queuefile_extension:
            return None

        return queuefile_extension

    def _rss_assisted_videos(self):
        '''
        RSS-assisted refresh will use the channel's RSS feed to find videos
        that are newer than the most recent video we have in the database.
        Then, these new videos can be queried using the regular API since the
        RSS doesn't contain all the attributes we need. This saves us from
        wasting any metered API calls in the case that the RSS has nothing new.

        Raises exceptions.RSSAssistFailed for any of these reasons:
        - The channel has no stored videos, so we don't have a reference point
          for the RSS assist.
        - The RSS did not contain the latest stored video (it has become deleted
          or unlisted), so we don't have a reference point.
        - The RSS fetch request experiences any HTTP error.
        - ytrss fails for any other reason.
        '''
        try:
            most_recent_video = self.get_most_recent_video_id()
        except exceptions.NoVideos as exc:
            raise exceptions.RSSAssistFailed(f'Channel has no videos to reference.') from exc

        # This might raise RSSAssistFailed.
        new_ids = ytrss.get_user_videos_since(self.id, most_recent_video)

        if not new_ids:
            return []
        videos = self.ycdldb.youtube.get_videos(new_ids)
        return videos

    @worms.transaction
    def delete(self):
        log.info('Deleting %s.', self)

        self.ycdldb.delete(table='videos', pairs={'author_id': self.id})
        self.ycdldb.delete(table='channels', pairs={'id': self.id})

    def get_most_recent_video_id(self) -> str:
        '''
        Return the ID of this channel's most recent video by publication date.

        Used primarily for the RSS assisted refresh where we check for videos
        newer than the stored videos.
        '''
        query = 'SELECT id FROM videos WHERE author_id == ? ORDER BY published DESC LIMIT 1'
        bindings = [self.id]
        row = self.ycdldb.select_one(query, bindings)
        if row is None:
            raise exceptions.NoVideos(self)
        return row[0]

    def has_pending(self) -> bool:
        '''
        Return True if this channel has any videos in the pending state.

        Used primarily for generating channel listings.
        '''
        query = 'SELECT 1 FROM videos WHERE author_id == ? AND state == "pending" LIMIT 1'
        bindings = [self.id]
        return self.ycdldb.select_one(query, bindings) is not None

    def jsonify(self):
        j = {
            'id': self.id,
            'name': self.name,
            'automark': self.automark,
        }
        return j

    @worms.transaction
    def refresh(self, *, force=False, rss_assisted=True):
        '''
        Fetch new videos on the channel.

        force:
            If True, all of the channel's videos will be re-downloaded.
            If False, we will first look for new videos, then refresh any
            individual videos that need special attention (unlisted, premieres,
            livestreams).

        rss_assisted:
            If True, we will use the RSS feed to look for new videos, so that
            we can save some API calls.
            If False, we will only use the tokened Youtube API.
            Has no effect when force=True.
        '''
        log.info('Refreshing %s.', self)

        if force or (not self.uploads_playlist):
            self.reset_uploads_playlist_id()

        if force or not rss_assisted:
            video_generator = self.ycdldb.youtube.get_playlist_videos(self.uploads_playlist)
        else:
            try:
                video_generator = self._rss_assisted_videos()
            except exceptions.RSSAssistFailed as exc:
                log.debug('Caught %s.', exc)
                video_generator = self.ycdldb.youtube.get_playlist_videos(self.uploads_playlist)

        seen_ids = set()

        try:
            for video in video_generator:
                seen_ids.add(video.id)
                status = self.ycdldb.ingest_video(video)

                if (not status['new']) and (not force):
                    break
        except googleapiclient.errors.HttpError as exc:
            raise exceptions.ChannelRefreshFailed(channel=self.id, exc=exc)

        # Now we will refresh some other IDs that may not have been refreshed
        # by the previous loop.
        refresh_ids = set()

        # 1. Videos which have become unlisted, therefore not returned by the
        # get_playlist_videos call. Take the set of all known ids minus those
        # refreshed by the earlier  loop, the difference will be unlisted,
        # private, or deleted videos. At this time we have no special handling
        # for deleted videos, but they simply won't come back from ytapi.
        if force:
            known_ids = {v.id for v in self.ycdldb.get_videos(channel_id=self.id)}
            refresh_ids.update(known_ids.difference(seen_ids))

        # 2. Premieres or live events which may now be over but were not
        # included in the requested batch of IDs because they are not the most
        # recent.
        query = 'SELECT id FROM videos WHERE author_id == ? AND live_broadcast IS NOT NULL'
        bindings = [self.id]
        premiere_ids = self.ycdldb.select_column(query, bindings)
        refresh_ids.update(premiere_ids)

        if refresh_ids:
            log.debug('Refreshing %d ids separately.', len(refresh_ids))
            # We call ingest_video instead of insert_video so that
            # premieres / livestreams which have finished can be automarked.
            for video in self.ycdldb.youtube.get_videos(refresh_ids):
                self.ycdldb.ingest_video(video)

    def reset_uploads_playlist_id(self):
        '''
        Reset the stored uploads_playlist id with current data from the API.
        '''
        self.uploads_playlist = self.ycdldb.youtube.get_user_uploads_playlist_id(self.id)
        self.set_uploads_playlist_id(self.uploads_playlist)
        return self.uploads_playlist

    @worms.transaction
    def set_automark(self, state):
        self.ycdldb.assert_valid_state(state)

        pairs = {
            'id': self.id,
            'automark': state,
        }
        self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
        self.automark = state

    @worms.transaction
    def set_autorefresh(self, autorefresh):
        autorefresh = self.normalize_autorefresh(autorefresh)

        pairs = {
            'id': self.id,
            'autorefresh': autorefresh,
        }
        self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
        self.autorefresh = autorefresh

    @worms.transaction
    def set_download_directory(self, download_directory):
        download_directory = self.normalize_download_directory(download_directory)

        pairs = {
            'id': self.id,
            'download_directory': download_directory.absolute_path if download_directory else None,
        }
        self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
        self.download_directory = download_directory

    @worms.transaction
    def set_name(self, name):
        name = self.normalize_name(name)

        pairs = {
            'id': self.id,
            'name': name,
        }
        self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
        self.name = name

    @worms.transaction
    def set_queuefile_extension(self, queuefile_extension):
        queuefile_extension = self.normalize_queuefile_extension(queuefile_extension)

        pairs = {
            'id': self.id,
            'queuefile_extension': queuefile_extension,
        }
        self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
        self.queuefile_extension = queuefile_extension

    @worms.transaction
    def set_uploads_playlist_id(self, playlist_id):
        log.debug('Setting %s upload playlist to %s.', self, playlist_id)
        if not isinstance(playlist_id, str):
            raise TypeError(f'Playlist id must be a string, not {type(playlist_id)}.')

        pairs = {
            'id': self.id,
            'uploads_playlist': playlist_id,
        }
        self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
        self.uploads_playlist = playlist_id

class Video(ObjectBase):
    table = 'videos'
    no_such_exception = exceptions.NoSuchVideo

    def __init__(self, ycdldb, db_row):
        super().__init__(ycdldb)
        db_row = self.ycdldb.normalize_db_row(db_row, self.table)

        self.id = db_row['id']
        self.published = db_row['published']
        self.author_id = db_row['author_id']
        self.title = db_row['title']
        self.description = db_row['description']
        self.duration = db_row['duration']
        self.views = db_row['views']
        self.thumbnail = db_row['thumbnail']
        self.live_broadcast = db_row['live_broadcast']
        self.state = db_row['state']

    def __repr__(self):
        return f'Video:{self.id}'

    @property
    def author(self):
        try:
            return self.ycdldb.get_channel(self.author_id)
        except exceptions.NoSuchChannel:
            return None

    @worms.transaction
    def delete(self):
        log.info('Deleting %s.', self)

        self.ycdldb.delete(table='videos', pairs={'id': self.id})

    def jsonify(self):
        j = {
            'id': self.id,
            'published': self.published,
            'author_id': self.author_id,
            'title': self.title,
            'description': self.description,
            'duration': self.duration,
            'views': self.views,
            'thumbnail': self.thumbnail,
            'state': self.state,
        }
        return j

    @worms.transaction
    def mark_state(self, state):
        '''
        Mark the video as ignored, pending, or downloaded.

        Note: Marking as downloaded will not create the queue file, this only
        updates the database. See yclddb.download_video.
        '''
        self.ycdldb.assert_valid_state(state)

        log.info('Marking %s as %s.', self, state)

        pairs = {
            'id': self.id,
            'state': state,
        }
        self.state = state
        self.ycdldb.update(table='videos', pairs=pairs, where_key='id')

    @property
    def published_string(self):
        published = self.published
        published = datetime.datetime.utcfromtimestamp(published)
        published = published.strftime('%Y-%m-%d')
        return published