Add views column and some other database changes.

And various other changes which have been sitting here for too long.
2020-01-06 22:07:25 -08:00 · 2020-01-06 22:07:25 -08:00 · 78ce6a6f41
commit 78ce6a6f41
parent 420a14bb88
4 changed files with 125 additions and 56 deletions
--- a/frontends/ycdl_flask/templates/channel.html
+++ b/frontends/ycdl_flask/templates/channel.html
@ -103,6 +103,7 @@
            <img src="http://i3.ytimg.com/vi/{{video['id']}}/default.jpg" height="100px">
            <a class="video_title" href="https://www.youtube.com/watch?v={{video['id']}}">{{video['_published_str']}} - {{video['title']}}</a>
            <span>({{video['duration'] | seconds_to_hms}})</span>
+            <span>({{video['views']}})</span>
            {% if channel is none %}
            <a href="/channel/{{video['author_id']}}">(Chan)</a>
            {% endif %}
--- a/utilities/database_upgrader.py
+++ b/utilities/database_upgrader.py
@ -5,10 +5,49 @@ import sys

 import ycdl

+def upgrade_3_to_4(sql):
+    '''
+    In this version, the views column was added.
+    '''
+    cur = sql.cursor()
+    cur.executescript('''
+        ALTER TABLE videos RENAME TO videos_old;
+        CREATE TABLE videos(
+            id TEXT,
+            published INT,
+            author_id TEXT,
+            title TEXT,
+            description TEXT,
+            duration INT,
+            views INT,
+            thumbnail TEXT,
+            download TEXT
+        );
+        INSERT INTO videos SELECT
+            id,
+            published,
+            author_id,
+            title,
+            description,
+            duration,
+            NULL,
+            thumbnail,
+            download
+        FROM videos_old;
+        DROP TABLE videos_old;
+    ''')
+
+def upgrade_2_to_3(sql):
+    '''
+    In this version, a column `automark` was added to the channels table, where
+    you can set channels to automatically mark videos as ignored or downloaded.
+    '''
+    cur = sql.cursor()
+    cur.execute('ALTER TABLE channels ADD COLUMN automark TEXT')
+
 def upgrade_1_to_2(sql):
    '''
-    In this version, a column `tagged_at` was added to the Photos table, to keep
-    track of the last time the photo's tags were edited (added or removed).
+    In this version, the duration column was added.
    '''
    cur = sql.cursor()
    cur.executescript('''
--- a/ycdl/ycdl.py
+++ b/ycdl/ycdl.py
@ -18,27 +18,7 @@ logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING)
 logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING)
 logging.getLogger('requests.packages.urllib3.util.retry').setLevel(logging.WARNING)

-SQL_CHANNEL_COLUMNS = [
-    'id',
-    'name',
-    'directory',
-]
-
-SQL_VIDEO_COLUMNS = [
-    'id',
-    'published',
-    'author_id',
-    'title',
-    'description',
-    'duration',
-    'thumbnail',
-    'download',
-]
-
-SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)}
-SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)}
-
-DATABASE_VERSION = 2
+DATABASE_VERSION = 4
 DB_INIT = '''
 PRAGMA count_changes = OFF;
 PRAGMA cache_size = 10000;
@ -46,7 +26,8 @@ PRAGMA user_version = {user_version};
 CREATE TABLE IF NOT EXISTS channels(
    id TEXT,
    name TEXT,
-    directory TEXT COLLATE NOCASE
+    directory TEXT COLLATE NOCASE,
+    automark TEXT
 );
 CREATE TABLE IF NOT EXISTS videos(
    id TEXT,
@ -55,6 +36,7 @@ CREATE TABLE IF NOT EXISTS videos(
    title TEXT,
    description TEXT,
    duration INT,
+    views INT,
    thumbnail TEXT,
    download TEXT
 );
@ -67,12 +49,34 @@ CREATE INDEX IF NOT EXISTS index_video_published on videos(published);
 CREATE INDEX IF NOT EXISTS index_video_download on videos(download);
 '''.format(user_version=DATABASE_VERSION)

+SQL_CHANNEL_COLUMNS = [
+    'id',
+    'name',
+    'directory',
+    'automark',
+]
+
+SQL_VIDEO_COLUMNS = [
+    'id',
+    'published',
+    'author_id',
+    'title',
+    'description',
+    'duration',
+    'views',
+    'thumbnail',
+    'download',
+]
+
+SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)}
+SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)}
+
 DEFAULT_DBNAME = 'ycdl.db'

-ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}'
+ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}.'


-def verify_is_abspath(path):
+def assert_is_abspath(path):
    '''
    TO DO: Determine whether this is actually correct.
    '''
@ -86,7 +90,6 @@ class InvalidVideoState(Exception):
 class NoSuchVideo(Exception):
    pass

-
 class YCDL:
    def __init__(self, youtube, database_filename=None, youtube_dl_function=None):
        self.youtube = youtube
@ -119,6 +122,7 @@ class YCDL:
    def add_channel(
            self,
            channel_id,
+            *,
            commit=True,
            download_directory=None,
            get_videos=False,
@ -134,27 +138,24 @@ class YCDL:
        data[SQL_CHANNEL['id']] = channel_id
        data[SQL_CHANNEL['name']] = name
        if download_directory is not None:
-            verify_is_abspath(download_directory)
+            assert_is_abspath(download_directory)
        data[SQL_CHANNEL['directory']] = download_directory

-        self.cur.execute('INSERT INTO channels VALUES(?, ?, ?)', data)
+        self.cur.execute('INSERT INTO channels VALUES(?, ?, ?, ?)', data)
+
        if get_videos:
            self.refresh_channel(channel_id, commit=False)
+
        if commit:
            self.sql.commit()

+        return data
+
    def channel_has_pending(self, channel_id):
-        query = 'SELECT * FROM videos WHERE author_id == ? AND download == "pending"'
+        query = 'SELECT 1 FROM videos WHERE author_id == ? AND download == "pending" LIMIT 1'
        self.cur.execute(query, [channel_id])
        return self.cur.fetchone() is not None

-    def channel_directory(self, channel_id):
-        self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id])
-        fetch = self.cur.fetchone()
-        if fetch is None:
-            return None
-        return fetch[SQL_CHANNEL['directory']]
-
    def download_video(self, video, commit=True, force=False):
        '''
        Execute the `YOUTUBE_DL_COMMAND`, within the channel's associated
@ -187,7 +188,7 @@ class YCDL:
            return

        current_directory = os.getcwd()
-        download_directory = self.channel_directory(channel_id)
+        download_directory = self.get_channel(channel_id)['directory']
        download_directory = download_directory or current_directory

        os.makedirs(download_directory, exist_ok=True)
@ -269,6 +270,7 @@ class YCDL:
            'title': video.title,
            'description': video.description,
            'duration': video.duration,
+            'views': video.views,
            'thumbnail': video.thumbnail['url'],
            'download': download_status,
        }
@ -302,16 +304,35 @@ class YCDL:

    def refresh_all_channels(self, force=False, commit=True):
        for channel in self.get_channels():
-            self.refresh_channel(channel['id'], force=force, commit=commit)
+            self.refresh_channel(channel, force=force, commit=commit)
        if commit:
            self.sql.commit()

-    def refresh_channel(self, channel_id, force=False, commit=True):
-        video_generator = self.youtube.get_user_videos(uid=channel_id)
-        log.debug('Refreshing channel: %s', channel_id)
+    def refresh_channel(self, channel, force=False, commit=True):
+        if isinstance(channel, str):
+            channel = self.get_channel(channel)
+
+        seen_ids = set()
+        video_generator = self.youtube.get_user_videos(uid=channel['id'])
+        log.debug('Refreshing channel: %s', channel['id'])
        for video in video_generator:
+            seen_ids.add(video.id)
            status = self.insert_video(video, commit=False)
+
+            if status['new'] and channel['automark'] is not None:
+                self.mark_video_state(video.id, channel['automark'], commit=False)
+                if channel['automark'] == 'downloaded':
+                    self.download_video(video.id, commit=False)
+
            if not force and not status['new']:
                break
+
+        if force:
+            known_videos = self.get_videos(channel_id=channel['id'])
+            known_ids = {v['id'] for v in known_videos}
+            refresh_ids = list(known_ids.difference(seen_ids))
+            for video in self.youtube.get_video(refresh_ids):
+                self.insert_video(video, commit=False)
+
        if commit:
            self.sql.commit()
--- a/ycdl/ytapi.py
+++ b/ycdl/ytapi.py
@ -21,6 +21,7 @@ class Video:

        snippet = data['snippet']
        content_details = data['contentDetails']
+        statistics = data['statistics']

        self.title = snippet['title'] or '[untitled]'
        self.description = snippet['description']
@ -33,6 +34,7 @@ class Video:
        self.published = published.timestamp()

        self.duration = isodate.parse_duration(content_details['duration']).seconds
+        self.views = statistics['viewCount']

        thumbnails = snippet['thumbnails']
        best_thumbnail = max(thumbnails, key=lambda x: thumbnails[x]['width'] * thumbnails[x]['height'])
@ -66,7 +68,6 @@ class Youtube:
            user = self.youtube.channels().list(part='contentDetails', id=uid).execute()
        upload_playlist = user['items'][0]['contentDetails']['relatedPlaylists']['uploads']
        page_token = None
-        total = 0
        while True:
            response = self.youtube.playlistItems().list(
                maxResults=50,
@ -78,12 +79,11 @@ class Youtube:
            video_ids = [item['contentDetails']['videoId'] for item in response['items']]
            videos = self.get_video(video_ids)
            videos.sort(key=lambda x: x.published, reverse=True)
-            yield from videos

-            count = len(videos)
-            total += count
-            print(f'Found {count} more, {total} total')
-            if page_token is None or count < 50:
+            for video in videos:
+                yield video
+
+            if page_token is None:
                break

    def get_related_videos(self, video_id, count=50):
@ -108,17 +108,25 @@ class Youtube:
        else:
            singular = False

-        results = []
+        snippets = []
        chunks = helpers.chunk_sequence(video_ids, 50)
        for chunk in chunks:
            chunk = ','.join(chunk)
-            data = self.youtube.videos().list(part='id,contentDetails,snippet', id=chunk).execute()
+            data = self.youtube.videos().list(part='id,contentDetails,snippet,statistics', id=chunk).execute()
            items = data['items']
-            results.extend(items)
-        results = [Video(snippet) for snippet in results]
+            snippets.extend(items)
+        videos = []
+        broken = []
+        for snippet in snippets:
+            try:
+                videos.append(Video(snippet))
+            except KeyError:
+                broken.append(snippet)
+        if broken:
+            print('broken:', broken)
        if singular:
-            if len(results) == 1:
-                return results[0]
-            elif len(results) == 0:
+            if len(videos) == 1:
+                return videos[0]
+            elif len(videos) == 0:
                raise VideoNotFound(video_ids[0])
-        return results
+        return videos