Add views column and some other database changes.

And various other changes which have been sitting here for too long.
This commit is contained in:
voussoir 2020-01-06 22:07:25 -08:00
parent 420a14bb88
commit 78ce6a6f41
4 changed files with 125 additions and 56 deletions

View file

@ -103,6 +103,7 @@
<img src="http://i3.ytimg.com/vi/{{video['id']}}/default.jpg" height="100px"> <img src="http://i3.ytimg.com/vi/{{video['id']}}/default.jpg" height="100px">
<a class="video_title" href="https://www.youtube.com/watch?v={{video['id']}}">{{video['_published_str']}} - {{video['title']}}</a> <a class="video_title" href="https://www.youtube.com/watch?v={{video['id']}}">{{video['_published_str']}} - {{video['title']}}</a>
<span>({{video['duration'] | seconds_to_hms}})</span> <span>({{video['duration'] | seconds_to_hms}})</span>
<span>({{video['views']}})</span>
{% if channel is none %} {% if channel is none %}
<a href="/channel/{{video['author_id']}}">(Chan)</a> <a href="/channel/{{video['author_id']}}">(Chan)</a>
{% endif %} {% endif %}

View file

@ -5,10 +5,49 @@ import sys
import ycdl import ycdl
def upgrade_3_to_4(sql):
'''
In this version, the views column was added.
'''
cur = sql.cursor()
cur.executescript('''
ALTER TABLE videos RENAME TO videos_old;
CREATE TABLE videos(
id TEXT,
published INT,
author_id TEXT,
title TEXT,
description TEXT,
duration INT,
views INT,
thumbnail TEXT,
download TEXT
);
INSERT INTO videos SELECT
id,
published,
author_id,
title,
description,
duration,
NULL,
thumbnail,
download
FROM videos_old;
DROP TABLE videos_old;
''')
def upgrade_2_to_3(sql):
'''
In this version, a column `automark` was added to the channels table, where
you can set channels to automatically mark videos as ignored or downloaded.
'''
cur = sql.cursor()
cur.execute('ALTER TABLE channels ADD COLUMN automark TEXT')
def upgrade_1_to_2(sql): def upgrade_1_to_2(sql):
''' '''
In this version, a column `tagged_at` was added to the Photos table, to keep In this version, the duration column was added.
track of the last time the photo's tags were edited (added or removed).
''' '''
cur = sql.cursor() cur = sql.cursor()
cur.executescript(''' cur.executescript('''

View file

@ -18,27 +18,7 @@ logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING)
logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING)
logging.getLogger('requests.packages.urllib3.util.retry').setLevel(logging.WARNING) logging.getLogger('requests.packages.urllib3.util.retry').setLevel(logging.WARNING)
SQL_CHANNEL_COLUMNS = [ DATABASE_VERSION = 4
'id',
'name',
'directory',
]
SQL_VIDEO_COLUMNS = [
'id',
'published',
'author_id',
'title',
'description',
'duration',
'thumbnail',
'download',
]
SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)}
SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)}
DATABASE_VERSION = 2
DB_INIT = ''' DB_INIT = '''
PRAGMA count_changes = OFF; PRAGMA count_changes = OFF;
PRAGMA cache_size = 10000; PRAGMA cache_size = 10000;
@ -46,7 +26,8 @@ PRAGMA user_version = {user_version};
CREATE TABLE IF NOT EXISTS channels( CREATE TABLE IF NOT EXISTS channels(
id TEXT, id TEXT,
name TEXT, name TEXT,
directory TEXT COLLATE NOCASE directory TEXT COLLATE NOCASE,
automark TEXT
); );
CREATE TABLE IF NOT EXISTS videos( CREATE TABLE IF NOT EXISTS videos(
id TEXT, id TEXT,
@ -55,6 +36,7 @@ CREATE TABLE IF NOT EXISTS videos(
title TEXT, title TEXT,
description TEXT, description TEXT,
duration INT, duration INT,
views INT,
thumbnail TEXT, thumbnail TEXT,
download TEXT download TEXT
); );
@ -67,12 +49,34 @@ CREATE INDEX IF NOT EXISTS index_video_published on videos(published);
CREATE INDEX IF NOT EXISTS index_video_download on videos(download); CREATE INDEX IF NOT EXISTS index_video_download on videos(download);
'''.format(user_version=DATABASE_VERSION) '''.format(user_version=DATABASE_VERSION)
SQL_CHANNEL_COLUMNS = [
'id',
'name',
'directory',
'automark',
]
SQL_VIDEO_COLUMNS = [
'id',
'published',
'author_id',
'title',
'description',
'duration',
'views',
'thumbnail',
'download',
]
SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)}
SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)}
DEFAULT_DBNAME = 'ycdl.db' DEFAULT_DBNAME = 'ycdl.db'
ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}' ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}.'
def verify_is_abspath(path): def assert_is_abspath(path):
''' '''
TO DO: Determine whether this is actually correct. TO DO: Determine whether this is actually correct.
''' '''
@ -86,7 +90,6 @@ class InvalidVideoState(Exception):
class NoSuchVideo(Exception): class NoSuchVideo(Exception):
pass pass
class YCDL: class YCDL:
def __init__(self, youtube, database_filename=None, youtube_dl_function=None): def __init__(self, youtube, database_filename=None, youtube_dl_function=None):
self.youtube = youtube self.youtube = youtube
@ -119,6 +122,7 @@ class YCDL:
def add_channel( def add_channel(
self, self,
channel_id, channel_id,
*,
commit=True, commit=True,
download_directory=None, download_directory=None,
get_videos=False, get_videos=False,
@ -134,27 +138,24 @@ class YCDL:
data[SQL_CHANNEL['id']] = channel_id data[SQL_CHANNEL['id']] = channel_id
data[SQL_CHANNEL['name']] = name data[SQL_CHANNEL['name']] = name
if download_directory is not None: if download_directory is not None:
verify_is_abspath(download_directory) assert_is_abspath(download_directory)
data[SQL_CHANNEL['directory']] = download_directory data[SQL_CHANNEL['directory']] = download_directory
self.cur.execute('INSERT INTO channels VALUES(?, ?, ?)', data) self.cur.execute('INSERT INTO channels VALUES(?, ?, ?, ?)', data)
if get_videos: if get_videos:
self.refresh_channel(channel_id, commit=False) self.refresh_channel(channel_id, commit=False)
if commit: if commit:
self.sql.commit() self.sql.commit()
return data
def channel_has_pending(self, channel_id): def channel_has_pending(self, channel_id):
query = 'SELECT * FROM videos WHERE author_id == ? AND download == "pending"' query = 'SELECT 1 FROM videos WHERE author_id == ? AND download == "pending" LIMIT 1'
self.cur.execute(query, [channel_id]) self.cur.execute(query, [channel_id])
return self.cur.fetchone() is not None return self.cur.fetchone() is not None
def channel_directory(self, channel_id):
self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id])
fetch = self.cur.fetchone()
if fetch is None:
return None
return fetch[SQL_CHANNEL['directory']]
def download_video(self, video, commit=True, force=False): def download_video(self, video, commit=True, force=False):
''' '''
Execute the `YOUTUBE_DL_COMMAND`, within the channel's associated Execute the `YOUTUBE_DL_COMMAND`, within the channel's associated
@ -187,7 +188,7 @@ class YCDL:
return return
current_directory = os.getcwd() current_directory = os.getcwd()
download_directory = self.channel_directory(channel_id) download_directory = self.get_channel(channel_id)['directory']
download_directory = download_directory or current_directory download_directory = download_directory or current_directory
os.makedirs(download_directory, exist_ok=True) os.makedirs(download_directory, exist_ok=True)
@ -269,6 +270,7 @@ class YCDL:
'title': video.title, 'title': video.title,
'description': video.description, 'description': video.description,
'duration': video.duration, 'duration': video.duration,
'views': video.views,
'thumbnail': video.thumbnail['url'], 'thumbnail': video.thumbnail['url'],
'download': download_status, 'download': download_status,
} }
@ -302,16 +304,35 @@ class YCDL:
def refresh_all_channels(self, force=False, commit=True): def refresh_all_channels(self, force=False, commit=True):
for channel in self.get_channels(): for channel in self.get_channels():
self.refresh_channel(channel['id'], force=force, commit=commit) self.refresh_channel(channel, force=force, commit=commit)
if commit: if commit:
self.sql.commit() self.sql.commit()
def refresh_channel(self, channel_id, force=False, commit=True): def refresh_channel(self, channel, force=False, commit=True):
video_generator = self.youtube.get_user_videos(uid=channel_id) if isinstance(channel, str):
log.debug('Refreshing channel: %s', channel_id) channel = self.get_channel(channel)
seen_ids = set()
video_generator = self.youtube.get_user_videos(uid=channel['id'])
log.debug('Refreshing channel: %s', channel['id'])
for video in video_generator: for video in video_generator:
seen_ids.add(video.id)
status = self.insert_video(video, commit=False) status = self.insert_video(video, commit=False)
if status['new'] and channel['automark'] is not None:
self.mark_video_state(video.id, channel['automark'], commit=False)
if channel['automark'] == 'downloaded':
self.download_video(video.id, commit=False)
if not force and not status['new']: if not force and not status['new']:
break break
if force:
known_videos = self.get_videos(channel_id=channel['id'])
known_ids = {v['id'] for v in known_videos}
refresh_ids = list(known_ids.difference(seen_ids))
for video in self.youtube.get_video(refresh_ids):
self.insert_video(video, commit=False)
if commit: if commit:
self.sql.commit() self.sql.commit()

View file

@ -21,6 +21,7 @@ class Video:
snippet = data['snippet'] snippet = data['snippet']
content_details = data['contentDetails'] content_details = data['contentDetails']
statistics = data['statistics']
self.title = snippet['title'] or '[untitled]' self.title = snippet['title'] or '[untitled]'
self.description = snippet['description'] self.description = snippet['description']
@ -33,6 +34,7 @@ class Video:
self.published = published.timestamp() self.published = published.timestamp()
self.duration = isodate.parse_duration(content_details['duration']).seconds self.duration = isodate.parse_duration(content_details['duration']).seconds
self.views = statistics['viewCount']
thumbnails = snippet['thumbnails'] thumbnails = snippet['thumbnails']
best_thumbnail = max(thumbnails, key=lambda x: thumbnails[x]['width'] * thumbnails[x]['height']) best_thumbnail = max(thumbnails, key=lambda x: thumbnails[x]['width'] * thumbnails[x]['height'])
@ -66,7 +68,6 @@ class Youtube:
user = self.youtube.channels().list(part='contentDetails', id=uid).execute() user = self.youtube.channels().list(part='contentDetails', id=uid).execute()
upload_playlist = user['items'][0]['contentDetails']['relatedPlaylists']['uploads'] upload_playlist = user['items'][0]['contentDetails']['relatedPlaylists']['uploads']
page_token = None page_token = None
total = 0
while True: while True:
response = self.youtube.playlistItems().list( response = self.youtube.playlistItems().list(
maxResults=50, maxResults=50,
@ -78,12 +79,11 @@ class Youtube:
video_ids = [item['contentDetails']['videoId'] for item in response['items']] video_ids = [item['contentDetails']['videoId'] for item in response['items']]
videos = self.get_video(video_ids) videos = self.get_video(video_ids)
videos.sort(key=lambda x: x.published, reverse=True) videos.sort(key=lambda x: x.published, reverse=True)
yield from videos
count = len(videos) for video in videos:
total += count yield video
print(f'Found {count} more, {total} total')
if page_token is None or count < 50: if page_token is None:
break break
def get_related_videos(self, video_id, count=50): def get_related_videos(self, video_id, count=50):
@ -108,17 +108,25 @@ class Youtube:
else: else:
singular = False singular = False
results = [] snippets = []
chunks = helpers.chunk_sequence(video_ids, 50) chunks = helpers.chunk_sequence(video_ids, 50)
for chunk in chunks: for chunk in chunks:
chunk = ','.join(chunk) chunk = ','.join(chunk)
data = self.youtube.videos().list(part='id,contentDetails,snippet', id=chunk).execute() data = self.youtube.videos().list(part='id,contentDetails,snippet,statistics', id=chunk).execute()
items = data['items'] items = data['items']
results.extend(items) snippets.extend(items)
results = [Video(snippet) for snippet in results] videos = []
broken = []
for snippet in snippets:
try:
videos.append(Video(snippet))
except KeyError:
broken.append(snippet)
if broken:
print('broken:', broken)
if singular: if singular:
if len(results) == 1: if len(videos) == 1:
return results[0] return videos[0]
elif len(results) == 0: elif len(videos) == 0:
raise VideoNotFound(video_ids[0]) raise VideoNotFound(video_ids[0])
return results return videos