Add views column and some other database changes.

And various other changes which have been sitting here for too long.
This commit is contained in:
voussoir 2020-01-06 22:07:25 -08:00
parent 420a14bb88
commit 78ce6a6f41
4 changed files with 125 additions and 56 deletions

View file

@ -103,6 +103,7 @@
<img src="http://i3.ytimg.com/vi/{{video['id']}}/default.jpg" height="100px">
<a class="video_title" href="https://www.youtube.com/watch?v={{video['id']}}">{{video['_published_str']}} - {{video['title']}}</a>
<span>({{video['duration'] | seconds_to_hms}})</span>
<span>({{video['views']}})</span>
{% if channel is none %}
<a href="/channel/{{video['author_id']}}">(Chan)</a>
{% endif %}

View file

@ -5,10 +5,49 @@ import sys
import ycdl
def upgrade_3_to_4(sql):
'''
In this version, the views column was added.
'''
cur = sql.cursor()
cur.executescript('''
ALTER TABLE videos RENAME TO videos_old;
CREATE TABLE videos(
id TEXT,
published INT,
author_id TEXT,
title TEXT,
description TEXT,
duration INT,
views INT,
thumbnail TEXT,
download TEXT
);
INSERT INTO videos SELECT
id,
published,
author_id,
title,
description,
duration,
NULL,
thumbnail,
download
FROM videos_old;
DROP TABLE videos_old;
''')
def upgrade_2_to_3(sql):
'''
In this version, a column `automark` was added to the channels table, where
you can set channels to automatically mark videos as ignored or downloaded.
'''
cur = sql.cursor()
cur.execute('ALTER TABLE channels ADD COLUMN automark TEXT')
def upgrade_1_to_2(sql):
'''
In this version, a column `tagged_at` was added to the Photos table, to keep
track of the last time the photo's tags were edited (added or removed).
In this version, the duration column was added.
'''
cur = sql.cursor()
cur.executescript('''

View file

@ -18,27 +18,7 @@ logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING)
logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING)
logging.getLogger('requests.packages.urllib3.util.retry').setLevel(logging.WARNING)
SQL_CHANNEL_COLUMNS = [
'id',
'name',
'directory',
]
SQL_VIDEO_COLUMNS = [
'id',
'published',
'author_id',
'title',
'description',
'duration',
'thumbnail',
'download',
]
SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)}
SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)}
DATABASE_VERSION = 2
DATABASE_VERSION = 4
DB_INIT = '''
PRAGMA count_changes = OFF;
PRAGMA cache_size = 10000;
@ -46,7 +26,8 @@ PRAGMA user_version = {user_version};
CREATE TABLE IF NOT EXISTS channels(
id TEXT,
name TEXT,
directory TEXT COLLATE NOCASE
directory TEXT COLLATE NOCASE,
automark TEXT
);
CREATE TABLE IF NOT EXISTS videos(
id TEXT,
@ -55,6 +36,7 @@ CREATE TABLE IF NOT EXISTS videos(
title TEXT,
description TEXT,
duration INT,
views INT,
thumbnail TEXT,
download TEXT
);
@ -67,12 +49,34 @@ CREATE INDEX IF NOT EXISTS index_video_published on videos(published);
CREATE INDEX IF NOT EXISTS index_video_download on videos(download);
'''.format(user_version=DATABASE_VERSION)
SQL_CHANNEL_COLUMNS = [
'id',
'name',
'directory',
'automark',
]
SQL_VIDEO_COLUMNS = [
'id',
'published',
'author_id',
'title',
'description',
'duration',
'views',
'thumbnail',
'download',
]
SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)}
SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)}
DEFAULT_DBNAME = 'ycdl.db'
ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}'
ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}.'
def verify_is_abspath(path):
def assert_is_abspath(path):
'''
TO DO: Determine whether this is actually correct.
'''
@ -86,7 +90,6 @@ class InvalidVideoState(Exception):
class NoSuchVideo(Exception):
pass
class YCDL:
def __init__(self, youtube, database_filename=None, youtube_dl_function=None):
self.youtube = youtube
@ -119,6 +122,7 @@ class YCDL:
def add_channel(
self,
channel_id,
*,
commit=True,
download_directory=None,
get_videos=False,
@ -134,27 +138,24 @@ class YCDL:
data[SQL_CHANNEL['id']] = channel_id
data[SQL_CHANNEL['name']] = name
if download_directory is not None:
verify_is_abspath(download_directory)
assert_is_abspath(download_directory)
data[SQL_CHANNEL['directory']] = download_directory
self.cur.execute('INSERT INTO channels VALUES(?, ?, ?)', data)
self.cur.execute('INSERT INTO channels VALUES(?, ?, ?, ?)', data)
if get_videos:
self.refresh_channel(channel_id, commit=False)
if commit:
self.sql.commit()
return data
def channel_has_pending(self, channel_id):
query = 'SELECT * FROM videos WHERE author_id == ? AND download == "pending"'
query = 'SELECT 1 FROM videos WHERE author_id == ? AND download == "pending" LIMIT 1'
self.cur.execute(query, [channel_id])
return self.cur.fetchone() is not None
def channel_directory(self, channel_id):
self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id])
fetch = self.cur.fetchone()
if fetch is None:
return None
return fetch[SQL_CHANNEL['directory']]
def download_video(self, video, commit=True, force=False):
'''
Execute the `YOUTUBE_DL_COMMAND`, within the channel's associated
@ -187,7 +188,7 @@ class YCDL:
return
current_directory = os.getcwd()
download_directory = self.channel_directory(channel_id)
download_directory = self.get_channel(channel_id)['directory']
download_directory = download_directory or current_directory
os.makedirs(download_directory, exist_ok=True)
@ -269,6 +270,7 @@ class YCDL:
'title': video.title,
'description': video.description,
'duration': video.duration,
'views': video.views,
'thumbnail': video.thumbnail['url'],
'download': download_status,
}
@ -302,16 +304,35 @@ class YCDL:
def refresh_all_channels(self, force=False, commit=True):
for channel in self.get_channels():
self.refresh_channel(channel['id'], force=force, commit=commit)
self.refresh_channel(channel, force=force, commit=commit)
if commit:
self.sql.commit()
def refresh_channel(self, channel_id, force=False, commit=True):
video_generator = self.youtube.get_user_videos(uid=channel_id)
log.debug('Refreshing channel: %s', channel_id)
def refresh_channel(self, channel, force=False, commit=True):
if isinstance(channel, str):
channel = self.get_channel(channel)
seen_ids = set()
video_generator = self.youtube.get_user_videos(uid=channel['id'])
log.debug('Refreshing channel: %s', channel['id'])
for video in video_generator:
seen_ids.add(video.id)
status = self.insert_video(video, commit=False)
if status['new'] and channel['automark'] is not None:
self.mark_video_state(video.id, channel['automark'], commit=False)
if channel['automark'] == 'downloaded':
self.download_video(video.id, commit=False)
if not force and not status['new']:
break
if force:
known_videos = self.get_videos(channel_id=channel['id'])
known_ids = {v['id'] for v in known_videos}
refresh_ids = list(known_ids.difference(seen_ids))
for video in self.youtube.get_video(refresh_ids):
self.insert_video(video, commit=False)
if commit:
self.sql.commit()

View file

@ -21,6 +21,7 @@ class Video:
snippet = data['snippet']
content_details = data['contentDetails']
statistics = data['statistics']
self.title = snippet['title'] or '[untitled]'
self.description = snippet['description']
@ -33,6 +34,7 @@ class Video:
self.published = published.timestamp()
self.duration = isodate.parse_duration(content_details['duration']).seconds
self.views = statistics['viewCount']
thumbnails = snippet['thumbnails']
best_thumbnail = max(thumbnails, key=lambda x: thumbnails[x]['width'] * thumbnails[x]['height'])
@ -66,7 +68,6 @@ class Youtube:
user = self.youtube.channels().list(part='contentDetails', id=uid).execute()
upload_playlist = user['items'][0]['contentDetails']['relatedPlaylists']['uploads']
page_token = None
total = 0
while True:
response = self.youtube.playlistItems().list(
maxResults=50,
@ -78,12 +79,11 @@ class Youtube:
video_ids = [item['contentDetails']['videoId'] for item in response['items']]
videos = self.get_video(video_ids)
videos.sort(key=lambda x: x.published, reverse=True)
yield from videos
count = len(videos)
total += count
print(f'Found {count} more, {total} total')
if page_token is None or count < 50:
for video in videos:
yield video
if page_token is None:
break
def get_related_videos(self, video_id, count=50):
@ -108,17 +108,25 @@ class Youtube:
else:
singular = False
results = []
snippets = []
chunks = helpers.chunk_sequence(video_ids, 50)
for chunk in chunks:
chunk = ','.join(chunk)
data = self.youtube.videos().list(part='id,contentDetails,snippet', id=chunk).execute()
data = self.youtube.videos().list(part='id,contentDetails,snippet,statistics', id=chunk).execute()
items = data['items']
results.extend(items)
results = [Video(snippet) for snippet in results]
snippets.extend(items)
videos = []
broken = []
for snippet in snippets:
try:
videos.append(Video(snippet))
except KeyError:
broken.append(snippet)
if broken:
print('broken:', broken)
if singular:
if len(results) == 1:
return results[0]
elif len(results) == 0:
if len(videos) == 1:
return videos[0]
elif len(videos) == 0:
raise VideoNotFound(video_ids[0])
return results
return videos