Ignore youtube shorts on a per-channel basis.

This commit is contained in:
voussoir 2023-08-09 23:24:47 -07:00
parent 94ee832a27
commit 4de461dbf1
11 changed files with 222 additions and 28 deletions

View file

@ -31,10 +31,11 @@ The reason for this is that youtube-dl is extremely configurable. Every user mig
- Web interface with video embeds - Web interface with video embeds
- Commandline interface for scripted use - Commandline interface for scripted use
- "Sub-box" page where newest videos from all channels are listed in order - "New videos" feed page where newest videos from all channels are listed in order (wow, no algorithm!)
- Sort videos by date, duration, views, or random - Sort videos by date, duration, views, or random
- Background thread will refresh channels over time - Background thread will refresh channels over time
- Automark channels as ignore or download - Mark channels as auto-download or auto-ignore
- Automatically remove shorts from feed
- Free yourself from Youtube's awful recommendation system - Free yourself from Youtube's awful recommendation system
## Your API key ## Your API key

View file

@ -6,6 +6,7 @@ import flask; from flask import request
import functools import functools
import threading import threading
import time import time
import traceback
from voussoirkit import flasktools from voussoirkit import flasktools
from voussoirkit import pathclass from voussoirkit import pathclass
@ -121,7 +122,45 @@ def refresher_thread(rate):
refresh_job.start() refresh_job.start()
last_refresh = time.time() last_refresh = time.time()
def ignore_shorts_thread(rate):
last_commit_id = None
while True:
if ycdldb.last_commit_id == last_commit_id:
# log.debug('Sleeping again due to no new commits.')
time.sleep(5 * rate)
continue
last_commit_id = ycdldb.last_commit_id
log.info('Starting shorts job.')
videos = ycdldb.get_videos_by_sql('''
SELECT * FROM videos
LEFT JOIN channels ON channels.id = videos.author_id
WHERE is_shorts IS NULL AND duration < 62 AND state = "pending" AND channels.ignore_shorts = 1
ORDER BY published DESC
LIMIT 10
''')
videos = list(videos)
if len(videos) == 0:
time.sleep(rate)
continue
with ycdldb.transaction:
for video in videos:
try:
is_shorts = ycdl.ytapi.video_is_shorts(video.id)
except Exception as exc:
log.warning(traceback.format_exc())
pairs = {'id': video.id, 'is_shorts': int(is_shorts)}
if is_shorts:
pairs['state'] = 'ignored'
ycdldb.update(table=ycdl.objects.Video, pairs=pairs, where_key='id')
time.sleep(rate)
def start_refresher_thread(rate): def start_refresher_thread(rate):
log.info('Starting refresher thread, once per %d seconds.', rate) log.info('Starting refresher thread, once per %d seconds.', rate)
refresher = threading.Thread(target=refresher_thread, args=[rate], daemon=True) refresher = threading.Thread(target=refresher_thread, args=[rate], daemon=True)
refresher.start() refresher.start()
shorts_killer = threading.Thread(target=ignore_shorts_thread, args=[60], daemon=True)
shorts_killer.start()

View file

@ -211,6 +211,22 @@ def post_set_download_directory(channel_id):
response = {'id': channel.id, 'download_directory': abspath} response = {'id': channel.id, 'download_directory': abspath}
return flasktools.json_response(response) return flasktools.json_response(response)
@flasktools.required_fields(['ignore_shorts'], forbid_whitespace=True)
@site.route('/channel/<channel_id>/set_ignore_shorts', methods=['POST'])
def post_set_ignore_shorts(channel_id):
ignore_shorts = request.form['ignore_shorts']
channel = common.ycdldb.get_channel(channel_id)
try:
ignore_shorts = stringtools.truthystring(ignore_shorts)
with common.ycdldb.transaction:
channel.set_ignore_shorts(ignore_shorts)
except (ValueError, TypeError):
flask.abort(400)
response = {'id': channel.id, 'ignore_shorts': channel.ignore_shorts}
return flasktools.json_response(response)
@flasktools.required_fields(['name'], forbid_whitespace=False) @flasktools.required_fields(['name'], forbid_whitespace=False)
@site.route('/channel/<channel_id>/set_name', methods=['POST']) @site.route('/channel/<channel_id>/set_name', methods=['POST'])
def post_set_name(channel_id): def post_set_name(channel_id):

View file

@ -73,6 +73,16 @@ function set_download_directory(channel_id, download_directory, callback)
}); });
} }
api.channels.set_ignore_shorts =
function set_ignore_shorts(channel_id, ignore_shorts, callback)
{
return http.post({
url: `/channel/${channel_id}/set_ignore_shorts`,
data: {"ignore_shorts": ignore_shorts},
callback: callback,
});
}
api.channels.set_name = api.channels.set_name =
function set_name(channel_id, name, callback) function set_name(channel_id, name, callback)
{ {

View file

@ -88,13 +88,13 @@ function hms_render_colons(hours, minutes, seconds)
} }
common.seconds_to_hms = common.seconds_to_hms =
function seconds_to_hms(seconds, args) function seconds_to_hms({
seconds,
renderer=common.hms_render_colons,
force_minutes=false,
force_hours=false,
})
{ {
args = args || {};
const renderer = args["renderer"] || common.hms_render_colons;
const force_minutes = args["force_minutes"] || false;
const force_hours = args["force_hours"] || false;
if (seconds > 0 && seconds < 1) if (seconds > 0 && seconds < 1)
{ {
seconds = 1; seconds = 1;

View file

@ -266,6 +266,12 @@ https://stackoverflow.com/a/35153397
<span id="set_automark_spinner" class="hidden">Working...</span> <span id="set_automark_spinner" class="hidden">Working...</span>
</div> </div>
<div>
{% set checked = 'checked' if channel.ignore_shorts else '' %}
<label><input type="checkbox" id="set_ignore_shorts_checkbox" {{checked}} onchange="return set_ignore_shorts_form(event);"/> Automatically ignore shorts (short vertical videos).</label>
<span id="set_ignore_shorts_spinner" class="hidden">Working...</span>
</div>
<div> <div>
<input type="text" id="set_queuefile_extension_input" placeholder="Queuefile extension" value="{{channel.queuefile_extension or ''}}"/> <input type="text" id="set_queuefile_extension_input" placeholder="Queuefile extension" value="{{channel.queuefile_extension or ''}}"/>
<button id="set_queuefile_extension_button" class="button_with_spinner" onclick="return set_queuefile_extension_form(event);">Set extension</button> <button id="set_queuefile_extension_button" class="button_with_spinner" onclick="return set_queuefile_extension_form(event);">Set extension</button>
@ -661,6 +667,22 @@ function set_download_directory_callback(response)
} }
} }
function set_ignore_shorts_form(event)
{
set_ignore_shorts_spinner.show();
api.channels.set_ignore_shorts(CHANNEL_ID, event.target.checked, set_ignore_shorts_callback);
}
function set_ignore_shorts_callback(response)
{
set_ignore_shorts_spinner.hide();
if (response.meta.status != 200)
{
alert(JSON.stringify(response));
return;
}
}
function set_name_form(event) function set_name_form(event)
{ {
const name = set_name_input.value.trim(); const name = set_name_input.value.trim();
@ -751,6 +773,9 @@ if (CHANNEL_ID)
var set_autorefresh_spinner = document.getElementById("set_autorefresh_spinner"); var set_autorefresh_spinner = document.getElementById("set_autorefresh_spinner");
set_autorefresh_spinner = new spinners.Spinner(set_autorefresh_spinner); set_autorefresh_spinner = new spinners.Spinner(set_autorefresh_spinner);
var set_ignore_shorts_spinner = document.getElementById("set_ignore_shorts_spinner");
set_ignore_shorts_spinner = new spinners.Spinner(set_ignore_shorts_spinner);
} }
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////

View file

@ -347,6 +347,72 @@ def upgrade_10_to_11(ycdldb):
m.go() m.go()
def upgrade_11_to_12(ycdldb):
'''
In this version, the `ignore_shorts` column was added to the channels table
and `is_shorts` was added to the videos table.
'''
m = Migrator(ycdldb)
m.tables['channels']['create'] = '''
CREATE TABLE IF NOT EXISTS channels(
id TEXT,
name TEXT,
uploads_playlist TEXT,
download_directory TEXT COLLATE NOCASE,
queuefile_extension TEXT COLLATE NOCASE,
automark TEXT,
autorefresh INT,
last_refresh INT,
ignore_shorts INT NOT NULL
);
'''
m.tables['channels']['transfer'] = '''
INSERT INTO channels SELECT
id,
name,
uploads_playlist,
download_directory,
queuefile_extension,
automark,
autorefresh,
last_refresh,
1
FROM channels_old;
'''
m.tables['videos']['create'] = '''
CREATE TABLE IF NOT EXISTS videos(
id TEXT,
published INT,
author_id TEXT,
title TEXT,
description TEXT,
duration INT,
views INT,
thumbnail TEXT,
live_broadcast TEXT,
state TEXT,
is_shorts INT
);
'''
m.tables['videos']['transfer'] = '''
INSERT INTO videos SELECT
id,
published,
author_id,
title,
description,
duration,
views,
thumbnail,
live_broadcast,
state,
NULL
FROM videos_old;
'''
m.go()
def upgrade_all(data_directory): def upgrade_all(data_directory):
''' '''
Given the directory containing a ycdl database, apply all of the Given the directory containing a ycdl database, apply all of the

View file

@ -1,6 +1,6 @@
from voussoirkit import sqlhelpers from voussoirkit import sqlhelpers
DATABASE_VERSION = 11 DATABASE_VERSION = 12
DB_INIT = f''' DB_INIT = f'''
CREATE TABLE IF NOT EXISTS channels( CREATE TABLE IF NOT EXISTS channels(
@ -11,7 +11,8 @@ CREATE TABLE IF NOT EXISTS channels(
queuefile_extension TEXT COLLATE NOCASE, queuefile_extension TEXT COLLATE NOCASE,
automark TEXT, automark TEXT,
autorefresh INT, autorefresh INT,
last_refresh INT last_refresh INT,
ignore_shorts INT NOT NULL
); );
CREATE INDEX IF NOT EXISTS index_channel_id on channels(id); CREATE INDEX IF NOT EXISTS index_channel_id on channels(id);
---------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------
@ -25,7 +26,8 @@ CREATE TABLE IF NOT EXISTS videos(
views INT, views INT,
thumbnail TEXT, thumbnail TEXT,
live_broadcast TEXT, live_broadcast TEXT,
state TEXT state TEXT,
is_shorts INT
); );
CREATE INDEX IF NOT EXISTS index_video_author_published on videos(author_id, published); CREATE INDEX IF NOT EXISTS index_video_author_published on videos(author_id, published);
CREATE INDEX IF NOT EXISTS index_video_author_state_published on videos(author_id, state, published); CREATE INDEX IF NOT EXISTS index_video_author_state_published on videos(author_id, state, published);

View file

@ -36,6 +36,7 @@ class Channel(ObjectBase):
self.queuefile_extension = self.normalize_queuefile_extension(db_row['queuefile_extension']) self.queuefile_extension = self.normalize_queuefile_extension(db_row['queuefile_extension'])
self.automark = db_row['automark'] or 'pending' self.automark = db_row['automark'] or 'pending'
self.autorefresh = stringtools.truthystring(db_row['autorefresh']) self.autorefresh = stringtools.truthystring(db_row['autorefresh'])
self.ignore_shorts = bool(db_row['ignore_shorts'])
def __repr__(self): def __repr__(self):
return f'Channel:{self.id}' return f'Channel:{self.id}'
@ -139,7 +140,7 @@ class Channel(ObjectBase):
log.info('Deleting %s.', self) log.info('Deleting %s.', self)
self.ycdldb.delete(table='videos', pairs={'author_id': self.id}) self.ycdldb.delete(table='videos', pairs={'author_id': self.id})
self.ycdldb.delete(table='channels', pairs={'id': self.id}) self.ycdldb.delete(table=Channel, pairs={'id': self.id})
self.deleted = True self.deleted = True
def get_most_recent_video_id(self) -> str: def get_most_recent_video_id(self) -> str:
@ -249,7 +250,7 @@ class Channel(ObjectBase):
'id': self.id, 'id': self.id,
'last_refresh': timetools.now().timestamp(), 'last_refresh': timetools.now().timestamp(),
} }
self.ycdldb.update(table='channels', pairs=pairs, where_key='id') self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
def reset_uploads_playlist_id(self): def reset_uploads_playlist_id(self):
''' '''
@ -267,7 +268,7 @@ class Channel(ObjectBase):
'id': self.id, 'id': self.id,
'automark': state, 'automark': state,
} }
self.ycdldb.update(table='channels', pairs=pairs, where_key='id') self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.automark = state self.automark = state
@worms.atomic @worms.atomic
@ -278,7 +279,7 @@ class Channel(ObjectBase):
'id': self.id, 'id': self.id,
'autorefresh': autorefresh, 'autorefresh': autorefresh,
} }
self.ycdldb.update(table='channels', pairs=pairs, where_key='id') self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.autorefresh = autorefresh self.autorefresh = autorefresh
@worms.atomic @worms.atomic
@ -289,9 +290,21 @@ class Channel(ObjectBase):
'id': self.id, 'id': self.id,
'download_directory': download_directory.absolute_path if download_directory else None, 'download_directory': download_directory.absolute_path if download_directory else None,
} }
self.ycdldb.update(table='channels', pairs=pairs, where_key='id') self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.download_directory = download_directory self.download_directory = download_directory
@worms.atomic
def set_ignore_shorts(self, ignore_shorts: bool):
if not isinstance(ignore_shorts, bool):
raise TypeError(ignore_shorts)
pairs = {
'id': self.id,
'ignore_shorts': int(ignore_shorts)
}
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.ignore_shorts = ignore_shorts
@worms.atomic @worms.atomic
def set_name(self, name): def set_name(self, name):
name = self.normalize_name(name) name = self.normalize_name(name)
@ -300,7 +313,7 @@ class Channel(ObjectBase):
'id': self.id, 'id': self.id,
'name': name, 'name': name,
} }
self.ycdldb.update(table='channels', pairs=pairs, where_key='id') self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.name = name self.name = name
@worms.atomic @worms.atomic
@ -311,7 +324,7 @@ class Channel(ObjectBase):
'id': self.id, 'id': self.id,
'queuefile_extension': queuefile_extension, 'queuefile_extension': queuefile_extension,
} }
self.ycdldb.update(table='channels', pairs=pairs, where_key='id') self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.queuefile_extension = queuefile_extension self.queuefile_extension = queuefile_extension
@worms.atomic @worms.atomic
@ -324,7 +337,7 @@ class Channel(ObjectBase):
'id': self.id, 'id': self.id,
'uploads_playlist': playlist_id, 'uploads_playlist': playlist_id,
} }
self.ycdldb.update(table='channels', pairs=pairs, where_key='id') self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.uploads_playlist = playlist_id self.uploads_playlist = playlist_id
class Video(ObjectBase): class Video(ObjectBase):

View file

@ -30,6 +30,7 @@ class YCDLDBChannelMixin:
*, *,
automark='pending', automark='pending',
download_directory=None, download_directory=None,
ignore_shorts=True,
queuefile_extension=None, queuefile_extension=None,
get_videos=False, get_videos=False,
name=None, name=None,
@ -67,6 +68,7 @@ class YCDLDBChannelMixin:
'queuefile_extension': queuefile_extension, 'queuefile_extension': queuefile_extension,
'automark': automark, 'automark': automark,
'autorefresh': True, 'autorefresh': True,
'ignore_shorts': int(bool(ignore_shorts)),
} }
self.insert(table='channels', pairs=data) self.insert(table='channels', pairs=data)
@ -393,14 +395,15 @@ class YCDLDBVideoMixin:
'thumbnail': video.thumbnail['url'], 'thumbnail': video.thumbnail['url'],
'live_broadcast': video.live_broadcast, 'live_broadcast': video.live_broadcast,
'state': download_status, 'state': download_status,
'is_shorts': None,
} }
if existing: if existing:
log.loud('Updating Video %s.', video) log.loud('Updating Video %s.', video)
self.update(table='videos', pairs=data, where_key='id') self.update(objects.Video, pairs=data, where_key='id')
else: else:
log.loud('Inserting Video %s.', video) log.loud('Inserting Video %s.', video)
self.insert(table='videos', pairs=data) self.insert(objects.Video, pairs=data)
# Override the cached copy with the new copy so that the cache contains # Override the cached copy with the new copy so that the cache contains
# updated information (view counts etc.). # updated information (view counts etc.).

View file

@ -1,10 +1,16 @@
import googleapiclient.discovery import googleapiclient.discovery
import isodate import isodate
import requests
import typing import typing
from voussoirkit import gentools from voussoirkit import gentools
from voussoirkit import httperrors
from voussoirkit import vlogging from voussoirkit import vlogging
log = vlogging.getLogger(__name__)
session = requests.Session()
def int_none(x): def int_none(x):
if x is None: if x is None:
return None return None
@ -60,7 +66,6 @@ class Youtube:
serviceName='youtube', serviceName='youtube',
version='v3', version='v3',
) )
self.log = vlogging.getLogger(__name__)
def _playlist_paginator(self, playlist_id): def _playlist_paginator(self, playlist_id):
page_token = None page_token = None
@ -131,21 +136,35 @@ class Youtube:
chunks = gentools.chunk_generator(video_ids, 50) chunks = gentools.chunk_generator(video_ids, 50)
total_snippets = 0 total_snippets = 0
for chunk in chunks: for chunk in chunks:
self.log.debug('Requesting batch of %d video ids.', len(chunk)) log.debug('Requesting batch of %d video ids.', len(chunk))
self.log.loud(chunk) log.loud(chunk)
chunk = ','.join(chunk) chunk = ','.join(chunk)
data = self.youtube.videos().list( data = self.youtube.videos().list(
part='id,contentDetails,snippet,statistics', part='id,contentDetails,snippet,statistics',
id=chunk, id=chunk,
).execute() ).execute()
snippets = data['items'] snippets = data['items']
self.log.debug('Got batch of %d snippets.', len(snippets)) log.debug('Got batch of %d snippets.', len(snippets))
total_snippets += len(snippets) total_snippets += len(snippets)
self.log.loud(snippets) log.loud(snippets)
for snippet in snippets: for snippet in snippets:
log.loud('%s', snippet)
try: try:
video = Video(snippet) video = Video(snippet)
yield video yield video
except KeyError as exc: except KeyError as exc:
self.log.warning(f'KEYERROR: {exc} not in {snippet}') log.warning(f'KEYERROR: {exc} not in {snippet}')
self.log.debug('Finished getting a total of %d snippets.', total_snippets) log.debug('Finished getting a total of %d snippets.', total_snippets)
def video_is_shorts(video_id) -> bool:
url = f'https://www.youtube.com/shorts/{video_id}'
log.loud('Checking if %s is shorts.', video_id)
response = session.head(url)
httperrors.raise_for_status(response)
if response.status_code == 200:
return True
elif response.status_code == 303:
return False
raise ValueError('Unexpected status code %s', response.status_code)