Ignore youtube shorts on a per-channel basis.

This commit is contained in:
voussoir 2023-08-09 23:24:47 -07:00
parent 94ee832a27
commit 4de461dbf1
11 changed files with 222 additions and 28 deletions

View file

@ -31,10 +31,11 @@ The reason for this is that youtube-dl is extremely configurable. Every user mig
- Web interface with video embeds
- Commandline interface for scripted use
- "Sub-box" page where newest videos from all channels are listed in order
- "New videos" feed page where newest videos from all channels are listed in order (wow, no algorithm!)
- Sort videos by date, duration, views, or random
- Background thread will refresh channels over time
- Automark channels as ignore or download
- Mark channels as auto-download or auto-ignore
- Automatically remove shorts from feed
- Free yourself from Youtube's awful recommendation system
## Your API key

View file

@ -6,6 +6,7 @@ import flask; from flask import request
import functools
import threading
import time
import traceback
from voussoirkit import flasktools
from voussoirkit import pathclass
@ -121,7 +122,45 @@ def refresher_thread(rate):
refresh_job.start()
last_refresh = time.time()
def ignore_shorts_thread(rate):
last_commit_id = None
while True:
if ycdldb.last_commit_id == last_commit_id:
# log.debug('Sleeping again due to no new commits.')
time.sleep(5 * rate)
continue
last_commit_id = ycdldb.last_commit_id
log.info('Starting shorts job.')
videos = ycdldb.get_videos_by_sql('''
SELECT * FROM videos
LEFT JOIN channels ON channels.id = videos.author_id
WHERE is_shorts IS NULL AND duration < 62 AND state = "pending" AND channels.ignore_shorts = 1
ORDER BY published DESC
LIMIT 10
''')
videos = list(videos)
if len(videos) == 0:
time.sleep(rate)
continue
with ycdldb.transaction:
for video in videos:
try:
is_shorts = ycdl.ytapi.video_is_shorts(video.id)
except Exception as exc:
log.warning(traceback.format_exc())
pairs = {'id': video.id, 'is_shorts': int(is_shorts)}
if is_shorts:
pairs['state'] = 'ignored'
ycdldb.update(table=ycdl.objects.Video, pairs=pairs, where_key='id')
time.sleep(rate)
def start_refresher_thread(rate):
log.info('Starting refresher thread, once per %d seconds.', rate)
refresher = threading.Thread(target=refresher_thread, args=[rate], daemon=True)
refresher.start()
shorts_killer = threading.Thread(target=ignore_shorts_thread, args=[60], daemon=True)
shorts_killer.start()

View file

@ -211,6 +211,22 @@ def post_set_download_directory(channel_id):
response = {'id': channel.id, 'download_directory': abspath}
return flasktools.json_response(response)
@flasktools.required_fields(['ignore_shorts'], forbid_whitespace=True)
@site.route('/channel/<channel_id>/set_ignore_shorts', methods=['POST'])
def post_set_ignore_shorts(channel_id):
ignore_shorts = request.form['ignore_shorts']
channel = common.ycdldb.get_channel(channel_id)
try:
ignore_shorts = stringtools.truthystring(ignore_shorts)
with common.ycdldb.transaction:
channel.set_ignore_shorts(ignore_shorts)
except (ValueError, TypeError):
flask.abort(400)
response = {'id': channel.id, 'ignore_shorts': channel.ignore_shorts}
return flasktools.json_response(response)
@flasktools.required_fields(['name'], forbid_whitespace=False)
@site.route('/channel/<channel_id>/set_name', methods=['POST'])
def post_set_name(channel_id):

View file

@ -73,6 +73,16 @@ function set_download_directory(channel_id, download_directory, callback)
});
}
api.channels.set_ignore_shorts =
function set_ignore_shorts(channel_id, ignore_shorts, callback)
{
return http.post({
url: `/channel/${channel_id}/set_ignore_shorts`,
data: {"ignore_shorts": ignore_shorts},
callback: callback,
});
}
api.channels.set_name =
function set_name(channel_id, name, callback)
{

View file

@ -88,13 +88,13 @@ function hms_render_colons(hours, minutes, seconds)
}
common.seconds_to_hms =
function seconds_to_hms(seconds, args)
function seconds_to_hms({
seconds,
renderer=common.hms_render_colons,
force_minutes=false,
force_hours=false,
})
{
args = args || {};
const renderer = args["renderer"] || common.hms_render_colons;
const force_minutes = args["force_minutes"] || false;
const force_hours = args["force_hours"] || false;
if (seconds > 0 && seconds < 1)
{
seconds = 1;

View file

@ -266,6 +266,12 @@ https://stackoverflow.com/a/35153397
<span id="set_automark_spinner" class="hidden">Working...</span>
</div>
<div>
{% set checked = 'checked' if channel.ignore_shorts else '' %}
<label><input type="checkbox" id="set_ignore_shorts_checkbox" {{checked}} onchange="return set_ignore_shorts_form(event);"/> Automatically ignore shorts (short vertical videos).</label>
<span id="set_ignore_shorts_spinner" class="hidden">Working...</span>
</div>
<div>
<input type="text" id="set_queuefile_extension_input" placeholder="Queuefile extension" value="{{channel.queuefile_extension or ''}}"/>
<button id="set_queuefile_extension_button" class="button_with_spinner" onclick="return set_queuefile_extension_form(event);">Set extension</button>
@ -661,6 +667,22 @@ function set_download_directory_callback(response)
}
}
function set_ignore_shorts_form(event)
{
set_ignore_shorts_spinner.show();
api.channels.set_ignore_shorts(CHANNEL_ID, event.target.checked, set_ignore_shorts_callback);
}
function set_ignore_shorts_callback(response)
{
set_ignore_shorts_spinner.hide();
if (response.meta.status != 200)
{
alert(JSON.stringify(response));
return;
}
}
function set_name_form(event)
{
const name = set_name_input.value.trim();
@ -751,6 +773,9 @@ if (CHANNEL_ID)
var set_autorefresh_spinner = document.getElementById("set_autorefresh_spinner");
set_autorefresh_spinner = new spinners.Spinner(set_autorefresh_spinner);
var set_ignore_shorts_spinner = document.getElementById("set_ignore_shorts_spinner");
set_ignore_shorts_spinner = new spinners.Spinner(set_ignore_shorts_spinner);
}
////////////////////////////////////////////////////////////////////////////////////////////////////

View file

@ -347,6 +347,72 @@ def upgrade_10_to_11(ycdldb):
m.go()
def upgrade_11_to_12(ycdldb):
'''
In this version, the `ignore_shorts` column was added to the channels table
and `is_shorts` was added to the videos table.
'''
m = Migrator(ycdldb)
m.tables['channels']['create'] = '''
CREATE TABLE IF NOT EXISTS channels(
id TEXT,
name TEXT,
uploads_playlist TEXT,
download_directory TEXT COLLATE NOCASE,
queuefile_extension TEXT COLLATE NOCASE,
automark TEXT,
autorefresh INT,
last_refresh INT,
ignore_shorts INT NOT NULL
);
'''
m.tables['channels']['transfer'] = '''
INSERT INTO channels SELECT
id,
name,
uploads_playlist,
download_directory,
queuefile_extension,
automark,
autorefresh,
last_refresh,
1
FROM channels_old;
'''
m.tables['videos']['create'] = '''
CREATE TABLE IF NOT EXISTS videos(
id TEXT,
published INT,
author_id TEXT,
title TEXT,
description TEXT,
duration INT,
views INT,
thumbnail TEXT,
live_broadcast TEXT,
state TEXT,
is_shorts INT
);
'''
m.tables['videos']['transfer'] = '''
INSERT INTO videos SELECT
id,
published,
author_id,
title,
description,
duration,
views,
thumbnail,
live_broadcast,
state,
NULL
FROM videos_old;
'''
m.go()
def upgrade_all(data_directory):
'''
Given the directory containing a ycdl database, apply all of the

View file

@ -1,6 +1,6 @@
from voussoirkit import sqlhelpers
DATABASE_VERSION = 11
DATABASE_VERSION = 12
DB_INIT = f'''
CREATE TABLE IF NOT EXISTS channels(
@ -11,7 +11,8 @@ CREATE TABLE IF NOT EXISTS channels(
queuefile_extension TEXT COLLATE NOCASE,
automark TEXT,
autorefresh INT,
last_refresh INT
last_refresh INT,
ignore_shorts INT NOT NULL
);
CREATE INDEX IF NOT EXISTS index_channel_id on channels(id);
----------------------------------------------------------------------------------------------------
@ -25,7 +26,8 @@ CREATE TABLE IF NOT EXISTS videos(
views INT,
thumbnail TEXT,
live_broadcast TEXT,
state TEXT
state TEXT,
is_shorts INT
);
CREATE INDEX IF NOT EXISTS index_video_author_published on videos(author_id, published);
CREATE INDEX IF NOT EXISTS index_video_author_state_published on videos(author_id, state, published);

View file

@ -36,6 +36,7 @@ class Channel(ObjectBase):
self.queuefile_extension = self.normalize_queuefile_extension(db_row['queuefile_extension'])
self.automark = db_row['automark'] or 'pending'
self.autorefresh = stringtools.truthystring(db_row['autorefresh'])
self.ignore_shorts = bool(db_row['ignore_shorts'])
def __repr__(self):
return f'Channel:{self.id}'
@ -139,7 +140,7 @@ class Channel(ObjectBase):
log.info('Deleting %s.', self)
self.ycdldb.delete(table='videos', pairs={'author_id': self.id})
self.ycdldb.delete(table='channels', pairs={'id': self.id})
self.ycdldb.delete(table=Channel, pairs={'id': self.id})
self.deleted = True
def get_most_recent_video_id(self) -> str:
@ -249,7 +250,7 @@ class Channel(ObjectBase):
'id': self.id,
'last_refresh': timetools.now().timestamp(),
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
def reset_uploads_playlist_id(self):
'''
@ -267,7 +268,7 @@ class Channel(ObjectBase):
'id': self.id,
'automark': state,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.automark = state
@worms.atomic
@ -278,7 +279,7 @@ class Channel(ObjectBase):
'id': self.id,
'autorefresh': autorefresh,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.autorefresh = autorefresh
@worms.atomic
@ -289,9 +290,21 @@ class Channel(ObjectBase):
'id': self.id,
'download_directory': download_directory.absolute_path if download_directory else None,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.download_directory = download_directory
@worms.atomic
def set_ignore_shorts(self, ignore_shorts: bool):
if not isinstance(ignore_shorts, bool):
raise TypeError(ignore_shorts)
pairs = {
'id': self.id,
'ignore_shorts': int(ignore_shorts)
}
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.ignore_shorts = ignore_shorts
@worms.atomic
def set_name(self, name):
name = self.normalize_name(name)
@ -300,7 +313,7 @@ class Channel(ObjectBase):
'id': self.id,
'name': name,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.name = name
@worms.atomic
@ -311,7 +324,7 @@ class Channel(ObjectBase):
'id': self.id,
'queuefile_extension': queuefile_extension,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.queuefile_extension = queuefile_extension
@worms.atomic
@ -324,7 +337,7 @@ class Channel(ObjectBase):
'id': self.id,
'uploads_playlist': playlist_id,
}
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
self.uploads_playlist = playlist_id
class Video(ObjectBase):

View file

@ -30,6 +30,7 @@ class YCDLDBChannelMixin:
*,
automark='pending',
download_directory=None,
ignore_shorts=True,
queuefile_extension=None,
get_videos=False,
name=None,
@ -67,6 +68,7 @@ class YCDLDBChannelMixin:
'queuefile_extension': queuefile_extension,
'automark': automark,
'autorefresh': True,
'ignore_shorts': int(bool(ignore_shorts)),
}
self.insert(table='channels', pairs=data)
@ -393,14 +395,15 @@ class YCDLDBVideoMixin:
'thumbnail': video.thumbnail['url'],
'live_broadcast': video.live_broadcast,
'state': download_status,
'is_shorts': None,
}
if existing:
log.loud('Updating Video %s.', video)
self.update(table='videos', pairs=data, where_key='id')
self.update(objects.Video, pairs=data, where_key='id')
else:
log.loud('Inserting Video %s.', video)
self.insert(table='videos', pairs=data)
self.insert(objects.Video, pairs=data)
# Override the cached copy with the new copy so that the cache contains
# updated information (view counts etc.).

View file

@ -1,10 +1,16 @@
import googleapiclient.discovery
import isodate
import requests
import typing
from voussoirkit import gentools
from voussoirkit import httperrors
from voussoirkit import vlogging
log = vlogging.getLogger(__name__)
session = requests.Session()
def int_none(x):
if x is None:
return None
@ -60,7 +66,6 @@ class Youtube:
serviceName='youtube',
version='v3',
)
self.log = vlogging.getLogger(__name__)
def _playlist_paginator(self, playlist_id):
page_token = None
@ -131,21 +136,35 @@ class Youtube:
chunks = gentools.chunk_generator(video_ids, 50)
total_snippets = 0
for chunk in chunks:
self.log.debug('Requesting batch of %d video ids.', len(chunk))
self.log.loud(chunk)
log.debug('Requesting batch of %d video ids.', len(chunk))
log.loud(chunk)
chunk = ','.join(chunk)
data = self.youtube.videos().list(
part='id,contentDetails,snippet,statistics',
id=chunk,
).execute()
snippets = data['items']
self.log.debug('Got batch of %d snippets.', len(snippets))
log.debug('Got batch of %d snippets.', len(snippets))
total_snippets += len(snippets)
self.log.loud(snippets)
log.loud(snippets)
for snippet in snippets:
log.loud('%s', snippet)
try:
video = Video(snippet)
yield video
except KeyError as exc:
self.log.warning(f'KEYERROR: {exc} not in {snippet}')
self.log.debug('Finished getting a total of %d snippets.', total_snippets)
log.warning(f'KEYERROR: {exc} not in {snippet}')
log.debug('Finished getting a total of %d snippets.', total_snippets)
def video_is_shorts(video_id) -> bool:
url = f'https://www.youtube.com/shorts/{video_id}'
log.loud('Checking if %s is shorts.', video_id)
response = session.head(url)
httperrors.raise_for_status(response)
if response.status_code == 200:
return True
elif response.status_code == 303:
return False
raise ValueError('Unexpected status code %s', response.status_code)