Ignore youtube shorts on a per-channel basis.
This commit is contained in:
parent
94ee832a27
commit
4de461dbf1
11 changed files with 222 additions and 28 deletions
|
@ -31,10 +31,11 @@ The reason for this is that youtube-dl is extremely configurable. Every user mig
|
|||
|
||||
- Web interface with video embeds
|
||||
- Commandline interface for scripted use
|
||||
- "Sub-box" page where newest videos from all channels are listed in order
|
||||
- "New videos" feed page where newest videos from all channels are listed in order (wow, no algorithm!)
|
||||
- Sort videos by date, duration, views, or random
|
||||
- Background thread will refresh channels over time
|
||||
- Automark channels as ignore or download
|
||||
- Mark channels as auto-download or auto-ignore
|
||||
- Automatically remove shorts from feed
|
||||
- Free yourself from Youtube's awful recommendation system
|
||||
|
||||
## Your API key
|
||||
|
|
|
@ -6,6 +6,7 @@ import flask; from flask import request
|
|||
import functools
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from voussoirkit import flasktools
|
||||
from voussoirkit import pathclass
|
||||
|
@ -121,7 +122,45 @@ def refresher_thread(rate):
|
|||
refresh_job.start()
|
||||
last_refresh = time.time()
|
||||
|
||||
def ignore_shorts_thread(rate):
|
||||
last_commit_id = None
|
||||
while True:
|
||||
if ycdldb.last_commit_id == last_commit_id:
|
||||
# log.debug('Sleeping again due to no new commits.')
|
||||
time.sleep(5 * rate)
|
||||
continue
|
||||
|
||||
last_commit_id = ycdldb.last_commit_id
|
||||
|
||||
log.info('Starting shorts job.')
|
||||
videos = ycdldb.get_videos_by_sql('''
|
||||
SELECT * FROM videos
|
||||
LEFT JOIN channels ON channels.id = videos.author_id
|
||||
WHERE is_shorts IS NULL AND duration < 62 AND state = "pending" AND channels.ignore_shorts = 1
|
||||
ORDER BY published DESC
|
||||
LIMIT 10
|
||||
''')
|
||||
videos = list(videos)
|
||||
if len(videos) == 0:
|
||||
time.sleep(rate)
|
||||
continue
|
||||
|
||||
with ycdldb.transaction:
|
||||
for video in videos:
|
||||
try:
|
||||
is_shorts = ycdl.ytapi.video_is_shorts(video.id)
|
||||
except Exception as exc:
|
||||
log.warning(traceback.format_exc())
|
||||
pairs = {'id': video.id, 'is_shorts': int(is_shorts)}
|
||||
if is_shorts:
|
||||
pairs['state'] = 'ignored'
|
||||
ycdldb.update(table=ycdl.objects.Video, pairs=pairs, where_key='id')
|
||||
time.sleep(rate)
|
||||
|
||||
def start_refresher_thread(rate):
|
||||
log.info('Starting refresher thread, once per %d seconds.', rate)
|
||||
refresher = threading.Thread(target=refresher_thread, args=[rate], daemon=True)
|
||||
refresher.start()
|
||||
|
||||
shorts_killer = threading.Thread(target=ignore_shorts_thread, args=[60], daemon=True)
|
||||
shorts_killer.start()
|
||||
|
|
|
@ -211,6 +211,22 @@ def post_set_download_directory(channel_id):
|
|||
response = {'id': channel.id, 'download_directory': abspath}
|
||||
return flasktools.json_response(response)
|
||||
|
||||
@flasktools.required_fields(['ignore_shorts'], forbid_whitespace=True)
|
||||
@site.route('/channel/<channel_id>/set_ignore_shorts', methods=['POST'])
|
||||
def post_set_ignore_shorts(channel_id):
|
||||
ignore_shorts = request.form['ignore_shorts']
|
||||
channel = common.ycdldb.get_channel(channel_id)
|
||||
|
||||
try:
|
||||
ignore_shorts = stringtools.truthystring(ignore_shorts)
|
||||
with common.ycdldb.transaction:
|
||||
channel.set_ignore_shorts(ignore_shorts)
|
||||
except (ValueError, TypeError):
|
||||
flask.abort(400)
|
||||
|
||||
response = {'id': channel.id, 'ignore_shorts': channel.ignore_shorts}
|
||||
return flasktools.json_response(response)
|
||||
|
||||
@flasktools.required_fields(['name'], forbid_whitespace=False)
|
||||
@site.route('/channel/<channel_id>/set_name', methods=['POST'])
|
||||
def post_set_name(channel_id):
|
||||
|
|
|
@ -73,6 +73,16 @@ function set_download_directory(channel_id, download_directory, callback)
|
|||
});
|
||||
}
|
||||
|
||||
api.channels.set_ignore_shorts =
|
||||
function set_ignore_shorts(channel_id, ignore_shorts, callback)
|
||||
{
|
||||
return http.post({
|
||||
url: `/channel/${channel_id}/set_ignore_shorts`,
|
||||
data: {"ignore_shorts": ignore_shorts},
|
||||
callback: callback,
|
||||
});
|
||||
}
|
||||
|
||||
api.channels.set_name =
|
||||
function set_name(channel_id, name, callback)
|
||||
{
|
||||
|
|
|
@ -88,13 +88,13 @@ function hms_render_colons(hours, minutes, seconds)
|
|||
}
|
||||
|
||||
common.seconds_to_hms =
|
||||
function seconds_to_hms(seconds, args)
|
||||
function seconds_to_hms({
|
||||
seconds,
|
||||
renderer=common.hms_render_colons,
|
||||
force_minutes=false,
|
||||
force_hours=false,
|
||||
})
|
||||
{
|
||||
args = args || {};
|
||||
const renderer = args["renderer"] || common.hms_render_colons;
|
||||
const force_minutes = args["force_minutes"] || false;
|
||||
const force_hours = args["force_hours"] || false;
|
||||
|
||||
if (seconds > 0 && seconds < 1)
|
||||
{
|
||||
seconds = 1;
|
||||
|
|
|
@ -266,6 +266,12 @@ https://stackoverflow.com/a/35153397
|
|||
<span id="set_automark_spinner" class="hidden">Working...</span>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
{% set checked = 'checked' if channel.ignore_shorts else '' %}
|
||||
<label><input type="checkbox" id="set_ignore_shorts_checkbox" {{checked}} onchange="return set_ignore_shorts_form(event);"/> Automatically ignore shorts (short vertical videos).</label>
|
||||
<span id="set_ignore_shorts_spinner" class="hidden">Working...</span>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<input type="text" id="set_queuefile_extension_input" placeholder="Queuefile extension" value="{{channel.queuefile_extension or ''}}"/>
|
||||
<button id="set_queuefile_extension_button" class="button_with_spinner" onclick="return set_queuefile_extension_form(event);">Set extension</button>
|
||||
|
@ -661,6 +667,22 @@ function set_download_directory_callback(response)
|
|||
}
|
||||
}
|
||||
|
||||
function set_ignore_shorts_form(event)
|
||||
{
|
||||
set_ignore_shorts_spinner.show();
|
||||
api.channels.set_ignore_shorts(CHANNEL_ID, event.target.checked, set_ignore_shorts_callback);
|
||||
}
|
||||
|
||||
function set_ignore_shorts_callback(response)
|
||||
{
|
||||
set_ignore_shorts_spinner.hide();
|
||||
if (response.meta.status != 200)
|
||||
{
|
||||
alert(JSON.stringify(response));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
function set_name_form(event)
|
||||
{
|
||||
const name = set_name_input.value.trim();
|
||||
|
@ -751,6 +773,9 @@ if (CHANNEL_ID)
|
|||
|
||||
var set_autorefresh_spinner = document.getElementById("set_autorefresh_spinner");
|
||||
set_autorefresh_spinner = new spinners.Spinner(set_autorefresh_spinner);
|
||||
|
||||
var set_ignore_shorts_spinner = document.getElementById("set_ignore_shorts_spinner");
|
||||
set_ignore_shorts_spinner = new spinners.Spinner(set_ignore_shorts_spinner);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -347,6 +347,72 @@ def upgrade_10_to_11(ycdldb):
|
|||
|
||||
m.go()
|
||||
|
||||
def upgrade_11_to_12(ycdldb):
|
||||
'''
|
||||
In this version, the `ignore_shorts` column was added to the channels table
|
||||
and `is_shorts` was added to the videos table.
|
||||
'''
|
||||
m = Migrator(ycdldb)
|
||||
|
||||
m.tables['channels']['create'] = '''
|
||||
CREATE TABLE IF NOT EXISTS channels(
|
||||
id TEXT,
|
||||
name TEXT,
|
||||
uploads_playlist TEXT,
|
||||
download_directory TEXT COLLATE NOCASE,
|
||||
queuefile_extension TEXT COLLATE NOCASE,
|
||||
automark TEXT,
|
||||
autorefresh INT,
|
||||
last_refresh INT,
|
||||
ignore_shorts INT NOT NULL
|
||||
);
|
||||
'''
|
||||
m.tables['channels']['transfer'] = '''
|
||||
INSERT INTO channels SELECT
|
||||
id,
|
||||
name,
|
||||
uploads_playlist,
|
||||
download_directory,
|
||||
queuefile_extension,
|
||||
automark,
|
||||
autorefresh,
|
||||
last_refresh,
|
||||
1
|
||||
FROM channels_old;
|
||||
'''
|
||||
m.tables['videos']['create'] = '''
|
||||
CREATE TABLE IF NOT EXISTS videos(
|
||||
id TEXT,
|
||||
published INT,
|
||||
author_id TEXT,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
duration INT,
|
||||
views INT,
|
||||
thumbnail TEXT,
|
||||
live_broadcast TEXT,
|
||||
state TEXT,
|
||||
is_shorts INT
|
||||
);
|
||||
'''
|
||||
m.tables['videos']['transfer'] = '''
|
||||
INSERT INTO videos SELECT
|
||||
id,
|
||||
published,
|
||||
author_id,
|
||||
title,
|
||||
description,
|
||||
duration,
|
||||
views,
|
||||
thumbnail,
|
||||
live_broadcast,
|
||||
state,
|
||||
NULL
|
||||
FROM videos_old;
|
||||
'''
|
||||
|
||||
m.go()
|
||||
|
||||
def upgrade_all(data_directory):
|
||||
'''
|
||||
Given the directory containing a ycdl database, apply all of the
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from voussoirkit import sqlhelpers
|
||||
|
||||
DATABASE_VERSION = 11
|
||||
DATABASE_VERSION = 12
|
||||
|
||||
DB_INIT = f'''
|
||||
CREATE TABLE IF NOT EXISTS channels(
|
||||
|
@ -11,7 +11,8 @@ CREATE TABLE IF NOT EXISTS channels(
|
|||
queuefile_extension TEXT COLLATE NOCASE,
|
||||
automark TEXT,
|
||||
autorefresh INT,
|
||||
last_refresh INT
|
||||
last_refresh INT,
|
||||
ignore_shorts INT NOT NULL
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS index_channel_id on channels(id);
|
||||
----------------------------------------------------------------------------------------------------
|
||||
|
@ -25,7 +26,8 @@ CREATE TABLE IF NOT EXISTS videos(
|
|||
views INT,
|
||||
thumbnail TEXT,
|
||||
live_broadcast TEXT,
|
||||
state TEXT
|
||||
state TEXT,
|
||||
is_shorts INT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS index_video_author_published on videos(author_id, published);
|
||||
CREATE INDEX IF NOT EXISTS index_video_author_state_published on videos(author_id, state, published);
|
||||
|
|
|
@ -36,6 +36,7 @@ class Channel(ObjectBase):
|
|||
self.queuefile_extension = self.normalize_queuefile_extension(db_row['queuefile_extension'])
|
||||
self.automark = db_row['automark'] or 'pending'
|
||||
self.autorefresh = stringtools.truthystring(db_row['autorefresh'])
|
||||
self.ignore_shorts = bool(db_row['ignore_shorts'])
|
||||
|
||||
def __repr__(self):
|
||||
return f'Channel:{self.id}'
|
||||
|
@ -139,7 +140,7 @@ class Channel(ObjectBase):
|
|||
log.info('Deleting %s.', self)
|
||||
|
||||
self.ycdldb.delete(table='videos', pairs={'author_id': self.id})
|
||||
self.ycdldb.delete(table='channels', pairs={'id': self.id})
|
||||
self.ycdldb.delete(table=Channel, pairs={'id': self.id})
|
||||
self.deleted = True
|
||||
|
||||
def get_most_recent_video_id(self) -> str:
|
||||
|
@ -249,7 +250,7 @@ class Channel(ObjectBase):
|
|||
'id': self.id,
|
||||
'last_refresh': timetools.now().timestamp(),
|
||||
}
|
||||
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
|
||||
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
|
||||
|
||||
def reset_uploads_playlist_id(self):
|
||||
'''
|
||||
|
@ -267,7 +268,7 @@ class Channel(ObjectBase):
|
|||
'id': self.id,
|
||||
'automark': state,
|
||||
}
|
||||
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
|
||||
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
|
||||
self.automark = state
|
||||
|
||||
@worms.atomic
|
||||
|
@ -278,7 +279,7 @@ class Channel(ObjectBase):
|
|||
'id': self.id,
|
||||
'autorefresh': autorefresh,
|
||||
}
|
||||
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
|
||||
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
|
||||
self.autorefresh = autorefresh
|
||||
|
||||
@worms.atomic
|
||||
|
@ -289,9 +290,21 @@ class Channel(ObjectBase):
|
|||
'id': self.id,
|
||||
'download_directory': download_directory.absolute_path if download_directory else None,
|
||||
}
|
||||
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
|
||||
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
|
||||
self.download_directory = download_directory
|
||||
|
||||
@worms.atomic
|
||||
def set_ignore_shorts(self, ignore_shorts: bool):
|
||||
if not isinstance(ignore_shorts, bool):
|
||||
raise TypeError(ignore_shorts)
|
||||
|
||||
pairs = {
|
||||
'id': self.id,
|
||||
'ignore_shorts': int(ignore_shorts)
|
||||
}
|
||||
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
|
||||
self.ignore_shorts = ignore_shorts
|
||||
|
||||
@worms.atomic
|
||||
def set_name(self, name):
|
||||
name = self.normalize_name(name)
|
||||
|
@ -300,7 +313,7 @@ class Channel(ObjectBase):
|
|||
'id': self.id,
|
||||
'name': name,
|
||||
}
|
||||
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
|
||||
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
|
||||
self.name = name
|
||||
|
||||
@worms.atomic
|
||||
|
@ -311,7 +324,7 @@ class Channel(ObjectBase):
|
|||
'id': self.id,
|
||||
'queuefile_extension': queuefile_extension,
|
||||
}
|
||||
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
|
||||
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
|
||||
self.queuefile_extension = queuefile_extension
|
||||
|
||||
@worms.atomic
|
||||
|
@ -324,7 +337,7 @@ class Channel(ObjectBase):
|
|||
'id': self.id,
|
||||
'uploads_playlist': playlist_id,
|
||||
}
|
||||
self.ycdldb.update(table='channels', pairs=pairs, where_key='id')
|
||||
self.ycdldb.update(table=Channel, pairs=pairs, where_key='id')
|
||||
self.uploads_playlist = playlist_id
|
||||
|
||||
class Video(ObjectBase):
|
||||
|
|
|
@ -30,6 +30,7 @@ class YCDLDBChannelMixin:
|
|||
*,
|
||||
automark='pending',
|
||||
download_directory=None,
|
||||
ignore_shorts=True,
|
||||
queuefile_extension=None,
|
||||
get_videos=False,
|
||||
name=None,
|
||||
|
@ -67,6 +68,7 @@ class YCDLDBChannelMixin:
|
|||
'queuefile_extension': queuefile_extension,
|
||||
'automark': automark,
|
||||
'autorefresh': True,
|
||||
'ignore_shorts': int(bool(ignore_shorts)),
|
||||
}
|
||||
self.insert(table='channels', pairs=data)
|
||||
|
||||
|
@ -393,14 +395,15 @@ class YCDLDBVideoMixin:
|
|||
'thumbnail': video.thumbnail['url'],
|
||||
'live_broadcast': video.live_broadcast,
|
||||
'state': download_status,
|
||||
'is_shorts': None,
|
||||
}
|
||||
|
||||
if existing:
|
||||
log.loud('Updating Video %s.', video)
|
||||
self.update(table='videos', pairs=data, where_key='id')
|
||||
self.update(objects.Video, pairs=data, where_key='id')
|
||||
else:
|
||||
log.loud('Inserting Video %s.', video)
|
||||
self.insert(table='videos', pairs=data)
|
||||
self.insert(objects.Video, pairs=data)
|
||||
|
||||
# Override the cached copy with the new copy so that the cache contains
|
||||
# updated information (view counts etc.).
|
||||
|
|
|
@ -1,10 +1,16 @@
|
|||
import googleapiclient.discovery
|
||||
import isodate
|
||||
import requests
|
||||
import typing
|
||||
|
||||
from voussoirkit import gentools
|
||||
from voussoirkit import httperrors
|
||||
from voussoirkit import vlogging
|
||||
|
||||
log = vlogging.getLogger(__name__)
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
def int_none(x):
|
||||
if x is None:
|
||||
return None
|
||||
|
@ -60,7 +66,6 @@ class Youtube:
|
|||
serviceName='youtube',
|
||||
version='v3',
|
||||
)
|
||||
self.log = vlogging.getLogger(__name__)
|
||||
|
||||
def _playlist_paginator(self, playlist_id):
|
||||
page_token = None
|
||||
|
@ -131,21 +136,35 @@ class Youtube:
|
|||
chunks = gentools.chunk_generator(video_ids, 50)
|
||||
total_snippets = 0
|
||||
for chunk in chunks:
|
||||
self.log.debug('Requesting batch of %d video ids.', len(chunk))
|
||||
self.log.loud(chunk)
|
||||
log.debug('Requesting batch of %d video ids.', len(chunk))
|
||||
log.loud(chunk)
|
||||
chunk = ','.join(chunk)
|
||||
data = self.youtube.videos().list(
|
||||
part='id,contentDetails,snippet,statistics',
|
||||
id=chunk,
|
||||
).execute()
|
||||
snippets = data['items']
|
||||
self.log.debug('Got batch of %d snippets.', len(snippets))
|
||||
log.debug('Got batch of %d snippets.', len(snippets))
|
||||
total_snippets += len(snippets)
|
||||
self.log.loud(snippets)
|
||||
log.loud(snippets)
|
||||
for snippet in snippets:
|
||||
log.loud('%s', snippet)
|
||||
try:
|
||||
video = Video(snippet)
|
||||
yield video
|
||||
except KeyError as exc:
|
||||
self.log.warning(f'KEYERROR: {exc} not in {snippet}')
|
||||
self.log.debug('Finished getting a total of %d snippets.', total_snippets)
|
||||
log.warning(f'KEYERROR: {exc} not in {snippet}')
|
||||
log.debug('Finished getting a total of %d snippets.', total_snippets)
|
||||
|
||||
def video_is_shorts(video_id) -> bool:
|
||||
url = f'https://www.youtube.com/shorts/{video_id}'
|
||||
log.loud('Checking if %s is shorts.', video_id)
|
||||
response = session.head(url)
|
||||
httperrors.raise_for_status(response)
|
||||
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
elif response.status_code == 303:
|
||||
return False
|
||||
|
||||
raise ValueError('Unexpected status code %s', response.status_code)
|
||||
|
|
Loading…
Reference in a new issue