cmd/subtitle_shift_pointsync.py

198 lines
5.8 KiB
Python
Raw Normal View History

2019-06-12 05:41:31 +00:00
import argparse
import sys
inf = float('inf')
class Subtitles:
def __init__(self, lines):
self.lines = sorted(lines)
@classmethod
def from_text(cls, text):
text = text.strip()
while '\n\n\n' in text:
text = text.replace('\n\n\n', '\n\n')
lines = text.split('\n\n')
lines = [SubtitleLine.from_text(line) for line in lines]
return cls(lines)
def __getitem__(self, index):
return self.lines[index]
def __len__(self):
return len(self.lines)
def __repr__(self):
return f'Subtitles with {len(self.lines)} lines.'
def as_srt(self):
lines = sorted(self.lines)
lines = [f'{index+1}\n{line.as_srt()}' for (index, line) in enumerate(lines)]
return '\n\n'.join(lines)
class SubtitleLine:
def __init__(self, start, end, text):
self.start = start
self.end = end
self.text = text
@classmethod
def from_text(cls, text):
(index, timestamps, text) = text.split('\n', 2)
timestamps = timestamps.replace(',', '.')
(start, end) = timestamps.split('-->')
start = hms_to_seconds(start)
end = hms_to_seconds(end)
return cls(start, end, text)
def __lt__(self, other):
return (self.start, self.end) < (other.start, other.end)
def __repr__(self):
return repr(self.as_srt())
def as_srt(self):
start = seconds_to_hms(self.start, force_hours=True, force_milliseconds=True)
end = seconds_to_hms(self.end, force_hours=True, force_milliseconds=True)
srt = f'{start} --> {end}'.replace('.', ',')
srt += '\n' + self.text
return srt
class Point:
def __init__(self, x, y=None):
self.x = x
self.y = y
def __lt__(self, other):
return (self.x, self.y) < (other.x, other.y)
def __repr__(self):
return f'Point({self.x}, {self.y})'
def hms_to_seconds(hms):
'''
Convert hh:mm:ss string to an integer seconds.
'''
hms = hms.split(':')
seconds = 0
if len(hms) == 3:
seconds += int(hms.pop(0)) * 3600
if len(hms) == 2:
seconds += int(hms.pop(0)) * 60
if len(hms) == 1:
seconds += float(hms.pop(0).replace(',', '.'))
return seconds
def seconds_to_hms(seconds, force_hours=False, force_milliseconds=False):
milliseconds = seconds % 1
if milliseconds >= 0.999:
milliseconds = 0
seconds += 1
seconds = int(seconds)
(minutes, seconds) = divmod(seconds, 60)
(hours, minutes) = divmod(minutes, 60)
parts = []
if hours or force_hours:
parts.append(hours)
2020-01-11 09:36:52 +00:00
if hours or minutes or force_hours:
2019-06-12 05:41:31 +00:00
parts.append(minutes)
parts.append(seconds)
hms = ':'.join(f'{part:02d}' for part in parts)
if milliseconds or force_milliseconds:
milliseconds = f'.{milliseconds:.03f}'.split('.')[-1]
hms += '.' + milliseconds
return hms
def linear(slope, intercept):
'''
Given slope m and intercept b, return a function f such that f(x) = mx + b.
'''
def f(x):
y = (slope * x) + intercept
2020-08-27 23:30:37 +00:00
print(seconds_to_hms(x), '-->', seconds_to_hms(y), 'slope=', slope)
2019-06-12 05:41:31 +00:00
return y
return f
def slope_intercept(p1, p2):
'''
Given two Points, return the slope and intercept describing a line
between them.
'''
slope = (p2.y - p1.y) / (p2.x - p1.x)
intercept = p1.y - (slope * p1.x)
return (slope, intercept)
def pointsync(input_filename, output_filename, input_landmarks):
landmarks = []
used_olds = set()
for landmark in input_landmarks:
(old, new) = landmark.split('=')
(old, new) = (hms_to_seconds(old), hms_to_seconds(new))
if old < 0 or new < 0:
raise ValueError('No negative numbers!')
if old in used_olds:
raise ValueError(f'Cant use the same old value {seconds_to_hms(old, force_hours=True)} twice.')
used_olds.add(old)
landmarks.append(Point(old, new))
landmarks.sort()
if landmarks[0].x != 0:
landmarks.insert(0, Point(0, 0))
# print(landmarks)
if len(landmarks) < 2:
raise ValueError('Not enough landmarks')
landmark_functions = []
for (land1, land2) in zip(landmarks, landmarks[1:]):
(slope, intercept) = slope_intercept(land1, land2)
if slope < 0:
raise ValueError(f'Negative slope between {land1} and {land2}.')
f = linear(slope, intercept)
landmark_functions.append((land1.x, f))
landmark_functions.append((inf, None))
old_srt = Subtitles.from_text(open(input_filename, encoding='utf-8').read())
pointer = 0
new_srt = Subtitles([])
for old_line in old_srt:
if old_line.start >= landmark_functions[pointer+1][0]:
pointer += 1
new_start = landmark_functions[pointer][1](old_line.start)
if old_line.end >= landmark_functions[pointer+1][0]:
pointer += 1
new_end = landmark_functions[pointer][1](old_line.end)
new_line = SubtitleLine(new_start, new_end, old_line.text)
new_srt.lines.append(new_line)
new_file = open(output_filename, 'w', encoding='utf-8')
new_file.write(new_srt.as_srt())
def pointsync_argparse(args):
input_filename = args.input_filename
output_filename = args.output_filename
if '.srt' not in output_filename:
raise ValueError('Output filename', output_filename)
input_landmarks = args.landmarks
return pointsync(input_filename, output_filename, input_landmarks)
def main(argv):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('input_filename')
parser.add_argument('output_filename')
parser.add_argument('landmarks', nargs='*', default=None)
parser.set_defaults(func=pointsync_argparse)
args = parser.parse_args(argv)
return args.func(args)
2019-06-12 05:41:31 +00:00
if __name__ == '__main__':
raise SystemExit(main(sys.argv[1:]))