import re from datetime import timedelta VTT_TIMECODE_PATTERN = r"((?:\d{2}:)?\d{2}:\d{2}\.\d{3}) --> ((?:\d{2}:)?\d{2}:\d{2}\.\d{3})" VTT_LINE_NUMBER_PATTERN = r"^\d+$" def from_vtt(vtt_string): parts = re.split(r'\n\n+', vtt_string.strip()) if parts[0].startswith('WEBVTT'): parts.pop(0) subtitles = [] for part in parts: lines = part.split('\n') match = re.match(VTT_TIMECODE_PATTERN, lines[0]) if not match: if re.match(VTT_LINE_NUMBER_PATTERN, lines[0]): lines.pop(0) match = re.match(VTT_TIMECODE_PATTERN, lines[0]) if not match: continue start, end = match.groups() content = '\n'.join(lines[1:]) subtitles.append({ 'start': start, 'end': end, 'content': content }) return subtitles def to_vtt(subtitles): vtt_content = "WEBVTT\n\n" for idx, subtitle in enumerate(subtitles): start = subtitle['start'] end = subtitle['end'] content = subtitle['content'] vtt_content += f"{start} --> {end}\n{content}\n\n" return vtt_content.strip() def stack_subtitle(): buffer = [] linebuf = [] for line in parsed_vtt: print(line["content"].strip()) content = line["content"].strip() if True: linebuf.append(line) else: linebuf.append(line) buffer.append(linebuf) linebuf = [] sub = [] for section in buffer: strbuf = "" for scene in section: strbuf += scene["content"] # if scene["content"][-1] == ".": strbuf += "\n" # else: # strbuf += " " scene["content"] = strbuf sub.append(scene) with open("example.vtt", "r") as f: vtt_content = f.read() parsed_vtt = from_vtt(vtt_content) print(to_vtt(stack_subtitle(parsed_vtt)))