import json import re def parse_vtt(vtt_filename): with open(vtt_filename, 'r', encoding='utf-8') as file: lines = file.readlines() time_pattern = re.compile(r'(\d+\.\d{3}) --> (\d+\.\d{3})') subtitles = [] current_subtitle = {} for line in lines[1:]: match = time_pattern.match(line) if match: current_subtitle['start'] = float(match.group(1)) current_subtitle['end'] = float(match.group(2)) current_subtitle['content'] = "" elif line.strip() == '': if current_subtitle: if current_subtitle['content'][-1] == "\n": current_subtitle['content'] = current_subtitle['content'][:-1] subtitles.append(current_subtitle) current_subtitle = {} else: current_subtitle['content'] += line.strip() + "\n" # Space to separate lines if current_subtitle: if current_subtitle['content'][-1] == "\n": current_subtitle['content'] = current_subtitle['content'][:-1] subtitles.append(current_subtitle) return subtitles def subtitles_to_backup(subtitles): backup_data = { "subtitles": subtitles, "script_lines": [], "line_index": len(subtitles), "current_subtitle": {}, "play": 0 } return backup_data def main(vtt_filename, output_filename): subtitles = parse_vtt(vtt_filename) backup_data = subtitles_to_backup(subtitles) with open(output_filename, 'w', encoding='utf-8') as json_file: json.dump(backup_data, json_file, indent=2) vtt_filename = 'audio.vtt' output_filename = 'backup2.json' main(vtt_filename, output_filename)