56 lines
1.7 KiB
Python
56 lines
1.7 KiB
Python
import json
|
|
import re
|
|
|
|
def parse_vtt(vtt_filename):
|
|
with open(vtt_filename, 'r', encoding='utf-8') as file:
|
|
lines = file.readlines()
|
|
|
|
time_pattern = re.compile(r'(\d+\.\d{3}) --> (\d+\.\d{3})')
|
|
|
|
subtitles = []
|
|
current_subtitle = {}
|
|
|
|
for line in lines[1:]:
|
|
match = time_pattern.match(line)
|
|
if match:
|
|
current_subtitle['start'] = float(match.group(1))
|
|
current_subtitle['end'] = float(match.group(2))
|
|
current_subtitle['content'] = ""
|
|
elif line.strip() == '':
|
|
if current_subtitle:
|
|
if current_subtitle['content'][-1] == "\n":
|
|
current_subtitle['content'] = current_subtitle['content'][:-1]
|
|
subtitles.append(current_subtitle)
|
|
current_subtitle = {}
|
|
else:
|
|
current_subtitle['content'] += line.strip() + "\n" # Space to separate lines
|
|
|
|
if current_subtitle:
|
|
if current_subtitle['content'][-1] == "\n":
|
|
current_subtitle['content'] = current_subtitle['content'][:-1]
|
|
subtitles.append(current_subtitle)
|
|
|
|
return subtitles
|
|
|
|
def subtitles_to_backup(subtitles):
|
|
|
|
backup_data = {
|
|
"subtitles": subtitles,
|
|
"script_lines": [],
|
|
"line_index": len(subtitles),
|
|
"current_subtitle": {},
|
|
"play": 0
|
|
}
|
|
return backup_data
|
|
|
|
def main(vtt_filename, output_filename):
|
|
subtitles = parse_vtt(vtt_filename)
|
|
backup_data = subtitles_to_backup(subtitles)
|
|
|
|
with open(output_filename, 'w', encoding='utf-8') as json_file:
|
|
json.dump(backup_data, json_file, indent=2)
|
|
|
|
vtt_filename = 'audio.vtt'
|
|
output_filename = 'backup2.json'
|
|
main(vtt_filename, output_filename)
|