Fix

2024-02-17 08:40:45 +09:00 · 2024-02-17 08:40:45 +09:00 · 826aca5c9f
parent 368c26d491
commit 826aca5c9f
6 changed files with 1306 additions and 1056 deletions
--- a/backup.py
+++ b/backup.py
@ -1,11 +1,12 @@
 import json
 import re

+
 def parse_vtt(vtt_filename):
-    with open(vtt_filename, 'r', encoding='utf-8') as file:
+    with open(vtt_filename, "r", encoding="utf-8") as file:
        lines = file.readlines()

-    time_pattern = re.compile(r'(\d+\.\d{3}) --> (\d+\.\d{3})')
+    time_pattern = re.compile(r"(\d+\.\d{3}) --> (\d+\.\d{3})")

    subtitles = []
    current_subtitle = {}
@ -13,43 +14,47 @@ def parse_vtt(vtt_filename):
    for line in lines[1:]:
        match = time_pattern.match(line)
        if match:
-            current_subtitle['start'] = float(match.group(1))
-            current_subtitle['end'] = float(match.group(2))
-            current_subtitle['content'] = ""
-        elif line.strip() == '':
+            current_subtitle["start"] = float(match.group(1))
+            current_subtitle["end"] = float(match.group(2))
+            current_subtitle["content"] = ""
+        elif line.strip() == "":
            if current_subtitle:
-                if current_subtitle['content'][-1] == "\n":
-                    current_subtitle['content'] = current_subtitle['content'][:-1]
+                if current_subtitle["content"][-1] == "\n":
+                    current_subtitle["content"] = current_subtitle["content"][:-1]
                subtitles.append(current_subtitle)
                current_subtitle = {}
        else:
-            current_subtitle['content'] += line.strip() + "\n"  # Space to separate lines
+            current_subtitle["content"] += (
+                line.strip() + "\n"
+            )  # Space to separate lines

    if current_subtitle:
-        if current_subtitle['content'][-1] == "\n":
-            current_subtitle['content'] = current_subtitle['content'][:-1]
+        if current_subtitle["content"][-1] == "\n":
+            current_subtitle["content"] = current_subtitle["content"][:-1]
        subtitles.append(current_subtitle)

    return subtitles

-def subtitles_to_backup(subtitles):

+def subtitles_to_backup(subtitles):
    backup_data = {
-        "subtitles": subtitles, 
+        "subtitles": subtitles,
        "script_lines": [],
        "line_index": len(subtitles),
-        "current_subtitle": {}, 
-        "play": 0
+        "current_subtitle": {},
+        "play": 0,
    }
    return backup_data

+
 def main(vtt_filename, output_filename):
    subtitles = parse_vtt(vtt_filename)
    backup_data = subtitles_to_backup(subtitles)

-    with open(output_filename, 'w', encoding='utf-8') as json_file:
+    with open(output_filename, "w", encoding="utf-8") as json_file:
        json.dump(backup_data, json_file, indent=2)

-vtt_filename = 'audio.vtt'
-output_filename = 'backup2.json'
+
+vtt_filename = "audio.vtt"
+output_filename = "backup2.json"
 main(vtt_filename, output_filename)
--- a/snusub.py
+++ b/snusub.py
@ -6,410 +6,479 @@ from datetime import timedelta

 ###

+
 def from_vtt(vtt_string):
-  VTT_TIMECODE_PATTERN = r"((?:\d{2}:)?\d{2}:\d{2}\.\d{3}) --> ((?:\d{2}:)?\d{2}:\d{2}\.\d{3})"
-  VTT_LINE_NUMBER_PATTERN = r"^\d+$"
-  parts = re.split(r'\n\n+', vtt_string.strip())
-  if parts[0].startswith('WEBVTT'):
-    parts.pop(0)
+    VTT_TIMECODE_PATTERN = (
+        r"((?:\d{2}:)?\d{2}:\d{2}\.\d{3}) --> ((?:\d{2}:)?\d{2}:\d{2}\.\d{3})"
+    )
+    VTT_LINE_NUMBER_PATTERN = r"^\d+$"
+    parts = re.split(r"\n\n+", vtt_string.strip())
+    if parts[0].startswith("WEBVTT"):
+        parts.pop(0)

-  subtitles = []
-  for part in parts:
-    lines = part.split('\n')
-    match = re.match(VTT_TIMECODE_PATTERN, lines[0])
-    if not match:
-      if re.match(VTT_LINE_NUMBER_PATTERN, lines[0]):
-        lines.pop(0)
-      match = re.match(VTT_TIMECODE_PATTERN, lines[0])
-    if not match:
-      continue
+    subtitles = []
+    for part in parts:
+        lines = part.split("\n")
+        match = re.match(VTT_TIMECODE_PATTERN, lines[0])
+        if not match:
+            if re.match(VTT_LINE_NUMBER_PATTERN, lines[0]):
+                lines.pop(0)
+            match = re.match(VTT_TIMECODE_PATTERN, lines[0])
+        if not match:
+            continue

-    start, end = match.groups()
-    content = '\n'.join(lines[1:]) + "\n"
-    # if start == end:
-    #   continue
-      
-    subtitles.append({
-      'start': start,
-      'end': end,
-      'content': (content.replace("-\n", "\n").replace("</u>-\n", "</u>\n").replace("-", " ").replace("%", " ").replace("<u> "," <u>").replace(" </u>","</u> ").replace("<u> </u>","").replace("<u></u>","").replace(" \n", "\n"))[:-1]
-    })
+        start, end = match.groups()
+        content = "\n".join(lines[1:]) + "\n"
+        # if start == end:
+        #   continue
+
+        subtitles.append(
+            {
+                "start": start,
+                "end": end,
+                "content": (
+                    content.replace("-\n", "\n")
+                    .replace("</u>-\n", "</u>\n")
+                    .replace("-", " ")
+                    .replace("%", " ")
+                    .replace("<u> ", " <u>")
+                    .replace(" </u>", "</u> ")
+                    .replace("<u> </u>", "")
+                    .replace("<u></u>", "")
+                    .replace(" \n", "\n")
+                )[:-1],
+            }
+        )
+
+    return subtitles

-  return subtitles

 def to_vtt(subtitles):
    vtt_content = "WEBVTT\n\n\n"
    for idx, subtitle in enumerate(subtitles):
-        content = subtitle['content']
+        content = subtitle["content"]
        if not subtitle.get("split", False):
-          start = subtitle['start']
-          end = subtitle['end']
-          if not start or not end or start == end:
-            raise Exception(f"VTT timestamp parse error from #{idx}.")
-          vtt_content += f"{start} --> {end}\n{content}\n\n\n"
+            start = subtitle["start"]
+            end = subtitle["end"]
+            if not start or not end or start == end:
+                raise Exception(f"VTT timestamp parse error from #{idx}.")
+            vtt_content += f"{start} --> {end}\n{content}\n\n\n"
        else:
-          vtt_content += f"NOTE {content}\n\n\n"
+            vtt_content += f"NOTE {content}\n\n\n"

    return vtt_content.strip()

-def to_stacked_vtt(subtitles, continous = True):
-  vtt_content = "WEBVTT\n\n\n"
-  buffer = ""
-  for n, subtitle in enumerate(subtitles):
-    if subtitle.get("split", False):
-      buffer = ""
-      continue

-    if len(buffer) != 0:
-      if str(subtitle['content'].strip())[-1] == ".":
-        buffer += "\n"
-      else:
-        buffer += " "
+def to_stacked_vtt(subtitles, continous=True):
+    vtt_content = "WEBVTT\n\n\n"
+    buffer = ""
+    for n, subtitle in enumerate(subtitles):
+        if subtitle.get("split", False):
+            buffer = ""
+            continue

-    buffer += subtitle['content'].strip()
+        if len(buffer) != 0:
+            if str(subtitle["content"].strip())[-1] == ".":
+                buffer += "\n"
+            else:
+                buffer += " "

-    if n < len(subtitles) - 1:
-      end_time = subtitles[n+1]['start'] if continous and not subtitles[n+1].get("split", False) else subtitle['end']
-    else:
-      end_time = subtitle['end']
-    
-    if not subtitle['start'] or not end_time:
-      raise Exception(f"VTT timestamp parse error from #{idx}.")
-    if subtitle['start'] == end_time:
-      raise Exception(f"Error, subtitle timestamp overlaps.\n{subtitle['start']} --> {end_time} {subtitle['content'].strip()}")
-    vtt_content += f"{subtitle['start']} --> {end_time}\n"
-    vtt_content += buffer
-    vtt_content += "\n\n\n"
+        buffer += subtitle["content"].strip()

-    print(f"{subtitle['start']} --> {end_time}\n{buffer}\n\n")
+        if n < len(subtitles) - 1:
+            end_time = (
+                subtitles[n + 1]["start"]
+                if continous and not subtitles[n + 1].get("split", False)
+                else subtitle["end"]
+            )
+        else:
+            end_time = subtitle["end"]
+
+        if not subtitle["start"] or not end_time:
+            raise Exception(f"VTT timestamp parse error from #{idx}.")
+        if subtitle["start"] == end_time:
+            raise Exception(
+                f"Error, subtitle timestamp overlaps.\n{subtitle['start']} --> {end_time} {subtitle['content'].strip()}"
+            )
+        vtt_content += f"{subtitle['start']} --> {end_time}\n"
+        vtt_content += buffer
+        vtt_content += "\n\n\n"
+
+        print(f"{subtitle['start']} --> {end_time}\n{buffer}\n\n")
+
+    return vtt_content

-  return vtt_content

 ###

+
 def script_from_word_vtt(wordvtt):
-  subtitles = from_vtt(wordvtt)
-  print(f"Generating script file from VTT...")
-  sentences = []
-  ADD_NEXT_SENTENCE = 0
-  for n, subtitle in enumerate(subtitles):
-    sentence = subtitle["content"].replace("<u>", "").replace("</u>", "")
-    if ((sentences[-1] if sentences else None) != sentence) or ADD_NEXT_SENTENCE:
-      sentences.append(sentence)
-      ADD_NEXT_SENTENCE = 0
-    if subtitle["content"][-4:] == "</u>":
-      ADD_NEXT_SENTENCE = 1
-      if n + 2 < len(subtitles):
-        if subtitles[n+2]["content"].replace("<u>", "").replace("</u>", "") != sentence:
-          ADD_NEXT_SENTENCE = 0
-  return sentences
+    subtitles = from_vtt(wordvtt)
+    print(f"Generating script file from VTT...")
+    sentences = []
+    ADD_NEXT_SENTENCE = 0
+    for n, subtitle in enumerate(subtitles):
+        sentence = subtitle["content"].replace("<u>", "").replace("</u>", "")
+        if ((sentences[-1] if sentences else None) != sentence) or ADD_NEXT_SENTENCE:
+            sentences.append(sentence)
+            ADD_NEXT_SENTENCE = 0
+        if subtitle["content"][-4:] == "</u>":
+            ADD_NEXT_SENTENCE = 1
+            if n + 2 < len(subtitles):
+                if (
+                    subtitles[n + 2]["content"].replace("<u>", "").replace("</u>", "")
+                    != sentence
+                ):
+                    ADD_NEXT_SENTENCE = 0
+    return sentences
+

 def create_word_scenes(raw_vtt, raw_script):
-  subtitles = from_vtt(raw_vtt)
-  scripts   = [i for i in raw_script.split("\n") if i]
-  print(f"Found {len(subtitles)} subtitles, {len(scripts)} scenes.\nTimestamping each words...")
+    subtitles = from_vtt(raw_vtt)
+    scripts = [i for i in raw_script.split("\n") if i]
+    print(
+        f"Found {len(subtitles)} subtitles, {len(scripts)} scenes.\nTimestamping each words..."
+    )

-  scenes = []
-  for n, script in enumerate(scripts):
-    if len(script.split(" ")) == 1:
-      continue
-    scenes.append({"scene": script, "timestamp": []})
+    scenes = []
+    for n, script in enumerate(scripts):
+        if len(script.split(" ")) == 1:
+            continue
+        scenes.append({"scene": script, "timestamp": []})

-  scenes_cur = 0
-  for n, subtitle in enumerate(subtitles):
-    sentence = subtitle["content"].replace("<u>", "").replace("</u>", "")
-    if len(sentence.split(" ")) == 1:
-      continue
+    scenes_cur = 0
+    for n, subtitle in enumerate(subtitles):
+        sentence = subtitle["content"].replace("<u>", "").replace("</u>", "")
+        if len(sentence.split(" ")) == 1:
+            continue

-    if sentence != scenes[scenes_cur].get("scene"):
-      if sentence == scenes[scenes_cur+1].get("scene"):
-        scenes_cur += 1
-      else:
-        raise Exception(f"Error, Failed to match sentence with scene.\n\"{scenes[scenes_cur].get("scene")}\" or \"[{scenes_cur+1}] {scenes[scenes_cur+1].get("scene")}\" != \"{sentence}\"")
+        if sentence != scenes[scenes_cur].get("scene"):
+            if sentence == scenes[scenes_cur + 1].get("scene"):
+                scenes_cur += 1
+            else:
+                raise Exception(
+                    f"Error, Failed to match sentence with scene.\n\"{scenes[scenes_cur].get("scene")}\" or \"[{scenes_cur+1}] {scenes[scenes_cur+1].get("scene")}\" != \"{sentence}\""
+                )

-    current_scene = scenes[scenes_cur]
-    if current_scene["timestamp"]:
-      word_idx = current_scene["timestamp"][-1]["index"] + 1
-    else:
-      word_idx = 0
+        current_scene = scenes[scenes_cur]
+        if current_scene["timestamp"]:
+            word_idx = current_scene["timestamp"][-1]["index"] + 1
+        else:
+            word_idx = 0

-    if ("<u>" not in subtitle["content"]) and word_idx >= len(sentence.split(" ")):
-      # Ignore trailing dummy subtitle after last word indexed.
-      pass
+        if ("<u>" not in subtitle["content"]) and word_idx >= len(sentence.split(" ")):
+            # Ignore trailing dummy subtitle after last word indexed.
+            pass

-    if ("<u>" in subtitle["content"]) and word_idx >= len(sentence.split(" ")):
-      # If there is trailing non-dummy timestamped subtitle, Reset word_idx and step to next scene. (Repeating sentence doesnt increment cur.)
-      print(f"Error, Index wrong. {scenes_cur}, word: {word_idx}, total words: {len(sentence.split(" "))}\n{subtitle}")
-      word_idx = 0
-      scenes_cur += 1
-      current_scene = scenes[scenes_cur]
-      if current_scene["timestamp"]:
-        word_idx = current_scene["timestamp"][-1]["index"] + 1
-      else:
-        word_idx = 0
-      print(f"Changed to {word_idx}, {scenes_cur}")
+        if ("<u>" in subtitle["content"]) and word_idx >= len(sentence.split(" ")):
+            # If there is trailing non-dummy timestamped subtitle, Reset word_idx and step to next scene. (Repeating sentence doesnt increment cur.)
+            print(
+                f"Error, Index wrong. {scenes_cur}, word: {word_idx}, total words: {len(sentence.split(" "))}\n{subtitle}"
+            )
+            word_idx = 0
+            scenes_cur += 1
+            current_scene = scenes[scenes_cur]
+            if current_scene["timestamp"]:
+                word_idx = current_scene["timestamp"][-1]["index"] + 1
+            else:
+                word_idx = 0
+            print(f"Changed to {word_idx}, {scenes_cur}")

-    # Start matching words.
-    if "<u>" in subtitle["content"]:
-      word = subtitle["content"].split("<u>")[1].split("</u>")[0]
+        # Start matching words.
+        if "<u>" in subtitle["content"]:
+            word = subtitle["content"].split("<u>")[1].split("</u>")[0]

-      if word not in sentence.split(" "):
-        raise Exception(f"Error, Mismatch\n=> \"{word}\" not in \"{sentence}\"")
-        return
+            if word not in sentence.split(" "):
+                raise Exception(f'Error, Mismatch\n=> "{word}" not in "{sentence}"')
+                return

-      try:
-        assert sentence.split(" ")[word_idx] == word
-      except:
-        raise Exception(f"Error, Mismatch\n=> \"{word}\" != [{word_idx}] of \"{sentence}\"")
+            try:
+                assert sentence.split(" ")[word_idx] == word
+            except:
+                raise Exception(
+                    f'Error, Mismatch\n=> "{word}" != [{word_idx}] of "{sentence}"'
+                )

-      word_time = {"start": subtitle["start"], "end": subtitle["end"], "index": word_idx, "word": word}
-      current_scene["timestamp"].append(word_time)
+            word_time = {
+                "start": subtitle["start"],
+                "end": subtitle["end"],
+                "index": word_idx,
+                "word": word,
+            }
+            current_scene["timestamp"].append(word_time)

-  for scene in scenes:
-    if len(scene["scene"].split(" ")) != len(scene["timestamp"]):
-      raise Exception("Error, Scene length and timestamp length doesnt match.")
-    if "" in scene["scene"].split(" "):
-      print(repr(scene["scene"]))
+    for scene in scenes:
+        if len(scene["scene"].split(" ")) != len(scene["timestamp"]):
+            raise Exception("Error, Scene length and timestamp length doesnt match.")
+        if "" in scene["scene"].split(" "):
+            print(repr(scene["scene"]))

-  full_script, full_scenes = [], []
-  for scene in scenes:
-    full_script += scene["scene"].split(" ")[:-1]
-    full_script.append(scene["scene"].split(" ")[-1]+"##")
-    full_scenes += scene["timestamp"]
+    full_script, full_scenes = [], []
+    for scene in scenes:
+        full_script += scene["scene"].split(" ")[:-1]
+        full_script.append(scene["scene"].split(" ")[-1] + "##")
+        full_scenes += scene["timestamp"]

-  for i, j in zip(full_script, full_scenes):
-    if i.replace("##", "") != j["word"]:
-      raise Exception("Error, Mismatch")
-      return
+    for i, j in zip(full_script, full_scenes):
+        if i.replace("##", "") != j["word"]:
+            raise Exception("Error, Mismatch")
+            return
+
+    assert len(full_scenes) == len(full_script)
+
+    return full_script, full_scenes

-  assert len(full_scenes) == len(full_script)

-  return full_script, full_scenes
-  
 def scene_from_new_script(raw_script, full_script, full_scenes):
-  mod_script = raw_script.replace("\n", " \n ").split(" ")
-  mod_script = [i for i in mod_script if i]
-  n = 0
-  while True:
-    if mod_script[n] == "\n":
-      mod_script[n-1] += "\n"
-      del(mod_script[n])
-      n -= 1
-    n += 1
-    if n == len(mod_script):
-      break
-  
-  print(f"Original: {len(full_script)}, Modded: {len(mod_script)}")
-  allowed_list = [".", "\n", "\n\n", ",", "?", "##"]
+    mod_script = raw_script.replace("\n", " \n ").split(" ")
+    mod_script = [i for i in mod_script if i]
+    n = 0
+    while True:
+        if mod_script[n] == "\n":
+            mod_script[n - 1] += "\n"
+            del mod_script[n]
+            n -= 1
+        n += 1
+        if n == len(mod_script):
+            break
+
+    print(f"Original: {len(full_script)}, Modded: {len(mod_script)}")
+    allowed_list = [".", "\n", "\n\n", ",", "?", "##"]
+
+    def normalized(x):
+        for i in allowed_list:
+            x = x.replace(i, "")
+        return x.upper()
+
+    same = lambda a, b: normalized(a) == normalized(b)
+    new_script, new_timestamp, orig_index, n = [], [], 0, 0
+    fail = 0
+    while n < len(mod_script):
+        print(f"{repr(mod_script[n]):>20} ? {repr(full_script[orig_index])}")
+        word = mod_script[n]
+        if same(word, full_script[orig_index].replace("##", "")):
+            cur = full_scenes[orig_index]
+            new_script.append(word.replace("##", ""))
+            new_timestamp.append({"start": cur["start"], "end": cur["end"]})
+            fail = 0
+        else:
+            if fail > 10:
+                raise Exception("Error: Failed to match words,")
+                return
+            fail += 1
+            n -= 1
+        n, orig_index = n + 1, orig_index + 1
+    assert len(new_script) == len(new_timestamp)
+    return new_script, new_timestamp

-  def normalized(x):
-    for i in allowed_list:
-      x = x.replace(i, "")
-    return x.upper()
-  
-  same = lambda a, b: normalized(a) == normalized(b)
-  new_script, new_timestamp, orig_index, n = [], [], 0, 0
-  fail = 0
-  while n < len(mod_script):
-    print(f"{repr(mod_script[n]):>20} ? {repr(full_script[orig_index])}")
-    word = mod_script[n]
-    if same(word, full_script[orig_index].replace("##", "")):
-      cur = full_scenes[orig_index]
-      new_script.append(word.replace("##", ""))
-      new_timestamp.append({"start": cur["start"], "end": cur["end"]})
-      fail = 0
-    else:
-      if fail > 10:
-        raise Exception("Error: Failed to match words,")
-        return
-      fail += 1
-      n -= 1
-    n, orig_index = n+1, orig_index+1
-  assert len(new_script) == len(new_timestamp)
-  return new_script, new_timestamp

 def build_new_subtitle(new_script, new_timestamp):
-  buffer, new_scenes, start, end = [], [], None, None
-  current_scene = []
- 
-  for i, j in zip(new_script, new_timestamp):
-    buffer.append(i.replace("\n", ""))
-    if not start:
-      start = j["start"]
+    buffer, new_scenes, start, end = [], [], None, None
+    current_scene = []

-    if "\n" in i:
-      current_scene.append({"content": " ".join(buffer).replace("##", ""), "start": start, "end": j["end"]})
-      buffer, start = [], None
- 
-    if "\n\n" in i:
-      print(f"Section break at line #{len(current_scene):<3}| \"{current_scene[-1]["content"]}\"")
-      new_scenes.append(current_scene)
-      current_scene = []
+    for i, j in zip(new_script, new_timestamp):
+        buffer.append(i.replace("\n", ""))
+        if not start:
+            start = j["start"]

-  if start:
-      buffer.append(i.replace("\n", ""))
-      current_scene.append({"content": " ".join(buffer), "start": start, "end": j["end"]})
+        if "\n" in i:
+            current_scene.append(
+                {
+                    "content": " ".join(buffer).replace("##", ""),
+                    "start": start,
+                    "end": j["end"],
+                }
+            )
+            buffer, start = [], None

-  if current_scene != (new_scenes[-1] if new_scenes else None):
-    new_scenes.append(current_scene)
+        if "\n\n" in i:
+            print(
+                f"Section break at line #{len(current_scene):<3}| \"{current_scene[-1]["content"]}\""
+            )
+            new_scenes.append(current_scene)
+            current_scene = []

-  newsub = []
-  for n, i in enumerate(new_scenes):
-    newsub += i
-    if n < len(new_scenes) - 1:
-      newsub.append({"content": "Break", "start": None, "end": None, "split": True})
+    if start:
+        buffer.append(i.replace("\n", ""))
+        current_scene.append(
+            {"content": " ".join(buffer), "start": start, "end": j["end"]}
+        )
+
+    if current_scene != (new_scenes[-1] if new_scenes else None):
+        new_scenes.append(current_scene)
+
+    newsub = []
+    for n, i in enumerate(new_scenes):
+        newsub += i
+        if n < len(new_scenes) - 1:
+            newsub.append(
+                {"content": "Break", "start": None, "end": None, "split": True}
+            )
+
+    return newsub

-  return newsub

 ###

+
 def autobreak(lines, times):
-  from datetime import timedelta
+    from datetime import timedelta

-  def parsetime(time_str):
-    minutes, seconds = time_str.split(':')
-    seconds, milliseconds = seconds.split('.')
-    td = timedelta(minutes=int(minutes), seconds=int(seconds), milliseconds=int(milliseconds))
-    return td
+    def parsetime(time_str):
+        minutes, seconds = time_str.split(":")
+        seconds, milliseconds = seconds.split(".")
+        td = timedelta(
+            minutes=int(minutes), seconds=int(seconds), milliseconds=int(milliseconds)
+        )
+        return td

-  script = []
-  long_breaks = []
-  tmark = parsetime("0:0.0")
-  for i, j in zip(lines, times):
-    tdiff = parsetime(j["start"]) - tmark
-    tmark = parsetime(j["end"])
-    if tdiff > parsetime("0:0.0"):
-      long_breaks.append(tdiff)
+    script = []
+    long_breaks = []
+    tmark = parsetime("0:0.0")
+    for i, j in zip(lines, times):
+        tdiff = parsetime(j["start"]) - tmark
+        tmark = parsetime(j["end"])
+        if tdiff > parsetime("0:0.0"):
+            long_breaks.append(tdiff)

-  mean_break = parsetime("0:0.0")
-  for i in long_breaks:
-    mean_break += i/len(long_breaks)
-  print(mean_break)
+    mean_break = parsetime("0:0.0")
+    for i in long_breaks:
+        mean_break += i / len(long_breaks)
+    print(mean_break)

-  script = ""
-  tmark = parsetime("0:0.0")
-  tmp = " "
+    script = ""
+    tmark = parsetime("0:0.0")
+    tmp = " "

-  continous_line = 0
-  for i, j in zip(lines, times):
-    tdiff = parsetime(j["start"]) - tmark
-    tmark = parsetime(j["end"])
-    if tdiff > mean_break and tmp[-1] != ".":
-      script += "\n"
+    continous_line = 0
+    for i, j in zip(lines, times):
+        tdiff = parsetime(j["start"]) - tmark
+        tmark = parsetime(j["end"])
+        if tdiff > mean_break and tmp[-1] != ".":
+            script += "\n"

-    if (tdiff >= mean_break and tmp[-1] == "."):
-        script += "\n"
-        continous_line = 0
-    else:
-      continous_line += 1
+        if tdiff >= mean_break and tmp[-1] == ".":
+            script += "\n"
+            continous_line = 0
+        else:
+            continous_line += 1

-    script += i.replace("##", "")
+        script += i.replace("##", "")
+
+        if i[-1] == ".":
+            script += "\n"
+        elif "##" in i:
+            script += "\n"
+        else:
+            script += " "
+        tmp = i
+
+    return script

-    if i[-1] == ".":
-      script += "\n"
-    elif "##" in i:
-        script += "\n"
-    else:
-      script += " " 
-    tmp = i
-  
-  return script

 ############################################

-def saveFile(filename, data, override = False):
-  if os.path.exists(filename) and not override:
-    raise Exception(f"File {filename} already exists.")
-    return
-  with open(filename, "w") as f:
-    f.write(data)
+
+def saveFile(filename, data, override=False):
+    if os.path.exists(filename) and not override:
+        raise Exception(f"File {filename} already exists.")
+        return
+    with open(filename, "w") as f:
+        f.write(data)
+

 def openFile(filename):
-  if not os.path.exists(filename):
-    raise Exception(f"File {filename} doesnt exists.")
-    return
-  with open(filename, "r") as f:
-    data = f.read()
-  if not data:
-    raise Exception("Data empty.")
-    return
-  return data
+    if not os.path.exists(filename):
+        raise Exception(f"File {filename} doesnt exists.")
+        return
+    with open(filename, "r") as f:
+        data = f.read()
+    if not data:
+        raise Exception("Data empty.")
+        return
+    return data
+

 ############################################

-if __name__=="__main__":
-  PROG = sys.argv[0].split("/")[-1]
-  if len(sys.argv) not in (3, 4):
-    print( \
-f"""Usage: {PROG} [COMMAND] [FILES]...
+if __name__ == "__main__":
+    PROG = sys.argv[0].split("/")[-1]
+    if len(sys.argv) not in (3, 4):
+        print(
+            f"""Usage: {PROG} [COMMAND] [FILES]...

 Commands:
 - script   <VTT file>                    Generates script file from vtt file.
 - apply    <VTT file> <script file>      Applies new scripted file to create JSON file.
 - create   <JSON file>                   Creates new vtt from given JSON.
- """)                              
-    sys.exit()
+ """
+        )
+        sys.exit()

-  COMMAND = sys.argv[1]
-  if COMMAND not in ["script", "apply", "create"]:
-    print("Error. Command not found.")
-    sys.exit()
+    COMMAND = sys.argv[1]
+    if COMMAND not in ["script", "apply", "create"]:
+        print("Error. Command not found.")
+        sys.exit()

-  print(f"-> {sys.argv}")
-  if COMMAND == "script":
-    FILE = sys.argv[2]
-    if (not os.path.exists(FILE)):
-      print(f"Input file doesnt exists.")
-      sys.exit(-1)
+    print(f"-> {sys.argv}")
+    if COMMAND == "script":
+        FILE = sys.argv[2]
+        if not os.path.exists(FILE):
+            print(f"Input file doesnt exists.")
+            sys.exit(-1)

-    modfile = ".".join(scriptfile.split(".")[:-1]) + ".script"
-    x = create_word_scenes(openFile(FILE), "\n".join(script_from_word_vtt(openFile(FILE))))
-    if not x:
-      sys.exit(-1)
+        modfile = ".".join(scriptfile.split(".")[:-1]) + ".script"
+        x = create_word_scenes(
+            openFile(FILE), "\n".join(script_from_word_vtt(openFile(FILE)))
+        )
+        if not x:
+            sys.exit(-1)

-    full_script, full_scenes = x
-    genscript = autobreak(full_script, full_scenes)
-    saveFile(modfile, genscript)
-    print(f"Saved script file {modfile}.")
-  
-  elif COMMAND == "apply":
-    if len(sys.argv) != 4:
-      print(f"Not sufficient input.")
-      sys.exit()
+        full_script, full_scenes = x
+        genscript = autobreak(full_script, full_scenes)
+        saveFile(modfile, genscript)
+        print(f"Saved script file {modfile}.")

-    FILE1, FILE2 = sys.argv[2], sys.argv[3]
-    if (not os.path.exists(FILE1)) or (not os.path.exists(FILE2)):
-      print(f"Input file doesnt exists.")
-      sys.exit(-1)
+    elif COMMAND == "apply":
+        if len(sys.argv) != 4:
+            print(f"Not sufficient input.")
+            sys.exit()

-    x = create_word_scenes(openFile(FILE1), "\n".join(script_from_word_vtt(openFile(FILE1))))
-    if not x:
-      sys.exit(-1)
-    full_script, full_scenes = x
+        FILE1, FILE2 = sys.argv[2], sys.argv[3]
+        if (not os.path.exists(FILE1)) or (not os.path.exists(FILE2)):
+            print(f"Input file doesnt exists.")
+            sys.exit(-1)

-    x = scene_from_new_script(openFile(FILE2), full_script, full_scenes)
-    if not x:
-      sys.exit(-1)
-    a, b = x
+        x = create_word_scenes(
+            openFile(FILE1), "\n".join(script_from_word_vtt(openFile(FILE1)))
+        )
+        if not x:
+            sys.exit(-1)
+        full_script, full_scenes = x

-    final_sub = build_new_subtitle(a, b)
-    jsonfile = ".".join(FILE1.split(".")[:-1]) + ".json"
-    saveFile(jsonfile, json.dumps(final_sub, indent=2), True)
-    print(f"Saved JSON file {jsonfile}.")
-    sys.exit(0)
-  
-  elif COMMAND == "create":
-    FILE = sys.argv[2]
-    if (not os.path.exists(FILE)):
-      print(f"Input file doesnt exists.")
-      sys.exit(-1)
+        x = scene_from_new_script(openFile(FILE2), full_script, full_scenes)
+        if not x:
+            sys.exit(-1)
+        a, b = x

-    final_vtt = json.loads(openFile(FILE))
-    orgf = ".".join(FILE.split(".")[:-1])
-    print(f"Saved VTT file as {orgf}.final.vtt.")
+        final_sub = build_new_subtitle(a, b)
+        jsonfile = ".".join(FILE1.split(".")[:-1]) + ".json"
+        saveFile(jsonfile, json.dumps(final_sub, indent=2), True)
+        print(f"Saved JSON file {jsonfile}.")
+        sys.exit(0)

-    if os.path.exists(orgf + ".vtt"):
-      saveFile(orgf + ".stacked.vtt", to_stacked_vtt(final_vtt), True)
-    else:
-      saveFile(orgf + ".vtt", to_stacked_vtt(final_vtt), True)
-    sys.exit(0)
+    elif COMMAND == "create":
+        FILE = sys.argv[2]
+        if not os.path.exists(FILE):
+            print(f"Input file doesnt exists.")
+            sys.exit(-1)
+
+        final_vtt = json.loads(openFile(FILE))
+        orgf = ".".join(FILE.split(".")[:-1])
+        print(f"Saved VTT file as {orgf}.final.vtt.")
+
+        if os.path.exists(orgf + ".vtt"):
+            saveFile(orgf + ".stacked.vtt", to_stacked_vtt(final_vtt), True)
+        else:
+            saveFile(orgf + ".vtt", to_stacked_vtt(final_vtt), True)
+        sys.exit(0)
--- a/stackvtt.py
+++ b/stackvtt.py
@ -1,18 +1,21 @@
 import re
 from datetime import timedelta

-VTT_TIMECODE_PATTERN = r"((?:\d{2}:)?\d{2}:\d{2}\.\d{3}) --> ((?:\d{2}:)?\d{2}:\d{2}\.\d{3})"
+VTT_TIMECODE_PATTERN = (
+    r"((?:\d{2}:)?\d{2}:\d{2}\.\d{3}) --> ((?:\d{2}:)?\d{2}:\d{2}\.\d{3})"
+)
 VTT_LINE_NUMBER_PATTERN = r"^\d+$"

-def from_vtt(vtt_string):
-    parts = re.split(r'\n\n+', vtt_string.strip())

-    if parts[0].startswith('WEBVTT'):
+def from_vtt(vtt_string):
+    parts = re.split(r"\n\n+", vtt_string.strip())
+
+    if parts[0].startswith("WEBVTT"):
        parts.pop(0)

    subtitles = []
    for part in parts:
-        lines = part.split('\n')
+        lines = part.split("\n")
        match = re.match(VTT_TIMECODE_PATTERN, lines[0])
        if not match:
            if re.match(VTT_LINE_NUMBER_PATTERN, lines[0]):
@ -22,30 +25,28 @@ def from_vtt(vtt_string):
            continue

        start, end = match.groups()
-        content = '\n'.join(lines[1:])
+        content = "\n".join(lines[1:])

-        subtitles.append({
-            'start': start,
-            'end': end,
-            'content': content
-        })
+        subtitles.append({"start": start, "end": end, "content": content})

    return subtitles

+
 def to_vtt(subtitles):
    vtt_content = "WEBVTT\n\n"
    for idx, subtitle in enumerate(subtitles):
-        start = subtitle['start']
-        end = subtitle['end']
-        content = subtitle['content']
+        start = subtitle["start"]
+        end = subtitle["end"]
+        content = subtitle["content"]
        vtt_content += f"{start} --> {end}\n{content}\n\n"
    return vtt_content.strip()

+
 def stack_subtitle():
    buffer = []
    linebuf = []
    for line in parsed_vtt:
-        print(line["content"].strip()) 
+        print(line["content"].strip())
        content = line["content"].strip()
        if True:
            linebuf.append(line)
@ -62,12 +63,13 @@ def stack_subtitle():
            # if scene["content"][-1] == ".":
            strbuf += "\n"
            # else:
-                # strbuf += " "
+            # strbuf += " "
            scene["content"] = strbuf
            sub.append(scene)

+
 with open("example.vtt", "r") as f:
    vtt_content = f.read()

 parsed_vtt = from_vtt(vtt_content)
-print(to_vtt(stack_subtitle(parsed_vtt)))
+print(to_vtt(stack_subtitle(parsed_vtt)))
--- a/subedit.py
+++ b/subedit.py
@ -1,79 +1,88 @@
 import json
 import os, sys

-def readFile(file):
-  if not os.path.exists(file):
-    raise Exception(f"File {file} doesn't exists.")
-  with open(file, "r") as f:
-    data = f.read()
-  return data

-def writeFile(file, data, overwrite = False):
-  if (not overwrite) and os.path.exists(file):
-    raise Exception(f"File {file} already exists.")
-  if not len(data):
-    raise Exception(f"Tried to write empty data.")
-  with open(file, "w") as f:
-    ret = f.write(data)
-  return ret
+def readFile(file):
+    if not os.path.exists(file):
+        raise Exception(f"File {file} doesn't exists.")
+    with open(file, "r") as f:
+        data = f.read()
+    return data
+
+
+def writeFile(file, data, overwrite=False):
+    if (not overwrite) and os.path.exists(file):
+        raise Exception(f"File {file} already exists.")
+    if not len(data):
+        raise Exception(f"Tried to write empty data.")
+    with open(file, "w") as f:
+        ret = f.write(data)
+    return ret
+

 file = sys.argv[1]

 if ".json" in file:
-  subtitles = json.loads(readFile(file))
-  output = ""
-  index = 0
-  for subtitle in subtitles:
-    if subtitle.get("split", False):
-      output += "\n"
-    else:
-      index += 1
-      start = subtitle["start"]
-      end = subtitle["end"]
-      content = subtitle["content"]
-      "| {start:>10} --> {end:>10} |"
-      output += f"{index:03} | {content.strip()}\n"
+    subtitles = json.loads(readFile(file))
+    output = ""
+    index = 0
+    for subtitle in subtitles:
+        if subtitle.get("split", False):
+            output += "\n"
+        else:
+            index += 1
+            start = subtitle["start"]
+            end = subtitle["end"]
+            content = subtitle["content"]
+            "| {start:>10} --> {end:>10} |"
+            output += f"{index:03} | {content.strip()}\n"

-  output += "############ TIMESTAMPS ############\n\n"
+    output += "############ TIMESTAMPS ############\n\n"

-  index = 0
-  for subtitle in subtitles:
-    if not subtitle.get("split", False):
-      index += 1
-      start = subtitle["start"]
-      end = subtitle["end"]
-      output += f"{index:03} | {start} --> {end} \n"
+    index = 0
+    for subtitle in subtitles:
+        if not subtitle.get("split", False):
+            index += 1
+            start = subtitle["start"]
+            end = subtitle["end"]
+            output += f"{index:03} | {start} --> {end} \n"

-  writeFile(os.path.splitext(file)[0]+".edit", output)
+    writeFile(os.path.splitext(file)[0] + ".edit", output)

 elif ".edit" in file:
-  subtitles = json.loads(readFile(os.path.splitext(file)[0]+".json"))
-  lines = readFile(file)
+    subtitles = json.loads(readFile(os.path.splitext(file)[0] + ".json"))
+    lines = readFile(file)

-  idx, sub = 0, {}
-  for subtitle in subtitles:
-    if not subtitle.get("split", False):
-      sub[idx] = subtitle
-      idx += 1
+    idx, sub = 0, {}
+    for subtitle in subtitles:
+        if not subtitle.get("split", False):
+            sub[idx] = subtitle
+            idx += 1

-  new_brk, new_sub = [], {}
-  for line in lines.split("\n"):
-    if "\n############ TIMESTAMPS ############" == line:
-      break
-    if line:
-      idx, content = line.split(" | ")
-      idx = int(idx) - 1
-      if sub[idx]["content"] != content:
-        print(f"{idx} {sub[idx]["content"]} -> {content}")
-      new_sub[idx] = {"content": content, "start": sub[idx]["start"], "end": sub[idx]["end"]}
-    else:
-      new_brk.append(idx)
+    new_brk, new_sub = [], {}
+    for line in lines.split("\n"):
+        if "\n############ TIMESTAMPS ############" == line:
+            break
+        if line:
+            idx, content = line.split(" | ")
+            idx = int(idx) - 1
+            if sub[idx]["content"] != content:
+                print(f"{idx} {sub[idx]["content"]} -> {content}")
+            new_sub[idx] = {
+                "content": content,
+                "start": sub[idx]["start"],
+                "end": sub[idx]["end"],
+            }
+        else:
+            new_brk.append(idx)

-  output = []
-  for n in sorted(new_sub):
-    subtitle = new_sub[n]
-    output.append(subtitle)
-    if n in new_brk:
-      output.append({"content": "Break", "start": None, "end": None, "split": True})
+    output = []
+    for n in sorted(new_sub):
+        subtitle = new_sub[n]
+        output.append(subtitle)
+        if n in new_brk:
+            output.append(
+                {"content": "Break", "start": None, "end": None, "split": True}
+            )

-  writeFile(os.path.splitext(file)[0]+".json.1", json.dumps(output, indent=2))
+    writeFile(os.path.splitext(file)[0] + ".json.1", json.dumps(output, indent=2))
--- a/vttmaker.py
+++ b/vttmaker.py
--- a/wordvtt.py
+++ b/wordvtt.py
@ -4,50 +4,64 @@ import re, json
 import os
 from datetime import timedelta

+
 def from_vtt(vtt_string):
-  VTT_TIMECODE_PATTERN = r"((?:\d{2}:)?\d{2}:\d{2}\.\d{3}) --> ((?:\d{2}:)?\d{2}:\d{2}\.\d{3})"
-  VTT_LINE_NUMBER_PATTERN = r"^\d+$"
-  parts = re.split(r'\n\n+', vtt_string.strip())
-  if parts[0].startswith('WEBVTT'):
-    parts.pop(0)
+    VTT_TIMECODE_PATTERN = (
+        r"((?:\d{2}:)?\d{2}:\d{2}\.\d{3}) --> ((?:\d{2}:)?\d{2}:\d{2}\.\d{3})"
+    )
+    VTT_LINE_NUMBER_PATTERN = r"^\d+$"
+    parts = re.split(r"\n\n+", vtt_string.strip())
+    if parts[0].startswith("WEBVTT"):
+        parts.pop(0)

-  subtitles = []
-  for part in parts:
-    lines = part.split('\n')
-    match = re.match(VTT_TIMECODE_PATTERN, lines[0])
-    if not match:
-      if re.match(VTT_LINE_NUMBER_PATTERN, lines[0]):
-        lines.pop(0)
-      match = re.match(VTT_TIMECODE_PATTERN, lines[0])
-    if not match:
-      continue
+    subtitles = []
+    for part in parts:
+        lines = part.split("\n")
+        match = re.match(VTT_TIMECODE_PATTERN, lines[0])
+        if not match:
+            if re.match(VTT_LINE_NUMBER_PATTERN, lines[0]):
+                lines.pop(0)
+            match = re.match(VTT_TIMECODE_PATTERN, lines[0])
+        if not match:
+            continue

-    start, end = match.groups()
-    content = '\n'.join(lines[1:]) + "\n"
-    subtitles.append({
-      'start': start,
-      'end': end,
-      'content': (content.replace("-\n", "\n").replace("</u>-\n", "</u>\n").replace("-", " ").replace("%", " ").replace("<u> "," <u>").replace(" </u>","</u> ").replace("<u> </u>","").replace("<u></u>","").replace(" \n", "\n"))[:-1]
-    })
-  # def sanitizevttwordlevel(subtitles):
-  #   errorwords = []
-  #   newords = {}
-  #   for subtitle in subtitles:
-  #     for word in subtitle["content"].split(" "):
-  #       if ("<u>" in word):
-  #         newword = None
-  #         if (len(word.split("<u>")) > 1):
-  #           newword = word.replace("<u>", " <u>")
-  #         if (len(word.split("</u>")) > 1):
-  #           newword = word.replace("</u>", "</u> ")
-  #         if newword:
-  #           original = word.split("<u>")[1].split("</u>")[0]
-  #           if original in errorwords:
-  #             for i in errorwords[original]:
+        start, end = match.groups()
+        content = "\n".join(lines[1:]) + "\n"
+        subtitles.append(
+            {
+                "start": start,
+                "end": end,
+                "content": (
+                    content.replace("-\n", "\n")
+                    .replace("</u>-\n", "</u>\n")
+                    .replace("-", " ")
+                    .replace("%", " ")
+                    .replace("<u> ", " <u>")
+                    .replace(" </u>", "</u> ")
+                    .replace("<u> </u>", "")
+                    .replace("<u></u>", "")
+                    .replace(" \n", "\n")
+                )[:-1],
+            }
+        )
+    # def sanitizevttwordlevel(subtitles):
+    #   errorwords = []
+    #   newords = {}
+    #   for subtitle in subtitles:
+    #     for word in subtitle["content"].split(" "):
+    #       if ("<u>" in word):
+    #         newword = None
+    #         if (len(word.split("<u>")) > 1):
+    #           newword = word.replace("<u>", " <u>")
+    #         if (len(word.split("</u>")) > 1):
+    #           newword = word.replace("</u>", "</u> ")
+    #         if newword:
+    #           original = word.split("<u>")[1].split("</u>")[0]
+    #           if original in errorwords:
+    #             for i in errorwords[original]:

-
-  #           else:
-  #             errorwords[orig].append(word)
+    #           else:
+    #             errorwords[orig].append(word)

    #   error = False
    #   if "<u>" in subtitle["content"]:
@ -63,347 +77,386 @@ def from_vtt(vtt_string):
    #     for word in subtitle["content"].split(" "):
    #       if word.replace("<u>")

-  #   for subtitle in subtitles:
-  #     for words in subtitle["content"].split(" "):
-  #       if word in errorwords:
-  #         subtitle["content"]
+    #   for subtitle in subtitles:
+    #     for words in subtitle["content"].split(" "):
+    #       if word in errorwords:
+    #         subtitle["content"]
+
+    # sanitizevttwordlevel(subtitles)
+    return subtitles

-  # sanitizevttwordlevel(subtitles)
-  return subtitles

 def to_vtt(subtitles):
    vtt_content = "WEBVTT\n\n\n"
    for idx, subtitle in enumerate(subtitles):
-        content = subtitle['content']
+        content = subtitle["content"]
        if not subtitle.get("split", False):
-          start = subtitle['start']
-          end = subtitle['end']
-          vtt_content += f"{start} --> {end}\n{content}\n\n\n"
+            start = subtitle["start"]
+            end = subtitle["end"]
+            vtt_content += f"{start} --> {end}\n{content}\n\n\n"
        else:
-          vtt_content += f"NOTE {content}\n\n\n"
+            vtt_content += f"NOTE {content}\n\n\n"

    return vtt_content.strip()

+
 def to_stacked_vtt(subtitles):
-  vtt_content = "WEBVTT\n\n\n"
-  buffer = ""
-  for subtitle in subtitles:
-    if subtitle.get("split", False):
-      buffer = ""
-      continue
-    if len(buffer) != 0:
-      if str(subtitle['content'].strip())[-1] == ".":
-        buffer += "\n"
-      else:
-        buffer += " "
-    buffer += subtitle['content'].strip()
-    vtt_content += f"{subtitle['start']} --> {subtitle['end']}\n"
-    vtt_content += buffer
-    vtt_content += "\n\n\n"
-  return vtt_content
+    vtt_content = "WEBVTT\n\n\n"
+    buffer = ""
+    for subtitle in subtitles:
+        if subtitle.get("split", False):
+            buffer = ""
+            continue
+        if len(buffer) != 0:
+            if str(subtitle["content"].strip())[-1] == ".":
+                buffer += "\n"
+            else:
+                buffer += " "
+        buffer += subtitle["content"].strip()
+        vtt_content += f"{subtitle['start']} --> {subtitle['end']}\n"
+        vtt_content += buffer
+        vtt_content += "\n\n\n"
+    return vtt_content
+

 def script_from_word_vtt(wordvtt):
-  subtitles = from_vtt(wordvtt)
-  print(f"VTT {len(subtitles)} lines. Generating script file from VTT.")
-  sentences = []
-  EXCEPTION_FLAG, ADD_NEXT_SENTENCE = "", 0
-  for n, subtitle in enumerate(subtitles):
-    sentence = subtitle["content"].replace("<u>", "").replace("</u>", "")
-    if ((sentences[-1] if sentences else None) != sentence) or ADD_NEXT_SENTENCE:
-      sentences.append(sentence)
-    ADD_NEXT_SENTENCE = 0
-    if subtitle["content"][-4:] == "</u>":
-      # print(f"{len(sentences)} END {subtitle["content"]}")
-      ADD_NEXT_SENTENCE = 1
-      if n + 2 < len(subtitles):
-        if subtitles[n+2]["content"].replace("<u>", "").replace("</u>", "") != sentence:
-          ADD_NEXT_SENTENCE = 0
-  return sentences
+    subtitles = from_vtt(wordvtt)
+    print(f"VTT {len(subtitles)} lines. Generating script file from VTT.")
+    sentences = []
+    EXCEPTION_FLAG, ADD_NEXT_SENTENCE = "", 0
+    for n, subtitle in enumerate(subtitles):
+        sentence = subtitle["content"].replace("<u>", "").replace("</u>", "")
+        if ((sentences[-1] if sentences else None) != sentence) or ADD_NEXT_SENTENCE:
+            sentences.append(sentence)
+        ADD_NEXT_SENTENCE = 0
+        if subtitle["content"][-4:] == "</u>":
+            # print(f"{len(sentences)} END {subtitle["content"]}")
+            ADD_NEXT_SENTENCE = 1
+            if n + 2 < len(subtitles):
+                if (
+                    subtitles[n + 2]["content"].replace("<u>", "").replace("</u>", "")
+                    != sentence
+                ):
+                    ADD_NEXT_SENTENCE = 0
+    return sentences
+

 def create_word_scenes(wordvtt, scriptraw):
-  subtitles = from_vtt(wordvtt)
-  scripts   = [i for i in scriptraw.split("\n") if i]
-  print(f"VTT {len(subtitles)} lines, Script {len(scripts)} lines")
-  scenes = []
-  for n, script in enumerate(scripts):
-    if len(script.split(" ")) == 1:
-      continue
-    scenes.append({"scene": script, "timestamp": []})
+    subtitles = from_vtt(wordvtt)
+    scripts = [i for i in scriptraw.split("\n") if i]
+    print(f"VTT {len(subtitles)} lines, Script {len(scripts)} lines")
+    scenes = []
+    for n, script in enumerate(scripts):
+        if len(script.split(" ")) == 1:
+            continue
+        scenes.append({"scene": script, "timestamp": []})

-  scenes_cur = 0
-  for n, subtitle in enumerate(subtitles):
-    sentence = subtitle["content"].replace("<u>", "").replace("</u>", "")
-    if len(sentence.split(" ")) == 1:
-      continue
+    scenes_cur = 0
+    for n, subtitle in enumerate(subtitles):
+        sentence = subtitle["content"].replace("<u>", "").replace("</u>", "")
+        if len(sentence.split(" ")) == 1:
+            continue

-    if sentence != scenes[scenes_cur].get("scene"):
-      if sentence == scenes[scenes_cur+1].get("scene"):
-        scenes_cur += 1
-      else:
-        print(f"Error, Mismatch in scenes\n=>\"[{scenes_cur}] {scenes[scenes_cur].get("scene")}\" or \"[{scenes_cur+1}] {scenes[scenes_cur+1].get("scene")}\" != \"{sentence}\"")
-        return
+        if sentence != scenes[scenes_cur].get("scene"):
+            if sentence == scenes[scenes_cur + 1].get("scene"):
+                scenes_cur += 1
+            else:
+                print(
+                    f"Error, Mismatch in scenes\n=>\"[{scenes_cur}] {scenes[scenes_cur].get("scene")}\" or \"[{scenes_cur+1}] {scenes[scenes_cur+1].get("scene")}\" != \"{sentence}\""
+                )
+                return

-    current_scene = scenes[scenes_cur]
-    if current_scene["timestamp"]:
-      word_idx = current_scene["timestamp"][-1]["index"] + 1
-    else:
-      word_idx = 0
+        current_scene = scenes[scenes_cur]
+        if current_scene["timestamp"]:
+            word_idx = current_scene["timestamp"][-1]["index"] + 1
+        else:
+            word_idx = 0

-    # print(scenes_cur, subtitle, word_idx, sentence)
-    if ("<u>" not in subtitle["content"]) and word_idx >= len(sentence.split(" ")):
-      pass
-    if ("<u>" in subtitle["content"]) and word_idx >= len(sentence.split(" ")):
-      print(f"Error, index wrong. {scenes_cur}, word: {word_idx}, total words: {len(sentence.split(" "))}\n{subtitle}")
-      word_idx = 0
-      scenes_cur += 1
-      current_scene = scenes[scenes_cur]
-      if current_scene["timestamp"]:
-        word_idx = current_scene["timestamp"][-1]["index"] + 1
-      else:
-        word_idx = 0
-      print(f"Changed to {word_idx}, {scenes_cur}")
+        # print(scenes_cur, subtitle, word_idx, sentence)
+        if ("<u>" not in subtitle["content"]) and word_idx >= len(sentence.split(" ")):
+            pass
+        if ("<u>" in subtitle["content"]) and word_idx >= len(sentence.split(" ")):
+            print(
+                f"Error, index wrong. {scenes_cur}, word: {word_idx}, total words: {len(sentence.split(" "))}\n{subtitle}"
+            )
+            word_idx = 0
+            scenes_cur += 1
+            current_scene = scenes[scenes_cur]
+            if current_scene["timestamp"]:
+                word_idx = current_scene["timestamp"][-1]["index"] + 1
+            else:
+                word_idx = 0
+            print(f"Changed to {word_idx}, {scenes_cur}")

-    if "<u>" in subtitle["content"]:
-      # print(subtitle["content"])
-      word = subtitle["content"].split("<u>")[1].split("</u>")[0]
+        if "<u>" in subtitle["content"]:
+            # print(subtitle["content"])
+            word = subtitle["content"].split("<u>")[1].split("</u>")[0]

-      if word not in sentence.split(" "):
-        print(f"Error, Mismatch\n=> \"{word}\" not in \"{sentence}\"")
-        return
+            if word not in sentence.split(" "):
+                print(f'Error, Mismatch\n=> "{word}" not in "{sentence}"')
+                return

-      try:
-        assert sentence.split(" ")[word_idx] == word
-      except:
-        print(f"Error, Mismatch\n=> \"{word}\" != [{word_idx}] of \"{sentence}\"")
-        return
+            try:
+                assert sentence.split(" ")[word_idx] == word
+            except:
+                print(f'Error, Mismatch\n=> "{word}" != [{word_idx}] of "{sentence}"')
+                return

-      word_time = {"start": subtitle["start"], "end": subtitle["end"], "index": word_idx, "word": word}
-      current_scene["timestamp"].append(word_time)
+            word_time = {
+                "start": subtitle["start"],
+                "end": subtitle["end"],
+                "index": word_idx,
+                "word": word,
+            }
+            current_scene["timestamp"].append(word_time)

-  # print(json.dumps(scenes, indent=2))
+    # print(json.dumps(scenes, indent=2))

-  for scene in scenes:
-    if len(scene["scene"].split(" ")) != len(scene["timestamp"]):
-      print("Error, Mismatch length")
-      return
-    if "" in scene["scene"].split(" "):
-      print(repr(scene["scene"]))
+    for scene in scenes:
+        if len(scene["scene"].split(" ")) != len(scene["timestamp"]):
+            print("Error, Mismatch length")
+            return
+        if "" in scene["scene"].split(" "):
+            print(repr(scene["scene"]))

-  full_script, full_scenes = [], []
-  for scene in scenes:
-    full_script += scene["scene"].split(" ")[:-1]
-    full_script.append(scene["scene"].split(" ")[-1]+"##")
-    full_scenes += scene["timestamp"]
+    full_script, full_scenes = [], []
+    for scene in scenes:
+        full_script += scene["scene"].split(" ")[:-1]
+        full_script.append(scene["scene"].split(" ")[-1] + "##")
+        full_scenes += scene["timestamp"]

-  for i, j in zip(full_script, full_scenes):
-    if i.replace("##", "") != j["word"]:
-      print("Error, Mismatch")
-      return
+    for i, j in zip(full_script, full_scenes):
+        if i.replace("##", "") != j["word"]:
+            print("Error, Mismatch")
+            return
+
+    assert len(full_scenes) == len(full_script)
+
+    return full_script, full_scenes


-  assert len(full_scenes) == len(full_script)
-
-  return full_script, full_scenes
-  
 # Detect long break or change in context, inserts section break into script.
 def autobreak(lines, times):
-  from datetime import timedelta
+    from datetime import timedelta

-  def parsetime(time_str):
-    minutes, seconds = time_str.split(':')
-    seconds, milliseconds = seconds.split('.')
-    td = timedelta(minutes=int(minutes), seconds=int(seconds), milliseconds=int(milliseconds))
-    return td
+    def parsetime(time_str):
+        minutes, seconds = time_str.split(":")
+        seconds, milliseconds = seconds.split(".")
+        td = timedelta(
+            minutes=int(minutes), seconds=int(seconds), milliseconds=int(milliseconds)
+        )
+        return td

-  script = []
-  long_breaks = []
-  tmark = parsetime("0:0.0")
-  for i, j in zip(lines, times):
-    tdiff = parsetime(j["start"]) - tmark
-    tmark = parsetime(j["end"])
-    if tdiff > parsetime("0:0.0"):
-      long_breaks.append(tdiff)
-      # print()
-    # print(i, end=" ")
-  # print()
+    script = []
+    long_breaks = []
+    tmark = parsetime("0:0.0")
+    for i, j in zip(lines, times):
+        tdiff = parsetime(j["start"]) - tmark
+        tmark = parsetime(j["end"])
+        if tdiff > parsetime("0:0.0"):
+            long_breaks.append(tdiff)
+            # print()
+        # print(i, end=" ")
+    # print()

-  mean_break = parsetime("0:0.0")
-  for i in long_breaks:
-    mean_break += i/len(long_breaks)
-  print(mean_break)
+    mean_break = parsetime("0:0.0")
+    for i in long_breaks:
+        mean_break += i / len(long_breaks)
+    print(mean_break)

-  script = ""
-  tmark = parsetime("0:0.0")
-  tmp = " "
+    script = ""
+    tmark = parsetime("0:0.0")
+    tmp = " "

-  continous_line = 0
-  for i, j in zip(lines, times):
-    tdiff = parsetime(j["start"]) - tmark
-    tmark = parsetime(j["end"])
-    if tdiff > mean_break and tmp[-1] != ".":
-      script += "\n"
+    continous_line = 0
+    for i, j in zip(lines, times):
+        tdiff = parsetime(j["start"]) - tmark
+        tmark = parsetime(j["end"])
+        if tdiff > mean_break and tmp[-1] != ".":
+            script += "\n"

-    if (tdiff >= mean_break and tmp[-1] == "."):
-        script += "\n"
-        continous_line = 0
-    else:
-      continous_line += 1
+        if tdiff >= mean_break and tmp[-1] == ".":
+            script += "\n"
+            continous_line = 0
+        else:
+            continous_line += 1

-    script += i.replace("##", "")
+        script += i.replace("##", "")
+
+        if i[-1] == ".":
+            script += "\n"
+        elif "##" in i:
+            script += "\n"
+        else:
+            script += " "
+        tmp = i
+
+    return script

-    if i[-1] == ".":
-      script += "\n"
-    elif "##" in i:
-        script += "\n"
-    else:
-      script += " " 
-    tmp = i
-  
-  return script

 def scene_from_new_script(raw_script, full_script, full_scenes):
-  mod_script = raw_script.replace("\n", " \n ").split(" ")
-  mod_script = [i for i in mod_script if i]
-  n = 0
-  while True:
-    if mod_script[n] == "\n":
-      mod_script[n-1] += "\n"
-      del(mod_script[n])
-      n -= 1
-    n += 1
-    if n == len(mod_script):
-      break
-  # print(mod_script)
-  print(f"Original: {len(full_script)}, Modded: {len(mod_script)}")
-  allowed_list = [".", "\n", "\n\n", ",", "?", "##"]
+    mod_script = raw_script.replace("\n", " \n ").split(" ")
+    mod_script = [i for i in mod_script if i]
+    n = 0
+    while True:
+        if mod_script[n] == "\n":
+            mod_script[n - 1] += "\n"
+            del mod_script[n]
+            n -= 1
+        n += 1
+        if n == len(mod_script):
+            break
+    # print(mod_script)
+    print(f"Original: {len(full_script)}, Modded: {len(mod_script)}")
+    allowed_list = [".", "\n", "\n\n", ",", "?", "##"]
+
+    def normalized(x):
+        for i in allowed_list:
+            x = x.replace(i, "")
+        return x.upper()
+
+    same = lambda a, b: normalized(a) == normalized(b)
+    new_script, new_timestamp, orig_index, n = [], [], 0, 0
+    fail = 0
+    while n < len(mod_script):
+        print(f"{repr(mod_script[n]):>20} ? {repr(full_script[orig_index])}")
+        word = mod_script[n]
+        if same(word, full_script[orig_index].replace("##", "")):
+            cur = full_scenes[orig_index]
+            new_script.append(word.replace("##", ""))
+            new_timestamp.append({"start": cur["start"], "end": cur["end"]})
+            fail = 0
+        else:
+            if fail > 10:
+                print("Error: Failed to match words,")
+                return
+            # print("Back")
+            fail += 1
+            n -= 1
+        n, orig_index = n + 1, orig_index + 1
+    assert len(new_script) == len(new_timestamp)
+    return new_script, new_timestamp

-  def normalized(x):
-    for i in allowed_list:
-      x = x.replace(i, "")
-    return x.upper()
-  
-  same = lambda a, b: normalized(a) == normalized(b)
-  new_script, new_timestamp, orig_index, n = [], [], 0, 0
-  fail = 0
-  while n < len(mod_script):
-    print(f"{repr(mod_script[n]):>20} ? {repr(full_script[orig_index])}")
-    word = mod_script[n]
-    if same(word, full_script[orig_index].replace("##", "")):
-      cur = full_scenes[orig_index]
-      new_script.append(word.replace("##", ""))
-      new_timestamp.append({"start": cur["start"], "end": cur["end"]})
-      fail = 0
-    else:
-      if fail > 10:
-        print("Error: Failed to match words,")
-        return
-      # print("Back")
-      fail += 1
-      n -= 1
-    n, orig_index = n+1, orig_index+1
-  assert len(new_script) == len(new_timestamp)
-  return new_script, new_timestamp

 def build_new_subtitle(new_script, new_timestamp):
-  buffer, new_scenes, start, end = [], [], None, None
-  current_scene = []
-  # print(" ".join(new_script).split("\n"))
+    buffer, new_scenes, start, end = [], [], None, None
+    current_scene = []
+    # print(" ".join(new_script).split("\n"))

-  for i, j in zip(new_script, new_timestamp):
-    if "\n" in i:
-      buffer.append(i.replace("\n", ""))
-      current_scene.append({"content": " ".join(buffer).replace("##", ""), "start": start, "end": j["end"]})
-      buffer, start = [], None
-      if "\n\n" in i:
-        print(f"Section break at line #{len(current_scene):<3}| \"{current_scene[-1]["content"]}\"")
+    for i, j in zip(new_script, new_timestamp):
+        if "\n" in i:
+            buffer.append(i.replace("\n", ""))
+            current_scene.append(
+                {
+                    "content": " ".join(buffer).replace("##", ""),
+                    "start": start,
+                    "end": j["end"],
+                }
+            )
+            buffer, start = [], None
+            if "\n\n" in i:
+                print(
+                    f"Section break at line #{len(current_scene):<3}| \"{current_scene[-1]["content"]}\""
+                )
+                new_scenes.append(current_scene)
+                current_scene = []
+        else:
+            buffer.append(i)
+            if not start:
+                start = j["start"]
+
+    if start:
+        buffer.append(i.replace("\n", ""))
+        current_scene.append(
+            {"content": " ".join(buffer), "start": start, "end": j["end"]}
+        )
+
+    if current_scene != (new_scenes[-1] if new_scenes else None):
        new_scenes.append(current_scene)
-        current_scene = []
-    else:
-      buffer.append(i)
-      if not start:
-        start = j["start"]

-  if start:
-      buffer.append(i.replace("\n", ""))
-      current_scene.append({"content": " ".join(buffer), "start": start, "end": j["end"]})
+    # print("\n\n".join(["\n".join([j["content"] for j in i]) for i in new_scenes]))
+    newsub = []
+    for n, i in enumerate(new_scenes):
+        newsub += i
+        if n < len(new_scenes) - 1:
+            newsub.append(
+                {"content": "Break", "start": None, "end": None, "split": True}
+            )

-  if current_scene != (new_scenes[-1] if new_scenes else None):
-    new_scenes.append(current_scene)
+    return newsub

-  # print("\n\n".join(["\n".join([j["content"] for j in i]) for i in new_scenes]))
-  newsub = []
-  for n, i in enumerate(new_scenes):
-    newsub += i
-    if n < len(new_scenes) - 1:
-      newsub.append({"content": "Break", "start": None, "end": None, "split": True})

-  return newsub
+def saveFile(filename, data, override=False):
+    if os.path.exists(filename) and not override:
+        print(f"File {filename} already exists.")
+        return -1
+    with open(filename, "w") as f:
+        f.write(data)

-def saveFile(filename, data, override = False):
-  if os.path.exists(filename) and not override:
-    print(f"File {filename} already exists.")
-    return -1
-  with open(filename, "w") as f:
-    f.write(data)

 def openFile(filename):
-  with open(filename, "r") as f:
-    data = f.read()
-  if not data:
-    return -1
-  return data
+    with open(filename, "r") as f:
+        data = f.read()
+    if not data:
+        return -1
+    return data
+

 def main(vttfile, scriptfile):
-  modfile = ".".join(scriptfile.split(".")[:-1]) + ".script"
-  x = create_word_scenes(openFile(vttfile), openFile(scriptfile))
-  if not x:
-    sys.exit(-1)
-  full_script, full_scenes = x
-
-  if not os.path.exists(modfile):
-    genscript = autobreak(full_script,full_scenes)
-    saveFile(modfile, genscript)
-    print(f"Saved modification file as {modfile}. Modify it and return back.")
-  else:
-    x = scene_from_new_script(openFile(modfile), full_script, full_scenes)
+    modfile = ".".join(scriptfile.split(".")[:-1]) + ".script"
+    x = create_word_scenes(openFile(vttfile), openFile(scriptfile))
    if not x:
-      sys.exit(-1)
-    a, b = x
+        sys.exit(-1)
+    full_script, full_scenes = x

-    final_vtt = build_new_subtitle(a, b)
-    jsonfile = ".".join(vttfile.split(".")[:-1]) + ".json"
-    saveFile(jsonfile, json.dumps(final_vtt, indent=2), True)
-    print(f"Saved JSON file as {jsonfile}. Fix it, and convert it to VTT.")
+    if not os.path.exists(modfile):
+        genscript = autobreak(full_script, full_scenes)
+        saveFile(modfile, genscript)
+        print(f"Saved modification file as {modfile}. Modify it and return back.")
+    else:
+        x = scene_from_new_script(openFile(modfile), full_script, full_scenes)
+        if not x:
+            sys.exit(-1)
+        a, b = x

-if __name__=="__main__":
-  import sys
-  if len(sys.argv) not in (2, 3):
-    print(f"Usage: {sys.argv[0].split("/")[-1]} [vtt file] (txt file)\n"                                  \
-     f"       {sys.argv[0].split("/")[-1]} [JSON file]\n"                                                 \
-      "** Only output from openai-whisper with '--word-timestamp true' is accepted.)\n"                   \
-      "** You have to run this for first time, and then fix .script file, and then re-run this script.\n" \
-      "** Adding newline/period/commas are onlt permitted. Fix else in JSON file.")
-    sys.exit()
+        final_vtt = build_new_subtitle(a, b)
+        jsonfile = ".".join(vttfile.split(".")[:-1]) + ".json"
+        saveFile(jsonfile, json.dumps(final_vtt, indent=2), True)
+        print(f"Saved JSON file as {jsonfile}. Fix it, and convert it to VTT.")

-  vtt = sys.argv[1]
-  print(f"\n[{vtt}]")
-  if len(sys.argv) == 3:
-    script = sys.argv[2]
-    if (not os.path.exists(vtt)) or (not os.path.exists(script)):
-      print(f"Input file doesnt exists.")
-      sys.exit(-1)
-    main(vtt, script)
-  else:
-    if ".json" in vtt:
-      final_vtt = json.loads(openFile(vtt))
-      orgf = ".".join(vtt.split(".")[:-1])
-      print(f"Saved VTT file as {orgf}.final.vtt.")
-      saveFile(orgf + ".final.vtt", to_vtt(final_vtt), True)
-      saveFile(orgf + ".stacked.vtt", to_stacked_vtt(final_vtt), True)
-      sys.exit(0)
-    if (not os.path.exists(vtt)):
-      print(f"Input file doesnt exists.")
-      sys.exit(-1)
-    script = ".".join(vtt.split(".")[:-1]) + ".txt"
-    saveFile(script, "\n".join(script_from_word_vtt(openFile(vtt))))
-    main(vtt, script)
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) not in (2, 3):
+        print(
+            f"Usage: {sys.argv[0].split("/")[-1]} [vtt file] (txt file)\n"
+            f"       {sys.argv[0].split("/")[-1]} [JSON file]\n"
+            "** Only output from openai-whisper with '--word-timestamp true' is accepted.)\n"
+            "** You have to run this for first time, and then fix .script file, and then re-run this script.\n"
+            "** Adding newline/period/commas are onlt permitted. Fix else in JSON file."
+        )
+        sys.exit()
+
+    vtt = sys.argv[1]
+    print(f"\n[{vtt}]")
+    if len(sys.argv) == 3:
+        script = sys.argv[2]
+        if (not os.path.exists(vtt)) or (not os.path.exists(script)):
+            print(f"Input file doesnt exists.")
+            sys.exit(-1)
+        main(vtt, script)
+    else:
+        if ".json" in vtt:
+            final_vtt = json.loads(openFile(vtt))
+            orgf = ".".join(vtt.split(".")[:-1])
+            print(f"Saved VTT file as {orgf}.final.vtt.")
+            saveFile(orgf + ".final.vtt", to_vtt(final_vtt), True)
+            saveFile(orgf + ".stacked.vtt", to_stacked_vtt(final_vtt), True)
+            sys.exit(0)
+        if not os.path.exists(vtt):
+            print(f"Input file doesnt exists.")
+            sys.exit(-1)
+        script = ".".join(vtt.split(".")[:-1]) + ".txt"
+        saveFile(script, "\n".join(script_from_word_vtt(openFile(vtt))))
+        main(vtt, script)