Пример #1
0
 def test_windows1252(self):
     vtt_string = copen(self.windows_path, encoding='windows-1252').read()
     vtt_file = from_string(vtt_string, encoding='windows-1252', eol='\r\n')
     self.assertEqual(len(vtt_file), 1332)
     self.assertEqual(vtt_file.eol, '\r\n')
     self.assertRaises(UnicodeDecodeError, vttopen,
                       self.utf8_path, encoding='ascii')
Пример #2
0
 def test_windows1252(self):
     vtt_string = copen(self.windows_path, encoding='windows-1252').read()
     vtt_file = from_string(vtt_string, encoding='windows-1252', eol='\r\n')
     self.assertEqual(len(vtt_file), 1332)
     self.assertEqual(vtt_file.eol, '\r\n')
     self.assertRaises(UnicodeDecodeError,
                       vttopen,
                       self.utf8_path,
                       encoding='ascii')
Пример #3
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     iterator = zip(pyvtt.open(self.utf8_path),
                    pyvtt.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEqual(str(file_item), str(string_item))
Пример #4
0
 def test_utf8(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     self.assertEqual(len(pyvtt.from_string(unicode_content)), 1332)
     self.assertRaises(UnicodeDecodeError, open(self.windows_path).read)
Пример #5
0
 def test_utf8(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     self.assertEqual(len(pyvtt.from_string(unicode_content)), 1332)
     self.assertRaises(UnicodeDecodeError, open(self.windows_path).read)
Пример #6
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     iterator = zip(pyvtt.open(self.utf8_path),
                    pyvtt.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEqual(str(file_item), str(string_item))
    def vtt2bcc(path, threshold=0.1, word=True):
        path = path if path else ""
        if os.path.exists(path):
            subs = pyvtt.open(path)
        else:
            subs = pyvtt.from_string(path)

        caption_list = []
        if not word:
            caption_list = [{
                "from": sub.start.ordinal / 1000,
                "to": sub.end.ordinal / 1000,
                "location": 2,
                "content": sub.text_without_tags.split("\n")[-1],
            } for sub in subs]
        else:
            # NOTE 按照 vtt 的断词模式分隔 bcc
            for i, sub in enumerate(subs):
                text = sub.text

                start = sub.start.ordinal / 1000
                end = sub.end.ordinal / 1000
                try:
                    idx = text.index("<")
                    pre_text = text[:idx]
                    regx = re.compile(r"<(.*?)><c>(.*?)</c>")
                    for t_str, match in regx.findall(text):
                        pre_text += match
                        t = datetime.strptime(t_str, r"%H:%M:%S.%f")
                        sec = (t.hour * 3600 + t.minute * 60 + t.second +
                               t.microsecond / 10**len((str(t.microsecond))))
                        final_text = pre_text.split("\n")[-1]

                        if caption_list and (sec - start <= threshold
                                             or caption_list[-1]["content"]
                                             == final_text):
                            caption_list[-1].update({
                                "to": sec,
                                "content": final_text,
                            })
                        else:
                            caption_list.append({
                                "from": start,
                                "to": sec,
                                "location": 2,
                                "content": final_text,
                            })
                        start = sec
                except:
                    final_text = sub.text.split("\n")[-1]
                    if caption_list and caption_list[-1][
                            "content"] == final_text:
                        caption_list[-1].update({
                            "to": end,
                            "content": final_text,
                        })
                    else:
                        if caption_list and end - start < threshold:
                            start = caption_list[-1]["to"]
                        caption_list.append({
                            "from": start,
                            "to": end,
                            "location": 2,
                            "content": final_text,
                        })

        # print(len(caption_list))
        # NOTE 避免超出视频长度
        last = caption_list[-1]
        last["to"] = last.get("from") + 0.1
        bcc = {
            "font_size": 0.4,
            "font_color": "#FFFFFF",
            "background_alpha": 0.5,
            "background_color": "#9C27B0",
            "Stroke": "none",
            "body": caption_list,
        }

        return bcc if subs else {}