Пример #1
0
class TestShifting(unittest.TestCase):
    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_shift_up(self):
        self.item.shift(1, 2, 3, 4)
        self.assertEqual(self.item.start, (1, 3, 3, 4))
        self.assertEqual(self.item.end, (1, 3, 23, 4))
        self.assertEqual(self.item.duration, (0, 0, 20, 0))
        self.assertEqual(self.item.characters_per_second, 0.65)

    def test_shift_down(self):
        self.item.shift(5)
        self.item.shift(-1, -2, -3, -4)
        self.assertEqual(self.item.start, (3, 58, 56, 996))
        self.assertEqual(self.item.end, (3, 59, 16, 996))
        self.assertEqual(self.item.duration, (0, 0, 20, 0))
        self.assertEqual(self.item.characters_per_second, 0.65)

    def test_shift_by_ratio(self):
        self.item.shift(ratio=2)
        self.assertEqual(self.item.start, {'minutes': 2})
        self.assertEqual(self.item.end, {'minutes': 2, 'seconds': 40})
        self.assertEqual(self.item.duration, (0, 0, 40, 0))
        self.assertEqual(self.item.characters_per_second, 0.325)
Пример #2
0
class TestShifting(unittest.TestCase):

    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_shift_up(self):
        self.item.shift(1, 2, 3, 4)
        self.assertEqual(self.item.start, (1, 3, 3, 4))
        self.assertEqual(self.item.end, (1, 3, 23, 4))
        self.assertEqual(self.item.duration, (0, 0, 20, 0))
        self.assertEqual(self.item.characters_per_second, 0.65)

    def test_shift_down(self):
        self.item.shift(5)
        self.item.shift(-1, -2, -3, -4)
        self.assertEqual(self.item.start, (3, 58, 56, 996))
        self.assertEqual(self.item.end, (3, 59, 16, 996))
        self.assertEqual(self.item.duration, (0, 0, 20, 0))
        self.assertEqual(self.item.characters_per_second, 0.65)

    def test_shift_by_ratio(self):
        self.item.shift(ratio=2)
        self.assertEqual(self.item.start, {'minutes': 2})
        self.assertEqual(self.item.end, {'minutes': 2, 'seconds': 40})
        self.assertEqual(self.item.duration, (0, 0, 40, 0))
        self.assertEqual(self.item.characters_per_second, 0.325)
Пример #3
0
class TestTagRemoval(unittest.TestCase):

    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_italics_tag(self):
        self.item.text = "<i>Hello world !</i>"
        self.assertEqual(self.item.text_without_tags,'Hello world !')
        
    def test_bold_tag(self):
        self.item.text = "<b>Hello world !</b>"
        self.assertEqual(self.item.text_without_tags,'Hello world !')

    def test_underline_tag(self):
        self.item.text = "<u>Hello world !</u>"
        self.assertEqual(self.item.text_without_tags,'Hello world !')

    def test_color_tag(self):
        self.item.text = '<font color="#ff0000">Hello world !</font>'
        self.assertEqual(self.item.text_without_tags,'Hello world !')

    def test_all_tags(self):
        self.item.text = '<b>Bold</b>, <i>italic</i>, <u>underlined</u>\n' + \
        '<font color="#ff0000">red text</font>' + \
        ', <b>one,<i> two,<u> three</u></i></b>.'
        self.assertEqual(self.item.text_without_tags,'Bold, italic, underlined' + \
                '\nred text, one, two, three.')
Пример #4
0
class TestTagRemoval(unittest.TestCase):
    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_italics_tag(self):
        self.item.text = "<i>Hello world !</i>"
        self.assertEqual(self.item.text_without_tags, 'Hello world !')

    def test_bold_tag(self):
        self.item.text = "<b>Hello world !</b>"
        self.assertEqual(self.item.text_without_tags, 'Hello world !')

    def test_underline_tag(self):
        self.item.text = "<u>Hello world !</u>"
        self.assertEqual(self.item.text_without_tags, 'Hello world !')

    def test_color_tag(self):
        self.item.text = '<font color="#ff0000">Hello world !</font>'
        self.assertEqual(self.item.text_without_tags, 'Hello world !')

    def test_all_tags(self):
        self.item.text = '<b>Bold</b>, <i>italic</i>, <u>underlined</u>\n' + \
        '<font color="#ff0000">red text</font>' + \
        ', <b>one,<i> two,<u> three</u></i></b>.'
        self.assertEqual(self.item.text_without_tags,'Bold, italic, underlined' + \
                '\nred text, one, two, three.')
Пример #5
0
def json_to_srt(deepspeech_json, max_word_time=10, min_sub_time=1.5, max_sub_time=3):
    index = 0
    subtitle = ""
    start_time = 0
    end_time = 0
    subtitles = SubRipFile()

    for word in deepspeech_json["words"]:
        word["end_time"] = word["start_time"] + word["duration"]
        if word["duration"] < max_word_time:
            if start_time + max_sub_time >= word["end_time"] and subtitle:
                subtitle += " "
                subtitle += word["word"]
                end_time = max(word["end_time"], start_time + min_sub_time)
            elif subtitle:
                # Convert to milliseconds
                subtitles.append(
                    SubRipItem(index=++index, start=int(start_time*1000), end=int(end_time*1000), text=subtitle))
                subtitle = ""

            if not subtitle:
                start_time = word["start_time"]
                subtitle += word["word"]
                end_time = max(word["end_time"], start_time + min_sub_time)

    if subtitle:
        subtitles.append(SubRipItem(index=++index, start=int(start_time*1000), end=int(end_time*1000), text=subtitle))
    return subtitles
Пример #6
0
class TestSerialAndParsing(unittest.TestCase):

    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)
        self.string = u'1\n00:01:00,000 --> 00:01:20,000\nHello world !\n'
        self.bad_string = u'foobar'
        self.coordinates = (u'1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 '
                                'Y1:050 Y2:100\nHello world !\n')
        self.vtt = (u'1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start '
                                'L:12%\nHello world !\n')
        self.dots = u'1\n00:01:00.000 --> 00:01:20.000\nHello world !\n'
        self.string_index = u'foo\n00:01:00,000 --> 00:01:20,000\nHello !\n'
        self.no_index = u'00:01:00,000 --> 00:01:20,000\nHello world !\n'

    def test_serialization(self):
        self.assertEqual(unicode(self.item), self.string)

    def test_from_string(self):
        self.assertEquals(SubRipItem.from_string(self.string), self.item)
        self.assertRaises(InvalidItem, SubRipItem.from_string,
            self.bad_string)

    def test_coordinates(self):
        item = SubRipItem.from_string(self.coordinates)
        self.assertEquals(item, self.item)
        self.assertEquals(item.position, 'X1:000 X2:000 Y1:050 Y2:100')

    def test_vtt_positioning(self):
        vtt = SubRipItem.from_string(self.vtt)
        self.assertEquals(vtt.position, 'D:vertical A:start L:12%')
        self.assertEquals(vtt.index, 1)
        self.assertEquals(vtt.text, 'Hello world !')

    def test_idempotence(self):
        vtt = SubRipItem.from_string(self.vtt)
        self.assertEquals(unicode(vtt), self.vtt)
        item = SubRipItem.from_string(self.coordinates)
        self.assertEquals(unicode(item), self.coordinates)

    def test_dots(self):
        self.assertEquals(SubRipItem.from_string(self.dots), self.item)

    # Bug reported in https://github.com/byroot/pysrt/issues/16
    def test_paring_error(self):
        self.assertRaises(InvalidItem, SubRipItem.from_string, u'1\n'
            '00:01:00,000 -> 00:01:20,000 X1:000 X2:000 '
            'Y1:050 Y2:100\nHello world !\n')

    def test_string_index(self):
        item = SubRipItem.from_string(self.string_index)
        self.assertEquals(item.index, 'foo')
        self.assertEquals(item.text, 'Hello !')

    def test_no_index(self):
        item = SubRipItem.from_string(self.no_index)
        self.assertEquals(item.index, None)
        self.assertEquals(item.text, 'Hello world !')
Пример #7
0
class TestOperators(unittest.TestCase):
    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_cmp(self):
        self.assertEqual(self.item, self.item)
Пример #8
0
class TestDuration(unittest.TestCase):
    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_duration(self):
        self.assertEqual(self.item.duration, (0, 0, 20, 0))
Пример #9
0
class TestOperators(unittest.TestCase):

    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_cmp(self):
        self.assertEquals(self.item, self.item)
Пример #10
0
class TestDuration(unittest.TestCase):

    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_duration(self):
        self.assertEqual(self.item.duration, (0, 0, 20, 0))
Пример #11
0
 def setUp(self):
     self.item = SubRipItem(1, text="Hello world !")
     self.item.shift(minutes=1)
     self.item.end.shift(seconds=20)
     self.string = u'1\n00:01:00,000 --> 00:01:20,000\nHello world !\n'
     self.bad_string = u'foobar'
     self.coordinates = (u'1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 '
                             'Y1:050 Y2:100\nHello world !\n')
     self.vtt = (u'1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start '
                             'L:12%\nHello world !\n')
     self.dots = u'1\n00:01:00.000 --> 00:01:20.000\nHello world !\n'
Пример #12
0
class TestCPS(unittest.TestCase):

    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_characters_per_second(self):
        self.assertEqual(self.item.characters_per_second, 0.65)

    def test_text_change(self):
        self.item.text = "Hello world !\nHello world again !"
        self.assertEqual(self.item.characters_per_second, 1.6)
Пример #13
0
class TestSerialAndParsing(unittest.TestCase):

    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)
        self.string = u'1\n00:01:00,000 --> 00:01:20,000\nHello world !\n'
        self.bad_string = u'foobar'
        self.coordinates = (u'1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 '
                                'Y1:050 Y2:100\nHello world !\n')
        self.vtt = (u'1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start '
                                'L:12%\nHello world !\n')
        self.dots = u'1\n00:01:00.000 --> 00:01:20.000\nHello world !\n'
        self.bad_index = u'foo\n00:01:00,000 --> 00:01:20,000\nHello !\n'

    def test_serialization(self):
        self.assertEqual(unicode(self.item), self.string)

    def test_from_string(self):
        self.assertEquals(SubRipItem.from_string(self.string), self.item)
        self.assertRaises(InvalidItem, SubRipItem.from_string,
            self.bad_string)

    def test_coordinates(self):
        item = SubRipItem.from_string(self.coordinates)
        self.assertEquals(item, self.item)
        self.assertEquals(item.position, 'X1:000 X2:000 Y1:050 Y2:100')

    def test_vtt_positioning(self):
        vtt = SubRipItem.from_string(self.vtt)
        self.assertEquals(vtt.position, 'D:vertical A:start L:12%')
        self.assertEquals(vtt.index, 1)
        self.assertEquals(vtt.text, 'Hello world !')

    def test_idempotence(self):
        vtt = SubRipItem.from_string(self.vtt)
        self.assertEquals(unicode(vtt), self.vtt)
        item = SubRipItem.from_string(self.coordinates)
        self.assertEquals(unicode(item), self.coordinates)

    def test_dots(self):
        self.assertEquals(SubRipItem.from_string(self.dots), self.item)

    # Bug reported in https://github.com/byroot/pysrt/issues/16
    def test_paring_error(self):
        self.assertRaises(InvalidItem, SubRipItem.from_string, u'1\n'
            '00:01:00,000 -> 00:01:20,000 X1:000 X2:000 '
            'Y1:050 Y2:100\nHello world !\n')

    def test_invalid_index(self):
        self.assertRaises(InvalidItem, SubRipItem.from_string, self.bad_index)
Пример #14
0
 def setUp(self):
     self.item = SubRipItem(1, text="Hello world !")
     self.item.shift(minutes=1)
     self.item.end.shift(seconds=20)
     self.string = '1\n00:01:00,000 --> 00:01:20,000\nHello world !\n'
     self.bad_string = 'foobar'
     self.coordinates = ('1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 '
                         'Y1:050 Y2:100\nHello world !\n')
     self.vtt = ('1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start '
                 'L:12%\nHello world !\n')
     self.string_index = 'foo\n00:01:00,000 --> 00:01:20,000\nHello !\n'
     self.dots = '1\n00:01:00.000 --> 00:01:20.000\nHello world !\n'
     self.no_index = '00:01:00,000 --> 00:01:20,000\nHello world !\n'
     self.junk_after_timestamp = ('1\n00:01:00,000 --> 00:01:20,000?\n'
                                  'Hello world !\n')
Пример #15
0
def processSub(sub_L1, sub_L2, levels, outs, removed_lines, show_L2):
    text_L1 = sub_L1.text.lstrip()
    text_L2 = sub_L2.text.lstrip()

    if (text_L2 is not None) and (len(text_L2) > 0):
        cefr_level, flesh_kincade_grade, n_words = analyzeSubLevel(text_L2)
    else:
        flesh_kincade_grade = ""
        cefr_level = ""
        n_words = 0

    for level in levels:
        if (text_L2
                is not None) and (len(text_L2) > 0) and isTextNotAboveLevel(
                    level, cefr_level, flesh_kincade_grade, n_words,
                    len(text_L2)):
            removed_lines[level] = removed_lines[level] + 1
            text = "" if show_L2 == "no" else this.L2_sub_template.format(
                text_L2)
        else:
            text = joinLines(
                text_L2, text_L1
            ) if show_L2 == "yes" else this.L1_sub_template.format(text_L1)

        if len(text) > 0:
            item = SubRipItem(sub_L2.index, sub_L2.start, sub_L2.end, text)
            outs[level].append(item)
Пример #16
0
    def setUp(self):
        original = """77777777 333 1
55555 55555
4444 4444
22 22 22"""
        self.item = SubRipItem(1, text=original)
        self.item.break_lines(5)
Пример #17
0
    def process(self, subs: SubRipFile, items: List[PgsSubtitleItem],
                post_process, confidence: int, max_width: int):
        full_image = FullImage.from_items(items, self.gap, max_width)

        config = {'output_type': tess.Output.DICT, 'config': '--psm 11'}

        if self.pgs.language:
            config.update({'lang': self.pgs.language.alpha3})

        if self.omp_thread_limit:
            os.environ['OMP_THREAD_LIMIT'] = str(self.omp_thread_limit)
        # cv2.imwrite(f'{subs.path}-{len(items)}-{confidence}.png', full_image.data)
        data = TsvData(tess.image_to_data(full_image.data, **config))

        remaining = []
        for item in items:
            text = self.accept(data, item, confidence)
            if text is None:
                remaining.append(item)
                continue

            text = item.text
            if post_process:
                text = post_process(text)
            if text:
                item = SubRipItem(0, item.start, item.end, text)
                subs.append(item)

        return remaining
Пример #18
0
def generate_srt_from_sjson(sjson_subs, speed):
    """Generate transcripts with speed = 1.0 from sjson to SubRip (*.srt).

    :param sjson_subs: "sjson" subs.
    :param speed: speed of `sjson_subs`.
    :returns: "srt" subs.
    """

    output = ''

    equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len(
        sjson_subs['text'])
    if not equal_len:
        return output

    sjson_speed_1 = generate_subs(speed, 1, sjson_subs)

    for i in range(len(sjson_speed_1['start'])):
        item = SubRipItem(
            index=i,
            start=SubRipTime(milliseconds=sjson_speed_1['start'][i]),
            end=SubRipTime(milliseconds=sjson_speed_1['end'][i]),
            text=sjson_speed_1['text'][i])
        output += (unicode(item))
        output += '\n'
    return output
Пример #19
0
def dmxread_callback(frame, frame_no):
    global prevFrame, prevTime, subs, srtFile
    # if prevFrame. == 0:
    #     prevFrame = array(frame)
    frameArray = array(frame)
    if not array_equal(prevFrame,frameArray):
        if frame != None:
            item = SubRipItem(1, text="DMX1"+str(frame))
            item.shift(seconds=prevTime)
            item.end.shift(seconds=perf_counter()-prevTime)
            if VERBOSE:
                print(item)
            subs.append(item)
            srtFile.append(item)
            prevTime = perf_counter()
    prevFrame = array(frame)
Пример #20
0
    def generate_srt_from_sjson(sjson_subs):
        """
        Generate transcripts from sjson to SubRip (*.srt)

        Arguments:
            sjson_subs (dict): `sjson` subs.

        Returns:
            Subtitles in SRT format.
        """
        output = ''

        equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len(sjson_subs['text'])
        if not equal_len:
            return output

        for i in range(len(sjson_subs['start'])):
            item = SubRipItem(
                index=i,
                start=SubRipTime(milliseconds=sjson_subs['start'][i]),
                end=SubRipTime(milliseconds=sjson_subs['end'][i]),
                text=sjson_subs['text'][i]
            )
            output += (str(item))
            output += '\n'
        return output
Пример #21
0
def play_record_dmx(unused_addr, args, value):
    global INTERVAL, TRANSITION_TIME, previous_time, dmxCounter, VERBOSE
    global prev_frame, prev_time, subs, srtFile, previous_dmx_time, DMX_INTERVAL, sub_incr
    global dmx

    dmx_array[int(args[0])] = int(value * 255)

    current_dmx_time = time.time()
    elapsed_dmx_time = current_dmx_time - previous_dmx_time

    if DEBUG:
        print("DMX time", current_dmx_time, previous_dmx_time,
              elapsed_dmx_time)

    if (elapsed_dmx_time > DMX_INTERVAL):
        frameArray = trim_zeros(dmx_array, 'b').astype('uint8')
        # frameArray = array(frameArray)
        print(array_equal(prev_frame, frameArray), tuple(prev_frame),
              tuple(frameArray))
        if not array_equal(prev_frame, frameArray):
            if frameArray.any() != None:
                item = SubRipItem(sub_incr,
                                  text="DMX1" +
                                  str(tuple(frameArray)[1:]).replace(" ", ""))
                item.shift(seconds=prev_time)
                item.end.shift(seconds=perf_counter() - prev_time)
                if VERBOSE:
                    print(item)
                subs.append(item)
                if srtFile != None:
                    srtFile.append(item)
                    # encoding="utf_8"
                    # srtFile.save(SRT_FILENAME, encoding=encoding)
                sub_incr += 1
                prev_time = perf_counter()
                if PLAY_DMX:
                    if DEBUG:
                        print("DMX tuple", tuple(frameArray)[1:])
                    dmx.write_frame(tuple(frameArray)[1:])
                if not array_equal(prev_frame, frameArray):
                    prev_frame = frameArray

        previous_dmx_time = time.time()
        print(previous_dmx_time)
Пример #22
0
    def refragment_with_min_duration(
            subs: List[SubRipItem],
            minimum_segment_duration: float) -> List[SubRipItem]:
        """Re-fragment a list of subtitle cues into new cues each of spans a minimum duration

        Arguments:
            subs {list} -- A list of SupRip cues.
            minimum_segment_duration {float} -- The minimum duration in seconds for each output subtitle cue.
        Returns:
            list -- A list of new SupRip cues after fragmentation.
        """
        new_segment = []
        new_segment_index = 0
        new_segment_duration = 0.0
        new_segment_text = ""
        new_subs = []
        for sub in subs:
            if minimum_segment_duration > new_segment_duration:
                new_segment.append(sub)
                new_segment_duration += MediaHelper.get_duration_in_seconds(
                    str(sub.start), str(sub.end)) or 0.0
                new_segment_text += "{}\n".format(sub.text)
            else:
                concatenated_item = SubRipItem(new_segment_index,
                                               new_segment[0].start,
                                               new_segment[-1].end,
                                               new_segment_text,
                                               new_segment[0].position)
                new_subs.append(concatenated_item)
                new_segment_index += 1
                new_segment = [sub]
                new_segment_duration = MediaHelper.get_duration_in_seconds(
                    str(sub.start), str(sub.end)) or 0.0
                new_segment_text = "{}\n".format(sub.text)
        if new_segment:
            concatenated_item = SubRipItem(new_segment_index,
                                           new_segment[0].start,
                                           new_segment[-1].end,
                                           new_segment_text,
                                           new_segment[0].position)
            new_subs.append(concatenated_item)
        return new_subs
Пример #23
0
class TestBreakingLines(unittest.TestCase):

    def setUp(self):
        original = """77777777 333 1
55555 55555
4444 4444
22 22 22"""
        self.item = SubRipItem(1, text=original)
        self.item.break_lines(5)

    def test_break(self):
        shouldbe = """77777777
333 1
55555
55555
4444
4444
22 22
22"""
        self.assertEqual(shouldbe, self.item.text)
Пример #24
0
class TestCPS(unittest.TestCase):
    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_characters_per_second(self):
        self.assertEqual(self.item.characters_per_second, 0.65)

    def test_text_change(self):
        self.item.text = "Hello world !\nHello world again !"
        self.assertEqual(self.item.characters_per_second, 1.6)

    def test_zero_duration(self):
        self.item.start.shift(seconds=20)
        self.assertEqual(self.item.characters_per_second, 0.0)

    def test_tags(self):
        self.item.text = '<b>bold</b>, <i>italic</i>, <u>underlined</u>\n' + \
        '<font color="#ff0000">red text</font>' + \
        ', <b>one,<i> two,<u> three</u></i></b>'
        self.assertEqual(self.item.characters_per_second, 2.45)
Пример #25
0
    def generate_srt(self, text: str):
        """
        Generates .srt file with the given text and timestamps.
        :param text: String with all retrieved text.
        """
        self.create_subs_path()

        subs = open_srt(self.srt_path)
        texts = self.prepare_text(text.split(" "))
        timestamps = self.prepare_timestamps(texts)

        for i, (sentence, (start_timestamp,
                           end_timestamp)) in enumerate(zip(texts,
                                                            timestamps)):
            start_timestamp_list = [
                int(ts) for ts in start_timestamp.split(':')
            ]
            end_timestamp_list = [int(ts) for ts in end_timestamp.split(':')]

            sub = SubRipItem(index=i)
            sub.text = sentence

            sub.start = SubRipTime(hours=start_timestamp_list[0],
                                   minutes=start_timestamp_list[1],
                                   seconds=start_timestamp_list[2],
                                   milliseconds=start_timestamp_list[3])

            sub.end = SubRipTime(hours=end_timestamp_list[0],
                                 minutes=end_timestamp_list[1],
                                 seconds=end_timestamp_list[2],
                                 milliseconds=end_timestamp_list[3])

            subs.append(sub)

        # Saving result subtitles into file
        subs.save(self.srt_path, encoding='utf-8')

        logging.info(f"Generated subtitles are saved in {self.srt_path}")
Пример #26
0
class TestCPS(unittest.TestCase):

    def setUp(self):
        self.item = SubRipItem(1, text="Hello world !")
        self.item.shift(minutes=1)
        self.item.end.shift(seconds=20)

    def test_characters_per_second(self):
        self.assertEqual(self.item.characters_per_second, 0.65)

    def test_text_change(self):
        self.item.text = "Hello world !\nHello world again !"
        self.assertEqual(self.item.characters_per_second, 1.6)

    def test_zero_duration(self):
        self.item.start.shift(seconds = 20)
        self.assertEqual(self.item.characters_per_second, 0.0)

    def test_tags(self):
	    self.item.text = '<b>bold</b>, <i>italic</i>, <u>underlined</u>\n' + \
	    '<font color="#ff0000">red text</font>' + \
	    ', <b>one,<i> two,<u> three</u></i></b>'
	    self.assertEqual(self.item.characters_per_second, 2.45)
Пример #27
0
 def setUp(self):
     self.item = SubRipItem(1, text="Hello world !")
     self.item.shift(minutes=1)
     self.item.end.shift(seconds=20)
     self.string = '1\n00:01:00,000 --> 00:01:20,000\nHello world !\n'
     self.bad_string = 'foobar'
     self.coordinates = ('1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 '
                             'Y1:050 Y2:100\nHello world !\n')
     self.vtt = ('1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start '
                             'L:12%\nHello world !\n')
     self.string_index = 'foo\n00:01:00,000 --> 00:01:20,000\nHello !\n'
     self.dots = '1\n00:01:00.000 --> 00:01:20.000\nHello world !\n'
     self.no_index = '00:01:00,000 --> 00:01:20,000\nHello world !\n'
     self.junk_after_timestamp = ('1\n00:01:00,000 --> 00:01:20,000?\n'
                             'Hello world !\n')
Пример #28
0
def get_captions(client_name, clip_id):
    h = httplib2.Http()
    g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id)
    print "Fetching URL: %s" % g_url
    response, j = h.request(g_url)
    dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name
    filename = dirname + "%s.srt" % clip_id
    subs = SubRipFile()

    if response.get('status') == '200':
        captions = []
        try:
            j = json.loads(j, strict=False)[0]
        except ValueError:
            ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "")
            try:
                j = json.loads(ts, strict=False)[0]
            except UnicodeDecodeError:
                ts = unicode(ts, errors='ignore')
                j = json.loads(ts, strict=False)[0]
        except:
            j = False

        sub_count = 0
        for item in j: 
            if item["type"] == "text":
                cap = item["text"]
                offset = round(float(item["time"]), 3)
                captions.append({'time': offset, 'text': cap})        
                end = get_cap_end(j, sub_count)
                if end:
                    subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap)
                    subs.append(subtitle)
           
            sub_count = sub_count + 1
        
        try:
            subs.save(path=filename, encoding="utf-8")
        except IOError:
            p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE)
            t = p.wait()

            subs.save(path=filename, encoding="utf-8")
            
        s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id))
        return (captions, s3_url)
    else:
        return ([], '')
Пример #29
0
    def save(self, path):
        if path.endswith('srt'):
            verify_dependencies(['pysrt'])
            from pysrt import SubRipFile, SubRipItem
            from datetime import time

            out = SubRipFile()
            for elem in self._elements:
                start = time(*self._to_tup(elem.onset))
                end = time(*self._to_tup(elem.onset + elem.duration))
                out.append(SubRipItem(0, start, end, elem.text))
            out.save(path)
        else:
            with open(path, 'w') as f:
                f.write('onset\ttext\tduration\n')
                for elem in self._elements:
                    f.write('{}\t{}\t{}\n'.format(elem.onset, elem.text,
                                                  elem.duration))
Пример #30
0
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'):
    """
    合并两种不同言语的srt字幕

    因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕,
    导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免

    参考https://github.com/byroot/pysrt/issues/17

    https://github.com/byroot/pysrt/issues/15

    :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding)
    :param sub_b:
    :param delta:
    :return:
    """
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
Пример #31
0
def merge_subtitle(sub_a, sub_b, delta):
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in range(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
Пример #32
0
 def test_string_index(self):
     item = SubRipItem.from_string(self.string_index)
     self.assertEquals(item.index, 'foo')
     self.assertEquals(item.text, 'Hello !')
Пример #33
0
 def test_vtt_positioning(self):
     vtt = SubRipItem.from_string(self.vtt)
     self.assertEquals(vtt.position, 'D:vertical A:start L:12%')
     self.assertEquals(vtt.index, 1)
     self.assertEquals(vtt.text, 'Hello world !')
Пример #34
0
 def test_coordinates(self):
     item = SubRipItem.from_string(self.coordinates)
     self.assertEquals(item, self.item)
     self.assertEquals(item.position, 'X1:000 X2:000 Y1:050 Y2:100')
Пример #35
0
 def test_from_string(self):
     self.assertEquals(SubRipItem.from_string(self.string), self.item)
     self.assertRaises(InvalidItem, SubRipItem.from_string,
         self.bad_string)
Пример #36
0
 def setUp(self):
     self.item = SubRipItem(1, text="Hello world !")
     self.item.shift(minutes=1)
     self.item.end.shift(seconds=20)
Пример #37
0
 def setUp(self):
     self.item = SubRipItem()
Пример #38
0
 def test_junk_after_timestamp(self):
     item = SubRipItem.from_string(self.junk_after_timestamp)
     self.assertEquals(item, self.item)
Пример #39
0
 def test_dots(self):
     self.assertEquals(SubRipItem.from_string(self.dots), self.item)
Пример #40
0
 def test_idempotence(self):
     vtt = SubRipItem.from_string(self.vtt)
     self.assertEquals(unicode(vtt), self.vtt)
     item = SubRipItem.from_string(self.coordinates)
     self.assertEquals(unicode(item), self.coordinates)
Пример #41
0
 def test_no_index(self):
     item = SubRipItem.from_string(self.no_index)
     self.assertEquals(item.index, None)
     self.assertEquals(item.text, 'Hello world !')
Пример #42
0
 def test_junk_after_timestamp(self):
     item = SubRipItem.from_string(self.junk_after_timestamp)
     self.assertEquals(item, self.item)
Пример #43
0
 def test_no_index(self):
     item = SubRipItem.from_string(self.no_index)
     self.assertEquals(item.index, None)
     self.assertEquals(item.text, 'Hello world !')