Exemplo n.º 1
0
    def _parse(self, lines):
        self._compute_blocks(lines)

        for block in self.blocks:
            if self._is_cue_block(block):
                caption = self._parse_cue_block(block)
                self.captions.append(caption)
            elif self._is_comment_block(block):
                continue
            elif self._is_style_block(block):
                if self.captions:
                    raise MalformedFileError(
                        'Style block defined after the first cue in line {}.'.
                        format(block.line_number))
                style = Style()
                style.lines = block.lines[1:]
                self.styles.append(style)
            else:
                if len(block.lines) == 1:
                    raise MalformedCaptionError(
                        'Standalone cue identifier in line {}.'.format(
                            block.line_number))
                else:
                    raise MalformedCaptionError(
                        'Missing timing cue in line {}.'.format(
                            block.line_number + 1))
Exemplo n.º 2
0
    def _parse(self, lines):
        c = None

        for index, line in enumerate(lines):
            if self._should_skip_line(line, index, c):  # allow child classes to skip lines based on the content
                continue

            if self._is_timeframe_line(line):
                try:
                    start, end = self._parse_timeframe_line(line)
                except MalformedCaptionError as e:
                    raise MalformedCaptionError('{} in line! {}'.format(e, index + 1))
                c = Caption(start, end)
            elif line:
                if c is None:
                    raise MalformedCaptionError('Caption missing timeframe in line {}.'.format(index + 1))
                else:
                    c.add_line(line)
            else:
                if c is None:
                    continue
                if not c.lines:
                    raise MalformedCaptionError('Caption missing text in line {}.'.format(index + 1))

                self.captions.append(c)
                c = None

        if c is not None and c.lines:
            self.captions.append(c)
Exemplo n.º 3
0
    def _parse_timeframe_line(self, line):
        """Parse timeframe line and return start and end timestamps."""
        tf = self._validate_timeframe_line(line)
        if not tf:
            raise MalformedCaptionError('Invalid time format')

        return tf.group(1), tf.group(2)
Exemplo n.º 4
0
    def _parse_timestamp(self, timestamp):
        res = re.match(TIMESTAMP_PATTERN, timestamp)
        if not res:
            raise MalformedCaptionError('Invalid timestamp: {}'.format(timestamp))

        values = list(map(lambda x: int(x) if x else 0, res.groups()))
        return self._to_seconds(*values)
Exemplo n.º 5
0
 def _parse_timestamp(self, timestamp):
     res = re.match(TIMESTAMP_PATTERN, timestamp)
     if not res:
         raise MalformedCaptionError(
             'Invalid timestamp: {}'.format(timestamp))
     return self._to_seconds(
         int(res.group(1)),  # hours
         int(res.group(2)),  # minutes
         int(res.group(3)),  # seconds
         int(res.group(4))  # milliseconds
     )
Exemplo n.º 6
0
    def _should_skip_line(self, line, index, caption):
        is_header_title = index == 0 and line == 'WEBVTT'
        is_header_data = len(self.captions) == 0 and caption is None

        if self.expect_timeframe:
            raise MalformedCaptionError(
                'Caption missing timeframe in line {}.'.format(index + 1))

        is_cue_identifier = caption is None and not is_header_title and not is_header_data
        if is_cue_identifier:
            self.expect_timeframe = True

        return is_header_title or is_header_data or is_cue_identifier
Exemplo n.º 7
0
    def _parse_cue_block(self, block):
        caption = Caption()
        cue_timings = None

        for line_number, line in enumerate(block.lines):
            if self._is_cue_timings_line(line):
                if cue_timings is None:
                    try:
                        cue_timings = self._parse_timeframe_line(line)
                    except MalformedCaptionError as e:
                        raise MalformedCaptionError('{} in line {}'.format(
                            e, block.line_number + line_number))
                else:
                    raise MalformedCaptionError(
                        '--> found in line {}'.format(block.line_number +
                                                      line_number))
            elif line_number == 0:
                caption.identifier = line
            else:
                caption.add_line(line)

        caption.start = cue_timings[0]
        caption.end = cue_timings[1]
        return caption