Пример #1
0
    def write_into(self, output_file, eol=None, include_indexes=False):
        """
        write_into(output_file [, eol])

        Serialize current state into `output_file`.

        `output_file` -> Any instance that respond to `write()`, typically a
        file object

        If include_indexes is True the cue indexes will be included in the
        file.
        """
        self._check_valid_len()
        output_eol = eol or self.eol
        output_file.write("WEBVTT{0}{0}".format(output_eol))

        for item in self:
            string_repr = str(item)
            if output_eol != '\n':
                string_repr = string_repr.replace('\n', output_eol)
            if include_indexes:
                output_file.write(str(item.index) + output_eol)
            output_file.write(string_repr)
            # Only add trailing eol if it's not already present.
            # It was kept in the WebVTTItem's text before but it really
            # belongs here. Existing applications might give us subtitles
            # which already contain a trailing eol though.
            if not string_repr.endswith(2 * output_eol):
                output_file.write(output_eol)
Пример #2
0
    def __init__(self, index=0, start=None, end=None, text='', position=''):
        try:
            self.index = int(index)
        except (TypeError, ValueError):  # try to cast as int, but it's not mandatory
            self.index = index

        self.start = WebVTTTime.coerce(start or 0)
        self.end = WebVTTTime.coerce(end or 0)
        self.position = str(position)
        self.text = str(text)
Пример #3
0
    def __init__(self, index=0, start=None, end=None, text='', position=''):
        try:
            self.index = int(index)
        except (TypeError,
                ValueError):  # try to cast as int, but it's not mandatory
            self.index = index

        self.start = WebVTTTime.coerce(start or 0)
        self.end = WebVTTTime.coerce(end or 0)
        self.position = str(position)
        self.text = str(text)
Пример #4
0
    def test_eol_preservation(self):

        # Tests input eol is kept after saving

        self.temp_eol_path = os.path.join(self.static_path,
                                          'temp_eol_preserv.vtt')
        end_of_lines = ['\n', '\r', '\r\n']
        enc = 'utf-8'

        for eols in end_of_lines:
            input_eol = open(self.temp_eol_path, 'wb')
            input_eol.write(
                str('00:01:00,000 --> 00:02:00,000' + eols +
                    'TestEOLPreservation' + eols))
            input_eol.close()

            input_file = open(self.temp_eol_path, 'rU', encoding=enc)
            input_file.read()
            self.assertEqual(eols, input_file.newlines)

            vtt_file = pyvtt.open(self.temp_eol_path, encoding=enc)
            vtt_file.save(self.temp_eol_path, eol=input_file.newlines)

            output_file = open(self.temp_eol_path, 'rU', encoding=enc)
            output_file.read()
            self.assertEqual(output_file.newlines, input_file.newlines)

            os.remove(self.temp_eol_path)
Пример #5
0
class WebVTTItem(ComparableMixin):
    """
    WebVTTItem(index, start, end, text, position)

    start, end -> WebVTTTime or coercible.
    text -> unicode: text content for item.
    position -> unicode: raw vtt "display coordinates" string
    """
    ITEM_PATTERN = str('%s --> %s%s\n%s\n')
    TIMESTAMP_SEPARATOR = '-->'

    def __init__(self, index=0, start=None, end=None, text='', position=''):
        try:
            self.index = int(index)
        except (TypeError,
                ValueError):  # try to cast as int, but it's not mandatory
            self.index = index

        self.start = WebVTTTime.coerce(start or 0)
        self.end = WebVTTTime.coerce(end or 0)
        self.position = str(position)
        self.text = str(text)

    @property
    def duration(self):
        return self.end - self.start

    @property
    def text_without_tags(self):
        return self._text_tag_cleaner('<', '>')

    @property
    def text_without_brackets(self):
        return self._text_tag_cleaner('\[', '\]')

    @property
    def text_without_keys(self):
        return self._text_tag_cleaner('{', '}')

    def _text_tag_cleaner(self, before_delimiter, after_delimiter):
        return re.compile(r"{0}[^>]*?{1}".format(before_delimiter,
                                                 after_delimiter)).sub(
                                                     '', self.text)

    @property
    def text_without_trailing_spaces(self):
        return self.text.strip()

    @property
    def characters_per_second(self):
        characters_count = len(self.text_without_tags.replace('\n', ''))
        try:
            return characters_count / (self.duration.ordinal / 1000.0)
        except ZeroDivisionError:
            return 0.0

    def text_with_replacements(self, replacements_map={}):
        for replaced, replacement in replacements_map.iteritems():
            self.text = self.text.replace(replaced, replacement)
        return self.text

    def __str__(self):
        position = ' %s' % self.position if self.position.strip() else ''
        return self.ITEM_PATTERN % (self.start, self.end, position, self.text)

    if is_py2:
        __unicode__ = __str__

        def __str__(self):
            raise NotImplementedError('Use unicode() instead!')

    def _cmpkey(self):
        return (self.start, self.end)

    def shift(self, *args, **kwargs):
        """
        shift(hours, minutes, seconds, milliseconds, ratio)

        Add given values to start and end attributes.
        All arguments are optional and have a default value of 0.
        """
        self.start.shift(*args, **kwargs)
        self.end.shift(*args, **kwargs)

    @classmethod
    def from_string(cls, source):
        return cls.from_lines(source.splitlines(True))

    @classmethod
    def from_lines(cls, lines):
        if len(lines) < 2:
            raise InvalidItem()
        lines = [l.rstrip("\n\r") for l in lines
                 ]  # All cases are considered: '\n', '\r\n', '\r'
        lines[0] = lines[0].rstrip()
        index = None
        if cls.TIMESTAMP_SEPARATOR not in lines[0]:
            index = lines.pop(0)
        start, end, position = cls.split_timestamps(lines[0])
        body = '\n'.join(lines[1:])
        return cls(index, start, end, body, position)

    @classmethod
    def split_timestamps(cls, line):
        timestamps = line.split(cls.TIMESTAMP_SEPARATOR)
        if len(timestamps) != 2:
            raise InvalidItem()
        start, end_and_position = timestamps
        end_and_position = end_and_position.lstrip().split(' ', 1)
        end = end_and_position[0]
        position = end_and_position[1] if len(end_and_position) > 1 else ''
        return (s.strip() for s in (start, end, position))
Пример #6
0
 def test_idempotence(self):
     vtt = WebVTTItem.from_string(self.vtt)
     self.assertEqual(str(vtt), self.vtt)
     item = WebVTTItem.from_string(self.coordinates)
     self.assertEqual(str(item), self.coordinates[2:])
Пример #7
0
 def test_serialization(self):
     self.assertEqual(str(self.item), self.string[2:])
Пример #8
0
 def test_idempotence(self):
     vtt = WebVTTItem.from_string(self.vtt)
     self.assertEqual(str(vtt), self.vtt)
     item = WebVTTItem.from_string(self.coordinates)
     self.assertEqual(str(item), self.coordinates[2:])
Пример #9
0
 def test_serialization(self):
     self.assertEqual(str(self.item), self.string[2:])
Пример #10
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     iterator = zip(pyvtt.open(self.utf8_path),
                    pyvtt.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEqual(str(file_item), str(string_item))
Пример #11
0
    from collections import UserList
except ImportError:
    from UserList import UserList
from copy import copy
from itertools import chain
from os import linesep
from sys import stderr

from pyvtt.vttexc import Error, InvalidFile
from pyvtt.vttitem import WebVTTItem
from pyvtt.compat import str

BOMS = ((BOM_UTF32_LE, 'utf_32_le'), (BOM_UTF32_BE, 'utf_32_be'),
        (BOM_UTF16_LE, 'utf_16_le'), (BOM_UTF16_BE, 'utf_16_be'),
        (BOM_UTF8, 'utf_8'))
CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS)
BIGGER_BOM = max(len(bom) for bom, encoding in BOMS)


class WebVTTFile(UserList, object):
    """
    WebVTT file descriptor.

    Provide a pure Python mapping on all metadata.

    WebVTTFile(items, eol, path, encoding)

    items -> list of WebVTTItem. Default to [].
    eol -> str: end of line character. Default to linesep used in opened file
        if any else to os.linesep.
    path -> str: path where file will be saved. To open an existant file see
Пример #12
0
 def __str__(self):
     if self.ordinal < 0:
         # Represent negative times as zero
         return str(WebVTTTime.from_ordinal(0))
     return self.TIME_PATTERN % tuple(self)
Пример #13
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     iterator = zip(pyvtt.open(self.utf8_path),
                    pyvtt.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEqual(str(file_item), str(string_item))
Пример #14
0
class WebVTTItem(ComparableMixin):
    """
    WebVTTItem(index, start, end, text, position)

    start, end -> WebVTTTime or coercible.
    text -> unicode: text content for item.
    position -> unicode: raw vtt "display coordinates" string
    """
    ITEM_PATTERN = str('%s --> %s%s\n%s\n')
    TIMESTAMP_SEPARATOR = '-->'

    def __init__(self, index=0, start=None, end=None, text='', position=''):
        try:
            self.index = int(index)
        except (TypeError,
                ValueError):  # try to cast as int, but it's not mandatory
            self.index = index

        self.start = WebVTTTime.coerce(start or 0)
        self.end = WebVTTTime.coerce(end or 0)
        self.position = str(position)
        self.text = str(text)

    @property
    def duration(self):
        return self.end - self.start

    @property
    def text_without_tags(self):
        return re.compile(r'<[^>]*?>').sub('', self.text)

    @property
    def text_without_keys(self):
        return re.compile(r'{[^>]*?}').sub('', self.text)

    @property
    def text_without_strange_chars(self):
        for c in [
                "\\i1", "\\i0", "\\b1", "\\b0", "\\b<weight>", "\\u1", "\\u0",
                "\\s1", "\\s0", "\\bord<size>", "\\xbord<size>",
                "\\ybord<size>", "\\shad<depth>", "\\xshad<depth>",
                "\\yshad<depth>"
        ]:
            self.text = self.text.replace(c, '')
        return self.text

    @property
    def text_without_trailing_spaces(self):
        return self.text.strip()

    @property
    def characters_per_second(self):
        characters_count = len(self.text_without_tags.replace('\n', ''))
        try:
            return characters_count / (self.duration.ordinal / 1000.0)
        except ZeroDivisionError:
            return 0.0

    def __str__(self):
        position = ' %s' % self.position if self.position.strip() else ''
        return self.ITEM_PATTERN % (self.start, self.end, position, self.text)

    if is_py2:
        __unicode__ = __str__

        def __str__(self):
            raise NotImplementedError('Use unicode() instead!')

    def _cmpkey(self):
        return (self.start, self.end)

    def shift(self, *args, **kwargs):
        """
        shift(hours, minutes, seconds, milliseconds, ratio)

        Add given values to start and end attributes.
        All arguments are optional and have a default value of 0.
        """
        self.start.shift(*args, **kwargs)
        self.end.shift(*args, **kwargs)

    @classmethod
    def from_string(cls, source):
        return cls.from_lines(source.splitlines(True))

    @classmethod
    def from_lines(cls, lines):
        if len(lines) < 2:
            raise InvalidItem()
        lines = [l.rstrip() for l in lines]
        index = None
        if cls.TIMESTAMP_SEPARATOR not in lines[0]:
            index = lines.pop(0)
        start, end, position = cls.split_timestamps(lines[0])
        body = '\n'.join(lines[1:])
        return cls(index, start, end, body, position)

    @classmethod
    def split_timestamps(cls, line):
        timestamps = line.split(cls.TIMESTAMP_SEPARATOR)
        if len(timestamps) != 2:
            raise InvalidItem()
        start, end_and_position = timestamps
        end_and_position = end_and_position.lstrip().split(' ', 1)
        end = end_and_position[0]
        position = end_and_position[1] if len(end_and_position) > 1 else ''
        return (s.strip() for s in (start, end, position))