Пример #1
0
def parse_op_collab_react(op_array, editor):
    """
    Parse a op in OT type into a list of elementary operations.
    There will be missing the author, timestamp...
    https://github.com/ottypes/text


    :param editor: FROG or collab-react-components
    :param op_array: string to parse
    :return : List of elem_ops contained in op
    :rtype: list[ElementaryOperation]
    :type op_array: [{'p': list[int], str: str}]
    """

    elem_ops = []
    """:type: list[ElementaryOperation]"""
    for op in op_array:
        if editor == 'collab-react-components':
            assert len(op['p']) == 1
            position = op['p'][0]  # Collab
        else:
            if len(op['p']) != 2:
                raise AssertionError(
                    "len(op['p']) != 2. Might be because you"
                    "are treating ill-formatted logs from the DB.")
            assert op['p'][0] == 'text'
            position = op['p'][1]  # FROG

        if 'sd' in op.keys():
            # Deleting some letters
            length_to_delete = len(op['sd'])
            elem_ops.append(
                ElementaryOperation(operation_type="del",
                                    abs_position=position,
                                    length_to_delete=length_to_delete,
                                    changeset=op))
        if 'si' in op.keys():
            # Inserting some letters
            text_to_add = op['si']
            elem_ops.append(
                ElementaryOperation(operation_type="add",
                                    abs_position=position,
                                    text_to_add=text_to_add,
                                    changeset=op))

    return elem_ops
Пример #2
0
 def generateElemOps(self):
     '''
     used to sort the elem_operation and define the end time of the window
     :return: None
     '''
     for op in self.operations:
         if self.endTime < op.timestamp_end:
             self.endTime = op.timestamp_end
         if self.start_time > op.timestamp_start:
             self.start_time = op.timestamp_start
         self.elemOps.extend(op.elem_ops)
     self.elemOps = ElementaryOperation.sort_elem_ops(self.elemOps)
Пример #3
0
    def get_elem_ops(self, sorted_):
        """
        Get the list of ElementaryOperation from all the Operation. The result is ordered by timestamp. Good for
        building a representation of the text. Note that each ElementaryOperation know to which Operation it belongs

        :return: list of ElementaryOperation
        :rtype: list[ElementaryOperation]
        """
        # Recover all the elementary ops
        elem_ops = []
        for op in self.operations:
            for elem_op in op.elem_ops:
                elem_ops.append(elem_op)
        if sorted_:
            return ElementaryOperation.sort_elem_ops(elem_ops)
        else:
            return elem_ops
Пример #4
0
def parse_changeset_etherpad(changeset):
    """
    Parse a changeset of type etherpad into a list of elementary operations. There will be missing the author,
    timestamp... http://policypad.readthedocs.io/en/latest/changesets.html

    :param changeset: string to parse
    :type changeset: str

    :return : List of elem_ops contained in the changeset
    :rtype: list[ElementaryOperation]

    """

    def find_next_symbol_idx(string, start):
        """
        Find the next symbol from start
        """
        symbols = ['|', '$', '+', '-', '=', '*']
        for i in range(start, len(string)):
            if string[i] in symbols:
                return i
        # We reached the end of the text
        return len(string)

    line_number = 0
    line_abs_position = 0
    position = 0
    position_inline = 0
    elementary_operations = []
    used_databank = 0
    # Finding the first operations. It's always a |
    idx = find_next_symbol_idx(changeset, 0)
    while idx < len(changeset):
        if changeset[idx] == '|':
            # It's going to be taken care of by elif changeset[idx].isalnum()
            idx += 1
        elif changeset[idx] == '$':
            # Should have already been added with the '+'
            return elementary_operations
        elif changeset[idx].isalnum():
            # Format is |L+N,|L-N or |L=N
            symbol_idx = find_next_symbol_idx(changeset, idx)
            if changeset[symbol_idx] == '=':
                # |L=N
                # Keep N characters from the source text, containing L newlines.
                # The last character kept MUST be a newline, and the final newline
                # of the document is allowed.
                # L
                line_number += int(changeset[idx:symbol_idx], 36)
                next_symbol_position = find_next_symbol_idx(changeset, symbol_idx + 1)
                # N
                line_abs_position += int(changeset[symbol_idx + 1:next_symbol_position], 36)
                position += int(changeset[symbol_idx + 1:next_symbol_position], 36)
                idx = next_symbol_position
            elif changeset[symbol_idx] == '+':
                # |L+N
                # Insert N characters from the source text, containing L newlines.
                # The last character inserted MUST be a newline, but not the (new)
                # document's final newline.

                next_symbol_position = find_next_symbol_idx(changeset, symbol_idx + 1)
                # We care about  N (size of the addition) since we will date N chars from the databank.
                # noinspection PyPep8Naming
                N = int(changeset[symbol_idx + 1:next_symbol_position], 36)
                data_bank = changeset[changeset.find('$') + 1:]
                text_to_add = data_bank[used_databank:used_databank + N]
                elementary_operations.append(ElementaryOperation("add",
                                                                 position,
                                                                 text_to_add=text_to_add,
                                                                 line_number=line_number,
                                                                 position_inline=position_inline,
                                                                 changeset=changeset))
                used_databank += N
                position += N
                idx = next_symbol_position
            else:
                assert (changeset[symbol_idx] == '-')

                # |L-N
                # Delete N characters from the source text, containing L newlines.
                # The last character inserted MUST be a newline, but not the (old)
                # document's final newline.
                next_symbol_position = find_next_symbol_idx(changeset, symbol_idx + 1)
                # N
                chars_to_delete = int(changeset[symbol_idx + 1:next_symbol_position], 36)
                idx = next_symbol_position
                elementary_operations.append(ElementaryOperation("del",
                                                                 position,
                                                                 length_to_delete=chars_to_delete,
                                                                 line_number=line_number,
                                                                 position_inline=position_inline,
                                                                 changeset=changeset))
        elif changeset[idx] == '=':
            # Keep N characters from the source text, none of them newlines
            # (position inline)
            next_symbol_position = find_next_symbol_idx(changeset, idx + 1)
            # We add the inline offset
            position += int(changeset[idx + 1:next_symbol_position], 36)
            position_inline += int(changeset[idx + 1:next_symbol_position], 36)
            idx = next_symbol_position
        elif changeset[idx] == '+':
            next_symbol_position = find_next_symbol_idx(changeset, idx + 1)
            # We care about N (size of the addition). We only take N chars from the databank (not counting the
            # already used ones)
            N = int(changeset[idx + 1:next_symbol_position], 36)
            data_bank = changeset[changeset.find('$') + 1:]
            text_to_add = data_bank[used_databank:used_databank + N]
            elementary_operations.append(ElementaryOperation("add",
                                                             position,
                                                             text_to_add=text_to_add,
                                                             line_number=line_number,
                                                             position_inline=position_inline,
                                                             changeset=changeset))
            used_databank += N
            position += N
            idx = next_symbol_position

        elif changeset[idx] == '-':
            # Remove the next n symbols
            next_symbol_position = find_next_symbol_idx(changeset, idx + 1)
            chars_to_delete = int(changeset[idx + 1:next_symbol_position], 36)
            idx = next_symbol_position
            elementary_operations.append(ElementaryOperation("del",
                                                             position,
                                                             length_to_delete=chars_to_delete,
                                                             line_number=line_number,
                                                             position_inline=position_inline,
                                                             changeset=changeset))
        else:
            # TODO: Format with '*'
            assert changeset[idx] == '*'
            idx = find_next_symbol_idx(changeset, idx + 1)