def parse_op_collab_react(op_array, editor): """ Parse a op in OT type into a list of elementary operations. There will be missing the author, timestamp... https://github.com/ottypes/text :param editor: FROG or collab-react-components :param op_array: string to parse :return : List of elem_ops contained in op :rtype: list[ElementaryOperation] :type op_array: [{'p': list[int], str: str}] """ elem_ops = [] """:type: list[ElementaryOperation]""" for op in op_array: if editor == 'collab-react-components': assert len(op['p']) == 1 position = op['p'][0] # Collab else: if len(op['p']) != 2: raise AssertionError( "len(op['p']) != 2. Might be because you" "are treating ill-formatted logs from the DB.") assert op['p'][0] == 'text' position = op['p'][1] # FROG if 'sd' in op.keys(): # Deleting some letters length_to_delete = len(op['sd']) elem_ops.append( ElementaryOperation(operation_type="del", abs_position=position, length_to_delete=length_to_delete, changeset=op)) if 'si' in op.keys(): # Inserting some letters text_to_add = op['si'] elem_ops.append( ElementaryOperation(operation_type="add", abs_position=position, text_to_add=text_to_add, changeset=op)) return elem_ops
def generateElemOps(self): ''' used to sort the elem_operation and define the end time of the window :return: None ''' for op in self.operations: if self.endTime < op.timestamp_end: self.endTime = op.timestamp_end if self.start_time > op.timestamp_start: self.start_time = op.timestamp_start self.elemOps.extend(op.elem_ops) self.elemOps = ElementaryOperation.sort_elem_ops(self.elemOps)
def get_elem_ops(self, sorted_): """ Get the list of ElementaryOperation from all the Operation. The result is ordered by timestamp. Good for building a representation of the text. Note that each ElementaryOperation know to which Operation it belongs :return: list of ElementaryOperation :rtype: list[ElementaryOperation] """ # Recover all the elementary ops elem_ops = [] for op in self.operations: for elem_op in op.elem_ops: elem_ops.append(elem_op) if sorted_: return ElementaryOperation.sort_elem_ops(elem_ops) else: return elem_ops
def parse_changeset_etherpad(changeset): """ Parse a changeset of type etherpad into a list of elementary operations. There will be missing the author, timestamp... http://policypad.readthedocs.io/en/latest/changesets.html :param changeset: string to parse :type changeset: str :return : List of elem_ops contained in the changeset :rtype: list[ElementaryOperation] """ def find_next_symbol_idx(string, start): """ Find the next symbol from start """ symbols = ['|', '$', '+', '-', '=', '*'] for i in range(start, len(string)): if string[i] in symbols: return i # We reached the end of the text return len(string) line_number = 0 line_abs_position = 0 position = 0 position_inline = 0 elementary_operations = [] used_databank = 0 # Finding the first operations. It's always a | idx = find_next_symbol_idx(changeset, 0) while idx < len(changeset): if changeset[idx] == '|': # It's going to be taken care of by elif changeset[idx].isalnum() idx += 1 elif changeset[idx] == '$': # Should have already been added with the '+' return elementary_operations elif changeset[idx].isalnum(): # Format is |L+N,|L-N or |L=N symbol_idx = find_next_symbol_idx(changeset, idx) if changeset[symbol_idx] == '=': # |L=N # Keep N characters from the source text, containing L newlines. # The last character kept MUST be a newline, and the final newline # of the document is allowed. # L line_number += int(changeset[idx:symbol_idx], 36) next_symbol_position = find_next_symbol_idx(changeset, symbol_idx + 1) # N line_abs_position += int(changeset[symbol_idx + 1:next_symbol_position], 36) position += int(changeset[symbol_idx + 1:next_symbol_position], 36) idx = next_symbol_position elif changeset[symbol_idx] == '+': # |L+N # Insert N characters from the source text, containing L newlines. # The last character inserted MUST be a newline, but not the (new) # document's final newline. next_symbol_position = find_next_symbol_idx(changeset, symbol_idx + 1) # We care about N (size of the addition) since we will date N chars from the databank. # noinspection PyPep8Naming N = int(changeset[symbol_idx + 1:next_symbol_position], 36) data_bank = changeset[changeset.find('$') + 1:] text_to_add = data_bank[used_databank:used_databank + N] elementary_operations.append(ElementaryOperation("add", position, text_to_add=text_to_add, line_number=line_number, position_inline=position_inline, changeset=changeset)) used_databank += N position += N idx = next_symbol_position else: assert (changeset[symbol_idx] == '-') # |L-N # Delete N characters from the source text, containing L newlines. # The last character inserted MUST be a newline, but not the (old) # document's final newline. next_symbol_position = find_next_symbol_idx(changeset, symbol_idx + 1) # N chars_to_delete = int(changeset[symbol_idx + 1:next_symbol_position], 36) idx = next_symbol_position elementary_operations.append(ElementaryOperation("del", position, length_to_delete=chars_to_delete, line_number=line_number, position_inline=position_inline, changeset=changeset)) elif changeset[idx] == '=': # Keep N characters from the source text, none of them newlines # (position inline) next_symbol_position = find_next_symbol_idx(changeset, idx + 1) # We add the inline offset position += int(changeset[idx + 1:next_symbol_position], 36) position_inline += int(changeset[idx + 1:next_symbol_position], 36) idx = next_symbol_position elif changeset[idx] == '+': next_symbol_position = find_next_symbol_idx(changeset, idx + 1) # We care about N (size of the addition). We only take N chars from the databank (not counting the # already used ones) N = int(changeset[idx + 1:next_symbol_position], 36) data_bank = changeset[changeset.find('$') + 1:] text_to_add = data_bank[used_databank:used_databank + N] elementary_operations.append(ElementaryOperation("add", position, text_to_add=text_to_add, line_number=line_number, position_inline=position_inline, changeset=changeset)) used_databank += N position += N idx = next_symbol_position elif changeset[idx] == '-': # Remove the next n symbols next_symbol_position = find_next_symbol_idx(changeset, idx + 1) chars_to_delete = int(changeset[idx + 1:next_symbol_position], 36) idx = next_symbol_position elementary_operations.append(ElementaryOperation("del", position, length_to_delete=chars_to_delete, line_number=line_number, position_inline=position_inline, changeset=changeset)) else: # TODO: Format with '*' assert changeset[idx] == '*' idx = find_next_symbol_idx(changeset, idx + 1)