Exemplo n.º 1
0
def format_patch(source: CodeInfo,
                 target: CodeInfo,
                 numlines: int = 3,
                 title: str = "") -> Iterator[str]:
    """Variation of :obj:`difflib.unified_diff` that considers line offsets."""
    started = False
    matcher = SequenceMatcher(None, source.lines, target.lines)
    for group in matcher.get_grouped_opcodes(numlines):
        if not started:
            started = True
            yield f"--- {relativise_path(source.file)}\n"
            yield f"+++ {relativise_path(target.file)}\n"

        first, last = group[0], group[-1]
        source_range = line_range(first[1], last[2], source.starting_line)
        target_range = line_range(first[3], last[4], target.starting_line)
        yield f"@@ -{source_range} +{target_range} @@ {title}".strip() + "\n"

        for tag, i1, i2, j1, j2 in group:
            if tag == "equal":
                for line in source.lines[i1:i2]:
                    yield " " + line
                continue
            if tag in {"replace", "delete"}:
                for line in source.lines[i1:i2]:
                    yield "-" + line
            if tag in {"replace", "insert"}:
                for line in target.lines[j1:j2]:
                    yield "+" + line
def diff_token_streams(stream_a, stream_b, safe_tokens=DEFAULT_SAFE_TOKENS):
    list_a = list(stream_a)
    list_b = list(stream_b)
    sm = SequenceMatcher(None, list_a, list_b, False)
    lines = []
    for group in sm.get_grouped_opcodes(0):
        for op, i1, i2, j1, j2 in group:
            if op == 'equal':
                continue
            if op == 'replace':
                assert i2 - i1 == j2 - j1, 'replace OP range lengths should match'
                for offset in range(i2 - i1):
                    a = list_a[i1 + offset]
                    b = list_b[j1 + offset]
                    if a[0] in safe_tokens and b[0] in safe_tokens:
                        continue
                    lines.append('** ' + token_diff(a, b))
            elif op == 'delete':
                for i in range(i1, i2):
                    a = list_a[i]
                    if a[0] in safe_tokens:
                        continue
                    lines.append('-- [{}]'.format(':'.join(a)))
            elif op == 'insert':
                for j in range(j1, j2):
                    b = list_b[j]
                    if b[0] in safe_tokens:
                        continue
                    lines.append('++ [{}]'.format(':'.join(b)))
            else:
                raise ValueError('Unkown diff op: {} {} {} {} {}'.format(
                    op, i1, i2, j1, j2))
    return lines
Exemplo n.º 3
0
    def rediff(self, map_func=string.rstrip):
        """Return a new diff where the lines are compared after mapping.

        The default mapping is string.rstrip, which disregards trailing whitespace.

        """
        a = map(map_func, self.getOriginalLines())
        b = map(map_func, self.getModifiedLines())
        hunks = []
        matcher = SequenceMatcher(a=a, b=b)
        for group in matcher.get_grouped_opcodes(3):
            lines = []
            aoff = group[0][1] + 1
            boff = group[0][3] + 1
            for tag, i1, i2, j1, j2 in group:
                if tag == 'equal':
                    for line in a[i1:i2]:
                        lines.append((' ', line))
                    continue
                if tag == 'replace' or tag == 'delete':
                    for line in a[i1:i2]:
                        lines.append(('-', line))
                if tag == 'replace' or tag == 'insert':
                    for line in b[j1:j2]:
                        lines.append(('+', line))
            hunks.append(Hunk(aoff, boff, lines))

        diff = FileDiff(self.file, self.versions, hunks)
        # Update memoized fields we have already accessed
        mapped_a = '\n'.join(a)
        diff.getOriginal = lambda: mapped_a
        diff.getModifiedLines = lambda: b
        return diff
Exemplo n.º 4
0
def trans_inplace(
        src: Sequence[_H], dest: Sequence[_H],
        unifying: int) -> Iterable[Tuple[Tuple[int, int], Tuple[int, int]]]:
    """
    for (i1, i2), (j1, j2) in inplace_trans(src, dest, n):
      src[i1:i2] = dest[j1:j2]
    """

    matcher = SequenceMatcher(isjunk=None, a=src, b=dest, autojunk=False)
    offset = 0

    for operations in matcher.get_grouped_opcodes(n=unifying):
        for op_name, i1, i2, j1, j2 in operations:
            opcode = cast(_OP_CODE, op_name)
            lo, hi = i1 + offset, i2 + offset

            if opcode == "equal":
                pass

            elif opcode == "delete":
                yield (lo, hi), (j1, j2)
                offset = offset - (i2 - i1)

            elif opcode == "insert":
                yield (lo, hi), (j1, j2)
                offset = offset + (j2 - j1)

            elif opcode == "replace":
                yield (lo, hi), (j1, j2)
                offset = offset - (i2 - i1) + (j2 - j1)

            else:
                never(opcode)
Exemplo n.º 5
0
    def sequence_changed(self):
        print "new sequence?"
        with file('z:/running.txt') as infile:
            self.newfile = infile.readlines()

        differ = SequenceMatcher(None, self.oldfile, self.newfile
                                 )
        for group in differ.get_grouped_opcodes(1):
            for tag, i1, i2, j1, j2 in group: 
                if tag == 'replace':

                    nold = i2-i1
                    nnew = j2-j1


                    if (nold == nnew):
                        for k in range(nold):
                            old = sequenceparser.linematch(self.oldfile[i1+k])
                            new = sequenceparser.linematch(self.newfile[j1+k])
                            changedkey = dictcompare(old, new)
                            if changedkey:
                                print new['name'], 
                                print "changed:", changedkey[0], new[changedkey[0]]

                    else:
                        print "too many lines changed at once", nold, nnew

        self.oldfile = self.newfile

        print "---"
Exemplo n.º 6
0
class Diff(object):
    def __init__(self, old, new, **kwargs):
        self.old = old.splitlines()
        self.new = new.splitlines()
        self.surrounding = kwargs.get("surrounding", 2)
        self.blank_lines = kwargs.get("blank_lines", False)
        self.case_changes = kwargs.get("case_changes", False)
        self.whitespace = kwargs.get("whitespace", False)

        junk = self.whitespace and IS_CHARACTER_JUNK or None
        self.diff = SequenceMatcher(junk, self.old, self.new)

    def do_diff(self):
        if self.surrounding > -1:
            return self.__partial_diff()
        else:
            return self.__full_diff()

    def __partial_diff(self):
        return map(self.parse_section, self.diff.get_grouped_opcodes(self.surrounding))

    def __full_diff(self):
        return [self.parse_section(self.diff.get_opcodes())]

    def parse_section(self, section):
        lines = []
        for change_type, start_old, end_old, start_new, end_new in section:
            if change_type == "replace":
                lines.extend(self.handle_replace(xrange(start_old, end_old), xrange(start_new, end_new)))
            elif change_type == "insert":
                lines.extend(self.handle_insert(xrange(start_new, end_new)))
            elif change_type == "delete":
                lines.extend(self.handle_delete(xrange(start_old, end_old)))
            elif change_type == "equal":
                lines.extend(self.handle_equal(xrange(start_old, end_old), xrange(start_new, end_new)))
        return lines

    def handle_replace(self, deleted_lines, inserted_lines):
        raise NotImplementedError()

    def handle_insert(self, inserted_lines):
        length = len(inserted_lines) - 1
        for index, line in enumerate(inserted_lines):
            change_type = ("insert", (index is 0 and "start" or "") + (index is length and " end" or ""))
            yield change_type, "", line + 1, self.new[line]

    def handle_delete(self, deleted_lines):
        length = len(deleted_lines) - 1
        for index, line in enumerate(deleted_lines):
            change_type = ("delete", (index is 0 and "start" or "") + (index is length and " end" or ""))
            yield change_type, line + 1, "", self.old[line]

    def handle_equal(self, old_lines, new_lines):
        for index, line in enumerate(old_lines):
            change_type = ("equal", index is 0 and "start" or "end")
            new_line = new_lines[index]
            yield change_type, line + 1, new_line + 1, self.old[line]
Exemplo n.º 7
0
    def _get_added_lines(self, old_content, new_content):
        buf = []
        old_lines = old_content.splitlines()
        new_lines = new_content.splitlines()
        matcher = SequenceMatcher(None, old_lines, new_lines)
        for group in matcher.get_grouped_opcodes(0):
            for tag, i1, i2, j1, j2 in group:
                if tag in ('insert', 'replace'):
                    buf.append('\n'.join(new_lines[j1:j2]))

        return '\n'.join(buf)
Exemplo n.º 8
0
    def _get_added_lines(self, old_content, new_content):
        # Gets just the added lines in the new content, as a string.
        buf = []
        old_lines = old_content.splitlines()
        new_lines = new_content.splitlines()
        matcher = SequenceMatcher(None, old_lines, new_lines)
        for group in matcher.get_grouped_opcodes(0):
            for tag, i1, i2, j1, j2 in group:
                if tag in ("insert", "replace"):
                    buf.append("\n".join(new_lines[j1:j2]))

        return "\n".join(buf)
Exemplo n.º 9
0
def diff_seq(a, b, context=3, depth=0, fromfile='a', tofile='b'):
    if not hasattr(a, '__iter__') and not hasattr(a, '__getitem__'):
        raise NotSequence("Not a sequence %s" % type(a))
    hashable_a = [hashable(_) for _ in a]
    hashable_b = [hashable(_) for _ in b]
    sm = SequenceMatcher(a=hashable_a, b=hashable_b)
    if type(a) == tuple:
        ddiff = DataDiff(tuple, '(', ')', fromfile=fromfile, tofile=tofile)
    elif type(b) == list:
        ddiff = DataDiff(list, '[', ']', fromfile=fromfile, tofile=tofile)
    else:
        ddiff = DataDiff(type(a), fromfile=fromfile, tofile=tofile)
    for chunk in sm.get_grouped_opcodes(context):
        ddiff.context(max(chunk[0][1] - 1, 0), max(chunk[-1][2] - 1, 0),
                      max(chunk[0][3] - 1, 0), max(chunk[-1][4] - 1, 0))
        for change, i1, i2, j1, j2 in chunk:
            if change == 'replace':
                consecutive_deletes = []
                consecutive_inserts = []
                for a2, b2 in zip(a[i1:i2], b[j1:j2]):
                    try:
                        nested_diff = diff(a2, b2, context, depth + 1)
                        ddiff.delete_multi(consecutive_deletes)
                        ddiff.insert_multi(consecutive_inserts)
                        consecutive_deletes = []
                        consecutive_inserts = []
                        ddiff.nested(nested_diff)
                    except DiffTypeError:
                        consecutive_deletes.append(a2)
                        consecutive_inserts.append(b2)

                # differing lengths get truncated by zip()
                # here we handle the truncated items
                ddiff.delete_multi(consecutive_deletes)
                if i2 - i1 > j2 - j1:
                    common_length = j2 - j1  # covered by zip
                    ddiff.delete_multi(a[i1 + common_length:i2])
                ddiff.insert_multi(consecutive_inserts)
                if i2 - i1 < j2 - j1:
                    common_length = i2 - i1  # covered by zip
                    ddiff.insert_multi(b[j1 + common_length:j2])
            else:
                if change == 'insert':
                    items = b[j1:j2]
                else:
                    items = a[i1:i2]
                ddiff.multi(change, items)
        if i2 < len(a):
            ddiff.context_end_container()
    return ddiff
def diff_seq(a, b, context=3, depth=0, fromfile='a', tofile='b'):
    if not hasattr(a, '__iter__') and not hasattr(a, '__getitem__'):
        raise NotSequence("Not a sequence %s" % type(a))
    hashable_a = [hashable(_) for _ in a]
    hashable_b = [hashable(_) for _ in b]
    sm = SequenceMatcher(a = hashable_a, b = hashable_b)
    if type(a) == tuple:
        ddiff = DataDiff(tuple, '(', ')', fromfile=fromfile, tofile=tofile)
    elif type(b) == list:
        ddiff = DataDiff(list, '[', ']', fromfile=fromfile, tofile=tofile)
    else:
        ddiff = DataDiff(type(a), fromfile=fromfile, tofile=tofile)
    for chunk in sm.get_grouped_opcodes(context):
        ddiff.context(max(chunk[0][1]-1,0), max(chunk[-1][2]-1, 0),
                     max(chunk[0][3]-1,0), max(chunk[-1][4]-1, 0))
        for change, i1, i2, j1, j2 in chunk:
            if change == 'replace':
                consecutive_deletes = []
                consecutive_inserts = []
                for a2, b2 in zip(a[i1:i2], b[j1:j2]):
                    try:
                        nested_diff = diff(a2, b2, context, depth+1)
                        ddiff.delete_multi(consecutive_deletes)
                        ddiff.insert_multi(consecutive_inserts)
                        consecutive_deletes = []
                        consecutive_inserts = []
                        ddiff.nested(nested_diff)
                    except DiffTypeError:
                        consecutive_deletes.append(a2)
                        consecutive_inserts.append(b2)

                # differing lengths get truncated by zip()
                # here we handle the truncated items
                ddiff.delete_multi(consecutive_deletes)
                if i2-i1 > j2-j1:
                    common_length = j2-j1 # covered by zip
                    ddiff.delete_multi(a[i1+common_length:i2])
                ddiff.insert_multi(consecutive_inserts)
                if i2-i1 < j2-j1:
                    common_length = i2-i1 # covered by zip
                    ddiff.insert_multi(b[j1+common_length:j2])
            else:
                if change == 'insert':
                    items = b[j1:j2]
                else:
                    items = a[i1:i2]
                ddiff.multi(change, items)
        if i2 < len(a):
            ddiff.context_end_container()
    return ddiff
Exemplo n.º 11
0
def binary_diff(bytes):
    ret = []
    # Since SequenceMatcher is very, *very* slow, we first search for the
    # first byte that differs in both buffers in order to reduce the number of
    # bytes handed to that function.
    offset = 0
    for offset in range(min(len(bytes[0]), len(bytes[1]))):
        if bytes[0][offset] != bytes[1][offset]:
            break
    bytes = [i[offset:] for i in bytes]
    diff = SequenceMatcher(None, bytes[0], bytes[1])
    for i in diff.get_grouped_opcodes():
        for j in i:
            ret.extend(binary_diff_str(bytes, j, offset))
    return ret
Exemplo n.º 12
0
def binary_diff(bytes):
	ret = []
	# Since SequenceMatcher is very, *very* slow, we first search for the
	# first byte that differs in both buffers in order to reduce the number of
	# bytes handed to that function.
	offset = 0
	for offset in range(min(len(bytes[0]), len(bytes[1]))):
		if bytes[0][offset] != bytes[1][offset]:
			break
	bytes = [i[offset:] for i in bytes]
	diff = SequenceMatcher(None, bytes[0], bytes[1])
	for i in diff.get_grouped_opcodes():
		for j in i:
			ret.extend(binary_diff_str(bytes, j, offset))
	return ret
Exemplo n.º 13
0
def context_diff(sequence,
                 a,
                 b,
                 fromfile='',
                 tofile='',
                 fromfiledate='',
                 tofiledate='',
                 n=3,
                 lineterm='\n'):
    r"""
Modified context_diff taken from standard Python difflib.

This version takes
"""
    from difflib import _format_range_context, SequenceMatcher

    prefix = dict(insert='+ ', delete='- ', replace='! ', equal='  ')
    started = False
    seq = SequenceMatcher(None, [], [])
    seq.opcodes = sequence
    for group in seq.get_grouped_opcodes(n):
        if not started:
            started = True
            fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
            todate = '\t{}'.format(tofiledate) if tofiledate else ''
            yield '*** {}{}{}'.format(fromfile, fromdate, lineterm)
            yield '--- {}{}{}'.format(tofile, todate, lineterm)

        first, last = group[0], group[-1]
        yield '***************' + lineterm

        file1_range = _format_range_context(first[1], last[2])
        yield '*** {} ****{}'.format(file1_range, lineterm)

        if any(tag in ('replace', 'delete') for tag, _, _, _, _ in group):
            for tag, i1, i2, _, _ in group:
                if tag != 'insert':
                    for line in a[i1:i2]:
                        yield prefix[tag] + line

        file2_range = _format_range_context(first[3], last[4])
        yield '--- {} ----{}'.format(file2_range, lineterm)

        if any(tag in ('replace', 'insert') for tag, _, _, _, _ in group):
            for tag, _, _, j1, j2 in group:
                if tag != 'delete':
                    for line in b[j1:j2]:
                        yield prefix[tag] + line
Exemplo n.º 14
0
def unified_diff(fromlines, tolines, context=None):
    """
    Generator for unified diffs. Slightly modified version from Trac 0.11.
    """
    matcher = SequenceMatcher(None, fromlines, tolines)
    for group in matcher.get_grouped_opcodes(context):
        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
        if i1 == 0 and i2 == 0:
            i1, i2 = -1, -1 # add support
        yield '@@ -%d,%d +%d,%d @@' % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)
        for tag, i1, i2, j1, j2 in group:
            if tag == 'equal':
                for line in fromlines[i1:i2]:
                    yield ' ' + line
            else:
                if tag in ('replace', 'delete'):
                    for line in fromlines[i1:i2]:
                        yield '-' + line
                if tag in ('replace', 'insert'):
                    for line in tolines[j1:j2]:
                        yield '+' + line
Exemplo n.º 15
0
def unified_diff(fromlines, tolines, context=None):
    """
    Generator for unified diffs. Slightly modified version from Trac 0.11.
    """
    matcher = SequenceMatcher(None, fromlines, tolines)
    for group in matcher.get_grouped_opcodes(context):
        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
        if i1 == 0 and i2 == 0:
            i1, i2 = -1, -1  # add support
        yield '@@ -{0:d},{1:d} +{2:d},{3:d} @@'.format(i1 + 1, i2 - i1, j1 + 1,
                                                       j2 - j1)
        for tag, i1, i2, j1, j2 in group:
            if tag == 'equal':
                for line in fromlines[i1:i2]:
                    yield ' ' + line
            else:
                if tag in ('replace', 'delete'):
                    for line in fromlines[i1:i2]:
                        yield '-' + line
                if tag in ('replace', 'insert'):
                    for line in tolines[j1:j2]:
                        yield '+' + line
Exemplo n.º 16
0
class TextHandler(TypeHandler):
    """Text (multiline string) handler."""

    handled_type = str

    def __init__(self, context=3):
        """
        Construct handler.

        :param context: Amount of context lines.

        """
        super().__init__()
        self.lcs = SequenceMatcher(isjunk=None, autojunk=False)
        self.context = context

    def diff(self, differ, a, b):
        r"""
        Return diff for texts (multiline strings).

        Result is a unified-like diff formatted as nested diff structure, with
        'I' tagged subdiffs containing hunks headers.

        :param differ: nested_diff.Differ object.
        :param a: First string to diff.
        :param b: Second string to diff.

        >>> from nested_diff import Differ
        >>>
        >>> a = 'one'
        >>> b = 'one\ntwo'
        >>>
        >>> Differ(handlers=[TextHandler()]).diff(a, b)
        {'D': [{'I': [0, 1, 0, 2]}, {'U': 'one'}, {'A': 'two'}], 'E': ''}
        >>>
        """
        lines_a = a.split('\n', -1)
        lines_b = b.split('\n', -1)

        if len(lines_a) == len(lines_b) == 1:
            return super().diff(differ, a, b)

        diff = []
        self.lcs.set_seq1(lines_a)
        self.lcs.set_seq2(lines_b)

        for group in self.lcs.get_grouped_opcodes(self.context):
            diff.append({
                'I': [
                    group[0][1],
                    group[-1][2],
                    group[0][3],
                    group[-1][4],
                ],
            })

            for op, i1, i2, j1, j2 in group:
                if op == 'equal':
                    diff.extend({'U': line} for line in lines_a[i1:i2])
                    continue

                if op != 'insert':
                    diff.extend({'R': line} for line in lines_a[i1:i2])

                if op != 'delete':
                    diff.extend({'A': line} for line in lines_b[j1:j2])

        if diff:
            return {'D': diff, 'E': a.__class__()}

        return {}

    def patch(self, patcher, target, diff):
        """
        Return patched text (multiline string).

        Unlike GNU patch, this algorithm does not implement any heuristics and
        patch target in straightforward way: get position from hunk header and
        apply changes specified in hunk.

        :param patcher: nested_diff.Patcher object.
        :param target: string to patch.
        :param diff: Nested diff.

        """
        offset = 0
        target = target.split('\n', -1)

        for subdiff in diff['D']:
            if 'I' in subdiff:  # hunk started
                idx = subdiff['I'][0] + offset
            elif 'A' in subdiff:
                target.insert(idx, subdiff['A'])
                offset += 1
                idx += 1
            elif 'R' in subdiff:
                if target.pop(idx) != subdiff['R']:
                    raise ValueError('Removing line does not match')
                offset -= 1
            elif 'U' in subdiff:
                if target[idx] != subdiff['U']:
                    raise ValueError('Unchanged line does not match')
                idx += 1
            else:
                raise ValueError('Unsupported operation')

        return '\n'.join(target)
Exemplo n.º 17
0
def diff_seq(a, b, context=3, depth=0, fromfile='a', tofile='b', compare_with_func=False):
    if not hasattr(a, '__iter__') and not hasattr(a, '__getitem__'):
        raise NotSequence("Not a sequence %s" % type(a))
    hashable_a = [hashable(_) for _ in a]
    hashable_b = [hashable(_) for _ in b]
    sm = SequenceMatcher(a=hashable_a, b=hashable_b)
    if type(a) == tuple:
        ddiff = DataDiff(tuple, '(', ')', fromfile=fromfile, tofile=tofile)
    elif type(b) == list:
        ddiff = DataDiff(list, '[', ']', fromfile=fromfile, tofile=tofile)
    else:
        ddiff = DataDiff(type(a), fromfile=fromfile, tofile=tofile)
    for chunk in sm.get_grouped_opcodes(context):
        chunk_ddiff = ddiff.clone()
        chunk_ddiff.context(max(chunk[0][1] - 1, 0), max(chunk[-1][2] - 1, 0),
                      max(chunk[0][3] - 1, 0), max(chunk[-1][4] - 1, 0))
        realy_diffs = compare_with_func is False
        for change, i1, i2, j1, j2 in chunk:
            if change == 'replace':
                consecutive_deletes = []
                consecutive_inserts = []
                for a2, b2 in zip(a[i1:i2], b[j1:j2]):
                    if compares_with_func(a2, b2):
                        continue
                    try:
                        nested_diff = diff(a2, b2, context, depth+1, compare_with_func=compare_with_func)
                        # so this is a special case where the  elemnets compare equal with the func
                        if not nested_diff:
                            continue
                        chunk_ddiff.delete_multi(consecutive_deletes)
                        chunk_ddiff.insert_multi(consecutive_inserts)
                        consecutive_deletes = []
                        consecutive_inserts = []
                        chunk_ddiff.nested(nested_diff)
                    except DiffTypeError:
                        consecutive_deletes.append(a2)
                        consecutive_inserts.append(b2)

                    realy_diffs = True

                # differing lengths get truncated by zip()
                # here we handle the truncated items
                chunk_ddiff.delete_multi(consecutive_deletes)
                if i2-i1 > j2-j1:
                    common_length = j2-j1 # covered by zip
                    chunk_ddiff.delete_multi(a[i1+common_length:i2])
                chunk_ddiff.insert_multi(consecutive_inserts)
                if i2-i1 < j2-j1:
                    common_length = i2-i1 # covered by zip
                    chunk_ddiff.insert_multi(b[j1+common_length:j2])
            else:
                if change == 'insert':
                    items = b[j1:j2]
                else:
                    items = a[i1:i2]
                if change != 'equal':
                    realy_diffs = True
                chunk_ddiff.multi(change, items)

        if i2 < len(a):
            chunk_ddiff.context_end_container()
        if realy_diffs:
            ddiff.diffs.extend(chunk_ddiff.diffs)
    return ddiff
Exemplo n.º 18
0
class Differ(object):
    """
    Compute recursive diff for two passed objects.

    Resulting diff is a dict and may contain following keys:
    `A` stands for 'added', it's value - added item.
    `D` means 'different' and contains subdiff.
    `E` diffed entity (optional), value - empty instance of entity's class.
    `I` index for sequence item, used only when prior item was omitted.
    `N` is a new value for changed item.
    `O` is a changed item's old value.
    `R` key used for removed item.
    `U` represent unchanged item.

    Diff metadata alternates with actual data; simple types specified as is,
    dicts, lists and tuples contain subdiffs for their items with native for
    such types addressing: indexes for lists and tuples and keys for
    dictionaries. Each status type, except `D`. `E` and `I`, may be omitted
    during diff computation. `E` tag is used with `D` when entity unable to
    contain diff by itself (set, frozenset); `D` contain a list of subdiffs
    in this case.

    Example:

    a:  {"one": [5,7]}
    b:  {"one": [5], "two": 2}
    opts: U=False  # omit unchanged items

    diff:
    {"D": {"one": {"D": [{"I": 1, "R": 7}]}, "two": {"A": 2}}}
    | |   |  |    | |   || |   |   |   |       |    | |   |
    | |   |  |    | |   || |   |   |   |       |    | |   +- with value 2
    | |   |  |    | |   || |   |   |   |       |    | +- key 'two' was added
    | |   |  |    | |   || |   |   |   |       |    +- subdiff for it
    | |   |  |    | |   || |   |   |   |       +- another key from top-level
    | |   |  |    | |   || |   |   |   +- what it was (item's value: 7)
    | |   |  |    | |   || |   |   +- what happened to item (removed)
    | |   |  |    | |   || |   +- list item's actual index
    | |   |  |    | |   || +- prior item was omitted
    | |   |  |    | |   |+- subdiff for list item
    | |   |  |    | |   +- it's value - list
    | |   |  |    | +- it is deeply changed
    | |   |  |    +- subdiff for key 'one'
    | |   |  +- it has key 'one'
    | |   +- top-level thing is a dict
    | +- changes somewhere deeply inside
    +- diff is always a dict

    Dicts, lists, sets and tuples traversed recursively, all other types
    compared by values.

    """
    def __init__(
            self,
            A=True,
            N=True,
            O=True,
            R=True,
            U=True,  # noqa: E741
            trimR=False,
            diff_method=None,
            multiline_diff_context=-1):
        """
        Construct Differ.

        Optional arguments:
        `A`, `N`, `O`, `R`, `U` are toggles for according diff ops and all
        enabled (`True`) by default.

        `trimR` when True will drop (replace by `None`) removed data from diff;
        default is `False`.

        `diff_method` method with such name (if object have one) from first
        diffed object will be called for diff. Second diffed object and all
        Differ opts will be passed as arguments, diff expected for output.
        Disabled (`None`) by default.

        `multiline_diff_context` defines amount of context lines for multiline
        string diffs, multiline diffs disabled when value is negative.

        """
        self.__diff_method = diff_method
        self.lcs = SequenceMatcher(isjunk=None, autojunk=False)

        self.op_a = A
        self.op_n = N
        self.op_o = O
        self.op_r = R
        self.op_u = U
        self.op_trim_r = trimR

        self.__differs = {
            dict: self.diff_dict,
            frozenset: self.diff_set,
            list: self.diff_list,
            set: self.diff_set,
            tuple: self.diff_tuple,
        }

        if multiline_diff_context >= 0:
            self.__differs[str] = self.diff_multiline
            self.multiline_diff_context = multiline_diff_context

    def diff(self, a, b):
        """
        Compute diff for two arbitrary objects.

        This method is a dispatcher and calls registered diff method for each
        diffed values pair according to their type. `diff__default` called for
        unequal and not registered types. Args and kwargs passed to called
        method as is.

        :param a: First object to diff.
        :param b: Second object to diff.

        """
        if self.__diff_method is not None:
            try:
                method = a.__getattribute__(self.__diff_method)
            except AttributeError:
                pass
            else:
                return method(
                    b,
                    A=self.op_a,
                    N=self.op_n,
                    O=self.op_o,  # noqa: E741
                    R=self.op_r,
                    U=self.op_u,
                    trimR=self.op_trim_r,
                    diff_method=self.__diff_method,
                )

        if a.__class__ is b.__class__:
            # it's faster to compare pickled dumps and dig differences
            # afterwards than recursively diff each pair of objects
            if a is b or dumps(a, -1) == dumps(b, -1):
                return {'U': a} if self.op_u else {}

            return self.get_differ(a.__class__)(a, b)

        return self.diff__default(a, b)

    def diff__default(self, a, b):
        """
        Return default diff.

        """
        dif = {}

        if self.op_n:
            dif['N'] = b
        if self.op_o:
            dif['O'] = a

        return dif

    def diff_dict(self, a, b):
        """
        Compute diff for two dicts.

        :param a: First dict to diff.
        :param b: Second dict to diff.

        >>> a = {'one': 1, 'two': 2, 'three': 3}
        >>> b = {'one': 1, 'two': 42}
        >>>
        >>> Differ(O=False, U=False).diff_dicts(a, b)
        {'D': {'two': {'N': 42}, 'three': {'R': 3}}}
        >>>

        """
        dif = {}

        for key in set(a).union(b):
            try:
                old = a[key]
                try:
                    new = b[key]
                except KeyError:  # removed
                    if self.op_r:
                        dif[key] = {'R': None if self.op_trim_r else old}
                    continue
            except KeyError:  # added
                if self.op_a:
                    dif[key] = {'A': b[key]}
                continue

            subdiff = self.diff(old, new)
            if subdiff:
                dif[key] = subdiff

        if dif:
            return {'D': dif}

        return dif

    def diff_list(self, a, b):
        """
        Compute diff for two lists.

        :param a: First list to diff.
        :param b: Second list to diff.

        >>> a = [0,1,2,3]
        >>> b = [  1,2,4,5]
        >>>
        >>> Differ(O=False, U=False).diff_lists(a, b)
        {'D': [{'R': 0}, {'N': 4, 'I': 3}, {'A': 5}]}
        >>>

        """
        self.lcs.set_seq1(tuple(dumps(i, -1) for i in a))
        self.lcs.set_seq2(tuple(dumps(i, -1) for i in b))

        dif = []
        i = j = 0
        force_index = False

        for ai, bj, _ in self.lcs.get_matching_blocks():
            while i < ai and j < bj:
                subdiff = self.diff(a[i], b[j])
                if subdiff:
                    dif.append(subdiff)
                    if force_index:
                        dif[-1]['I'] = i
                        force_index = False
                else:
                    force_index = True

                i += 1
                j += 1

            while i < ai:  # removed
                if self.op_r:
                    dif.append({'R': None if self.op_trim_r else a[i]})
                    if force_index:
                        dif[-1]['I'] = i
                        force_index = False
                else:
                    force_index = True

                i += 1

            while j < bj:  # added
                if self.op_a:
                    dif.append({'A': b[j]})
                    if force_index:
                        dif[-1]['I'] = i
                        force_index = False
                else:
                    force_index = True

                j += 1

        if dif:
            return {'D': dif}

        return {}

    def diff_multiline(self, a, b):
        """
        Compute diff for multiline strings.

        Result is a unified diff formatted as usual nested diff structure with
        'I' tagged subdiffs to contain hunks headers.

        :param a: First string to diff.
        :param b: Second string to diff.

        >>> a = 'A\nB\nC'
        >>> b = 'A\nC'
        >>>
        >>> Differ(multiline_diff_context=3).diff_multiline(a, b)
        {'D': [{'I': [0, 3, 0, 2]}, {'U': 'A'}, {'R': 'B'}, {'U': 'C'}],
         'E': ''}

        """
        lines_a = a.split('\n', -1)
        lines_b = b.split('\n', -1)

        if len(lines_a) == len(lines_b) == 1:
            return self.diff__default(a, b)

        dif = []
        self.lcs.set_seq1(lines_a)
        self.lcs.set_seq2(lines_b)

        for group in self.lcs.get_grouped_opcodes(self.multiline_diff_context):
            dif.append({
                'I': [
                    group[0][1],
                    group[-1][2],
                    group[0][3],
                    group[-1][4],
                ],
            })

            for op, i1, i2, j1, j2 in group:
                if op == 'equal':
                    for line in lines_a[i1:i2]:
                        dif.append({'U': line})
                    continue

                if op in {'replace', 'delete'}:
                    for line in lines_a[i1:i2]:
                        dif.append({'R': line})

                if op in {'replace', 'insert'}:
                    for line in lines_b[j1:j2]:
                        dif.append({'A': line})

        if dif:
            return {'D': dif, 'E': a.__class__()}

        return {}

    def diff_set(self, a, b):
        """
        Compute diff for two [frozen]sets.

        :param a: First set to diff.
        :param b: Second set to diff.

        >>> a = {1, 2}
        >>> b = {2, 3}
        >>>
        >>> Differ(U=False).diff_sets(a, b)
        {'D': [{'R': 1}, {'A': 3}], 'E': set()}
        >>>

        """
        dif = []

        for i in a.union(b):
            if i in a and i in b:
                if self.op_u:
                    dif.append({'U': i})

            elif i in a:  # removed
                if self.op_r:
                    # ignore trimR opt here: value required for removal
                    dif.append({'R': i})

            else:  # added
                if self.op_a:
                    dif.append({'A': i})

        if dif:
            return {'D': dif, 'E': a.__class__()}

        return {}

    def diff_tuple(self, a, b):
        """
        Compute diff for two tuples.

        :param a: First tuple to diff.
        :param b: Second tuple to diff.

        >>> a = (  1,2,4,5)
        >>> b = (0,1,2,3)
        >>>
        >>> Differ(O=False, U=False).diff_tuples(a, b)
        {'D': ({'A': 0}, {'N': 3, 'I': 2}, {'R': 5})}
        >>>

        """
        dif = self.diff_list(a, b)

        if 'D' in dif:
            dif['D'] = tuple(dif['D'])

        return dif

    def get_differ(self, type_):
        """
        Return diff method for specified type.

        :param type_: diffed object type.

        """
        try:
            return self.__differs[type_]
        except KeyError:
            return self.diff__default

    def set_differ(self, type_, method):
        """
        Set differ for specified data type.

        :param type_: diffed object type.
        :param method: diff method.

        """
        self.__differs[type_] = method
Exemplo n.º 19
0
            break
    word = ''.join(beginning[::-1]) + ''.join(end)

    return word


#TODO: identify the differences
print('\n'.join(diff))
print()
i = 0
for i in range(len(correct)):
    #where i loops through the sentences
    #compares the same sentence
    s = SequenceMatcher(None, transcript[i], correct[i])
    #since opcodes returns a list you need to loop through inside
    for o in s.get_grouped_opcodes():
        for code in o:
            if code[0] == 'replace':
                if transcript[i][code[1]] in th_changes:
                    if (th_changes[transcript[i][code[1]:code[2]]] ==
                            correct[i][code[3]:code[4]]):
                        word = (track_word(i, code))
                        print("unvoiced th articulation error! in word {}".
                              format(word))
                        mistake_reporting.th_articulation()
                        print()
                if transcript[i][code[1]] in dh_changes:
                    if (dh_changes[transcript[i][code[1]:code[2]]] ==
                            correct[i][code[3]:code[4]]):
                        word = (track_word(i, code))
                        print(
Exemplo n.º 20
0
    def gen(self, diff):
        from difflib import SequenceMatcher

        if not self.no_adjust:
            assert diff.file.read_lines()

        if self.verbose:
            yield "# pdiff: file=%r line=%d pos=%d\n" % (diff.file, diff.line,
                                                         diff.pos)

        if self.current != diff.file:
            self.current = diff.file
            self.hunkno = 0
            yield "--- %s\n" % diff.file
            yield "+++ %s\n" % diff.file

        s = SequenceMatcher(None, diff.source, diff.target)
        for g in s.get_grouped_opcodes(self.context):
            i1 = g[0][1]
            i2 = g[-1][2]
            j1 = g[0][3]
            j2 = g[-1][4]

            assert i2 >= i1
            assert j2 >= j1

            if self.no_adjust:
                k1 = 0
            elif g[0][0] != 'equal':
                k1 = self.context
            else:
                k1 = self.context - \
                    (g[0][2] - g[0][1])
                assert k1 >= 0

            if self.no_adjust:
                k2 = 0
            elif g[-1][0] != 'equal':
                k2 = self.context
            else:
                k2 = self.context - \
                    (g[-1][2] - g[-1][1])
                assert k2 >= 0

            if not self.no_adjust:
                if k1 >= i1 + diff.line:
                    k1 = i1 + diff.line - 1

                n = len(diff.file)
                assert n >= i2 + diff.line - 1
                if k2 >= n - i2 - diff.line + 1:
                    k2 = n - i2 - diff.line + 1

            self.hunkno += 1
            yield "@@ -%d,%d +%d,%d @@ #%d\n" % (
                i1 + diff.line - k1, i2 - i1 + k1 + k2, j1 + diff.line - k1,
                j2 - j1 + k1 + k2, self.hunkno)

            for i in range(i1 + diff.line - 1 - k1, i1 + diff.line - 1):
                yield ' ' + diff.file[i]

            for t, i1, i2, j1, j2 in g:
                if t == 'equal':
                    for l in diff.source[i1:i2]:
                        yield ' ' + l
                    continue
                if t == 'replace' or t == 'delete':
                    for l in diff.source[i1:i2]:
                        yield '-' + l
                if t == 'replace' or t == 'insert':
                    for l in diff.target[j1:j2]:
                        yield '+' + l

            for i in range(i2 + diff.line - 1, i2 + diff.line - 1 + k2):
                yield ' ' + diff.file[i]