Exemplo n.º 1
0
def compute_edit_distance(src_file, para_file):
    src_data = load_conllu(src_file)
    para_data = load_conllu(para_file)
    assert len(src_data) == len(para_data)

    edit_distances = []
    for key in tqdm(src_data.keys(), total=len(src_data)):
        src_sent = src_data[key]
        para_sent = para_data[key]
        src_root, _ = head_to_tree(src_sent['head'], src_sent['upos'])
        para_root, _ = head_to_tree(para_sent['head'], para_sent['upos'])
        src_tree_to_string = []
        treeToString(src_root, src_tree_to_string)
        src_tree_to_string = ['{'] + src_tree_to_string + ['}']
        src_tree_to_string = ''.join(src_tree_to_string)
        para_tree_to_string = []
        treeToString(para_root, para_tree_to_string)
        para_tree_to_string = ['{'] + para_tree_to_string + ['}']
        para_tree_to_string = ''.join(para_tree_to_string)
        # print(src_tree_to_string)
        # print(para_tree_to_string)
        apted = APTED(aptedTree.from_text(src_tree_to_string),
                      aptedTree.from_text(para_tree_to_string))
        ted = apted.compute_edit_distance()
        edit_distances.append(ted)
        # mapping = apted.compute_edit_mapping()
        # print(mapping)

    return edit_distances
Exemplo n.º 2
0
def apted_code_distance(code_a, code_b):
    tree_a = gen_apted_tree(code_a)
    tree_b = gen_apted_tree(code_b)

    from apted import APTED

    apted = APTED(tree_a, tree_b)
    ted = apted.compute_edit_distance()
    return ted
Exemplo n.º 3
0
def count_distance(a_ast, b_ast):
    """
       Counts tree edit distance between two ast trees.
    """

    a_ast = build_tree(a_ast)
    b_ast = build_tree(b_ast)

    apted = APTED(a_ast, b_ast, CustomConfig())
    ted = apted.compute_edit_distance()

    return ted
def apted(tree1, tree2):
    # remove outer brackets and strip all white space
    str_t1 = apted_tree_format(tree1).strip()[1:-1].strip()
    str_t2 = apted_tree_format(tree2).strip()[1:-1].strip()

    # convert to apted tree from apted format
    t1 = helpers.Tree.from_text(str_t1)
    t2 = helpers.Tree.from_text(str_t2)

    apted = APTED(t1, t2)

    return apted.compute_edit_distance()
Exemplo n.º 5
0
def get_tree_edit_distance(tree1, tree2):
    class TreeEditDistanceConfig(Config):
        def __init__(self):
            pass

        def rename(self, node1, node2):
            return 1 if node1.value != node2.value else 0

        def children(self, node):
            return [x for x in node.children]

    apted = APTED(tree1, tree2, TreeEditDistanceConfig())
    ed = apted.compute_edit_distance()
    return ed
Exemplo n.º 6
0
def diff(tree_before: Node, tree_after: Node) -> (int, dict):
    """
    Returns the difference between two QEP trees

    :param tree_before: The 'before tree'.
    :param tree_after: The 'after tree'.
    :return:
        distance: The structural edit distance between the two trees.
            Only difference in algorithm is captured.
        delta: The difference between the two trees. Has 3 keys:
            - deleted: Those nodes that are deleted from tree_before
            - inserted: Those nodes that are inserted into tree_after
            - stayed: Those nodes that are present in both trees. Has two
                keys:

                - before: the nodes in tree_before
                - after : the nodes in tree_after

                Note that the before and after may be different in attributes
                other than algorithm and operation.
    """
    apted = APTED(tree_before, tree_after, APTEDConfig())
    distance = apted.compute_edit_distance()
    mapping = apted.compute_edit_mapping()

    delta = {
        "deleted": [m[0] for m in mapping if m[1] is None],
        "inserted": [m[1] for m in mapping if m[0] is None],
        "stayed": {
            "before":
            [m[0] for m in mapping if m[0] is not None and m[1] is not None],
            "after":
            [m[1] for m in mapping if m[0] is not None and m[1] is not None]
        }
    }
    return distance, delta
Exemplo n.º 7
0
def treeDistance(tree1, tree2):
    """Compute distance between two trees"""
    tree1, tree2 = treeToTree(tree1), treeToTree(tree2)
    ap = APTED(tree1, tree2)
    return ap.compute_edit_distance()
Exemplo n.º 8
0
def calculate_edit_distance(tree1, tree2):
    apted = APTED(tree1, tree2, TreeEditDistanceConfig())
    ed = apted.compute_edit_distance()
    return ed
Exemplo n.º 9
0
     c_loss = torch.Tensor([c_loss])
     c_loss.requires_grad_()
     c_loss = c_loss.cuda()                        
     #------------loss_s--------------------#
     s_len = len(samples_tree)
     _loss = []
     _losses = 0
     for i in range(s_len): # 32
         _uid = real_DT[i].split('_')[0]
         tt1_i = train_DT_id.index(_uid)
         tt1 = train_DT[tt1_i]
         tree1 = Tree.from_text(tt1)
         tt2 = samples_tree[i]
         tree2 = Tree.from_text(tt2)
         _apted = APTED(tree1, tree2, Config())
         ted = _apted.compute_edit_distance()
         _loss.append(ted)
         _losses += ted
     
     t_loss = torch.mean(torch.Tensor([_loss]))
     t_loss.requires_grad_()
     t_loss = t_loss.cuda()
     #-------------------------------------------# 
 
 # construct the input to the genrator, add zeros before samples and delete the last column
 zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor)
 if samples.is_cuda:
     zeros = zeros.cuda()
 
 inputs = Variable(torch.cat([zeros, samples.data], dim = 1)[:, :-1].contiguous())                    
 targets = Variable(samples.data).contiguous().view((-1,))