def nodes_links(node_ids,tree):
    def nodes_links_lambda(x, d, o):
        for child in children(x):
            o[node_ids[x.uuid][1]].append(node_ids[child.uuid][1])
        return o

    links = defaultdict(list)
    bfs(tree, callback=nodes_links_lambda, mode="all", out=links)
    # for x,id in node_ids:
    #     links[id].append()
    return links
def nodes_ids(tree):
    def nodes_ids_lambda(x, d, o):
        global c
        o[x.uuid] = (x, c)
        c += 1
        return o
    global c
    c = 0
    ids = {}
    bfs(tree, callback=nodes_ids_lambda, mode="all", out=ids)
    # for x,id in node_ids:
    #     links[id].append()
    return ids
示例#3
0
def avg_branch(ast_tree):
    def avg_branch_lambda(x, d, o):
        count = len(list(children(x)))
        if count > 0:
            o.append(count)

    out = bfs(ast_tree, callback=avg_branch_lambda, mode="all", out=[])
    return int(np.mean(out))
示例#4
0
    def tf_node_leaves(self, ast_tree):
        out = defaultdict(int)

        def tf_nodes(x, d, o):
            o[self.astnodes.index(x)] += 1

        out = bfs(ast_tree, callback=tf_nodes, mode="leaves", out=out)
        return out
示例#5
0
def max_depth(ast_tree):
    def max_depth_lambda(x, d, o):
        if len(o) == 0:
            o.append(d)
        elif d > o[0]:
            o[0] = d

    out = bfs(ast_tree, callback=max_depth_lambda, mode="all", out=[])
    return out[0]
示例#6
0
    def ngrams_node_fast(self, ast_tree, ngram=2):
        out = []

        def ngrams_nodes(x, d, o):
            # grams_idx = tuple(self.astnodes.index(gram) for gram in grams)
            # grams_idx = tuple(type(gram).__name__ for gram in grams)
            o.append(self.astnodes.index(x))

        return bfs(ast_tree, callback=ngrams_nodes, mode="leaves", out=out)
示例#7
0
def max_branch(ast_tree):
    def max_branch_lambda(x, d, o):
        count = len(list(children(x)))
        if len(o) == 0:
            o.append(count)
        elif count > o[0]:
            o[0] = count

    out = bfs(ast_tree, callback=max_branch_lambda, mode="all", out=[])
    return out[0]
示例#8
0
    def tf_ngrams_node(self, ast_tree, ngram=2):
        # out = []
        out = defaultdict(int)

        def ngrams_nodes(x, d, o, ngram=ngram):
            successors = list(children(x))
            if len(successors) > 0:
                father = x
                for grams in zip(*[successors[i:] for i in range(0, ngram - 1)]):
                    grams = (self.astnodes.index(father),) + tuple(self.astnodes.index(gram) for gram in grams)
                    o[self.astnodes.index(grams)] += 1

        return bfs(ast_tree, callback=ngrams_nodes, mode="all", out=out)
示例#9
0
    def avg_node_leaves(self, ast_tree):
        out = defaultdict(list)

        def avg_nodes(x, d, o):
            info = o[self.astnodes.index(x)]
            if len(info) == 0:
                info.extend([0, 0])
            info[0] += d
            info[1] += 1

        out = bfs(ast_tree, callback=avg_nodes, mode="leaves", out=out)
        out_avg = {k: v[0] / v[1] for k, v in out.items()}
        return out_avg
示例#10
0
    def tf_ngrams_node_fast(self, ast_tree, ngram=2):
        out = defaultdict(int)

        def ngrams_nodes(x, d, o, ngram=ngram, predecessor=tuple()):

            if len(predecessor) < ngram:
                predecessor = predecessor + (x,)  # (type(x).__name__,)
                for child in children(x):
                    grams = ngrams_nodes(child, d, o, ngram=ngram, predecessor=predecessor)
                    if len(grams) == ngram:
                        grams_idx = tuple(self.astnodes.index(gram) for gram in grams)
                        o[self.astnodes.index(grams_idx)] += 1
            return predecessor

        return bfs(ast_tree, callback=ngrams_nodes, mode="all", out=out)
def convert_src_files(basefolder):
    X_names, y, problems = get_ast_src_files(basefolder)
    X ,y,tags = np.array([ast_parse_file(name) for name in tqdm(X_names)]), np.array(y), problems
    for name,tree in zip(X_names,X):
        name = os.path.basename(name)
        tree = unify_children(tree)
        # print(1)
        with open(os.path.join("..","dataset","python_trees",os.path.splitext(name)[0]+".tree"),"+w") as file:
            cc = []
            coun = len(bfs(tree,callback=count_nodes,out=cc))
            node_ids = nodes_ids(tree)
            node_links = nodes_links(node_ids, tree)
            for k,(v,l) in node_ids.items():
                file.write(">{0}\t{1}\n".format(l,type(v).__name__))
            for k,v in node_links.items():
                file.write("<{0}={1}\n".format(str(k),",".join([str(i) for i in v])))
示例#12
0
    def tf_skip_grams_node_fast(self, ast_tree, ngram=2,v_skip=0):
        out = defaultdict(int)

        def ngrams_nodes(x, d, o, ngram=ngram,v_skip=v_skip,predecessor=tuple()):

            if len(predecessor) < ngram+v_skip:
                predecessor = predecessor + (x,)  # (type(x).__name__,)
                for child in children(x):
                    grams = ngrams_nodes(child, d, o, ngram=ngram,v_skip=v_skip,predecessor=predecessor)
                    grams = grams[::v_skip+1]
                    if len(grams) == ngram:
                        grams_idx = tuple(self.astnodes.index(gram) for gram in grams)
                        # grams_idx = tuple(type(gram).__name__ for gram in grams)
                        o[self.astnodes.index(grams_idx)] += 1
                        # o[grams_idx] += 1
            return predecessor

        return bfs(ast_tree, callback=ngrams_nodes, mode="all", out=out)
示例#13
0
def avg_depth(ast_tree):
    def avg_depth_lambda(x, d, o):
        o.append(d)

    out = bfs(ast_tree, callback=avg_depth_lambda, mode="all", out=[])
    return int(np.mean(out))