def is_correctly_generated(graph: DiGraph, min_args, max_args) -> bool:
    """
    Check if a graph is correctly generated
    """
    # no empty graphs
    if graph.number_of_nodes() == 0 or graph.number_of_edges() == 0:
        return False

    # no incorrectly generated graphs
    if not min_args <= len(graph.nodes) <= max_args:
        return False

    # no graphs with isolated subgraphs
    if nx.number_connected_components(graph.to_undirected()) > 1:
        return False

    return True
Exemplo n.º 2
0
    def get_dependency(cls,
                       workflow: Workflow,
                       processor_id: str,
                       graph: DiGraph,
                       spark: SparkSession) -> Union[Dependency, None]:
        processor_config = workflow.get_processor(processor_id)
        predecessors = []
        if graph.number_of_edges() > 0:
            predecessors = list(graph.predecessors(processor_id))
        dependencies = []
        if not bool(predecessors):
            predecessors = []
        for predecessor in predecessors:
            dependencies.append(
                cls.get_dependency(workflow, predecessor, graph, spark))

        processor_context = ProcessorContext(
            spark_session=spark,
            property_groups=processor_config.property_groups,
            dependencies=dependencies)
        processor = SparkProcessor.get_spark_processor(processor_config.type)

        return processor.run(processor_context)
Exemplo n.º 3
0
def tree0(weight_value, startwindow, term):

    print 'start window:', startwindow
    # windowGraph = {}
    cliqueGraph = DiGraph()
    dic_term = {}
    dic_last_time = {}
    dic_temp = {}
    dic_term_num = {}
    dic_intersect_level = {}
    # term = 183
    
    root = 0
    cliqueGraph.add_node(root, annotation='root', windowsize='root', weight_value='root')
    w = data.shape[1]
    i = 0
    q = 0
    
    for window in range(startwindow, w):
        dic_intersect_level.clear()
        #print window ## mine
        if window == startwindow:
            

            for clique in find_cliques(windowGraph[window]):
                if len(clique) >size_clique:
                    cliqueGraph.add_node(term, annotation=list(clique), windowsize=[window],
                                         weight=weight_value)  # generate a term
                    cliqueGraph.add_edge(root, term)
                    dic_term[frozenset(clique)] = [window]  # dic_term 记录 window和clique or Dic_term records window and clique
                    dic_term_num[frozenset(clique)] = term  # dic_term_num 记录 term 序号和clique or Dic_term_num record term number and clique
                    dic_last_time[frozenset(clique)] = [window]  # dic_last_time   记录上一时刻生成的交集 用于下一时刻的比较 or Dic_last_time records the intersection generated at the last moment for comparison at the next moment
                    term = term + 1
                    print 'for start window '
                else:
                    continue
                    # print len(dic_last_time), len(dic_term), cliqueGraph.number_of_nodes()

        else:

            for clique in find_cliques(windowGraph[window]):
                if len(clique) > size_clique:
                    #print window, 'clique:', clique ## mine

                    for key, value in dic_last_time.items():  # key 是clique ,value是 [window] or Key is clique, value is [window]
                        intersect = sorted(set(key).intersection(set(clique)))
                        q = 0
                        # if len(intersect) >=  size_clique:
                        if len(intersect) >= size_clique:
                            #print 'intersect', intersect
                            # 同一层判断交集之间是否有重复的父子关系。 每生成一个交集, 判断当前层的其他term和交集的关系。or The same layer determines whether there are 
                            #duplicate parent-child relationships between intersections. Each generation of an intersection determines the relationship 
                            #between other terms and intersections of the current layer.
                            for ik, iv in dic_intersect_level.items():
                                if set(intersect) == (set(ik)):  # 生成一模一样的交集 or Generate exactly the same intersection
                                    # 判断两个的编号是否一样?or Is the two numbers the same?
                                    if dic_term_num[frozenset(key)] != dic_term_num[frozenset(ik)]:
                                        cliqueGraph.add_edge(dic_term_num[frozenset(key)], dic_term_num[frozenset(ik)])
                                    q = 1
                                    break
                                elif set(intersect).issuperset(set(ik)):  # 生成了超集 or Superset generated
                                    cliqueGraph.remove_node(dic_term_num[frozenset(ik)])
                                    dic_term.pop(frozenset(ik))  # 从四个字典中都删除该节点的信息 or Delete the node's information from all four dictionaries
                                    dic_term_num.pop(frozenset(ik))
                                    dic_intersect_level.pop(frozenset(ik))
                                    dic_temp.pop(frozenset(ik))
                                elif set(intersect).issubset(set(ik)):  # 生成了子集 or Generated subset
                                    q = 1
                                    break
                            if q == 1:
                                continue
                            dic_intersect_level[frozenset(intersect)] = 1

                            if dic_term.has_key(frozenset(intersect)):
                                # 交集已经出现过 or Intersection has appeared
                                parent = cliqueGraph.predecessors(dic_term_num[frozenset(intersect)])
                                children = cliqueGraph.successors(dic_term_num[frozenset(intersect)])
                                #print 'parent',len(parent)
                                if len(parent) > 0:
                                    # 是交集生成的term,则重定向 or  Is the intersection of generated term, then redirect
                                    cliqueGraph.add_node(term, annotation=list(intersect),
                                                         windowsize=value + [window],
                                                         weight=weight_value)
                                    for p in parent:
                                        cliqueGraph.add_edge(p, term)  # 连边 // Edge

                                    for c in children:
                                        cliqueGraph.add_edge(term, c)  # 连边 // edge
                                    cliqueGraph.remove_node(dic_term_num[frozenset(intersect)])  # 从图中删除冗余结点 or Remove redundant nodes from the figure

                                    # print 'deleted intersect nodes:',dic_term_num[frozenset(intersect)]
                                    i = i + 1
                                    dic_term.pop(frozenset(intersect))  # 字典中删除 // Delete in dictionary
                                    dic_term_num.pop(frozenset(intersect))

                                    dic_term[frozenset(intersect)] = value + [window]  # 新节点插入字典 // New node insert dictionary
                                    dic_term_num[frozenset(intersect)] = term
                                    dic_temp[frozenset(intersect)] = value + [window]  # 记录到dic_temp里 // Record to dic_temp
                                    term = term + 1
                                    continue
                                else:
                                    # 是window生成的term // Is the term generated by the window
                                    continue
                            else:
                                # 交集没有出现过, 则生成新的term // No intersection occurs, then a new term is generated
                                # print 'new term intersect never appear:', term
                                cliqueGraph.add_node(term, annotation=list(intersect), windowsize=value + [window],
                                                     weight=weight_value)  # generate a term

                                cliqueGraph.add_edge(dic_term_num[frozenset(key)], term)  # 连边,变化:只连接交集作为父亲。// Edge, change: Only connect intersections as fathers.
                                dic_term[frozenset(intersect)] = value + [window]  # 新节点插入字典 // New node insert dictionary
                                dic_term_num[frozenset(intersect)] = term
                                dic_temp[frozenset(intersect)] = value + [window]  # 记录到dic_temp里 // Record to dic_temp
                                term = term + 1
                        else:
                            continue
                else:
                    continue
            dic_last_time.clear()
            for key, value in dic_temp.items():
                dic_last_time[key] = value
            dic_temp.clear()
    print 'window', startwindow, 'size is', cliqueGraph.number_of_nodes(), cliqueGraph.number_of_edges()## mine
    # print 'deleted nodes:', i
    # fw = open('0904edges_remove.txt', 'w')
    # fw2 = open('0904terms_remove.txt', 'w')
    # fw.write('parent' + '\t' + 'child' + '\n')
    # for edge in cliqueGraph.edges():
    #     fw.write(str(edge[0]) + '\t' + str(edge[1]) + '\n')
    # fw.close()
    # fw2.write('term_id' + '\t' + 'anno_genes' + '\t' + 'window' + '\t' + 'gene_size' + '\t' + 'window_size' + '\n')
    # for key, value in dic_term.items():
    #     fw2.write(str(dic_term_num[key]) + '\t' + str(key) + '\t' + str(value) + '\t' + str(len(key)) + '\t' + str(len(value)) + '\n')
    # fw2.close()
    # for nodes in cliqueGraph.nodes():
    #     if cliqueGraph.degree(nodes) == 0:
    #         print nodes
 
    return cliqueGraph, dic_term, dic_term_num, term