def is_correctly_generated(graph: DiGraph, min_args, max_args) -> bool: """ Check if a graph is correctly generated """ # no empty graphs if graph.number_of_nodes() == 0 or graph.number_of_edges() == 0: return False # no incorrectly generated graphs if not min_args <= len(graph.nodes) <= max_args: return False # no graphs with isolated subgraphs if nx.number_connected_components(graph.to_undirected()) > 1: return False return True
def get_dependency(cls, workflow: Workflow, processor_id: str, graph: DiGraph, spark: SparkSession) -> Union[Dependency, None]: processor_config = workflow.get_processor(processor_id) predecessors = [] if graph.number_of_edges() > 0: predecessors = list(graph.predecessors(processor_id)) dependencies = [] if not bool(predecessors): predecessors = [] for predecessor in predecessors: dependencies.append( cls.get_dependency(workflow, predecessor, graph, spark)) processor_context = ProcessorContext( spark_session=spark, property_groups=processor_config.property_groups, dependencies=dependencies) processor = SparkProcessor.get_spark_processor(processor_config.type) return processor.run(processor_context)
def tree0(weight_value, startwindow, term): print 'start window:', startwindow # windowGraph = {} cliqueGraph = DiGraph() dic_term = {} dic_last_time = {} dic_temp = {} dic_term_num = {} dic_intersect_level = {} # term = 183 root = 0 cliqueGraph.add_node(root, annotation='root', windowsize='root', weight_value='root') w = data.shape[1] i = 0 q = 0 for window in range(startwindow, w): dic_intersect_level.clear() #print window ## mine if window == startwindow: for clique in find_cliques(windowGraph[window]): if len(clique) >size_clique: cliqueGraph.add_node(term, annotation=list(clique), windowsize=[window], weight=weight_value) # generate a term cliqueGraph.add_edge(root, term) dic_term[frozenset(clique)] = [window] # dic_term 记录 window和clique or Dic_term records window and clique dic_term_num[frozenset(clique)] = term # dic_term_num 记录 term 序号和clique or Dic_term_num record term number and clique dic_last_time[frozenset(clique)] = [window] # dic_last_time 记录上一时刻生成的交集 用于下一时刻的比较 or Dic_last_time records the intersection generated at the last moment for comparison at the next moment term = term + 1 print 'for start window ' else: continue # print len(dic_last_time), len(dic_term), cliqueGraph.number_of_nodes() else: for clique in find_cliques(windowGraph[window]): if len(clique) > size_clique: #print window, 'clique:', clique ## mine for key, value in dic_last_time.items(): # key 是clique ,value是 [window] or Key is clique, value is [window] intersect = sorted(set(key).intersection(set(clique))) q = 0 # if len(intersect) >= size_clique: if len(intersect) >= size_clique: #print 'intersect', intersect # 同一层判断交集之间是否有重复的父子关系。 每生成一个交集, 判断当前层的其他term和交集的关系。or The same layer determines whether there are #duplicate parent-child relationships between intersections. Each generation of an intersection determines the relationship #between other terms and intersections of the current layer. for ik, iv in dic_intersect_level.items(): if set(intersect) == (set(ik)): # 生成一模一样的交集 or Generate exactly the same intersection # 判断两个的编号是否一样?or Is the two numbers the same? if dic_term_num[frozenset(key)] != dic_term_num[frozenset(ik)]: cliqueGraph.add_edge(dic_term_num[frozenset(key)], dic_term_num[frozenset(ik)]) q = 1 break elif set(intersect).issuperset(set(ik)): # 生成了超集 or Superset generated cliqueGraph.remove_node(dic_term_num[frozenset(ik)]) dic_term.pop(frozenset(ik)) # 从四个字典中都删除该节点的信息 or Delete the node's information from all four dictionaries dic_term_num.pop(frozenset(ik)) dic_intersect_level.pop(frozenset(ik)) dic_temp.pop(frozenset(ik)) elif set(intersect).issubset(set(ik)): # 生成了子集 or Generated subset q = 1 break if q == 1: continue dic_intersect_level[frozenset(intersect)] = 1 if dic_term.has_key(frozenset(intersect)): # 交集已经出现过 or Intersection has appeared parent = cliqueGraph.predecessors(dic_term_num[frozenset(intersect)]) children = cliqueGraph.successors(dic_term_num[frozenset(intersect)]) #print 'parent',len(parent) if len(parent) > 0: # 是交集生成的term,则重定向 or Is the intersection of generated term, then redirect cliqueGraph.add_node(term, annotation=list(intersect), windowsize=value + [window], weight=weight_value) for p in parent: cliqueGraph.add_edge(p, term) # 连边 // Edge for c in children: cliqueGraph.add_edge(term, c) # 连边 // edge cliqueGraph.remove_node(dic_term_num[frozenset(intersect)]) # 从图中删除冗余结点 or Remove redundant nodes from the figure # print 'deleted intersect nodes:',dic_term_num[frozenset(intersect)] i = i + 1 dic_term.pop(frozenset(intersect)) # 字典中删除 // Delete in dictionary dic_term_num.pop(frozenset(intersect)) dic_term[frozenset(intersect)] = value + [window] # 新节点插入字典 // New node insert dictionary dic_term_num[frozenset(intersect)] = term dic_temp[frozenset(intersect)] = value + [window] # 记录到dic_temp里 // Record to dic_temp term = term + 1 continue else: # 是window生成的term // Is the term generated by the window continue else: # 交集没有出现过, 则生成新的term // No intersection occurs, then a new term is generated # print 'new term intersect never appear:', term cliqueGraph.add_node(term, annotation=list(intersect), windowsize=value + [window], weight=weight_value) # generate a term cliqueGraph.add_edge(dic_term_num[frozenset(key)], term) # 连边,变化:只连接交集作为父亲。// Edge, change: Only connect intersections as fathers. dic_term[frozenset(intersect)] = value + [window] # 新节点插入字典 // New node insert dictionary dic_term_num[frozenset(intersect)] = term dic_temp[frozenset(intersect)] = value + [window] # 记录到dic_temp里 // Record to dic_temp term = term + 1 else: continue else: continue dic_last_time.clear() for key, value in dic_temp.items(): dic_last_time[key] = value dic_temp.clear() print 'window', startwindow, 'size is', cliqueGraph.number_of_nodes(), cliqueGraph.number_of_edges()## mine # print 'deleted nodes:', i # fw = open('0904edges_remove.txt', 'w') # fw2 = open('0904terms_remove.txt', 'w') # fw.write('parent' + '\t' + 'child' + '\n') # for edge in cliqueGraph.edges(): # fw.write(str(edge[0]) + '\t' + str(edge[1]) + '\n') # fw.close() # fw2.write('term_id' + '\t' + 'anno_genes' + '\t' + 'window' + '\t' + 'gene_size' + '\t' + 'window_size' + '\n') # for key, value in dic_term.items(): # fw2.write(str(dic_term_num[key]) + '\t' + str(key) + '\t' + str(value) + '\t' + str(len(key)) + '\t' + str(len(value)) + '\n') # fw2.close() # for nodes in cliqueGraph.nodes(): # if cliqueGraph.degree(nodes) == 0: # print nodes return cliqueGraph, dic_term, dic_term_num, term