def name_estimation(graph, group, layer, graphreference, vectorizer, nameestimator, subgraphs): if subgraphs: map(remove_eden_annotation, subgraphs) try: data = vectorizer.transform(subgraphs) except: draw.graphlearn(subgraphs, contract= False) clusterids = nameestimator.predict(data) #for d, g in zip(data, subgraphs): # g.graph['hash_title'] = hash_function(d) #draw.graphlearn(subgraphs,size=2, title_key='hash_title', edge_label='label') for sg, clid in zip(subgraphs, clusterids): for n in sg.nodes(): graph.node[n][group] = '-' if clid == -1 else str(clid) # doing the contraction... graph = contraction([graph], contraction_attribute=group, modifiers=[], nesting=False, dont_contract_attribute_symbol='-').next() # write labels def f(n, d): d['label'] = graphreference.node[max(d['contracted'])]['label'] \ if d['label'] == '-' else "L%sC%s" % (layer, d['label']) node_operation(graph, f) return graph
def _revert_edge_to_vertex_transform(self,graph): # making it to a normal graph before we revert graph=nx.Graph(graph) try: graph=self.vectorizer._revert_edge_to_vertex_transform(graph) return graph except: print 'rnasampler: revert edge to vertex transform failed' draw.graphlearn(graph,contract=False, size=20)
def fit(self, graphs,graphs_neg=[], fit_transform=False): ''' TODO: be sure to set the self.cluster_ids :) Parameters ---------- graphs Returns ------- ''' # PREPARE graphs = list(graphs) graphs_neg = list(graphs_neg) if graphs[0].graph.get('expanded', False): raise Exception('give me an unexpanded graph') self.prepfit() # info if self.debug: print 'minortransform_fit' draw.graphlearn(graphs[:5], contract=False, size=5, vertex_label='label') # annotate graphs and GET SUBGRAPHS graphs,graphs_neg = self.annotator.fit_transform(graphs,graphs_neg) #draw.graphlearn([graphs[0], graphs_neg[-1]], vertex_label='importance') # info if self.debug: print 'minortransform_scores' draw.graphlearn(graphs[:5], contract=False, size=5, vertex_label='importance') # vertex_color='importance', colormap='inferno') subgraphs = list(self.abstractor.get_subgraphs(graphs+graphs_neg)) #if self.num_classes==2: # nusgs = list(self.abstractor.get_subgraphs(graphs_neg)) # #draw.graphlearn([nusgs[0],subgraphs[-1]],vertex_label='importance') # subgraphs += nusgs # FILTER UNIQUES AND TRAIN THE CLUSTERER self.cluster_classifier.fit(subgraphs) self.abstractor.nameestimator = self.cluster_classifier # annotating is super slow. so in case of fit_transform i can save that step if fit_transform: return self.transform(graphs) if self.num_classes ==1 else (self.transform(graphs),self.transform(graphs_neg))
def re_transform_single(self, graph): ''' Parameters ---------- graphwrapper Returns ------- a postprocessed graphwrapper ''' draw.graphlearn(graph) #print len(graph) abstract = self.abstract(graph, debug=False) draw.graphlearn([graph, abstract]) return ScoreGraphWrapper(abstract, graph, self.vectorizer, self.base_thickness_list)
def transform(self, graphs): ''' Parameters ---------- inputs: [graph] Returns ------- [(edge_expanded_graph, minor),...] ''' graphs = self.annotator.transform(graphs) result = self.abstractor.transform(graphs) if self.debug: print 'minortransform transform. the new layer ' draw.graphlearn(result[:5], contract=False, size=6, vertex_label='contracted') return result
def fit(self, input, grammar_n_jobs=-1, grammar_batch_size=10, train_min_size=None): """ use input to fit the grammar and fit the estimator """ self.preprocessor.set_param(self.vectorizer) graphmanagers = self.preprocessor.fit_transform(input,self.vectorizer) self.estimatorobject.fit(graphmanagers, vectorizer=self.vectorizer, nu=nu, grammar_n_jobs=grammar_n_jobs, random_state=self.random_state) self.lsgg.fit(graphmanagers, grammar_n_jobs, grammar_batch_size=grammar_batch_size) tempest= EstiWrap(nu=.5, grammar_n_jobs=grammar_n_jobs) tempest.fit(graphmanagers, vectorizer=self.vectorizer, random_state=self.random_state) ''' HOW TO TRAIN NEW CORES? make a sampler with: estimator as estimator, interface-groups as input, dat filter for cip choosing ''' def entitycheck(g,nodes): if type(nodes) is not list: nodes=[nodes] for e in nodes: if 'interface' in g.node[e]: return False return True prod=self.lsgg.productions for i, interface_hash in enumerate(prod.keys()): if prod[interface_hash] < train_min_size: continue print "################################# new ihash" # for all the interface buckets cips=prod[interface_hash].values() sampler=GraphLearnSampler(estimator=tempest,node_entity_check=entitycheck) graphs_wrapped=[ GraphWrap(cip.graph, self.vectorizer) for cip in cips ] graphs=[ cip.graph for cip in cips ] sampler.lsgg.fit(graphs_wrapped) sampler.preprocessor.fit(0,self.vectorizer) sampler.postprocessor.fit(sampler.preprocessor) r=sampler.sample(graphs, max_size_diff=0, select_cip_max_tries=100, quick_skip_orig_cip=False, improving_linear_start=.2, improving_threshold=.6) # get graphs and sample them r= list(r) for j, raw_graph in enumerate(r): # for each resulting graph raw_graph.graph.pop('graph',None) score= tempest.score(raw_graph) if score > tempest.score(cips[j].graph): # check if the score is good enough, then add to grammar self.lsgg.productions[interface_hash][score]=CoreInterfacePair( interface_hash=cips[j].interface_hash, core_hash=score, graph=raw_graph, radius=cips[j].radius, thickness=cips[j].thickness, core_nodes_count=len(raw_graph), count=1, distance_dict=cips[j].distance_dict) print 'new graph:',score draw.graphlearn(raw_graph)
def abstract(self, graph, score_attribute='importance', group='class', debug=False): ''' Parameters ---------- score_attribute group Returns ------- ''' graph = self.vectorizer._edge_to_vertex_transform(graph) graph2 = self.vectorizer._revert_edge_to_vertex_transform(graph) if debug: print 'abstr here1' draw.graphlearn(graph2) graph2 = self.vectorizer.annotate( [graph2], estimator=self.rawgraph_estimator.estimator).next() for n, d in graph2.nodes(data=True): #d[group]=str(math.floor(d[score_attribute])) d[group] = str(self.kmeans.predict(d[score_attribute])[0]) if debug: print 'abstr here' draw.graphlearn(graph2, vertex_label='class') graph2 = contraction([graph2], contraction_attribute=group, modifiers=[], nesting=False).next() graph2 = self.vectorizer._edge_to_vertex_transform(graph2) # find out to which abstract node the edges belong # finding out where the edge-nodes belong, because the contractor cant possibly do this getabstr = { contra: node for node, d in graph2.nodes(data=True) for contra in d.get('contracted', []) } for n, d in graph.nodes(data=True): if 'edge' in d: # if we have found an edge node... # lets see whos left and right of it: n1, n2 = graph.neighbors(n) # case1: ok those belong to the same gang so we most likely also belong there. if getabstr[n1] == getabstr[n2]: graph2.node[getabstr[n1]]['contracted'].add(n) # case2: neighbors belong to different gangs... else: blub = set(graph2.neighbors(getabstr[n1])) & set( graph2.neighbors(getabstr[n2])) for blob in blub: if 'contracted' in graph2.node[blob]: graph2.node[blob]['contracted'].add(n) else: graph2.node[blob]['contracted'] = set([n]) return graph2
def fit(self, input, grammar_n_jobs=-1, grammar_batch_size=10, train_min_size=None): """ use input to fit the grammar and fit the estimator """ self.preprocessor.set_param(self.vectorizer) graphmanagers = self.preprocessor.fit_transform(input,self.vectorizer) self.estimatorobject.fit(graphmanagers, vectorizer=self.vectorizer, nu=nu, grammar_n_jobs=grammar_n_jobs, random_state=self.random_state) self.lsgg.fit(graphmanagers, grammar_n_jobs, grammar_batch_size=grammar_batch_size) tempest= EstiWrap(nu=.5, grammar_n_jobs=grammar_n_jobs) tempest.fit(graphmanagers, vectorizer=self.vectorizer, random_state=self.random_state) ''' HOW TO TRAIN NEW CORES? make a sampler with: estimator as estimator, interface-groups as input, dat filter for cip choosing ''' def entitycheck(g,nodes): if type(nodes) is not list: nodes=[nodes] for e in nodes: if 'interface' in g.node[e]: return False return True prod=self.lsgg.productions for i, interface_hash in enumerate(prod.keys()): if prod[interface_hash] < train_min_size: continue print "################################# new ihash" # for all the interface buckets cips=prod[interface_hash].values() sampler=GraphLearnSampler(estimator=tempest,node_entity_check=entitycheck) graphs_wrapped=[ GraphWrap(cip.graph, self.vectorizer) for cip in cips ] graphs=[ cip.graph for cip in cips ] sampler.lsgg.fit(graphs_wrapped) sampler.preprocessor.fit(0,self.vectorizer) sampler.postprocessor.fit(sampler.preprocessor) r=sampler.transform(graphs, size_constrained_core_choice=0, select_cip_max_tries=100, quick_skip_orig_cip=False, improving_linear_start=.2, improving_threshold=.6) # get graphs and sample them r= list(r) for j, raw_graph in enumerate(r): # for each resulting graph raw_graph.graph.pop('graph',None) score= tempest.predict(raw_graph) if score > tempest.predict(cips[j].graph): # check if the score is good enough, then add to grammar self.lsgg.productions[interface_hash][score]=CoreInterfacePair( interface_hash=cips[j].interface_hash, core_hash=score, graph=raw_graph, radius=cips[j].radius, thickness=cips[j].thickness, core_nodes_count=len(raw_graph), count=1, distance_dict=cips[j].distance_dict) print 'new graph:',score draw.graphlearn(raw_graph)