def name_estimation(graph, group, layer, graphreference, vectorizer, nameestimator, subgraphs): if subgraphs: map(remove_eden_annotation, subgraphs) try: data = vectorizer.transform(subgraphs) except: draw.graphlearn(subgraphs, contract= False) clusterids = nameestimator.predict(data) #for d, g in zip(data, subgraphs): # g.graph['hash_title'] = hash_function(d) #draw.graphlearn(subgraphs,size=2, title_key='hash_title', edge_label='label') for sg, clid in zip(subgraphs, clusterids): for n in sg.nodes(): graph.node[n][group] = '-' if clid == -1 else str(clid) # doing the contraction... graph = contraction([graph], contraction_attribute=group, modifiers=[], nesting=False, dont_contract_attribute_symbol='-').next() # write labels def f(n, d): d['label'] = graphreference.node[max(d['contracted'])]['label'] \ if d['label'] == '-' else "L%sC%s" % (layer, d['label']) node_operation(graph, f) return graph
def _transform_single(self, graph): ''' Parameters ---------- score_attribute: string name of the attribute used group: string annnotate in this field Returns ------- ''' graphcopy = graph.copy() maxnodeid = max(graph.nodes()) # def f(n,d): d[score_attribute] = graph.degree(n) # node_operation(graph,f) tcc = ThresholdedConnectedComponents(attribute=self.score_attribute, more_than=False, shrink_graphs=True) components = tcc._extract_ccomponents(graph, threshold=self.score_threshold, min_size=self.min_size, max_size=self.max_size) nodeset = {n for g in components for n in g.nodes()} def f(n, d): d[self.group_attribute] = '~' if n in nodeset else '-' node_operation(graph, f) # now we either contract what we have, or additionally rename the contracted nodes according to the group estimator graph = name_estimation(graph, self.group_attribute, self.layer, graphcopy, self.vectorizer, self.nameestimator, components) #else: # graph = contraction([graph], # contraction_attribute=group, # modifiers=[], # nesting=False, dont_contract_attribute_symbol='-').next() graph = nx.relabel_nodes(graph, dict( zip(graph.nodes(), range(maxnodeid + 1, 1 + maxnodeid + graph.number_of_nodes()))), copy=False) graph.graph['original']= graphcopy graph.graph['layer']=self.layer return graph
def fit(self, graphs_pos, graphs_neg=[]): if self.trained: return self self.trained=True map(utils.remove_eden_annotation,graphs_pos+graphs_neg) map(lambda x: utils.node_operation(x, lambda n,d: d.pop('importance',None)), graphs_pos+graphs_neg) map( lambda graph: graph.graph.pop('mass_annotate_mp_was_here',None) ,graphs_pos+graphs_neg) if graphs_neg: #print 'choosing to train binary esti' self.estimator = SGDClassifier() classes= [1]*len(graphs_pos)+[-1]*len(graphs_neg) self.estimator.fit(self.vectorizer.transform(graphs_pos+graphs_neg),classes) else: self.estimator = ExperimentalOneClassEstimator() self.estimator.fit(self.vectorizer.transform(graphs_pos)) return self