Exemplo n.º 1
0
def name_estimation(graph, group, layer, graphreference, vectorizer, nameestimator, subgraphs):
    if subgraphs:
        map(remove_eden_annotation, subgraphs)
        try:
            data = vectorizer.transform(subgraphs)
        except:
            draw.graphlearn(subgraphs, contract= False)
        clusterids = nameestimator.predict(data)

        #for d, g in zip(data, subgraphs):
        #    g.graph['hash_title'] = hash_function(d)
        #draw.graphlearn(subgraphs,size=2, title_key='hash_title', edge_label='label')

        for sg, clid in zip(subgraphs, clusterids):
            for n in sg.nodes():
                graph.node[n][group] = '-' if clid == -1 else str(clid)

    # doing the contraction...
    graph = contraction([graph], contraction_attribute=group, modifiers=[],
                        nesting=False, dont_contract_attribute_symbol='-').next()

    # write labels
    def f(n, d):
        d['label'] = graphreference.node[max(d['contracted'])]['label'] \
            if d['label'] == '-' else "L%sC%s" % (layer, d['label'])

    node_operation(graph, f)
    return graph
 def _revert_edge_to_vertex_transform(self,graph):
     # making it to a normal graph before we revert
     graph=nx.Graph(graph)
     try:
         graph=self.vectorizer._revert_edge_to_vertex_transform(graph)
         return graph
     except:
         print 'rnasampler: revert edge to vertex transform failed'
         draw.graphlearn(graph,contract=False, size=20)
Exemplo n.º 3
0
    def fit(self, graphs,graphs_neg=[], fit_transform=False):
        '''
        TODO: be sure to set the self.cluster_ids :)

        Parameters
        ----------
        graphs

        Returns
        -------

        '''
        #  PREPARE
        graphs = list(graphs)
        graphs_neg = list(graphs_neg)
        if graphs[0].graph.get('expanded', False):
            raise Exception('give me an unexpanded graph')
        self.prepfit()

        # info
        if self.debug:
            print 'minortransform_fit'
            draw.graphlearn(graphs[:5], contract=False, size=5, vertex_label='label')


        # annotate graphs and GET SUBGRAPHS
        graphs,graphs_neg = self.annotator.fit_transform(graphs,graphs_neg)

        #draw.graphlearn([graphs[0], graphs_neg[-1]], vertex_label='importance')
        # info
        if self.debug:
            print 'minortransform_scores'
            draw.graphlearn(graphs[:5], contract=False, size=5, vertex_label='importance')
            # vertex_color='importance', colormap='inferno')


        subgraphs = list(self.abstractor.get_subgraphs(graphs+graphs_neg))
        #if self.num_classes==2:
        #    nusgs = list(self.abstractor.get_subgraphs(graphs_neg))
        #    #draw.graphlearn([nusgs[0],subgraphs[-1]],vertex_label='importance')
        #    subgraphs += nusgs

        # FILTER UNIQUES AND TRAIN THE CLUSTERER
        self.cluster_classifier.fit(subgraphs)
        self.abstractor.nameestimator = self.cluster_classifier

        # annotating is super slow. so in case of fit_transform i can save that step
        if fit_transform:
            return  self.transform(graphs) if self.num_classes ==1 else (self.transform(graphs),self.transform(graphs_neg))
Exemplo n.º 4
0
    def re_transform_single(self, graph):
        '''
        Parameters
        ----------
        graphwrapper

        Returns
        -------
        a postprocessed graphwrapper
        '''

        draw.graphlearn(graph)
        #print len(graph)
        abstract = self.abstract(graph, debug=False)
        draw.graphlearn([graph, abstract])
        return ScoreGraphWrapper(abstract, graph, self.vectorizer,
                                 self.base_thickness_list)
Exemplo n.º 5
0
    def transform(self, graphs):
        '''
        Parameters
        ----------
        inputs: [graph]

        Returns
        -------
            [(edge_expanded_graph, minor),...]
        '''

        graphs = self.annotator.transform(graphs)
        result = self.abstractor.transform(graphs)
        if self.debug:
            print 'minortransform  transform. the new layer  '
            draw.graphlearn(result[:5], contract=False, size=6, vertex_label='contracted')
        return result
Exemplo n.º 6
0
    def fit(self, input, grammar_n_jobs=-1, grammar_batch_size=10, train_min_size=None):
        """
          use input to fit the grammar and fit the estimator
        """
        self.preprocessor.set_param(self.vectorizer)

        graphmanagers = self.preprocessor.fit_transform(input,self.vectorizer)

        self.estimatorobject.fit(graphmanagers,
                                 vectorizer=self.vectorizer,
                                 nu=nu,
                                 grammar_n_jobs=grammar_n_jobs,
                                 random_state=self.random_state)

        self.lsgg.fit(graphmanagers, grammar_n_jobs, grammar_batch_size=grammar_batch_size)


        tempest= EstiWrap(nu=.5,  grammar_n_jobs=grammar_n_jobs)
        tempest.fit(graphmanagers,
                    vectorizer=self.vectorizer,

                    random_state=self.random_state)




        '''
        HOW TO TRAIN NEW CORES?
        make a sampler
        with: estimator as estimator, interface-groups as input, dat filter for cip choosing
        '''


        def entitycheck(g,nodes):
            if type(nodes) is not list:
                nodes=[nodes]
            for e in nodes:
                if 'interface' in g.node[e]:
                    return False
            return True

        prod=self.lsgg.productions

        for i, interface_hash in enumerate(prod.keys()):

            if prod[interface_hash] < train_min_size:
                continue
            print "################################# new ihash"
            # for all the interface buckets
            cips=prod[interface_hash].values()
            sampler=GraphLearnSampler(estimator=tempest,node_entity_check=entitycheck)
            graphs_wrapped=[ GraphWrap(cip.graph, self.vectorizer) for cip in cips ]
            graphs=[ cip.graph for cip in cips ]

            sampler.lsgg.fit(graphs_wrapped)
            sampler.preprocessor.fit(0,self.vectorizer)
            sampler.postprocessor.fit(sampler.preprocessor)
            r=sampler.sample(graphs, max_size_diff=0, select_cip_max_tries=100, quick_skip_orig_cip=False,
                             improving_linear_start=.2, improving_threshold=.6)

            # get graphs and sample them
            r= list(r)

            for j, raw_graph in enumerate(r):
                # for each resulting graph
                raw_graph.graph.pop('graph',None)
                score= tempest.score(raw_graph)
                if score > tempest.score(cips[j].graph):
                    # check if the score is good enough, then add to grammar
                    self.lsgg.productions[interface_hash][score]=CoreInterfacePair(
                         interface_hash=cips[j].interface_hash,
                         core_hash=score,
                         graph=raw_graph,
                         radius=cips[j].radius,
                         thickness=cips[j].thickness,
                         core_nodes_count=len(raw_graph),
                         count=1,
                         distance_dict=cips[j].distance_dict)
                    print 'new graph:',score
                    draw.graphlearn(raw_graph)
Exemplo n.º 7
0
    def abstract(self,
                 graph,
                 score_attribute='importance',
                 group='class',
                 debug=False):
        '''
        Parameters
        ----------
        score_attribute
        group

        Returns
        -------
        '''

        graph = self.vectorizer._edge_to_vertex_transform(graph)
        graph2 = self.vectorizer._revert_edge_to_vertex_transform(graph)

        if debug:
            print 'abstr here1'
            draw.graphlearn(graph2)

        graph2 = self.vectorizer.annotate(
            [graph2], estimator=self.rawgraph_estimator.estimator).next()

        for n, d in graph2.nodes(data=True):
            #d[group]=str(math.floor(d[score_attribute]))
            d[group] = str(self.kmeans.predict(d[score_attribute])[0])

        if debug:
            print 'abstr here'
            draw.graphlearn(graph2, vertex_label='class')

        graph2 = contraction([graph2],
                             contraction_attribute=group,
                             modifiers=[],
                             nesting=False).next()
        graph2 = self.vectorizer._edge_to_vertex_transform(graph2)

        # find out to which abstract node the edges belong
        # finding out where the edge-nodes belong, because the contractor cant possibly do this
        getabstr = {
            contra: node
            for node, d in graph2.nodes(data=True)
            for contra in d.get('contracted', [])
        }

        for n, d in graph.nodes(data=True):
            if 'edge' in d:
                # if we have found an edge node...
                # lets see whos left and right of it:
                n1, n2 = graph.neighbors(n)
                # case1: ok those belong to the same gang so we most likely also belong there.
                if getabstr[n1] == getabstr[n2]:
                    graph2.node[getabstr[n1]]['contracted'].add(n)

                # case2: neighbors belong to different gangs...
                else:
                    blub = set(graph2.neighbors(getabstr[n1])) & set(
                        graph2.neighbors(getabstr[n2]))
                    for blob in blub:
                        if 'contracted' in graph2.node[blob]:
                            graph2.node[blob]['contracted'].add(n)
                        else:
                            graph2.node[blob]['contracted'] = set([n])
        return graph2
Exemplo n.º 8
0
    def fit(self, input, grammar_n_jobs=-1, grammar_batch_size=10, train_min_size=None):
        """
          use input to fit the grammar and fit the estimator
        """
        self.preprocessor.set_param(self.vectorizer)

        graphmanagers = self.preprocessor.fit_transform(input,self.vectorizer)

        self.estimatorobject.fit(graphmanagers,
                                 vectorizer=self.vectorizer,
                                 nu=nu,
                                 grammar_n_jobs=grammar_n_jobs,
                                 random_state=self.random_state)

        self.lsgg.fit(graphmanagers, grammar_n_jobs, grammar_batch_size=grammar_batch_size)


        tempest= EstiWrap(nu=.5,  grammar_n_jobs=grammar_n_jobs)
        tempest.fit(graphmanagers,
                    vectorizer=self.vectorizer,

                    random_state=self.random_state)




        '''
        HOW TO TRAIN NEW CORES?
        make a sampler
        with: estimator as estimator, interface-groups as input, dat filter for cip choosing
        '''


        def entitycheck(g,nodes):
            if type(nodes) is not list:
                nodes=[nodes]
            for e in nodes:
                if 'interface' in g.node[e]:
                    return False
            return True

        prod=self.lsgg.productions

        for i, interface_hash in enumerate(prod.keys()):

            if prod[interface_hash] < train_min_size:
                continue
            print "################################# new ihash"
            # for all the interface buckets
            cips=prod[interface_hash].values()
            sampler=GraphLearnSampler(estimator=tempest,node_entity_check=entitycheck)
            graphs_wrapped=[ GraphWrap(cip.graph, self.vectorizer) for cip in cips ]
            graphs=[ cip.graph for cip in cips ]

            sampler.lsgg.fit(graphs_wrapped)
            sampler.preprocessor.fit(0,self.vectorizer)
            sampler.postprocessor.fit(sampler.preprocessor)
            r=sampler.transform(graphs, size_constrained_core_choice=0, select_cip_max_tries=100, quick_skip_orig_cip=False,
                                improving_linear_start=.2, improving_threshold=.6)

            # get graphs and sample them
            r= list(r)

            for j, raw_graph in enumerate(r):
                # for each resulting graph
                raw_graph.graph.pop('graph',None)
                score= tempest.predict(raw_graph)
                if score > tempest.predict(cips[j].graph):
                    # check if the score is good enough, then add to grammar
                    self.lsgg.productions[interface_hash][score]=CoreInterfacePair(
                         interface_hash=cips[j].interface_hash,
                         core_hash=score,
                         graph=raw_graph,
                         radius=cips[j].radius,
                         thickness=cips[j].thickness,
                         core_nodes_count=len(raw_graph),
                         count=1,
                         distance_dict=cips[j].distance_dict)
                    print 'new graph:',score
                    draw.graphlearn(raw_graph)