def sample(graphs): sampler =GraphLearnSampler() graphs, graphs_ = itertools.tee(graphs) sampler.fit(graphs) return unpack(sampler.sample(graphs_, same_radius=False, max_size_diff=False, sampling_interval=9999, select_cip_max_tries=100, batch_size=30, n_steps=100, n_jobs=-1, improving_threshold=0.9 ))
def sample(graphs): sampler = GraphLearnSampler() graphs, graphs_ = itertools.tee(graphs) sampler.fit(graphs) return unpack( sampler.transform( graphs_, same_radius=False, size_constrained_core_choice=False, sampling_interval=9999, select_cip_max_tries=100, batch_size=30, n_steps=100, n_jobs=-1, improving_threshold=0.9, ) )
def _sample(self, graph): sampled_graph = GraphLearnSampler._sample(self, graph) if self.num_scores> self.MIN_SCORES_2_PREDICT: if self.num_scores - self.num_scores_fitted > self.MIN_BATCH_SIZE: self.create_features() self.cip_fit() print("FITTED ", self.num_scores_fitted) return sampled_graph
def fit(self, input, grammar_n_jobs=-1, grammar_batch_size=10, train_min_size=None): """ use input to fit the grammar and fit the estimator """ self.preprocessor.set_param(self.vectorizer) graphmanagers = self.preprocessor.fit_transform(input,self.vectorizer) self.estimatorobject.fit(graphmanagers, vectorizer=self.vectorizer, nu=nu, grammar_n_jobs=grammar_n_jobs, random_state=self.random_state) self.lsgg.fit(graphmanagers, grammar_n_jobs, grammar_batch_size=grammar_batch_size) tempest= EstiWrap(nu=.5, grammar_n_jobs=grammar_n_jobs) tempest.fit(graphmanagers, vectorizer=self.vectorizer, random_state=self.random_state) ''' HOW TO TRAIN NEW CORES? make a sampler with: estimator as estimator, interface-groups as input, dat filter for cip choosing ''' def entitycheck(g,nodes): if type(nodes) is not list: nodes=[nodes] for e in nodes: if 'interface' in g.node[e]: return False return True prod=self.lsgg.productions for i, interface_hash in enumerate(prod.keys()): if prod[interface_hash] < train_min_size: continue print "################################# new ihash" # for all the interface buckets cips=prod[interface_hash].values() sampler=GraphLearnSampler(estimator=tempest,node_entity_check=entitycheck) graphs_wrapped=[ GraphWrap(cip.graph, self.vectorizer) for cip in cips ] graphs=[ cip.graph for cip in cips ] sampler.lsgg.fit(graphs_wrapped) sampler.preprocessor.fit(0,self.vectorizer) sampler.postprocessor.fit(sampler.preprocessor) r=sampler.sample(graphs, max_size_diff=0, select_cip_max_tries=100, quick_skip_orig_cip=False, improving_linear_start=.2, improving_threshold=.6) # get graphs and sample them r= list(r) for j, raw_graph in enumerate(r): # for each resulting graph raw_graph.graph.pop('graph',None) score= tempest.score(raw_graph) if score > tempest.score(cips[j].graph): # check if the score is good enough, then add to grammar self.lsgg.productions[interface_hash][score]=CoreInterfacePair( interface_hash=cips[j].interface_hash, core_hash=score, graph=raw_graph, radius=cips[j].radius, thickness=cips[j].thickness, core_nodes_count=len(raw_graph), count=1, distance_dict=cips[j].distance_dict) print 'new graph:',score draw.graphlearn(raw_graph)
# ok we create an iterator over the graphs we want to work with... graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample) # save results here: originals=[] improved=[] # we want to use an increasing part of the test set.. percentages=[.1, .2, .4, .6, .8 ,1 ] sampler = GraphLearnSampler() for perc in percentages: # we work with count many graphs count = int(lenpo*perc) # make copy of graphiterator # select count random elements # triplicate the count long iterator graphs_pos, graphs_pos_ = itertools.tee(graphs_pos) x=range(count) random.shuffle(x) graphs_pos_ = picker(graphs_pos_, x ) graphs_pos_,graphs_pos__,graphs_pos___ = itertools.tee(graphs_pos_,3)
def __init__(self, *args, **kwargs): GraphLearnSampler.__init__(self, *args, **kwargs) CipPredictor.__init__(self) self.online_learning = True self.intelligent_cip_selection = False
def fit(self, input, grammar_n_jobs=-1, grammar_batch_size=10, train_min_size=None): """ use input to fit the grammar and fit the estimator """ self.preprocessor.set_param(self.vectorizer) graphmanagers = self.preprocessor.fit_transform(input,self.vectorizer) self.estimatorobject.fit(graphmanagers, vectorizer=self.vectorizer, nu=nu, grammar_n_jobs=grammar_n_jobs, random_state=self.random_state) self.lsgg.fit(graphmanagers, grammar_n_jobs, grammar_batch_size=grammar_batch_size) tempest= EstiWrap(nu=.5, grammar_n_jobs=grammar_n_jobs) tempest.fit(graphmanagers, vectorizer=self.vectorizer, random_state=self.random_state) ''' HOW TO TRAIN NEW CORES? make a sampler with: estimator as estimator, interface-groups as input, dat filter for cip choosing ''' def entitycheck(g,nodes): if type(nodes) is not list: nodes=[nodes] for e in nodes: if 'interface' in g.node[e]: return False return True prod=self.lsgg.productions for i, interface_hash in enumerate(prod.keys()): if prod[interface_hash] < train_min_size: continue print "################################# new ihash" # for all the interface buckets cips=prod[interface_hash].values() sampler=GraphLearnSampler(estimator=tempest,node_entity_check=entitycheck) graphs_wrapped=[ GraphWrap(cip.graph, self.vectorizer) for cip in cips ] graphs=[ cip.graph for cip in cips ] sampler.lsgg.fit(graphs_wrapped) sampler.preprocessor.fit(0,self.vectorizer) sampler.postprocessor.fit(sampler.preprocessor) r=sampler.transform(graphs, size_constrained_core_choice=0, select_cip_max_tries=100, quick_skip_orig_cip=False, improving_linear_start=.2, improving_threshold=.6) # get graphs and sample them r= list(r) for j, raw_graph in enumerate(r): # for each resulting graph raw_graph.graph.pop('graph',None) score= tempest.predict(raw_graph) if score > tempest.predict(cips[j].graph): # check if the score is good enough, then add to grammar self.lsgg.productions[interface_hash][score]=CoreInterfacePair( interface_hash=cips[j].interface_hash, core_hash=score, graph=raw_graph, radius=cips[j].radius, thickness=cips[j].thickness, core_nodes_count=len(raw_graph), count=1, distance_dict=cips[j].distance_dict) print 'new graph:',score draw.graphlearn(raw_graph)