def candidate_generator(self, seed_graphs): """Generate candidates. Parameters ---------- seed_graphs : networkx graphs The iterator over the seed graphs, i.e. the graphs that are used as a starting point for the proposal. """ start = time.time() graphs = transform(seed_graphs, program=AnnotateImportance( program=self.fit_wrapped_predictor.program)) graphs = list(graphs) logger.debug('Working on %d graphs' % len(graphs)) # mark the position of nodes with the attribute 'exclude' to remove # the influence of primers graphs = transform( graphs, program=MarkWithIntervals(quadruples=self.exclusion_quadruples)) # find the ktop largest (reverse=True) values for the # attribute='importance' in the vertices of a graph # and add an attribute to each vertex that is 'selected'=True # if the node is among the ktop graphs = transform(graphs, program=MarkKTop(attribute='importance', exclude_attribute='exclude', ktop=self.n_substitutions, reverse=True, mark_attribute='selected')) # generate graphs that have all possible combination of symbols in # the nodes marked by MarkTop graphs = transform(graphs, program=ReplaceWithAllCombinations( attribute='selected', label_list=self.label_list)) # refold the sequences to account for structural changes graphs = transform(graphs, program=self.seq_to_structure_prog) # return the candidate graphs candidate_graphs = list(graphs) delta_time = datetime.timedelta(seconds=(time.time() - start)) logger.info('Candidate generation took: %s' % (str(delta_time))) logger.info('Number of candidates: %d' % (len(candidate_graphs))) return candidate_graphs
def candidate_generator(self, seed_graphs): """Generate candidates. Parameters ---------- seed_graphs : networkx graphs The iterator over the seed graphs, i.e. the graphs that are used as a starting point for the proposal. """ start = time.time() graphs = transform(seed_graphs, program=AnnotateImportance( program=self.fit_wrapped_predictor.program)) graphs = list(graphs) logger.debug('Working on %d graphs' % len(graphs)) # mark the position of nodes with the attribute 'exclude' to remove # the influence of primers graphs = transform(graphs, program=MarkWithIntervals( quadruples=self.exclusion_quadruples)) # find the ktop largest (reverse=True) values for the # attribute='importance' in the vertices of a graph # and add an attribute to each vertex that is 'selected'=True # if the node is among the ktop graphs = transform(graphs, program=MarkKTop(attribute='importance', exclude_attribute='exclude', ktop=self.n_substitutions, reverse=True, mark_attribute='selected')) # generate graphs that have all possible combination of symbols in # the nodes marked by MarkTop graphs = transform(graphs, program=ReplaceWithAllCombinations( attribute='selected', label_list=self.label_list)) # refold the sequences to account for structural changes graphs = transform(graphs, program=self.seq_to_structure_prog) # return the candidate graphs candidate_graphs = list(graphs) delta_time = datetime.timedelta(seconds=(time.time() - start)) logger.info('Candidate generation took: %s' % (str(delta_time))) logger.info('Number of candidates: %d' % (len(candidate_graphs))) return candidate_graphs
def transform(self, orig_graphs=None): """transform.""" try: graphs = self._transform(orig_graphs) # reduce all 'label' attributes of contracted nodes to a # histogram to be written in the 'label' attribute of the # resulting graph label_modifier = contraction_modifier(attribute_in='label', attribute_out='label', reduction='categorical') # reduce all 'weight' attributes of contracted nodes using # a sum to be written in the 'weight' attribute of the # resulting graph weight_modifier = contraction_modifier(attribute_in='weight', attribute_out='weight', reduction='sum') modifiers = [label_modifier, weight_modifier] s = self.original_edges_to_nesting priors = dict(nesting=self.nesting, weight_scaling_factor=1, original_edges_to_nesting=s) ca = 'max_clique_hash' graphs = transform(graphs, program=Contract(modifiers=modifiers, contraction_attribute=ca), parameters_priors=priors) return graphs except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def efficient_selection(self, candidate_graphs, known_graphs=None): """Propose a small number of alternative structures. Parameters ---------- candidate_graphs : networkx graphs The iterator over the seed graphs, i.e. the graphs that are used as a starting point for the proposal. known_graphs : networkx graphs The iterator over the already known graphs. These are used to bias the exploration towards less similar proposals. """ start = time.time() candidate_graphs = transform( candidate_graphs, program=AnnotateImportance( program=self.fit_wrapped_predictor.program)) candidate_graphs = list(candidate_graphs) # transform graphs according to importance # this allows similarity notion to be task dependent known_graphs = transform( known_graphs, program=AnnotateImportance( program=self.fit_wrapped_predictor.program)) known_graphs = list(known_graphs) # store the nearest neighbors in knn_manager # compute the k nearest neighbors distances of each proposal graph knn_manager = KNNManager(n_neighbors=self.n_neighbors, complexity=3) knn_manager.setup(known_graphs=known_graphs, candidate_graphs=candidate_graphs) delta_time = datetime.timedelta(seconds=(time.time() - start)) logger.info('Knn computation took: %s' % (str(delta_time))) # compute predictions predicted_graphs = predict(candidate_graphs, program=self.fit_wrapped_predictor) predicted_graphs = list(predicted_graphs) scores = np.array([graph.graph['score'] for graph in predicted_graphs]).reshape(-1, 1) # iterations tradeoff = self.exploration_vs_exploitation_tradeoff selection_ids = [] for i in range(self.n_proposals): uncertainties = knn_manager.average_distances() # run the acquisition function (n_proposals times) # and return best_id maximal_id = self._acquisition( scores, uncertainties, exploration_vs_exploitation_tradeoff=tradeoff) # update distances with new selection knn_manager.add_element(maximal_id) # store id selection_ids.append(maximal_id) graph = candidate_graphs[maximal_id] logger.debug('>%s' % graph.graph['header']) logger.debug(graph.graph['sequence']) return selection_ids