def setup(self, known_graphs=None, candidate_graphs=None): """Setup.""" # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the # known graphs in the list 'known_graphs' parameters_priors = dict(n_neighbors=self.n_neighbors) parameters_priors.update(dict(vectorizer__complexity=self.complexity, vectorize__n_jobs=-1, vectorize__fit_flag=False, vectorize__n_blocks=5, vectorize__block_size=100)) fit_wrapped_knn_predictor_known = \ model(known_graphs, program=KNNWrapper(program=NearestNeighbors()), parameters_priors=parameters_priors) # compute distances of candidate_graphs to known_graphs knn_candidate_graphs = predict(candidate_graphs, program=fit_wrapped_knn_predictor_known) knn_candidate_graphs = list(knn_candidate_graphs) self.distances_to_known_graphs = [] for knn_candidate_graph in knn_candidate_graphs: distances = knn_candidate_graph.graph['distances'] self.distances_to_known_graphs.append(distances) # compute candidate_graphs encodings self.candidate_graphs_data_matrix = \ vectorize(candidate_graphs, vectorizer=Vectorizer(complexity=self.complexity), block_size=400, n_jobs=-1)
def setup(self, known_graphs=None, candidate_graphs=None): """Setup.""" # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the # known graphs in the list 'known_graphs' parameters_priors = dict(n_neighbors=self.n_neighbors) parameters_priors.update(dict(vectorizer__complexity=self.complexity, vectorizer__discrete=True)) fit_wrapped_knn_predictor_known = \ model(known_graphs, program=KNNWrapper(program=NearestNeighbors()), parameters_priors=parameters_priors) # compute distances of candidate_graphs to known_graphs knn_candidate_graphs = predict(candidate_graphs, program=fit_wrapped_knn_predictor_known) knn_candidate_graphs = list(knn_candidate_graphs) self.distances_to_known_graphs = [] for knn_candidate_graph in knn_candidate_graphs: distances = knn_candidate_graph.graph['distances'] self.distances_to_known_graphs.append(distances) # compute candidate_graphs encodings vec = Vectorizer(complexity=self.complexity) self.candidate_graphs_data_matrix = vec.transform(candidate_graphs)
def setup(self, known_graphs=None, candidate_graphs=None): """Setup.""" # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the # known graphs in the list 'known_graphs' parameters_priors = dict(n_neighbors=self.n_neighbors) parameters_priors.update( dict(vectorizer__complexity=self.complexity, vectorizer__discrete=True)) fit_wrapped_knn_predictor_known = \ model(known_graphs, program=KNNWrapper(program=NearestNeighbors()), parameters_priors=parameters_priors) # compute distances of candidate_graphs to known_graphs knn_candidate_graphs = predict(candidate_graphs, program=fit_wrapped_knn_predictor_known) knn_candidate_graphs = list(knn_candidate_graphs) self.distances_to_known_graphs = [] for knn_candidate_graph in knn_candidate_graphs: distances = knn_candidate_graph.graph['distances'] self.distances_to_known_graphs.append(distances) # compute candidate_graphs encodings vec = Vectorizer(complexity=self.complexity) self.candidate_graphs_data_matrix = vec.transform(candidate_graphs)
def efficient_selection(self, candidate_graphs, known_graphs=None): """Propose a small number of alternative structures. Parameters ---------- candidate_graphs : networkx graphs The iterator over the seed graphs, i.e. the graphs that are used as a starting point for the proposal. known_graphs : networkx graphs The iterator over the already known graphs. These are used to bias the exploration towards less similar proposals. """ start = time.time() candidate_graphs = transform( candidate_graphs, program=AnnotateImportance( program=self.fit_wrapped_predictor.program)) candidate_graphs = list(candidate_graphs) # transform graphs according to importance # this allows similarity notion to be task dependent known_graphs = transform( known_graphs, program=AnnotateImportance( program=self.fit_wrapped_predictor.program)) known_graphs = list(known_graphs) # store the nearest neighbors in knn_manager # compute the k nearest neighbors distances of each proposal graph knn_manager = KNNManager(n_neighbors=self.n_neighbors, complexity=3) knn_manager.setup(known_graphs=known_graphs, candidate_graphs=candidate_graphs) delta_time = datetime.timedelta(seconds=(time.time() - start)) logger.info('Knn computation took: %s' % (str(delta_time))) # compute predictions predicted_graphs = predict(candidate_graphs, program=self.fit_wrapped_predictor) predicted_graphs = list(predicted_graphs) scores = np.array([graph.graph['score'] for graph in predicted_graphs]).reshape(-1, 1) # iterations tradeoff = self.exploration_vs_exploitation_tradeoff selection_ids = [] for i in range(self.n_proposals): uncertainties = knn_manager.average_distances() # run the acquisition function (n_proposals times) # and return best_id maximal_id = self._acquisition( scores, uncertainties, exploration_vs_exploitation_tradeoff=tradeoff) # update distances with new selection knn_manager.add_element(maximal_id) # store id selection_ids.append(maximal_id) graph = candidate_graphs[maximal_id] logger.debug('>%s' % graph.graph['header']) logger.debug(graph.graph['sequence']) return selection_ids