def sparql_search_endpoint(): sparql_query = request.args.get('query') response = jsonify(search.search(sparql_query, utils.get_uri2rank(), utils.get_clusters())) print('Successfully searched') return response
def get_mob_spawner_clusters(self): points = [p for p, b in self.block_entities.iteritems() if b._type == 'MobSpawner'] results = [] for cluster in utils.get_clusters(points, 16): cluster.mob_types = [ self.block_entities.get(p, {}).get('EntityId') for p in cluster.points] results.append(cluster) return results
def main(args): graph = GraphEmbedding( path_img=args.ImgPath, sigma=sigma, resize=args.resize_factor ) cluster_centers = get_clusters(graph) graph.compute_graph(cluster_centers) embeddings = graph.embeddings_matrix source = len(embeddings) - 2 sink = len(embeddings) - 1 cut_edges = FordFulkerson(embeddings, source, sink) mask = compute_mask(cut_edges, graph.height, graph.width, source, sink) mask_reshape = cv2.resize(mask, graph.original_size[::-1], 0, 0) path, file_ = os.path.split(args.ImgPath) filename, file_extension = os.path.splitext(file_) save_dir = os.path.join(path, filename + "-mask" + file_extension) plt.imsave(save_dir, mask_reshape, cmap="gray") print("Saved image in ", save_dir)
def full_train(n_epochs=1, batch_size=200, save_prefix=None): """ Runs the complete training process. """ # Load initial data print("Loading data...") data = load_data() # Estimate the GPS clusters print("Estimating clusters...") clusters = get_clusters(data.train_labels) # Set up callbacks callbacks = [] if save_prefix is not None: # Save the model's intermediary weights to disk after each epoch file_path = "cache/%s-{epoch:03d}-{val_loss:.4f}.hdf5" % save_prefix callbacks.append(ModelCheckpoint(file_path, monitor='val_loss', mode='min', save_weights_only=True, verbose=1)) # Create model print("Creating model...") start_new_session() model = create_model(data.metadata, clusters) # Run the training print("Start training...") history = model.fit( process_features(data.train), data.train_labels, nb_epoch=n_epochs, batch_size=batch_size, validation_data=(process_features(data.validation), data.validation_labels), callbacks=callbacks) if save_prefix is not None: # Save the training history to disk file_path = 'cache/%s-history.pickle' % save_prefix with open(file_path, 'wb') as handle: pickle.dump(history.history, handle, protocol=pickle.HIGHEST_PROTOCOL) return history
plt.xticks(fontsize=9) plt.axes().xaxis.set_major_locator(MultipleLocator(10)) plt.legend(['train', 'validation', 'smoothened validation'], loc='upper right') plt.show() ''' ########################################################################################################### ## selected the weights learned at epoch #70 for my final model, which can now be loaded again with Keras: start_new_session() # load data and generate clusters np.random.seed(42) os.chdir('C:/ENEA_CAS_WORK/Taxi_destination_predictions') data = load_data() clusters = get_clusters(data.train_labels) # load the model of the run #1 os.chdir('C:\\ENEA_CAS_WORK\\Taxi_destination_predictions\\cache') model = create_model(data.metadata, clusters) model.load_weights('mymodel-001-2.2026.hdf5') WWW = model.weights print(WWW[1].shape) # Out[139]: TensorShape([7, 10]) ....7 feature of each of the 10 lat, lon (first and last coordinates) processed = process_features(data.validation) print(len(processed)) print(processed[6].shape) # Out[155]: (16444, 20) # lat, lon
def evaluate(self, test_docs): # doc_name: <cluster assignments> pairs for all test documents logging.info("Evaluating...") all_test_preds = {} # [MUC score] # The MUC score counts the minimum number of links between mentions # to be inserted or deleted when mapping a system response to a gold standard key set # [B3 score] # B3 computes precision and recall for all mentions in the document, # which are then combined to produce the final precision and recall numbers for the entire output # [CEAF score] # CEAF applies a similarity metric (either mention based or entity based) for each pair of entities # (i.e. a set of mentions) to measure the goodness of each possible alignment. # The best mapping is used for calculating CEAF precision, recall and F-measure muc_score = metrics.Score() b3_score = metrics.Score() ceaf_score = metrics.Score() for curr_doc in tqdm(test_docs): test_preds, _ = self._train_doc(curr_doc, eval_mode=True) test_clusters = get_clusters(test_preds) # Save predicted clusters for this document id all_test_preds[curr_doc.doc_id] = test_clusters # input into metric functions should be formatted as dictionary of {int -> set(str)}, # where keys (ints) are clusters and values (string sets) are mentions in a cluster. Example: # { # 1: {'rc_1', 'rc_2', ...} # 2: {'rc_5', 'rc_8', ...} # 3: ... # } # gt = ground truth, pr = predicted by model gt_clusters = {k: set(v) for k, v in enumerate(curr_doc.clusters)} pr_clusters = {} for (pr_ment, pr_clst) in test_clusters.items(): if pr_clst not in pr_clusters: pr_clusters[pr_clst] = set() pr_clusters[pr_clst].add(pr_ment) muc_score.add(metrics.muc(gt_clusters, pr_clusters)) b3_score.add(metrics.b_cubed(gt_clusters, pr_clusters)) ceaf_score.add(metrics.ceaf_e(gt_clusters, pr_clusters)) avg_score = metrics.conll_12(muc_score, b3_score, ceaf_score) logging.info(f"----------------------------------------------") logging.info(f"**Test scores**") logging.info(f"**MUC: {muc_score}**") logging.info(f"**BCubed: {b3_score}**") logging.info(f"**CEAFe: {ceaf_score}**") logging.info(f"**CoNLL-12: {avg_score}**") logging.info(f"----------------------------------------------") # Save test predictions and scores to file for further debugging with open(self.path_pred_scores, "w", encoding="utf-8") as f: f.writelines([ f"Database: {self.dataset_name}\n\n", f"Test scores:\n", f"MUC: {muc_score}\n", f"BCubed: {b3_score}\n", f"CEAFe: {ceaf_score}\n", f"CoNLL-12: {metrics.conll_12(muc_score, b3_score, ceaf_score)}\n", ]) with open(self.path_pred_clusters, "w", encoding="utf-8") as f: f.writelines(["Predictions:\n"]) for doc_id, clusters in all_test_preds.items(): f.writelines([f"Document '{doc_id}':\n", str(clusters), "\n"]) return { "muc": muc_score, "b3": b3_score, "ceafe": ceaf_score, "avg": avg_score }
use_elms_file = sys.argv[6] suffix = sys.argv[7] use_elms = {} with open(use_elms_file) as f: for line in f: (elm, stuff) = line.strip().split('\t') use_elms[elm] = True do_clustering = True if distance_file == 'NA': do_clustering = False if do_clustering: dis_file = os.path.join(results_dir, distance_file) mapping = utils.get_clusters(dis_file, dis_cutoff_init, dis_cutoff_meta) else: mapping = {} counts = utils.count_host_elmSeqs(global_settings.TEST_GENOMES, do_clustering, mapping, results_dir, use_elms, suffix) ls = [] for host in counts: ls.append(counts[host]) all_elmSeqs = {} #all_elmSeqs = utils_graph.intersectLists(ls) for host in counts: for elmSeq in counts[host]: all_elmSeqs[elmSeq] = True
import sys, os, utils, global_settings, utils_graph from collections import defaultdict cluster_distance_file = sys.argv[1] # NA for skip elm_count_dir = sys.argv[2] # results/roundup_all/ do_clustering = True if cluster_distance_file == 'NA': do_clustering = False # this comes from my scratch experiments #human_distance_file = '../../scratch/human_flu_distances' #chicken_distance_file = '../../scratch/chicken_flu_distances' #both_distance_file = 'working/runs/Jun24/closest_dis' if do_clustering: f = os.path.join(elm_count_dir, cluster_distance_file) mapping = utils.get_clusters(f, 2.5, float(2.5)) else: mapping = {} hosts = global_settings.TEST_GENOMES #all_elmSeqs = {} flus = ('human',) flu_counts = {} seen_seqs = {} seen_seqs_ls = [] for flu in flus: flu_elm_file = os.path.join('results/', flu + '.H5N1.elms') utils.count_flu_sampled(flu, flu_elm_file, flu_counts, seen_seqs, mapping, do_clustering) seen_seqs_ls.append(seen_seqs[flu])