def make_eventDisplays(self) : pkl_path = '%sevent_displays.pkl' % self.path if os.path.exists(pkl_path) : print '[status] loading %s..' % pkl_path histo = helper.load_object(pkl_path) else : histo = self.scan_events() helper.save_object(histo, pkl_path) self.draw_events(histo)
def read_runs(self, path) : '''reads selections from config file''' print '[status] reading selections from %s..' % path if not os.path.isfile(path) : print '[ERROR] %s does not exist!' % path sys.exit(1) run_file = ConfigParser.ConfigParser() run_file.optionxform = str # case sensitive options run_file.read(path) runs = {} for run_number in run_file.sections() : run = result(run = run_number) for res in run_file.options(run_number) : if not hasattr(run, res) : print '[ERROR] run instance has no attribute %s! Check your selections config file!' % res sys.exit(1) setattr(run, res, eval(run_file.get(run_number, res))) runs[run_number] = run # get noise noise_file = '%s%s/noise/Noise_NonHitChannels_Dia_stat.pkl' % (self.input_path, run_number) noise = helper.load_object(noise_file) run.noise = noise['sigma'] run.noise_err = noise['sigma_err'] # get pulse height ph_file = '%s%s/transparent/PulseHeight_nStrips_2in10_stat.pkl' % (self.input_path, run_number) pulse_height = helper.load_object(ph_file) run.pulse_height = pulse_height['mean'] run.pulse_height_err = pulse_height['mean_err'] # run.pulse_height_err = run.noise # get clustered pulse height ph_file_clustering = '%s%s/clustering/PulseHeight_ClusterSize_1-2_Dia_stat.pkl' % (self.input_path, run_number) pulse_height_clustering = helper.load_object(ph_file_clustering) run.pulse_height_clustering = pulse_height_clustering['mean'] run.pulse_height_clustering_err = pulse_height_clustering['mean_err'] # run.pulse_height_clustering_err = run.noise return runs
ic4goBP = pickle.load(open('HumanICBP.pickle', "rb")) ic4goCC = pickle.load(open('HumanICCC.pickle', "rb")) ic4goMF = pickle.load(open('HumanICMF.pickle', "rb")) ic4goAll3 = pickle.load(open('HumanIC3ontology.pickle', "rb")) ancestor4goBP = pickle.load(open('GOBPANCESTOR.pickle', "rb")) ancestor4goCC = pickle.load(open('GOCCANCESTOR.pickle', "rb")) ancestor4goMF = pickle.load(open('GOMFANCESTOR.pickle', "rb")) ancestor4goAll3 = pickle.load(open('GOANCESTORS_full3ont.pickle', "rb")) if __name__ == "__main__": filefullpath = args.scoreOutput + args.nameExpression + str( args.pairStartIndex) + "." + str(args.pairEndIndex) + ".txt" # print("loading dictionary/embedding") dictionary = helper.load_object(args.save_path + 'gene_dictionary.p') embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx) print("loading model") # print (args) model = SentenceClassifier(dictionary, embeddings_index, args, select_method='max') if args.cuda: model = model.cuda() helper.load_model_states_from_checkpoint( model, args.save_path + 'model_best.pth.tar', 'state_dict', args.cuda) print('vocabulary size = ', len(dictionary))
threshold_examples:mid_train + threshold_examples] dev_corpus.data = dev_corpus.data[mid_dev - threshold_examples:mid_dev + threshold_examples] test_corpus.data = test_corpus.data[mid_test - threshold_examples:mid_test + threshold_examples] print('train set size = ', len(train_corpus.data)) print('development set size = ', len(dev_corpus.data)) print('test set size = ', len(test_corpus.data)) # save the dictionary object to use during testing if os.path.exists(args.output_base_path + args.task + '/' + 'dictionary.p'): print('loading dictionary') dictionary = helper.load_object(args.output_base_path + args.task + '/' + 'dictionary.p') else: dictionary = data.Dictionary() dictionary.build_dict(train_corpus.data, args.max_words) helper.save_object( dictionary, args.output_base_path + args.task + '/' + 'dictionary.p') print('vocabulary size = ', len(dictionary)) # ############################################################################### # train = train.Train(model, optimizer, selector, optimizer_selector, dictionary, args, best_acc) # train.train_epochs(train_corpus, dev_corpus, test_corpus, args.start_epoch, args.epochs) numpy.random.shuffle( train_corpus.data) #helper.batchify(train_corpus.data, args.batch_size)
print('MRR - ', mrr) print('NDCG@1 - ', ndcg_1) print('NDCG@3 - ', ndcg_3) print('NDCG@5 - ', ndcg_5) print('NDCG@10 - ', ndcg_10) print("targets size = ", len(targets)) print("predicts size = ", len(predicts)) multi_bleu.print_multi_bleu(predicts, targets) if __name__ == "__main__": #load dictionary dictionary = helper.load_object(args.data_path + args.dataset + '/dictionary.p') print('vocabulary size = ', len(dictionary)) if not os.path.exists(args.data_path + args.dataset + '/test_dataset.p'): #build test dataset test_dataset = dataload.Dataset(args.max_query_len, args.max_doc_len, args.hist_session_num_limit, args.click_num_limit) test_dataset.parse(args.corpus_path + args.dataset + '/test.txt', dictionary, args.max_example) print('test set size = ', len(test_dataset)) #save the test_dataset object helper.save_object(test_dataset, args.data_path + args.dataset + '/test_dataset.p') else: #load test dataset
f.write('pairID,gold_label' + '\n') for item in output: f.write(str(item[0]) + ',' + target_names[item[1]] + '\n') else: return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds), average='weighted'), s if __name__ == "__main__": dict_path = model_path = args.output_base_path + args.task+'/' dict_path += 'dictionary.p' model_path += args.model_file_name #'model_best.pth.tar' dictionary = helper.load_object(dict_path) embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx) model = BCN(dictionary, embeddings_index, args) if args.cuda: torch.cuda.set_device(args.gpu) model = model.cuda() print('loading model') helper.load_model(model, model_path, 'state_dict', args.cuda) print('vocabulary size = ', len(dictionary)) task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task] for task in task_names: test_corpus = data.Corpus(args.tokenize) if 'IMDB' in args.task:
n_correct += (preds.view(test_labels.size()).data == test_labels.data).sum() n_total += len(batches[batch_no]) if outfile: target_names = ['entailment', 'neutral', 'contradiction'] with open(outfile, 'w') as f: f.write('pairID,gold_label' + '\n') for item in output: f.write(str(item[0]) + ',' + target_names[item[1]] + '\n') else: return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds), average='weighted') if __name__ == "__main__": dictionary = helper.load_object(args.save_path + args.task + '_dictionary.pkl') embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx) model = SentenceClassifier(dictionary, embeddings_index, args) if args.cuda: model = model.cuda() helper.load_model_states_from_checkpoint(model, args.save_path + 'model_best.pth.tar', 'state_dict', args.cuda) print('vocabulary size = ', len(dictionary)) task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task] for task in task_names: if task == 'multinli' and args.test != 'train': for partition in ['_matched', '_mismatched']: test_corpus = data.Corpus(dictionary) test_corpus.parse(task, args.data, args.test + partition + '.txt', args.tokenize, is_test_corpus=True)
model.candid_next_q_len: candid_next_q_len, model.idx: idx } candid_query_score_ = sess.run(model.candid_query_score, feed_dict=feed_dict) mrr += MRR(candid_query_score_, label) mrr = mrr / num_batches print('Query Suggestion MRR - ', mrr) if __name__ == "__main__": #load dictionary dictionary = helper.load_object(args.data_path + args.dataset + '/dictionary.p') print('vocabulary size = ', len(dictionary)) if not os.path.exists(args.data_path + args.dataset + '/anchor_candidates.p'): #build anchor candidates anchor_queries = {} #{'anchor_query':set(target_next_queries),...} with open(args.corpus_path + args.dataset + '/test.txt', 'r') as f: for line in f: user = json.loads(line.strip()) #一个用户的数据 for session in user: #一个用户的每一个session anchor_query = session['query'][-2] #一个session的倒数第二个query target_next_query = session['query'][ -1] #一个session的倒数第一个query if anchor_query not in anchor_queries: anchor_queries[anchor_query] = set()