def make_eventDisplays(self) :
		pkl_path = '%sevent_displays.pkl' % self.path
		if os.path.exists(pkl_path) :
			print '[status] loading %s..' % pkl_path
			histo = helper.load_object(pkl_path)
		else :
			histo = self.scan_events()
			helper.save_object(histo, pkl_path)
		self.draw_events(histo)
	def read_runs(self, path) :
		'''reads selections from config file'''

		print '[status] reading selections from %s..' % path
		if not os.path.isfile(path) :
			print '[ERROR] %s does not exist!' % path
			sys.exit(1)
		run_file = ConfigParser.ConfigParser()
		run_file.optionxform = str # case sensitive options
		run_file.read(path)

		runs = {}
		for run_number in run_file.sections() :
			run = result(run = run_number)
			for res in run_file.options(run_number) :
				if not hasattr(run, res) :
					print '[ERROR] run instance has no attribute %s! Check your selections config file!' % res
					sys.exit(1)
				setattr(run, res, eval(run_file.get(run_number, res)))
			runs[run_number] = run

			# get noise
			noise_file = '%s%s/noise/Noise_NonHitChannels_Dia_stat.pkl' % (self.input_path, run_number)
			noise = helper.load_object(noise_file)
			run.noise     = noise['sigma']
			run.noise_err = noise['sigma_err']

			# get pulse height
			ph_file = '%s%s/transparent/PulseHeight_nStrips_2in10_stat.pkl' % (self.input_path, run_number)
			pulse_height = helper.load_object(ph_file)
			run.pulse_height     = pulse_height['mean']
			run.pulse_height_err = pulse_height['mean_err']
#			run.pulse_height_err = run.noise

			# get clustered pulse height
			ph_file_clustering = '%s%s/clustering/PulseHeight_ClusterSize_1-2_Dia_stat.pkl' % (self.input_path, run_number)
			pulse_height_clustering = helper.load_object(ph_file_clustering)
			run.pulse_height_clustering     = pulse_height_clustering['mean']
			run.pulse_height_clustering_err = pulse_height_clustering['mean_err']
#			run.pulse_height_clustering_err = run.noise

		return runs
ic4goBP = pickle.load(open('HumanICBP.pickle', "rb"))
ic4goCC = pickle.load(open('HumanICCC.pickle', "rb"))
ic4goMF = pickle.load(open('HumanICMF.pickle', "rb"))
ic4goAll3 = pickle.load(open('HumanIC3ontology.pickle', "rb"))
ancestor4goBP = pickle.load(open('GOBPANCESTOR.pickle', "rb"))
ancestor4goCC = pickle.load(open('GOCCANCESTOR.pickle', "rb"))
ancestor4goMF = pickle.load(open('GOMFANCESTOR.pickle', "rb"))
ancestor4goAll3 = pickle.load(open('GOANCESTORS_full3ont.pickle', "rb"))

if __name__ == "__main__":

    filefullpath = args.scoreOutput + args.nameExpression + str(
        args.pairStartIndex) + "." + str(args.pairEndIndex) + ".txt"  #

    print("loading dictionary/embedding")
    dictionary = helper.load_object(args.save_path + 'gene_dictionary.p')
    embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                                   args.word_vectors_file,
                                                   dictionary.word2idx)
    print("loading model")
    # print (args)
    model = SentenceClassifier(dictionary,
                               embeddings_index,
                               args,
                               select_method='max')
    if args.cuda:
        model = model.cuda()
    helper.load_model_states_from_checkpoint(
        model, args.save_path + 'model_best.pth.tar', 'state_dict', args.cuda)
    print('vocabulary size = ', len(dictionary))
                                          threshold_examples:mid_train +
                                          threshold_examples]
    dev_corpus.data = dev_corpus.data[mid_dev - threshold_examples:mid_dev +
                                      threshold_examples]
    test_corpus.data = test_corpus.data[mid_test -
                                        threshold_examples:mid_test +
                                        threshold_examples]

print('train set size = ', len(train_corpus.data))
print('development set size = ', len(dev_corpus.data))
print('test set size = ', len(test_corpus.data))

# save the dictionary object to use during testing
if os.path.exists(args.output_base_path + args.task + '/' + 'dictionary.p'):
    print('loading dictionary')
    dictionary = helper.load_object(args.output_base_path + args.task + '/' +
                                    'dictionary.p')
else:
    dictionary = data.Dictionary()
    dictionary.build_dict(train_corpus.data, args.max_words)
    helper.save_object(
        dictionary, args.output_base_path + args.task + '/' + 'dictionary.p')

print('vocabulary size = ', len(dictionary))

# ###############################################################################

# train = train.Train(model, optimizer, selector, optimizer_selector, dictionary, args, best_acc)
# train.train_epochs(train_corpus, dev_corpus, test_corpus, args.start_epoch, args.epochs)

numpy.random.shuffle(
    train_corpus.data)  #helper.batchify(train_corpus.data, args.batch_size)
Exemplo n.º 5
0
    print('MRR - ', mrr)
    print('NDCG@1 - ', ndcg_1)
    print('NDCG@3 - ', ndcg_3)
    print('NDCG@5 - ', ndcg_5)
    print('NDCG@10 - ', ndcg_10)

    print("targets size = ", len(targets))
    print("predicts size = ", len(predicts))

    multi_bleu.print_multi_bleu(predicts, targets)


if __name__ == "__main__":

    #load dictionary
    dictionary = helper.load_object(args.data_path + args.dataset +
                                    '/dictionary.p')
    print('vocabulary size = ', len(dictionary))

    if not os.path.exists(args.data_path + args.dataset + '/test_dataset.p'):
        #build test dataset
        test_dataset = dataload.Dataset(args.max_query_len, args.max_doc_len,
                                        args.hist_session_num_limit,
                                        args.click_num_limit)
        test_dataset.parse(args.corpus_path + args.dataset + '/test.txt',
                           dictionary, args.max_example)
        print('test set size = ', len(test_dataset))
        #save the test_dataset object
        helper.save_object(test_dataset,
                           args.data_path + args.dataset + '/test_dataset.p')
    else:
        #load test dataset
            f.write('pairID,gold_label' + '\n')
            for item in output:
                f.write(str(item[0]) + ',' + target_names[item[1]] + '\n')
    else:
        return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds),
                                                           average='weighted'), s


if __name__ == "__main__":

    dict_path = model_path = args.output_base_path + args.task+'/'
    dict_path += 'dictionary.p'
    model_path += args.model_file_name #'model_best.pth.tar'

    
    dictionary = helper.load_object(dict_path)
    embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file,
                                                   dictionary.word2idx)
    model = BCN(dictionary, embeddings_index, args)
    if args.cuda:
        torch.cuda.set_device(args.gpu)
        model = model.cuda()
    print('loading model')
    helper.load_model(model, model_path, 'state_dict', args.cuda)

    print('vocabulary size = ', len(dictionary))

    task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task]
    for task in task_names:
        test_corpus = data.Corpus(args.tokenize)
        if 'IMDB' in args.task:
Exemplo n.º 7
0
            n_correct += (preds.view(test_labels.size()).data == test_labels.data).sum()
            n_total += len(batches[batch_no])

    if outfile:
        target_names = ['entailment', 'neutral', 'contradiction']
        with open(outfile, 'w') as f:
            f.write('pairID,gold_label' + '\n')
            for item in output:
                f.write(str(item[0]) + ',' + target_names[item[1]] + '\n')
    else:
        return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds),
                                                           average='weighted')


if __name__ == "__main__":
    dictionary = helper.load_object(args.save_path + args.task + '_dictionary.pkl')
    embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file,
                                                   dictionary.word2idx)
    model = SentenceClassifier(dictionary, embeddings_index, args)
    if args.cuda:
        model = model.cuda()
    helper.load_model_states_from_checkpoint(model, args.save_path + 'model_best.pth.tar', 'state_dict', args.cuda)
    print('vocabulary size = ', len(dictionary))

    task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task]
    for task in task_names:
        if task == 'multinli' and args.test != 'train':
            for partition in ['_matched', '_mismatched']:
                test_corpus = data.Corpus(dictionary)
                test_corpus.parse(task, args.data, args.test + partition + '.txt', args.tokenize,
                                  is_test_corpus=True)
Exemplo n.º 8
0
            model.candid_next_q_len: candid_next_q_len,
            model.idx: idx
        }

        candid_query_score_ = sess.run(model.candid_query_score,
                                       feed_dict=feed_dict)
        mrr += MRR(candid_query_score_, label)

    mrr = mrr / num_batches
    print('Query Suggestion MRR - ', mrr)


if __name__ == "__main__":

    #load dictionary
    dictionary = helper.load_object(args.data_path + args.dataset +
                                    '/dictionary.p')
    print('vocabulary size = ', len(dictionary))

    if not os.path.exists(args.data_path + args.dataset +
                          '/anchor_candidates.p'):
        #build anchor candidates
        anchor_queries = {}  #{'anchor_query':set(target_next_queries),...}
        with open(args.corpus_path + args.dataset + '/test.txt', 'r') as f:
            for line in f:
                user = json.loads(line.strip())  #一个用户的数据
                for session in user:  #一个用户的每一个session
                    anchor_query = session['query'][-2]  #一个session的倒数第二个query
                    target_next_query = session['query'][
                        -1]  #一个session的倒数第一个query
                    if anchor_query not in anchor_queries:
                        anchor_queries[anchor_query] = set()