def do_science(prefixa, relsa, prefixb, relsb, ignore=False): # Build a dictionary with each of the results for stats. x, y = merge_first_rels(relsa, relsb, ignore=ignore) print(prefixa + ' mrr:', utils.calculate_mrr(x)) print(prefixb + ' mrr:', utils.calculate_mrr(y)) print('wilcoxon signedrank:', scipy.stats.wilcoxon(x, y))
def do_science(prefix, changeset_first_rels, release_first_rels, ignore=False): # Build a dictionary with each of the results for stats. x, y = merge_first_rels(changeset_first_rels, release_first_rels, ignore=ignore) print(prefix+' changeset mrr:', utils.calculate_mrr(x)) print(prefix+' release mrr:', utils.calculate_mrr(y)) print(prefix+' ranksums:', scipy.stats.ranksums(x, y)) print(prefix+' mann-whitney:', scipy.stats.mannwhitneyu(x, y)) print(prefix+' wilcoxon signedrank:', scipy.stats.wilcoxon(x, y))
def run(self): self.make_whitelisted_queries() if not self.search_only: self.build_queries() for i in range(1, 5): self.query_pickle_name = 'azzopardi/whiteliste-queries-model-' + str(i) + '.p' self.result_pickle_name = 'azzopardi/result-model-' + str(i) + '.p' self.search_queries() calculate_mrr(self.result_pickle_name, self.white_list)
def do_science(a_first_rels, b_first_rels, ignore=False): # Build a dictionary with each of the results for stats. x, y = common.merge_first_rels(a_first_rels, b_first_rels, ignore=ignore) print(len(x), len(y)) return { 'a_mrr': utils.calculate_mrr(x), 'b_mrr': utils.calculate_mrr(y), 'wilcoxon': scipy.stats.wilcoxon(x, y), }
def inner(*args, **kwargs): for arg, value in kwargs.items(): if arg.startswith('changeset_'): new_arg = arg[len('changeset_'):] project.changeset_config[new_arg] = value elif arg.startswith('model_base_'): new_arg = arg[len('model_base_'):] assert 'num_topics' in kwargs if value == 'auto': project.model_config[new_arg] = value else: project.model_config[new_arg] = float(value) / float( kwargs['num_topics']) else: project.model_config[arg] = value if not any(project.changeset_config.values()): return 0.0 p = project._replace( model_config_string='-'.join( [unicode(v) for k, v in sorted(project.model_config.items())]), changeset_config_string='-'.join([ unicode(v) for k, v in sorted(project.changeset_config.items()) ])) results = run_experiments(p) return utils.calculate_mrr(num for num, _, _ in results[source])
def run(self): # self.build_stop_words() self.build_queries() self.search_queries() _, mrr, _, _ = calculate_mrr(self.result_pickle_name, self.white_list) self.mrr = float(mrr)
def run(self): # try: # self.classification.load_model(self.model_name) # except: self.train_model() self.build_queries() self.search_queries() _, mrr, _, _ = calculate_mrr(self.result_pickle_name, self.white_list) self.mrr = float(mrr)
def cli(verbose, name, version, *args, **kwargs): random_seed_value = kwargs["random_seed_value"] numpy.random.seed(random_seed_value) coloredlogs.install() if verbose > 1: coloredlogs.set_level(level=logging.DEBUG) elif verbose == 1: coloredlogs.set_level(level=logging.INFO) elif verbose == 0: coloredlogs.set_level(level=logging.ERROR) model_config, model_config_string = get_default_model_config(kwargs) changeset_config, changeset_config_string = get_default_changeset_config() kwargs.update({ 'changeset_config': changeset_config, 'changeset_config_string': changeset_config_string }) kwargs.update({ 'model_config': model_config, 'model_config_string': model_config_string }) # load project info projects = common.load_projects(kwargs) if name: name = name.lower() projects = [x for x in projects if x.name == name] if version: version = version.lower() projects = [x for x in projects if x.version == version] mrr = dict() firstrels = dict() for project in projects: if project.goldset: goldsets.build_goldset(project) elif project.optimize_model: optimize_model(project) elif project.optimize_corpus: optimize_corpus(project) else: pn = project.printable_name firstrels[pn] = run_experiments(project) if pn not in mrr: mrr[pn] = dict() for source in project.source: mrr[pn][source] = utils.calculate_mrr( num for num, _, _ in firstrels[pn][source]) pprint(mrr)
def main(args): corpus = WebisCorpus(args) if args.experiment == "stats": length_stats_print('queries-silver.p') # length_stats_print('id-terms-in-common-no-stopwords-and-common-words-automatic-doc-lucene-dict.p') # corpus.print_handwritten_stats() return if args.experiment == "rm3": HandwrittenExp('handwritten-mini-YesRm3' + str(5) + '-' + str(5) + '-' + str(7 / 10), corpus, rm3=True, mini_index=True, rm3Terms=5, rm3Docs=5, rm3OrigWeight=(7 / 10)) # for q in range(5, 15, 5): # for d in range(5, 15, 5): # for w in range(5, 10, 2): # HandwrittenExp('handwritten-mini-YesRm3-' + str(q) + '-' + str(d) + '-' + str(w/10), corpus, rm3=True, mini_index=True, rm3Terms=q, rm3Docs=d, rm3OrigWeight=(w/10)) # HandwrittenExp('handwritten-mini-NoRm3', corpus, rm3=False, mini_index=True) # SilverExp(corpus, name="silver-mini-YesRm3", rm3=True, mini_index=True) # SilverExp(corpus, name="silver-mini-YesRm3-" + str(5) + '-' + str(5) + '-' + str(7 / 10), rm3=True, # mini_index=True, rm3Terms=5, rm3Docs=5, rm3OrigWeight=(7 / 10)) # for q in range(5, 15, 5): # for d in range(5, 15, 5): # for w in range(5, 10, 2): # SilverExp(corpus, name="silver-mini-YesRm3-" + str(q) + '-' + str(d) + '-' + str(w/10), rm3=True, mini_index=True, rm3Terms=q, rm3Docs=d, rm3OrigWeight=(w/10)) # SilverExp(corpus, name="silver-mini-NoRm3", rm3=False, mini_index=True) return if args.experiment == "cnn": CNNExp(corpus, train_samples=100, use_ner=True) return if args.experiment == "no-stop-words": MostCommonTermsExp("without-common-dict", corpus) return if args.experiment == 'classifier': train_samples = 300 ClassifierExp(corpus, train_samples=train_samples, use_ner=True, useContext=False) ClassifierExp(corpus, train_samples=train_samples, use_ner=True, useContext=True) ClassifierExp(corpus, train_samples=train_samples, use_ner=False, useContext=True) ClassifierExp(corpus, train_samples=train_samples, use_noun=False, useContext=True) ClassifierExp(corpus, train_samples=train_samples, use_verb=False, useContext=True) ClassifierExp(corpus, train_samples=train_samples, use_adj=False, useContext=True) # ClassifierExp(corpus, train_samples=train_samples, use_ner=True) return if args.experiment == 'classifier-no-context': train_samples = 50 gain = 50 best = 0 while True: exp = ClassifierExp(corpus, train_samples=train_samples, use_ner=True, useContext=False) score = exp.mrr if score > best: best = score train_samples += gain else: print('best score was: {} with {} train samples'.format( best, (train_samples - gain))) return if args.experiment == 'classifier-context': expSmallTrain = ClassifierExp(corpus, train_samples=400, noQueryTerms=15, use_ner=True, useContext=True) print('best score was: {} with {} train samples'.format( expSmallTrain.mrr, (170))) # expTrainALl = ClassifierExp(corpus, train_samples=2000, noQueryTerms=15, use_ner=True, useContext=True) # print('best score was: {} with {} train samples'.format(expTrainALl.mrr, (2000))) # train_samples = 150 # best = 0 # gain = 10 # while True: # score = exp.mrr # if score > best: # best = score # train_samples += gain # else: # return return if args.experiment == 'automatic': AutomaticExp('automatic', corpus) return if args.experiment == "tf-idf": for k in range(5, 30, 5): TfIdfExp('tf-idf-' + str(k), corpus, k) return if args.experiment == "tf": for k in range(5, 30, 5): TfExp('tf-' + str(k), corpus, k) return if args.experiment == "idf": for k in range(5, 30, 5): IdfExp('idf-' + str(k), corpus, k) return if args.experiment == "random": for k in range(5, 30, 5): RandomExp('random-' + str(k), corpus, k) return if args.experiment == "handwritten": HandwrittenExp('handwritten', corpus) return if args.experiment == 'ner-only': NamedEntityExp('named-entity-only', corpus) return if args.experiment == 'azzopardi': AzzopardiExp(corpus, search_only=True) return if args.experiment == 'silver': SilverExp(corpus) return if args.produce_sheet: result_pickle_list = [ 'result-clf-model-neruse-handwritten-as-goldTrueTrue-nounTrue-verbTrue-adjTrue-476.p' # 'result-clf-model-nerTrue-nounFalse-verbTrue-adjTrue-1000.p.p', # 'result-clf-model-nerFalse-nounTrue-verbTrue-adjTrue-1000.p.p', # 'result-clf-model-nerTrue-nounTrue-verbFalse-adjTrue-1000.p.p', # 'result-clf-model-nerTrue-nounTrue-verbTrue-adjFalse-1000.p.p', # 'result-silver.p', # 'result-handwritten.p', # 'result-automatic.p', # 'result-named-entity-only.p', # 'result-tf-idf-25.p', # 'result-tf-idf-20.p', # 'result-tf-idf-15.p', # 'result-tf-idf-10.p', # 'result-tf-idf-5.p', # 'result-tf-25.p', # 'result-tf-20.p', # 'result-tf-15.p', # 'result-tf-10.p', # 'result-tf-5.p', # 'result-idf-25.p', # 'result-idf-20.p', # 'result-idf-15.p', # 'result-idf-10.p', # 'result-idf-5.p', # 'result-random-25.p', # 'result-random-20.p', # 'result-random-15.p', # 'result-random-10.p', # 'result-random-5.p', # 'azzopardi/result-model-1.p', # 'azzopardi/result-model-2.p', # 'azzopardi/result-model-3.p', # 'azzopardi/result-model-4.p' ] with open('output_experiments.txt', 'w') as f: for result_pickle in result_pickle_list: f.write(','.join( calculate_mrr(result_pickle, get_white_listed_ids())) + '\n') return if corpus.get_user_modifications(): corpus.write_corpus_to_file() print(corpus.get_completed_percentage())
if __name__ == '__main__': """ 运行说明: 前四行代码用于生成给SVM Rank来训练和预测的文件。 后面两行是计算mmr和生成本阶段的答案。 要运行时,首先运行前四行代码,将后面两行代码注释; 然后在命令行中使用svm_rank_windows进行训练, 训练使用的命令: ./svm_rank_learn -c 10.0 ../data/svm_train.txt model_10.dat 使用训练好的模型预测dev集: ./svm_rank_classify ../data/svm_dev.txt model_10.dat ../data/dev_predictions 使用训练好的模型预测test集 ./svm_rank_classify ../data/svm_test.txt model_10.dat ../data/test_predictions 得到test集上的结果之后,将前四行注释掉,运行后面两行代码。 得到mmr结果和候选答案句的选取,结果保存在test_answer_result.json中 [0, 1, 2, 3, 5]prefect_correct:470, MRR:0.591683226004102 [0, 1, 2, 4, 5]prefect_correct:426, MRR:0.5643933064143644 [0, 1, 2, 3, 4, 5]prefect_correct:466, MRR:0.5895169264670681 [0, 1, 2, 3, 4, 5, 6, 7]prefect_correct:606, MRR:0.6986251509668727 """ # build_feature() # generate_svm_rank_data() # build_feature(False) # generate_svm_rank_data(False) calculate_mrr() get_test_ans()