import parse_semeval import cPickle import random class Paraphrase: def __init__(self,name,freq=0.0): self.name=name self.freq=freq def __eq__(self,other): return self.name==other.name print "unpickling: priors" priors=cPickle.load(open('/home/paul/thesis/data/pickles/trainingPriors')) print "unpickling prob table" probs=cPickle.load(open('/home/paul/thesis/data/pickles/trainingProbs')) f=open("/home/paul/thesis/data/SemEval2Task9/SemEval2_task9_testing_keys/FINAL_GOLD.txt") all_pairs=parse_semeval.parse_file(f) total=0.0 for pair in all_pairs: paras=[] for p in pair.paraphrases: if p.freq>2:paras.append(p) number=len(paras) subs=random.sample(paras,3) results=[] print pair.n1+" "+pair.n2 for p in probs.keys(): x=Paraphrase(p.strip()) x.score=0.0 results.append(x) for p in results: for s in subs:
score=0.0 basescore=0.0 for b in base[0:m]: if b in gold_paras:basescore+=1.0 for r in results[0:m]: if r in gold_paras:score+=1.0 total+=(score/float(m)) basetotal+=(basescore/float(m)) acc=total/len(testing) print "predictions:" print total/len(testing) print baseacc=basetotal/len(testing) print "baseline:" print basetotal/len(testing) print errcount print nonerrcount results=[acc,baseacc] return results if __name__=="__main__": n=5 data_file=open("/home/paul/mayThesis/semEvalTask9/combined.txt") all_pairs=parse_semeval.parse_file(data_file, n) get_results(all_pairs[200:500],all_pairs[200:500], 5)
probs[a][b] = (cooc[a][b]) / ((priors[b]) * (priors[a] ** 0)) # print probs[a][b] else: probs[a][b] = 0.0 return probs if __name__ == "__main__": n = 2 out_file = open( "/home/paul/mayThesis/semEvalTask9/SemEval2_task9_all_data_final/SemEval2_task9_scorer/out.txt", "w" ) train_file = open("/home/paul/mayThesis/semEvalTask9/combined.txt") test_file = open("/home/paul/mayThesis/semEvalTask9/testing.txt") test_file = open("/home/paul/mayThesis/semEvalTask9/testing.txt") test_pairs = parse_semeval.parse_file(test_file, n) all_pairs = parse_semeval.parse_file(train_file, n) priors = make_priors(all_pairs) probs = make_prob_table(all_pairs, priors) for pair in test_pairs: candidates = copy.copy(pair.paraphrases) for para in pair.paraphrases: for c in candidates: if para == c: continue para.score += probs[para.name][c.name] pair.paraphrases = sorted(pair.paraphrases, key=lambda x: x.score, reverse=True) i = 0 for p in pair.paraphrases: i += 1 out_file.write(str(i) + " " + pair.n1 + " " + pair.n2 + " " + p.name + " " + str(p.score) + "\n")