def test_overlap(self): negatives = [-8,-5,-3,2,21] positives = [-1,6,12,15,20] result = Separator.separate(positives,negatives) #threshold needs to be 2<=t<6 self.assertLess(result,6) self.assertGreaterEqual(result,2)
def test_overlap(self): negatives = [-8, -5, -3, 2, 21] positives = [-1, 6, 12, 15, 20] result = Separator.separate(positives, negatives) #threshold needs to be 2<=t<6 self.assertLess(result, 6) self.assertGreaterEqual(result, 2)
def trainFreqThresh1(self, split): print "Training split " + str(split) positives = [] negatives = [] done = 0 for [word1, word2, result] in self.pairmatrix[self.cv_idx != split]: #if word1 in self.entrydict.keys(): # if word2 in self.entrydict.keys(): if EntailClassifier.do_ratio: diff = float(self.entrydict[word2].freq) / float( self.entrydict[word1].freq) else: diff = float(self.entrydict[word2].freq) - float( self.entrydict[word1].freq) #else: # print "Error: no frequency information for "+word2 #else: #print "Error: no frequency information for "+word1 if int(result) == 1: positives.append(diff) else: negatives.append(diff) done += 1 if self.verbose and done % 1000 == 0: print "Trained on " + str(done) print len(positives), len(negatives) threshold = Separator.separate(positives, negatives, trials=1000000) return [threshold]
def trainFreqThresh1(self,split): print"Training split "+str(split) positives=[] negatives=[] done=0 for [word1,word2,result] in self.pairmatrix[self.cv_idx!=split]: #if word1 in self.entrydict.keys(): # if word2 in self.entrydict.keys(): if EntailClassifier.do_ratio: diff = float(self.entrydict[word2].freq)/float(self.entrydict[word1].freq) else: diff = float(self.entrydict[word2].freq)-float(self.entrydict[word1].freq) #else: # print "Error: no frequency information for "+word2 #else: #print "Error: no frequency information for "+word1 if int(result)==1: positives.append(diff) else: negatives.append(diff) done+=1 if self.verbose and done%1000==0: print "Trained on "+str(done) print len(positives),len(negatives) threshold = Separator.separate(positives,negatives,trials=1000000) return [threshold]
def trainCRThresh(self, split, method): print "Training split " + str(split) positives = [] negatives = [] done = 0 for [word1, word2, result] in self.pairmatrix[self.cv_idx != split]: #diff = float(self.entrydict[word2].freq)-float(self.entrydict[word1].freq) if method == "CR_thresh": precision = float(self.entrydict[word1].precision( self.entrydict[word2])) recall = float(self.entrydict[word2].precision( self.entrydict[word1])) elif method == "clarke_thresh": precision = float(self.entrydict[word1].min_precision( self.entrydict[word2])) recall = float(self.entrydict[word2].min_precision( self.entrydict[word1])) elif method == "invCL": precision = float(self.entrydict[word1].invCL( self.entrydict[word2])) recall = 1 #invCL uses 1-recall anyway so threshold just on precision value else: print "Unknown CR method " + method exit(1) if recall == 0: ratio = 0 #hm=0 else: if EntailClassifier.do_ratio: ratio = precision / recall else: ratio = precision - recall #hm=2*precision*recall/(precision+recall) if int(result) == 1: positives.append(ratio) else: negatives.append(ratio) done += 1 if self.verbose and done % 1000 == 0: print "Trained on " + str(done) print len(positives), len(negatives) threshold = Separator.separate(positives, negatives, trials=1000000, integer=False) return [threshold]
def trainCRThresh(self,split,method): print"Training split "+str(split) positives=[] negatives=[] done=0 for [word1,word2,result] in self.pairmatrix[self.cv_idx!=split]: #diff = float(self.entrydict[word2].freq)-float(self.entrydict[word1].freq) if method=="CR_thresh": precision = float(self.entrydict[word1].precision(self.entrydict[word2])) recall=float(self.entrydict[word2].precision(self.entrydict[word1])) elif method=="clarke_thresh": precision = float(self.entrydict[word1].min_precision(self.entrydict[word2])) recall=float(self.entrydict[word2].min_precision(self.entrydict[word1])) elif method=="invCL": precision = float(self.entrydict[word1].invCL(self.entrydict[word2])) recall=1 #invCL uses 1-recall anyway so threshold just on precision value else: print "Unknown CR method "+method exit(1) if recall == 0: ratio=0 #hm=0 else: if EntailClassifier.do_ratio: ratio=precision/recall else: ratio=precision-recall #hm=2*precision*recall/(precision+recall) if int(result)==1: positives.append(ratio) else: negatives.append(ratio) done+=1 if self.verbose and done%1000==0: print "Trained on "+str(done) print len(positives),len(negatives) threshold = Separator.separate(positives,negatives,trials=1000000,integer=False) return [threshold]
def test_separated_lists(self): negatives = [-4, 2, 3] positives = [5, 10, 12] result = Separator.separate(positives, negatives) self.assertEqual(result, 4)
def test_separated_lists(self): negatives=[-4,2,3] positives=[5,10,12] result = Separator.separate(positives,negatives) self.assertEqual(result,4)