Exemplo n.º 1
0
 def test_overlap(self):
     negatives = [-8,-5,-3,2,21]
     positives = [-1,6,12,15,20]
     result = Separator.separate(positives,negatives)
     #threshold needs to be 2<=t<6
     self.assertLess(result,6)
     self.assertGreaterEqual(result,2)
Exemplo n.º 2
0
 def test_overlap(self):
     negatives = [-8, -5, -3, 2, 21]
     positives = [-1, 6, 12, 15, 20]
     result = Separator.separate(positives, negatives)
     #threshold needs to be 2<=t<6
     self.assertLess(result, 6)
     self.assertGreaterEqual(result, 2)
Exemplo n.º 3
0
    def trainFreqThresh1(self, split):

        print "Training split " + str(split)
        positives = []
        negatives = []
        done = 0
        for [word1, word2, result] in self.pairmatrix[self.cv_idx != split]:
            #if word1 in self.entrydict.keys():
            #    if word2 in self.entrydict.keys():
            if EntailClassifier.do_ratio:
                diff = float(self.entrydict[word2].freq) / float(
                    self.entrydict[word1].freq)
            else:
                diff = float(self.entrydict[word2].freq) - float(
                    self.entrydict[word1].freq)
                #else:
                #    print "Error: no frequency information for "+word2
            #else:
            #print "Error: no frequency information for "+word1
            if int(result) == 1:
                positives.append(diff)
            else:
                negatives.append(diff)
            done += 1
            if self.verbose and done % 1000 == 0:
                print "Trained on " + str(done)
        print len(positives), len(negatives)

        threshold = Separator.separate(positives, negatives, trials=1000000)
        return [threshold]
Exemplo n.º 4
0
    def trainFreqThresh1(self,split):

        print"Training split "+str(split)
        positives=[]
        negatives=[]
        done=0
        for [word1,word2,result] in self.pairmatrix[self.cv_idx!=split]:
            #if word1 in self.entrydict.keys():
            #    if word2 in self.entrydict.keys():
            if EntailClassifier.do_ratio:
                diff = float(self.entrydict[word2].freq)/float(self.entrydict[word1].freq)
            else:
                diff = float(self.entrydict[word2].freq)-float(self.entrydict[word1].freq)
                #else:
                #    print "Error: no frequency information for "+word2
            #else:
                #print "Error: no frequency information for "+word1
            if int(result)==1:
                positives.append(diff)
            else:
                negatives.append(diff)
            done+=1
            if self.verbose and done%1000==0:
                print "Trained on "+str(done)
        print len(positives),len(negatives)

        threshold = Separator.separate(positives,negatives,trials=1000000)
        return [threshold]
Exemplo n.º 5
0
    def trainCRThresh(self, split, method):

        print "Training split " + str(split)
        positives = []
        negatives = []
        done = 0
        for [word1, word2, result] in self.pairmatrix[self.cv_idx != split]:

            #diff = float(self.entrydict[word2].freq)-float(self.entrydict[word1].freq)
            if method == "CR_thresh":
                precision = float(self.entrydict[word1].precision(
                    self.entrydict[word2]))
                recall = float(self.entrydict[word2].precision(
                    self.entrydict[word1]))
            elif method == "clarke_thresh":
                precision = float(self.entrydict[word1].min_precision(
                    self.entrydict[word2]))
                recall = float(self.entrydict[word2].min_precision(
                    self.entrydict[word1]))
            elif method == "invCL":
                precision = float(self.entrydict[word1].invCL(
                    self.entrydict[word2]))
                recall = 1  #invCL uses 1-recall anyway so threshold just on precision value
            else:
                print "Unknown CR method " + method
                exit(1)
            if recall == 0:
                ratio = 0
                #hm=0
            else:
                if EntailClassifier.do_ratio:
                    ratio = precision / recall
                else:
                    ratio = precision - recall
                #hm=2*precision*recall/(precision+recall)
            if int(result) == 1:
                positives.append(ratio)
            else:
                negatives.append(ratio)
            done += 1
            if self.verbose and done % 1000 == 0:
                print "Trained on " + str(done)
        print len(positives), len(negatives)

        threshold = Separator.separate(positives,
                                       negatives,
                                       trials=1000000,
                                       integer=False)
        return [threshold]
Exemplo n.º 6
0
    def trainCRThresh(self,split,method):

        print"Training split "+str(split)
        positives=[]
        negatives=[]
        done=0
        for [word1,word2,result] in self.pairmatrix[self.cv_idx!=split]:

            #diff = float(self.entrydict[word2].freq)-float(self.entrydict[word1].freq)
            if method=="CR_thresh":
                precision = float(self.entrydict[word1].precision(self.entrydict[word2]))
                recall=float(self.entrydict[word2].precision(self.entrydict[word1]))
            elif method=="clarke_thresh":
                precision = float(self.entrydict[word1].min_precision(self.entrydict[word2]))
                recall=float(self.entrydict[word2].min_precision(self.entrydict[word1]))
            elif method=="invCL":
                precision = float(self.entrydict[word1].invCL(self.entrydict[word2]))
                recall=1 #invCL uses 1-recall anyway so threshold just on precision value
            else:
                print "Unknown CR method "+method
                exit(1)
            if recall == 0:
                ratio=0
                #hm=0
            else:
                if EntailClassifier.do_ratio:
                    ratio=precision/recall
                else:
                    ratio=precision-recall
                #hm=2*precision*recall/(precision+recall)
            if int(result)==1:
                positives.append(ratio)
            else:
                negatives.append(ratio)
            done+=1
            if self.verbose and done%1000==0:
                print "Trained on "+str(done)
        print len(positives),len(negatives)

        threshold = Separator.separate(positives,negatives,trials=1000000,integer=False)
        return [threshold]
Exemplo n.º 7
0
    def test_separated_lists(self):
        negatives = [-4, 2, 3]
        positives = [5, 10, 12]

        result = Separator.separate(positives, negatives)
        self.assertEqual(result, 4)
Exemplo n.º 8
0
    def test_separated_lists(self):
        negatives=[-4,2,3]
        positives=[5,10,12]

        result = Separator.separate(positives,negatives)
        self.assertEqual(result,4)