def computeF1(self, modelName, sentences):
        labelKey = self.labelKeys[modelName]
        model = self.models[modelName]
        idx2Label = self.idx2Labels[modelName]

        correctLabels = [
            sentences[idx][labelKey] for idx in range(len(sentences))
        ]
        predLabels = self.predictLabels(model, sentences)

        labelKey = self.labelKeys[modelName]
        encodingScheme = labelKey[labelKey.index('_') + 1:]

        pre, rec, f1 = BIOF1Validation.compute_f1(predLabels, correctLabels,
                                                  idx2Label, 'O',
                                                  encodingScheme)
        pre_b, rec_b, f1_b = BIOF1Validation.compute_f1(
            predLabels, correctLabels, idx2Label, 'B', encodingScheme)

        if f1_b > f1:
            logging.debug(
                "Setting wrong tags to B- improves from %.4f to %.4f" %
                (f1, f1_b))
            pre, rec, f1 = pre_b, rec_b, f1_b

        return pre, rec, f1
    def computeF1(self, modelName, sentences, tosave=False):
        labelKey = self.labelKeys[modelName]
        model = self.models[modelName]
        idx2Label = self.idx2Labels[modelName]
        
        correctLabels = [sentences[idx][labelKey] for idx in range(len(sentences))]
        predLabels = self.predictLabels(model, sentences)
        if tosave:
          self.correctLabels = correctLabels
          self.newpredLabels = predLabels
          print([(y,idx2Label[y]) for x in predLabels for y in x][:50])

        labelKey = self.labelKeys[modelName]
        encodingScheme = labelKey[labelKey.index('_')+1:]
        
        pre, rec, f1 = BIOF1Validation.compute_f1(predLabels, correctLabels, idx2Label, 'O', encodingScheme)
        pre_b, rec_b, f1_b = BIOF1Validation.compute_f1(predLabels, correctLabels, idx2Label, 'B', encodingScheme)
        
        if f1_b > f1:
            logging.debug("Setting wrong tags to B- improves from %.4f to %.4f" % (f1, f1_b))
            pre, rec, f1 = pre_b, rec_b, f1_b

        ###Added
        print(correctLabels[:5],'\n',predLabels[:5])
        correctLabels = [idx2Label[x] for y in correctLabels for x in y]
        predLabels = [idx2Label[x] for y in predLabels for x in y]
        print(correctLabels[:50],'\n',predLabels[:50])
        conllEVAL.evaluate(correctLabels, predLabels)
        ###EndAdded        
        return pre, rec, f1
    def computeF1(self, sentences, name=''):
        correctLabels = []
        predLabels = []
        paddedPredLabels = self.predictLabels(sentences)

        for idx in range(len(sentences)):
            unpaddedCorrectLabels = []
            unpaddedPredLabels = []
            for tokenIdx in range(len(sentences[idx]['tokens'])):
                if sentences[idx]['tokens'][
                        tokenIdx] != 0:  # Skip padding tokens
                    unpaddedCorrectLabels.append(
                        sentences[idx][self.labelKey][tokenIdx])
                    unpaddedPredLabels.append(paddedPredLabels[idx][tokenIdx])

            correctLabels.append(unpaddedCorrectLabels)
            predLabels.append(unpaddedPredLabels)

        #        encodingScheme = self.labelKey[self.labelKey.index('_') + 1:]
        encodingScheme = '0'
        print("encoding Scheme ", encodingScheme)
        pre, rec, f1 = BIOF1Validation.compute_f1(predLabels, correctLabels,
                                                  self.idx2Label, 'O',
                                                  encodingScheme)

        # pre_b, rec_b, f1_b = BIOF1Validation.compute_f1(predLabels, correctLabels, self.idx2Label, 'B', encodingScheme)
        #
        # if f1_b > f1:
        #     logging.debug("Setting incorrect tags to B yields improvement from %.4f to %.4f" % (f1, f1_b))
        #     pre, rec, f1 = pre_b, rec_b, f1_b

        if self.writeOutput:
            self.writeOutputToFile(sentences, predLabels,
                                   '%.4f_%s' % (f1, name))
        return pre, rec, f1
示例#4
0
    def computeF1(self, modelName, sentences, mode="", epoch=0):
        labelKey = self.labelKeys[modelName]
        print("Label key : {}".format(labelKey))
        model = self.models[modelName]
        idx2Label = self.idx2Labels[modelName]

        correctLabels = [
            sentences[idx][labelKey][:len(sentences[idx]['raw_tokens'])]
            for idx in range(len(sentences))
        ]
        #print("Correct labels : {}".format(correctLabels))
        #os._exit(2)
        predLabels = self.predictLabels(model, sentences)
        #print("Prediction labels : {}".format(predLabels))

        if mode == "dev":
            self.current_dev_prediction = predLabels
        if mode == "test":
            self.current_test_prediction = predLabels

        labelKey = self.labelKeys[modelName]
        encodingScheme = labelKey[labelKey.index('_') + 1:]

        #pre, rec, f1 = BIOF1Validation.compute_f1(predLabels, correctLabels, idx2Label, 'O', encodingScheme)
        #pre_b, rec_b, f1_b = BIOF1Validation.compute_f1(predLabels, correctLabels, idx2Label, 'B', encodingScheme)
        pre, rec, f1 = BIOF1Validation.compute_f1_conll(
            correctLabels, predLabels, idx2Label)
        #logging.info("ConLL version p : {:.2f} r: {:.2f} f1: {:.2f}".format(pre_conll, rec_conll, f1_conll))
        #if f1_b > f1:
        #    logging.info("Setting wrong tags to B- improves from %.4f to %.4f" % (f1, f1_b))
        #    pre, rec, f1 = pre_b, rec_b, f1_b

        return pre, rec, f1
示例#5
0
    def computeF1(self, task, predLabels, correctLabels):
        idx2Label = self.idx2Labels[task]
        encodingScheme = task[task.index('_') + 1:]

        pre, rec, f1 = BIOF1Validation.compute_f1(predLabels, correctLabels,
                                                  idx2Label, 'O',
                                                  encodingScheme)
        pre_b, rec_b, f1_b = BIOF1Validation.compute_f1(
            predLabels, correctLabels, idx2Label, 'B', encodingScheme)

        if f1_b > f1:
            logging.debug(
                "Setting wrong tags to B- improves from %.4f to %.4f" %
                (f1, f1_b))
            pre, rec, f1 = pre_b, rec_b, f1_b

        return pre, rec, f1