Exemplos de Tagger em Python, exemplos de pycrfsuite.Tagger em Python

Exemplo n.º 1

0

Exibir arquivo

def test():
    tagger = pycrfsuite.Tagger()
    tagger.open('crfsuite.model')

    test_file = os.path.join(os.path.dirname(__file__), '../data/test.txt')

    test_sents, test_tags = NnPreprocessor.read_tagged_file(test_file)

    X_test = [sent2features(s) for s in test_sents]
    y_test = test_tags

    y_pred = [tagger.tag(x) for x in X_test]

    print(bio_classification_report(y_test, y_pred))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: model.py Projeto: mstfbl/Thesis

def main(training_file, testing_file, model_file, ft):
    start = time.time()

    # Get training and testing set of data
    #Ignoring the ones where RESTR/NON-RESTR is not defined, i.e. the ones written as "_"
    training_set = get_input(training_file)
    testing_set = get_input(testing_file)

    #Special training case for Honnibal et al.
    if ft == "honnibal":
        y_test = [MyClassifiers.get_labels(s) for s in testing_set]
        y_pred = [MyClassifiers.get_features(s, ft) for s in testing_set]
        printReport(y_pred, y_test, ft)
        return

    # Get features of each word on training set
    X_train = []
    count = 0
    for s in training_set:
        X_train.append(MyClassifiers.get_features(s, ft))
        count += 1
    y_train = [MyClassifiers.get_labels(s) for s in training_set]

    # Get features of each word on testing set
    X_test = [MyClassifiers.get_features(s, ft) for s in testing_set]
    y_test = [MyClassifiers.get_labels(s) for s in testing_set]

    # Create trainer model of CRF
    trainer = pycrfsuite.Trainer(verbose=False)

    trainer.set_params(getTrainerFeatures(ft))

    for xseq, yseq in zip(X_train, y_train):
        trainer.append(xseq, yseq)

    # Train the model and save the trained model into model_file
    trainer.train(model_file_loc + ft + "Model.crfsuite")
    #print ("Log of last iteration={}".format(trainer.logparser.iterations[-1]))

    # Initial tagger for prediction task
    trained_model = pycrfsuite.Tagger()
    trained_model.open(model_file_loc + ft +
                       "Model.crfsuite")  # Load the trained model.
    # Get prediction tag results from trained model
    y_pred = [trained_model.tag(xseq) for xseq in X_test]

    # Print the Precision, Recall, and F-1 score
    end = time.time()
    print('Runtime:', end - start)
    printReport(y_test, y_pred, ft)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: nerc.py Projeto: ferrannoguera/AHLT

def nerc(input_dir, model_file, output_file):
    input_files = os.listdir(input_dir)
    output_file = open(output_file, 'w')
    crf_tagger = pycrfsuite.Tagger()
    crf_tagger.open(model_file)
    for file in input_files:
        tree = parseXML(input_dir + '/' + file)
        for sentence in tree:
            (id, text, _) = get_sentence_info(sentence)
            tokens = tokenize(text)
            features = extract_features(tokens)
            # output_features(id, tokens, features)
            classes = crf_tagger.tag(features)
            output_entities(id, tokens, classes, output_file)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: crf_test.py Projeto: Shahabks/Legal-Document-Summarization

def test_crf(file, tagbot=None):
    '''
		Use pre trained model (crf_alltrain) to classify file
	'''
    text, indices = parse_html(file)
    length = len(indices)
    test_sentences = []
    for line in text:
        tokens = nltk.tokenize.word_tokenize(line)
        labels = [(token, '-') for token in tokens]

        test_sentences.append(labels)

    test_postag = crf_train.add_postag(test_sentences)
    X_test = [
        crf_train.sentenceToFeatures(test_postag[i], indices[i] * 1.0 / length)
        for i in range(len(test_postag))
    ]
    # test is not known at all
    # Y_test = [crf_train.sentenceToLabels(sentence) for sentence in test_postag]

    tagger = tagbot
    if tagger == None:
        tagger = pycrfsuite.Tagger()
        tagger.open('crf_alltrain.model')
    Y_pred = [tagger.tag(xseq) for xseq in X_test]

    # labels = {'-': 0, '?': 0}
    # counter = 1
    # for category in crf_train.cue_phrases:
    # 	labels[category] = counter
    # 	counter += 1

    # predictions = [labels[tag] for row in Y_pred for tag in row]
    ## Test is not known
    # # truths = [labels[tag] for row in Y_test for tag in row]
    # c = 0
    # for p, t in zip(predictions, truths):
    # 	if p+t!=0:
    # 		print(p, t)
    # 		c+=1
    # # predictions = np.array([labels[tag] for row in Y_pred for tag in row])
    # # truths = np.array([labels[tag] for row in Y_test for tag in row])
    # # # print(len(predictions))
    # # # print(len(truths))
    # # print(c, 'identified labels')
    # # print(predictions)
    # print( classification_report(truths, predictions,target_names=list(crf_train.cue_phrases.keys()) ) )

    return text, X_test, Y_pred

Exemplo n.º 5

0

Exibir arquivo

    def from_disk(self, model_path, tokenizer_list, *args, **kwargs):
        self.model_file = self.get_model_file(model_path)

        self.crf_tagger = pycrfsuite.Tagger()
        self.crf_tagger.open(self.model_file)

        pickle_file = self.get_char2feature_file(model_path)
        with open(pickle_file, 'rb') as fd:
            self.feature_func_list = pickle.load(fd)

        for tokenizer in tokenizer_list:
            tokenizer.assign_from_loader(
                crf_tagger=self.crf_tagger,
                feature_func_list=self.feature_func_list)

Exemplo n.º 6

0

Exibir arquivo

def predict():
    tagger = crf.Tagger()
    tagger.open(trained_model)
    
    predictedY  =  []
    confidences =  []
    confidences_beam = []
    
    for xseq in testX:  
        yseq = tagger.tag(xseq)
        predictedY.append(yseq)
        confidences.append([tagger.marginal(yseq[i],i) for i in range(len(yseq))])   
        confidences_beam.append([ [tagger.marginal(tag, i)  for tag in train.int2tags]   for i in range(len(yseq))])
    return predictedY, testY, confidences, confidences_beam, tagger.info()

Exemplo n.º 7

0

Exibir arquivo

def prediction():
    filename = 'msr_test_gold.utf8'
    test_set = LoadData(filename)

    tagger = pycrfsuite.Tagger()
    tagger.open('msr.crfsuite')

    example_sent = test_set[100]
    print(example_sent)
    print(sent2tokens(example_sent))
    print(' '.join(sent2tokens(example_sent)), end='\n\n')

    print("Predicted:", ' '.join(tagger.tag(sent2features(example_sent))))
    print("Correct:  ", ' '.join(sent2labels(example_sent)))

Exemplo n.º 8

0

Exibir arquivo

    def __init__(self):
        self.model = pycrfsuite.Tagger()
        filepath = join(dirname(__file__), "model_9.bin")
        self.model.open(filepath)
        template = [
            "T[-2].lower",
            "T[-1].lower",
            "T[0].lower",
            "T[1].lower",
            "T[2].lower",
            "T[-1].isdigit",
            "T[0].isdigit",
            "T[1].isdigit",
            "T[-1].istitle",
            "T[0].istitle",
            "T[1].istitle",
            "T[0,1].istitle",
            "T[0,2].istitle",
            "T[-2].is_in_dict",
            "T[-1].is_in_dict",
            "T[0].is_in_dict",
            "T[1].is_in_dict",
            "T[2].is_in_dict",
            "T[-2,-1].is_in_dict",
            "T[-1,0].is_in_dict",
            "T[0,1].is_in_dict",
            "T[1,2].is_in_dict",
            "T[-2,0].is_in_dict",
            "T[-1,1].is_in_dict",
            "T[0,2].is_in_dict",

            # word unigram and bigram and trigram
            "T[-2]",
            "T[-1]",
            "T[0]",
            "T[1]",
            "T[2]",
            "T[-2,-1]",
            "T[-1,0]",
            "T[0,1]",
            "T[1,2]",
            "T[-2,0]",
            "T[-1,1]",
            "T[0,2]",
            # BI tag
            "T[-2][1]",
            "T[-1][1]"
        ]
        self.transformer = CustomTransformer(template)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: tagger.py Projeto: amrsharaf/PAL

 def get_confidence(self, sent):
     sent = sent.split()
     # Add a dummy label because sent2features using this interface
     sent = [(s, '') for s in sent]
     sent = sent2features(sent)
     tagger = pycrfsuite.Tagger()
     if not os.path.isfile(self.model_file):
         confidence = 0.2
         return [confidence]
     tagger.open(self.model_file)
     tagger.set(sent)
     Y_pred = tagger.tag()
     p_y_pred = tagger.probability(Y_pred)
     confidence = pow(p_y_pred, 1. / len(Y_pred))
     return [confidence]

Exemplo n.º 10

0

Exibir arquivo

Arquivo: ner_basicmodel.py Projeto: juae-kim/nlp_labeling

 def __init__(self, model_path, model_name, save_path=None, start_iter=0):
     self.model_path = model_path
     self.model_name = model_name
     self.trainer = pycrfsuite.Trainer(verbose=False)
     self.tagger = pycrfsuite.Tagger()
     self.iter = start_iter  #
     self.save_path = save_path
     if not os.path.exists(self.model_path):
         os.makedirs(self.model_path)
     if self.save_path is not None:
         if not os.path.exists(self.save_path):
             os.makedirs(self.save_path)
     if st.DICTIONARY is True or st.SELF_ITER_N > 1:
         self.X_total = []  ## added for dicionary
         self.y_total = []

Exemplo n.º 11

0

Exibir arquivo

def predict(filepath='', x_test=[]):
    twitter = Twitter()
    predicted_labels = []
    # file check
    is_filepath_existed(filepath)

    tagger = pycrfsuite.Tagger()
    tagger.open(filepath)

    for text in x_test:
        sent = twitter.pos(text, norm=True, stem=True)
        sent = [(sent[i][0], sent[i][1]) for i in range(len(sent))]
        predicted_labels.append(tagger.tag(sent2features(sent)))

    return predicted_labels

Exemplo n.º 12

0

Exibir arquivo

 def test_sequenceLabeler_predict(self):
     print("test_sequenceLabeler_predict")
     global id
     global model_file
     sentence = "I want to book a cab from Beijing"
     tokenizedSentence = word_tokenize(sentence)
     taggedToken = posTagger(sentence)
     tagger = pycrfsuite.Tagger()
     tagger.open(model_file)
     predictedLabels = tagger.tag(
         sequenceLabeler.sentToFeatures(taggedToken))
     extractedEntities = sequenceLabeler.extractEntities(
         zip(tokenizedSentence, predictedLabels))
     print("extractedEntities:")
     print(extractedEntities)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: Entity_Recognizer.py Projeto: mgmoran/ARChEType_MadLibs

 def train(self, docs: Iterable[Doc], algorithm: str, params: dict, path: str) -> None:
     trainer = pycrfsuite.Trainer(algorithm, verbose=False)
     trainer.set_params(params)
     encoder = self.encoder()
     for doc in docs:
         idx = 0
         for sent in doc.sents:
             tokens = list(sent)
             features = self.feature_extractor.extract([str(token) for token in tokens],idx)
             encoding = encoder.encode(tokens)
             trainer.append(features, encoding)
             idx +=1
     trainer.train(path)
     self.tagger = pycrfsuite.Tagger()
     self.tagger.open(path)

Exemplo n.º 14

0

Exibir arquivo

 def train(self, docs: Iterable[Doc], algorithm: str, params: dict,
           path: str) -> None:
     trainer = pycrfsuite.Trainer(algorithm, verbose=False)
     trainer.set_params(params)
     for doc in docs:
         #print(doc)
         for sent in doc.sents:
             tokens = list(sent)
             features = self.feature_extractor.extract(
                 [token.text for token in tokens])
             encoded_labels = self._encoder.encode(tokens)
             trainer.append(features, encoded_labels)
     trainer.train(path)
     self.tagger = pycrfsuite.Tagger()
     self.tagger.open(path)

Exemplo n.º 15

0

Exibir arquivo

def main():
    inputdir = sys.argv[1]
    testdir = sys.argv[2]
    outputfile = sys.argv[3]
    x_list = []
    y_list = []
    for root, dirs, files in os.walk(inputdir):
        for filename in files:
            if filename.endswith(".csv"):
                filepath = os.path.abspath(os.path.join(root, filename))
                utterances = inputtool.get_utterances_from_filename(filepath)
                x_train = sent2features(utterances)
                y_train = sent2labels(utterances)
                for x in x_train:
                    x_list.append(x)
                for y in y_train:
                    y_list.append(y)

    trainer = pycrfsuite.Trainer(verbose=False)
    trainer.append(x_list, y_list)
    trainer.set_params({
        'c1': 1,
        'c2': 1e-3,
        'max_iterations': 85,
        'feature.possible_states': True,
        'feature.possible_transitions': True
    })
    trainer.train('baseline.crfsuite')
    tagger = pycrfsuite.Tagger()
    tagger.open('baseline.crfsuite')
    f = open(outputfile, "a")
    f.truncate(0)
    for root, dirs, files in os.walk(testdir):
        for filename in files:
            if filename.endswith(".csv"):
                filepath = os.path.abspath(os.path.join(root, filename))
                utterances = inputtool.get_utterances_from_filename(filepath)
                x_tag = sent2features(utterances)
                outputlist = tagger.tag(x_tag)
                f.write('Filename="')
                f.write(filename)
                f.write('"')
                f.write('\n')
                for y in outputlist:
                    f.write(y)
                    f.write('\n')
                f.write('\n')
    f.close()

Exemplo n.º 16

0

Exibir arquivo

Arquivo: graphicalModel.py Projeto: deepudilip90/judicial_analytics_ac

def get_summary(file):
    '''
        Combine crf predictions with k-mix-model
    '''
    # text, indices = crf_test.parse_html(file)
    with open(file, 'r') as f:
        text = f.readlines()


    # we have list of sentences and indices, without para information
    doc_length = sum([len(line.split(' ')) for line in text])

    tagger = pycrfsuite.Tagger()
    tagger.open('crf_alltrain.model')
    text, X_test, Y_pred = crf_test.test_crf(file, tagger)
    kmm = k_mix_model_test.KMM(file)
    # kmm contains score for each line in text, in serialized order
    kmix_sorted = sorted(kmm.items(), key=operator.itemgetter(1), reverse=True)

    # generate_summary
    visited = {}
    summary = {}
    for pair in kmix_sorted:
        sentence_id = pair[0]
        label = Y_pred[sentence_id - 1][0]

        if label not in visited:
            summary[text[sentence_id - 1]] = label
            visited[label] = 1
        elif visited[label] == 2:
            continue
        else:
            visited[label] = 2
            summary[text[sentence_id - 1]] = label

        length = sum([len(key.split(' ')) for key in summary.keys()])
        # print(length, SUMMARY_PERCENT * 0.01 * doc_length)
        if length > SUMMARY_PERCENT * 0.01 * doc_length:
            break

    summary_txt = ''
    order = ['F', 'I', 'A', 'LR', 'SS', 'SP', 'SO', 'R']
    for category in order:
        summary_txt += ''.join([key for key in summary if summary[key] == category]) + '\n'

    # print(summary_txt)
    # return summary_txt
    return summary

Exemplo n.º 17

0

Exibir arquivo

def main(training_file, testing_file, model_file):
    
    start = time.time()
    
    # Get training and testing set of data
    training_set = get_input(training_file)
    testing_set = get_input(testing_file)
    
    # Get features of each word on training set
    X_train = [get_features(s) for s in training_set]
    y_train = [get_labels(s) for s in training_set]
    
    # Get features of each word on testing set
    X_test = [get_features(s) for s in testing_set]
    y_test = [get_labels(s) for s in testing_set]

    # Create trainer model of CRF
    trainer = pycrfsuite.Trainer(verbose=False)

    for xseq, yseq in zip(X_train, y_train):
        trainer.append(xseq, yseq)

    trainer.set_params({
        'c1': 0.5,   # coefficient for L1 penalty
        'c2': 1e-3,  # coefficient for L2 penalty
        'max_iterations': 1000,  # stop earlier
    
        # include transitions that are possible, but not observed
        'feature.possible_transitions': True
    })    
    
    # Train the model and save the trained model into model_file
    trainer.train(model_file)
    print ("Log of last iteration={}".format(trainer.logparser.iterations[-1]))

    # Initial tagger for prediction task
    trained_model = pycrfsuite.Tagger()
    trained_model.open(model_file) # Load the trained model.
        
    # Get prediction tag results from trained model
    y_pred = [trained_model.tag(xseq) for xseq in X_test]
    
    # Print the Precision, Recall, and F-1 score
    print(bio_classification_report(y_test, y_pred))
    
    end = time.time()
    print('CRF model has been generated.')
    print('runtime:', end - start)

Exemplo n.º 18

0

Exibir arquivo

 def predict(test_features, test_labels):
     predictor = pycrfsuite.Tagger(verbose = False)
     predictor.open("super_advanced_dialog_act_tagger.crfsuite")
     output_file = open("super_output.txt", "w+")
     correct_predictions = 0
     total_predictions = 0
     for conversation in range(len(test_features)):
         for label_index, predicted_label in enumerate(predictor.tag(test_features[conversation])):
             if predicted_label == test_labels[conversation][label_index]:
                 correct_predictions += 1
             total_predictions += 1
             predicted_label += "\n"
             output_file.writelines(predicted_label)
         output_file.writelines("\n")
     output_file.close()    
     print ("Accuracy is " , (correct_predictions / total_predictions))

Exemplo n.º 19

0

Exibir arquivo

Arquivo: semi_supervised.py Projeto: microvnn/language_vn

 def __init__(self, debug=False):
     cdr_file = get_data_file("named.rdr", folder="vietnamese")
     self.__root = SCRDRTree()
     self.__root.constructSCRDRtreeFromRDRfile(cdr_file)
     crf_file = get_data_file("ner.crf.bin", folder="models")
     if not path.isfile(crf_file):
         logging.error("Model %s not found " % crf_file)
         print("Model %s not found " % crf_file)
         exit()
     self.__crf: pycrfsuite.Tagger = pycrfsuite.Tagger()
     self.__crf.open(crf_file)
     self.__nlp = None
     self.__debug = debug
     self.__adapter: DocFeatures = DocFeatures()
     logging.info("Labels in model(Semi_Supervised_Doc_Ner) : %s" %
                  str(self.__crf.labels()))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: advanced_crf.py Projeto: ajay-malalikar/sequence-labelling

def predict(op_file):
    tagger = pycrfsuite.Tagger()
    tagger.open('train.crfsuite')
    total = 0
    correct = 0
    with open(op_file, 'w') as file:
        for name, features in map_features.items():
            tags = tagger.tag(features[0])
            file.write("Filename=\"" + name + "\"\n")
            for i in range(len(tags)):
                total += 1
                if features[1][i] == tags[i]:
                    correct += 1
                file.write(tags[i] + "\n")
            file.write("\n")
    return correct, total

Exemplo n.º 21

0

Exibir arquivo

Arquivo: Evaluation.py Projeto: Emmonss/SegmentAndNER

def Test(test_file):
    with open(test_file, 'rb') as rp:
        test_set = pickle.load(rp)

    test_set = test_set[:3000]
    X_test = [sent2features(s) for s in test_set]
    y_test = [sent2labels(s) for s in test_set]

    tagger = pycrfsuite.Tagger()
    tagger.open('PKU.crfsuite')

    y_pred = [tagger.tag(xseq) for xseq in X_test]

    res = Evaluation(y_test, y_pred)

    print(res)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: tagger.py Projeto: Gorluxor/MasterProject

def load_models(lang, dir=None):
    global trie
    global tagger
    global lemmatiser
    if dir != None:
        reldir = dir
    trie = pickle.load(open(os.path.join(reldir, lang + '.marisa'), 'rb'))
    tagger = pycrfsuite.Tagger()
    tagger.open(os.path.join(reldir, lang + '.msd.model'))
    lemmatiser = {
        'model':
        pickle.load(open(os.path.join(reldir, lang + '.lexicon.guesser'),
                         'rb')),
        'lexicon':
        pickle.load(open(os.path.join(reldir, lang + '.lexicon'), 'rb'))
    }

Exemplo n.º 23

0

Exibir arquivo

def classifier(features):
    """ Creates a list with predicted labels for each feature, using a previously instanciated learner.

            :param features: A list of feature vectors
            :returns: A list of predicted classes for each feature
    """
    predicted_classes = []

    tagger = pycrfsuite.Tagger()
    tagger.open(MODEL)

    # TODO: Placeholder, feature vector might need to be prepared
    for feature in features:
        predicted_classes.append(tagger.tag(feature))

    return predicted_classes

Exemplo n.º 24

0

Exibir arquivo

def pred(data):
    def get_labels(doc):
        return [label for (token, postag, label) in doc]

    def get_token(doc):
        return [token for (token, postag, label) in doc]

    # test_data = get_test_data_unlabel(input_file)

    X_test = [[train_ashford.create_feature(d, i) for i in range(len(d))]
              for d in data]

    tagger = pycrfsuite.Tagger()
    tagger.open(modelM)
    y_pred = [tagger.tag(xseq) for xseq in X_test]  #list[list[string]]
    return y_pred

Exemplo n.º 25

0

Exibir arquivo

Arquivo: crf_entity_extractor.py Projeto: subramanyata/rasa_nlu

    def load(cls, model_dir, model_name):
        # type: (Text, Text) -> CRFEntityExtractor
        import pycrfsuite

        if model_dir and model_name:
            ent_tagger = pycrfsuite.Tagger()
            ent_tagger.open(os.path.join(model_dir, 'ner', model_name))
            config = json.load(
                io.open(os.path.join(model_dir, 'ner', 'crf_config.json'),
                        'r'))

            return CRFEntityExtractor(ent_tagger=ent_tagger,
                                      crf_features=config['crf_features'],
                                      BILOU_flag=config['BILOU_flag'])
        else:
            return CRFEntityExtractor()

Exemplo n.º 26

0

Exibir arquivo

def perform_k_fold(k, x_train, y_train):
    kf = KFold(n_splits=k, shuffle=True)
    scores = []
    for i in range(k):
        result = next(kf.split(x_train), None)
        train_indices = result[0]
        test_indices = result[1]

        x_training_data = [x_train[i] for i in train_indices]
        y_training_data = [y_train[i] for i in train_indices]
        x_testing_data = [x_train[i] for i in test_indices]
        y_testing_data = [y_train[i] for i in test_indices]

        trainer = pycrfsuite.Trainer(verbose=False)
        for xseq, yseq in zip(x_training_data, y_training_data):
            trainer.append(xseq, yseq)

        trainer.set_params({
            'c1': 1.0,  # coefficient for L1 penalty
            'c2': 1e-3,  # coefficient for L2 penalty
            'max_iterations': 50,  # stop earlier

            # include transitions that are possible, but not observed
            'feature.possible_transitions': True
        })

        print("start training")
        trainer.train('advanced_model')
        print("finish training ")
        print(len(trainer.logparser.iterations),
              trainer.logparser.iterations[-1])

        tagger = pycrfsuite.Tagger()
        tagger.open('advanced_model')

        y_pred = [tagger.tag(xseq) for xseq in x_testing_data]

        flat_list_true = [
            item for sublist in y_testing_data for item in sublist
        ]
        flat_list_pred = [item for sublist in y_pred for item in sublist]

        acc_score = accuracy_score(flat_list_true,
                                   flat_list_pred,
                                   normalize=True,
                                   sample_weight=None)
        scores.append(acc_score)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: slotfiller.py Projeto: owenvvv/Steam_helper

def extract(text):
    tagger = pycrfsuite.Tagger()
    tagger.open('model/recommend_game.crfsuite')
    text_split = text.replace(' and', '.').split('.')
    sentence = input_prep(text_split)
    features = [sent2features(s) for s in sentence]
    tagList = [tagger.tag(s) for s in features]
    print(tagList)
    for idx_sent, sent in enumerate(tagList):
        for idx_word, word in enumerate(sent):
            if word != 'O':
                words = sentence[idx_sent][idx_word]
                words_new = (words[0], words[2], word)
                sentence[idx_sent][idx_word] = words_new
    #print(sentence)
    ratingList = []
    genreList = []
    priceList = []
    ageList = []
    characterList = []
    for idx_sent, sent in enumerate(sentence):
        for idx_word, word in enumerate(sent):
            if 'genre' in word[2]:
                genreList.append(word[0])
            elif 'age' in word[2]:
                if word[0].isdigit():
                    ageList.append(word[0])
            elif 'price' in word[2]:
                if 'free' in word[0]:
                    priceList.append('0')
                else:
                    if word[0].replace('$', '').isdigit():
                        priceList.append(word[0].replace('$', ''))
            elif 'rating' in word[2]:
                ratingList.append(word[0])
            elif 'character' in word[2]:
                characterList.append(word[0])

    entitylist = {
        'genre': genreList,
        'age': ageList,
        'price': priceList,
        'rating': ratingList,
        'characters': characterList
    }
    #print(f"entitylist: {entitylist}")
    return sentence, entitylist

Exemplo n.º 28

0

Exibir arquivo

Arquivo: crf_training_and_test.py Projeto: masatomix/ai-samples

def do_test(reader):
    test_sents = reader.iob_sents('test')

    x_test = [sent2features(s) for s in test_sents]
    y_test = [sent2labels(s) for s in test_sents]

    tagger = pycrfsuite.Tagger()
    tagger.open('model.crfsuite')

    for example_sent in test_sents:
        sys.stdout.write('\t')
        print('\t'.join(sent2tokens(example_sent)))
        print("Predicted:\t",
              '\t'.join(tagger.tag(sent2features(example_sent))))
        print("Correct:\t", '\t'.join(sent2labels(example_sent)))

    return (x_test, y_test)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: semi_iteration_test.py Projeto: zg-diligence/ccks_evaluating

def tag_evaluate(test_sents, crfsuite_model, templates, k):
    x_test = [ccks.sent2attributes(s, templates) for s in test_sents]
    tagger = pycrfsuite.Tagger()
    tagger.open(crfsuite_model)
    y_pred = [tagger.tag(xseq) for xseq in x_test]

    # 将单次迭代后的预测结果写入文件并评测
    # w = open('result.txt', 'w')
    # for sents, pred in zip(test_sents, y_pred):
    #     for i in range(len(sents)):
    #         w.write('\t'.join([sents[i][0], sents[i][1], sents[i][2], pred[i]]) + '\n')
    # w.close()
    # report = os.popen('perl conlleval.pl -d "\t" < result.txt').readlines()
    # report = [line.encode('utf8') for line in report]
    # open(str(k+1) + '-report.txt', 'a').writelines(report)

    return y_pred

Exemplo n.º 30

0

Exibir arquivo

    def predict(self, model_name, sentence):
        """
        Predict NER labels for given model and query
        :param model_name:
        :param sentence:
        :return:
        """
        from app.nlu.tasks import pos_tagger

        tokenized_sentence = word_tokenize(sentence)
        tagged_token = pos_tagger(sentence)
        tagger = pycrfsuite.Tagger()
        tagger.open("{}/{}.model".format(app.config["MODELS_DIR"], model_name))
        predicted_labels = tagger.tag(self.sent_to_features(tagged_token))
        extracted_entities = self.crf2json(
            zip(tokenized_sentence, predicted_labels))
        return extracted_entities