def eval(self, sentence_result, y_data, progress=False): slot_result, domain_result = list(zip(*y_data)) y_pred, y_pred_target = self.predict(sentence_result, progress=progress) y_test = slot_result y_target = np.array([[x] for x in domain_result]) y_pred_target = np.array([[x] for x in y_pred_target]) # print(y_target.shape) # print(y_pred_target.shape) return OrderedDict(( ('accuracy', metrics.flat_accuracy_score(y_test, y_pred)), ('precision', metrics.flat_precision_score(y_test, y_pred, average='weighted')), ('recall', metrics.flat_recall_score(y_test, y_pred, average='weighted')), ('f1', metrics.flat_f1_score(y_test, y_pred, average='weighted')), ('softmax_accuracy', metrics.flat_accuracy_score(y_target, y_pred_target)), ('softmax_precision', metrics.flat_precision_score(y_target, y_pred_target, average='weighted')), ('softmax_recall', metrics.flat_recall_score(y_target, y_pred_target, average='weighted')), ('softmax_f1', metrics.flat_f1_score(y_target, y_pred_target, average='weighted')), ))
def test_training(storage, capsys): annotations = list( a for a in storage.iter_annotations(simplify_form_types=True, simplify_field_types=True) if a.fields_annotated )[:300] crf = train( annotations=annotations, use_precise_form_types=False, optimize_hyperparameters_iters=10, full_form_type_names=False, full_field_type_names=False, ) out, err = capsys.readouterr() assert "Training on 300 forms" in out assert "realistic form types" in out assert "Best hyperparameters" in out assert 0.0 < crf.c1 < 1.5 assert 0.0 < crf.c2 < 0.9 assert crf.c1, crf.c2 != _REALISTIC_C1_C2 assert crf.c1, crf.c2 != _PRECISE_C1_C2 form_types = np.asarray([a.type for a in annotations]) X, y = get_Xy(annotations, form_types, full_type_names=False) y_pred = crf.predict(X) score = flat_accuracy_score(y, y_pred) assert 0.9 < score < 1.0 # overfitting FTW! field_schema = storage.get_field_schema() short_names = set(field_schema.types_inv.keys()) assert set(crf.classes_).issubset(short_names)
def predict(self, data, y=None, tag=None): if (y != None) and (tag != None): # Tagging each word in data to its corresponding tag t = tagger(X=data) tagged_data_ = t.fit(X=t.tag(), y=y, tag=tag) # Generates features required for conditional random field f = Features(X=tagged_data_, num_words=self.num_features) x_test, y_test = f.get # Gets trained model from finalized_model.sav loaded_model = pickle.load(open('finalized_model.sav', 'rb')) # prediction on test data result = loaded_model.predict(x_test) #printing classification report and Accuracy print('\n\n Classification Report: \n', flat_classification_report(y_test, result)) print('Accuracy:', flat_accuracy_score(y_test, result)) elif (y == None) and (tag == None): # data is tagged with list of tuples (token, pos tag, leammatized word, other tag) t = tagger(X=data) tagged_data_ = t.tag() # Generates features required for conditional random field f = Features(X=tagged_data_, num_words=self.num_features) x_test, _ = f.get # Gets trained model from finalized_model.sav loaded_model = pickle.load(open('finalized_model.sav', 'rb')) # prediction on test data result = loaded_model.predict(x_test) # # tokenizing test data # final=pd.DataFrame() # final['description'] = [re.findall('[A-Za-z0-9]+',i) for i in data] # final['result']=result # def func(df,tag): # mainlist=[] # for i in range(len(df)): # sublist=[] # desc=df['result'].iloc[i] # for j in range(len(desc)): # if(tag==desc[j]): # sublist.append(df['description'].iloc[i][j]) # if(len(sublist)!=0): # mainlist.append(' '.join(sublist)) # else: # mainlist.append("not assigned") # return mainlist # products=func(final,'P') # issues=func(final,'I') # finalresult=pd.DataFrame() # finalresult['Products']=products # finalresult['Issues']=issues return result
def running_metrics(p): intent_predictions, slot_predictions = p.predictions intent_labels, slot_labels = p.label_ids slot_predictions = np.argmax(slot_predictions, axis=2) intent_predictions = np.argmax(intent_predictions, axis=1) slot_predictions_clean = [[ p for (p, l) in zip(prediction, label) if l != -100 ] for prediction, label in zip(slot_predictions, slot_labels)] slot_labels_clean = [[ l for (p, l) in zip(prediction, label) if l != -100 ] for prediction, label in zip(slot_predictions, slot_labels)] intent_f1 = f1_score(intent_labels, intent_predictions, average="macro") intent_accuracy = accuracy_score(intent_labels, intent_predictions) flat_acc = seq_metrics.flat_accuracy_score(slot_labels_clean, slot_predictions_clean) flat_f1 = seq_metrics.flat_f1_score(slot_labels_clean, slot_predictions_clean, average="macro") slt_f1_weighted = seq_metrics.flat_f1_score(slot_labels_clean, slot_predictions_clean, average="weighted") return { "flat slot accuracy": flat_acc, "flat slot f1": flat_f1, "weighted slot f1": slt_f1_weighted, "intent f1": intent_f1, "intent accuracy": intent_accuracy, }
def train(train_file, test_file, min_freq, model_file): '''Train a CRF tagger based''' # Read in initial training data conll_data_train = read_conll_data(train_file) train_sents = [[line[0] for line in doc] for doc in conll_data_train] train_labels = [[line[2] for line in doc] for doc in conll_data_train] # Featurize and create instance from list of sentences feat_sent_train = build_dataset(train_sents) print("Training on {0} inst".format(len(feat_sent_train))) # Train and test loop for parameter settings # Create and train CRF model # For different parameter options, see: # https://sklearn-crfsuite.readthedocs.io/en/latest/_modules/sklearn_crfsuite/estimator.html model = CRF(min_freq=min_freq) model.fit(feat_sent_train, train_labels) # Test the model on held out test set if wanted if args.test_file: conll_data_test = read_conll_data(test_file) test_sents = [[line[0] for line in doc] for doc in conll_data_test] test_labels = [[line[2] for line in doc] for doc in conll_data_test] feat_sent_test = build_dataset(test_sents) # Predicting and printing accuracy pred = model.predict(feat_sent_test) acc = metrics.flat_accuracy_score(test_labels, pred) print("Accuracy: {0}%".format(float(round(acc, 3)) * 100)) # Save model to disk if wanted if args.model: print("Saving model to {0}".format(model_file)) joblib.dump(model, model_file)
def train1(self, data, y, tag): #tagged_data = a.fit(a.tag(),y,tag) # Features as conditional random field accepts feaobj = Features(data, self.num_features) x_train, y_train = feaobj.get print("labelled data") # Using conditional random field as features crf = CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=False) print(crf) crf.fit(x_train, y_train) # Saving the model which is trained filename = 'finalized_model.sav' pickle.dump(crf, open(filename, 'wb')) # Prediction on train pred = crf.predict(x_train) # printing classification report and Accuracy print('\n \n Prediction On Trained Data:\n \n', flat_classification_report(y_train, pred)) print('Accuracy:', flat_accuracy_score(y_train, pred))
def cross_validate(self): kfold = KFold(n_splits=3) for train_ids, test_ids in kfold.split(self.sentences): X_train = [self.sent2features(self.sentences[i][0]) for i in train_ids] y_train = [self.sent2labels(self.sentences[i][0], self.sentences[i][1]) for i in train_ids] crf = sklearn_crfsuite.CRF( algorithm='lbfgs', c1=0.1, c2=0.2, max_iterations=100, all_possible_transitions=True ) crf.fit(X_train, y_train) labels = list(crf.classes_) X_test = [self.sent2features(self.sentences[i][0]) for i in test_ids] y_test = [self.sent2labels(self.sentences[i][0], self.sentences[i][1]) for i in test_ids] y_pred = crf.predict(X_test) for idx, id in enumerate(test_ids): print(self.sentences[id][0]) print(self.sentences[id][1]) print(y_pred[idx]) print(y_test[idx]) # print(metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels)) print(metrics.flat_accuracy_score(y_test, y_pred))
def test_training(storage, capsys): annotations = (a for a in storage.iter_annotations( simplify_form_types=True, simplify_field_types=True, ) if a.fields_annotated) annotations = list(itertools.islice(annotations, 0, 300)) crf = train(annotations=annotations, use_precise_form_types=False, optimize_hyperparameters_iters=2, optimize_hyperparameters_folds=2, optimize_hyperparameters_jobs=-1, full_form_type_names=False, full_field_type_names=False) out, err = capsys.readouterr() assert 'Training on 300 forms' in out assert 'realistic form types' in out assert 'Best hyperparameters' in out assert 0.0 < crf.c1 < 2.5 assert 0.0 < crf.c2 < 0.9 assert crf.c1, crf.c2 != _REALISTIC_C1_C2 assert crf.c1, crf.c2 != _PRECISE_C1_C2 form_types = np.asarray([a.type for a in annotations]) X, y = get_Xy(annotations, form_types, full_type_names=False) y_pred = crf.predict(X) score = flat_accuracy_score(y, y_pred) assert 0.9 < score < 1.0 # overfitting FTW! field_schema = storage.get_field_schema() short_names = set(field_schema.types_inv.keys()) assert set(crf.classes_).issubset(short_names)
def crf_tag(): brown_tagged_sents = brown.tagged_sents(categories='news') #print(brown_tagged_sents[0]) train_len = int(len(brown_tagged_sents) * 0.9) training_sentences = brown_tagged_sents[:train_len] test_sentences = brown_tagged_sents[train_len:] X_train, y_train = transform_to_dataset(training_sentences) X_test, y_test = transform_to_dataset(test_sentences) #print(len(X_train)) #print(len(X_test)) print(X_train[0]) print(y_train[0]) model = CRF() model.fit(X_train, y_train) raw_sent = ['I', 'am', 'a', 'student'] sent_feat = [ feature_extract(raw_sent, index) for index in range(len(raw_sent)) ] print(list(zip(raw_sent, model.predict([sent_feat])[0]))) y_pred = model.predict(X_test) print(metrics.flat_accuracy_score(y_test, y_pred))
def train(self, model_name, tagged_sentences): # Split the dataset for training and testing cutoff = int(.75 * len(tagged_sentences)) training_sentences = tagged_sentences[:cutoff] test_sentences = tagged_sentences[cutoff:] X_train, y_train = transform_to_dataset(training_sentences) X_test, y_test = transform_to_dataset(test_sentences) print(len(X_train)) print(len(X_test)) print("Training Started........") print("it will take time according to your dataset size..") model = CRF() model.fit(X_train, y_train) print("Training Finished!") print("Evaluating with Test Data...") y_pred = model.predict(X_test) print("Accuracy is: ") print(metrics.flat_accuracy_score(y_test, y_pred)) pickle.dump(model, open(model_name, 'wb')) print("Model Saved!")
def evaluate(self, test=None): ''' ''' t0 = t = time() self.logger.info('started evaluation') # if test: self.test = Data(test, sent_cls=self.sent_cls) t0, t = t, time() self.logger.info('{:.2f}'.format(t - t0) + 's extracted test features') self.logger.info('processed ' + str(self.test.num_sents) + ' sentences') # if not self.test: self.logger.error('cannot evaluate without the test data') return # y_true = self.test.labels y_pred = self.tagger.predict(self.test.features) t0, t = t, time() self.logger.info('{:.2f}'.format(t - t0) + 's generated predictions') # accuracy = 100 * flat_accuracy_score(y_true, y_pred) self.logger.info('Accuracy : ' + '{:.2f}'.format(accuracy)) t0, t = t, time() self.logger.info('{:.2f}'.format(t - t0) + 's evaluated test data') return accuracy
def evaluate_rnn(y, preds): """Because the RNN sequences get clipped as necessary based on the `max_length` parameter, they have to be realigned to get a classification report. This method does that, building in the assumption that any clipped tokens are assigned an incorrect label. Parameters ---------- y : list of list of labels preds : list of list of labels Both of these lists need to have the same length, but the sequences they contain can vary in length. """ labels = sorted({c for ex in y for c in ex}) new_preds = [] for gold, pred in zip(y, preds): delta = len(gold) - len(pred) if delta > 0: # Make a *wrong* guess for these clipped tokens: pred += [random.choice(list(set(labels)-{label})) for label in gold[-delta: ]] new_preds.append(pred) labels = sorted({cls for ex in y for cls in ex} - {'OTHER'}) data = {} data['classification_report'] = flat_classification_report(y, new_preds) data['f1_macro'] = flat_f1_score(y, new_preds, average='macro') data['f1_micro'] = flat_f1_score(y, new_preds, average='micro') data['f1'] = flat_f1_score(y, new_preds, average=None) data['precision_score'] = flat_precision_score(y, new_preds, average=None) data['recall_score'] = flat_recall_score(y, new_preds, average=None) data['accuracy'] = flat_accuracy_score(y, new_preds) data['sequence_accuracy_score'] = sequence_accuracy_score(y, new_preds) return data
def score(self, X, y): """ Return accuracy score computed for sequence items. For other metrics check :mod:`sklearn_crfsuite.metrics`. """ y_pred = self.predict(X) return flat_accuracy_score(y, y_pred)
def calculate_overall_accuracy_and_f1_score_per_pos( self, print_results=False): hmm_tagger = HMMTagger() for i in self.train_sizes: hmm_tagger.train_tagger(self.train_data[:i]) train_tags, train_pred = hmm_tagger.predict(self.train_data[:i]) dev_tags, dev_pred = hmm_tagger.predict(self.dev_data) labels = [] for sentence_tags in train_tags: for tag in sentence_tags: labels.append(tag) labels = list(set(labels)) accuracy_score_train = metrics.flat_accuracy_score( train_tags, train_pred) self.train_overall_accuracy.append(accuracy_score_train) accuracy_score_dev = metrics.flat_accuracy_score( dev_tags, dev_pred) self.dev_overall_accuracy.append(accuracy_score_dev) f1_score_train = metrics.flat_classification_report(train_tags, train_pred, labels=labels) self.calculate_f1_stats(f1_score_train) # self.classification_report_csv(f1_score_train) f1_score_dev = metrics.flat_classification_report(dev_tags, dev_pred, labels=labels) self.calculate_f1_stats(f1_score_dev, False) # self.classification_report_csv(f1_score_dev, False) if print_results: print('The overall accuracy on Train data for train size = ' + str(i) + ' is = ' + str(accuracy_score_train)) print('The overall accuracy on DEV data for train size = ' + str(i) + ' is = ' + str(accuracy_score_dev)) print('Report') print('The overall accuracy on Train data for train size = ' + str(i) + ' is = ' + f1_score_train) print('The overall accuracy on DEV data for train size = ' + str(i) + ' is = ' + f1_score_dev) print( '--------------------------------------------------------------------------------------' )
def evaluate(self, x, y): y_pred = self.model.predict(x) print(metrics.flat_accuracy_score(y, y_pred)) count = 0 for i in range(len(y_pred)): if np.array_equal(y_pred[i], y[i]): count += 1 print("Acc:", count / len(y))
def calculate_overall_accuracy_and_f1_score_per_pos( self, crf_hyperparameters, print_results=False): crf_pos_model = CrfPosTagger() for i in self.train_sizes: my_model = crf_pos_model.trainCRF(self.data_features[:i], self.data_target[:i], crf_hyperparameters) train_pred = my_model.predict(self.data_features[:i]) dev_pred = my_model.predict(self.dev_features) labels = list(my_model.classes_) accuracy_score_train = metrics.flat_accuracy_score( self.data_target[:i], train_pred) self.train_overall_accuracy.append(accuracy_score_train) accuracy_score_dev = metrics.flat_accuracy_score( self.dev_target, dev_pred) self.dev_overall_accuracy.append(accuracy_score_dev) f1_score_train = metrics.flat_classification_report( self.data_target[:i], train_pred, labels=labels) self.calculate_f1_stats(f1_score_train) # self.classification_report_csv(f1_score_train) f1_score_dev = metrics.flat_classification_report(self.dev_target, dev_pred, labels=labels) self.calculate_f1_stats(f1_score_dev, False) # self.classification_report_csv(f1_score_dev, False) if print_results: print('The overall accuracy on Train data for train size = ' + str(i) + ' is = ' + str(accuracy_score_train)) print('The overall accuracy on DEV data for train size = ' + str(i) + ' is = ' + str(accuracy_score_dev)) print('Report') print('The overall accuracy on Train data for train size = ' + str(i) + ' is = ' + f1_score_train) print('The overall accuracy on DEV data for train size = ' + str(i) + ' is = ' + f1_score_dev) print( '--------------------------------------------------------------------------------------' )
def testing(crf,X_test,time_seq=[],y_test=[],save=0): if y_test: print("Results:") labels = list(crf.classes_) y_pred = crf.predict(X_test) sorted_labels = [str(x) for x in sorted(labels,key=lambda name: (name[1:], name[0]))] print(metrics.flat_classification_report(y_test, y_pred, digits=3, labels=sorted_labels)) # plot_results(y_pred,X_test,time_seq,save) return metrics.flat_accuracy_score(y_test, y_pred) # *** , labels=sorted_labels) else: y_pred = crf.predict(X_test) plot_results(y_pred,X_test,time_seq,save) return y_pred
def evaluate(dataset_name, data_iter, model, full_report=False): model.eval() total_corrects, avg_loss = 0, 0 for batch in data_iter: text, target = batch.Phrase, batch.Sentiment output = model(text) loss = F.nll_loss(output, target, reduction='sum').item() # sum up batch loss pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability correct = pred.eq(target.view_as(pred)).sum().item() avg_loss += loss total_corrects += correct size = len(data_iter.dataset) avg_loss /= size accuracy = 100.0 * total_corrects/size print(' Evaluation on {} - loss: {:.6f} acc: {:.4f}%({}/{})'.format(dataset_name, avg_loss, accuracy, total_corrects, size)) targetList = [] for tar in target: list1 = [] list1.append(tar) targetList.append(list1) pred = pred.tolist() predList = [] for pre in pred: list1 = [] list1.append(pre) predList.append(list1) if full_report: print(sklearn_crfsuite.metrics.flat_classification_report(targetList, predList, labels=[0,1,2,3,4])) print("accuracy_score", flat_accuracy_score(targetList, predList)) print("precision_score", flat_precision_score(targetList, predList, average='weighted')) print("recall_score", flat_recall_score(targetList, predList, average='weighted')) print("f1_score", flat_f1_score(targetList, predList, average='weighted')) return accuracy
def eval(self, sentence_result, slot_result): """评估结果""" y_pred = self.predict(sentence_result) y_test = slot_result return { 'precision': metrics.flat_precision_score(y_test, y_pred, average='weighted'), 'recall': metrics.flat_recall_score(y_test, y_pred, average='weighted'), 'f1': metrics.flat_f1_score(y_test, y_pred, average='weighted'), 'accuracy': metrics.flat_accuracy_score(y_test, y_pred), }
def tag(self, test=None, save=None): ''' if save is not given write to stdout ''' ''' ''' t0 = t = time() self.logger.info('started tagging') # if test: self.test = Data(test, sent_cls=self.sent_cls) t0, t = t, time() self.logger.info('{:.2f}'.format(t - t0) + 's extracted test features') self.logger.info('processed ' + str(self.test.num_sents) + ' sentences') # if not self.test: self.logger.error('cannot tag without the test data') return # y_pred = self.tagger.predict(self.test.features) t0, t = t, time() self.logger.info('{:.2f}'.format(t - t0) + 's generated predictions') # print accuracy, if given data contains labels accuracy = 0.0 y_true = self.test.labels if [tag for s_true in y_true for tag in s_true if tag != '_']: accuracy = 100 * flat_accuracy_score(y_true, y_pred) self.logger.info('Accuracy : ' + '{:.2f}'.format(accuracy)) t0, t = t, time() self.logger.info('{:.2f}'.format(t - t0) + 's tagged test data') # set ccg categories of the sentences self.test.update_tags(y_pred) # if not save: for sent in self.test.sentences: print(sent) else: with open(save, 'w', encoding='utf-8') as f: for sent in self.test.sentences: f.write(str(sent) + '\n') t0, t = t, time() self.logger.info('{:.2f}'.format(t - t0) + 's saved as ' + save) return accuracy
def test_model( model: sklearn_crfsuite.CRF, test_path: typing.Union[str, Path], out_file: typing.Optional[typing.TextIO] = None, ): """Print an accuracy report for a model to a file""" try: import conllu except ImportError as e: _LOGGER.fatal("conllu package is required for testing") _LOGGER.fatal("pip install 'conllu>=4.4'") raise e _LOGGER.debug("Loading test file (%s)", test_path) with open(test_path, "r") as test_file: test_sents = conllu.parse(test_file.read()) _LOGGER.debug("Getting features for %s test sentence(s)", len(test_sents)) x_test = [sent2features(s) for s in test_sents] y_test = [sent2labels(s) for s in test_sents] labels = list(model.classes_) y_pred = model.predict(x_test) print( "F1 score on the test set = {}".format( metrics.flat_f1_score(y_test, y_pred, average="weighted", labels=labels)), file=out_file, ) print( "Accuracy on the test set = {}".format( metrics.flat_accuracy_score(y_test, y_pred)), file=out_file, ) sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0])) print( "Test set classification report: {}".format( metrics.flat_classification_report(y_test, y_pred, labels=sorted_labels, digits=3)), file=out_file, )
def train_crf_pos(corpus, corpus_name): # Required corpus structure: # [[(w1,t1), (w2,t2),...(wn,tn)], [(w1,t1)(w2,t2),...(wm,tm)],...] #feat_all = {} # common features (baseline set) #feat_en = {} # extra features for English #features = {**feat_all, **feat_en} train_frac = 0.9 # fraction of data for the training set split_idx = int(train_frac * len(corpus)) # Extract the feautures and separate labels from features X = [get_crf_features([pair[0] for pair in sent]) for sent in corpus] y = [[pair[1] for pair in sent] for sent in corpus] # Create the training and the test sets X_train = X[:split_idx] y_train = y[:split_idx] X_test = X[split_idx:] y_test = y[split_idx:] # Create the CRF model model = CRF( algorithm='lbfgs', # gradient descent using the L-BFGS method c1=0.1, # coeff. for L1 regularization c2=0.1, # coeff. for L2 regularization max_iterations=100, ) # Train the model model.fit(X_train, y_train) # Save the model with open(os.path.join('data', 'models', corpus_name + '_crf.pkl'), 'wb') as f: pickle.dump(model, f, 4) # Evaluate the model y_pred = model.predict(X_test) print("Test accuracy: %.4f" % metrics.flat_accuracy_score(y_test, y_pred)) return model
def evaluate_rnn(y, preds): """ Evaluate the RNN performance using various metrics. Parameters ---------- y: list of list of labels preds: list of list of labels Both of these lists need to have the same length, but the sequences they contain can vary in length. Returns ------- data: dict """ labels = sorted({c for ex in y for c in ex}) new_preds = [] for gold, pred in zip(y, preds): delta = len(gold) - len(pred) if delta > 0: # Make a *wrong* guess for these clipped tokens: pred += [ random.choice(list(set(labels) - {label})) for label in gold[-delta:] ] new_preds.append(pred) labels = sorted({cls for ex in y for cls in ex} - {"OTHER"}) data = {} data["classification_report"] = flat_classification_report(y, new_preds, digits=3) data["f1_macro"] = flat_f1_score(y, new_preds, average="macro") data["f1_micro"] = flat_f1_score(y, new_preds, average="micro") data["f1"] = flat_f1_score(y, new_preds, average=None) data["precision_score"] = flat_precision_score(y, new_preds, average=None) data["recall_score"] = flat_recall_score(y, new_preds, average=None) data["accuracy"] = flat_accuracy_score(y, new_preds) data["sequence_accuracy_score"] = sequence_accuracy_score(y, new_preds) return data
def print_classification_report(annotations, n_splits=10, model=None): """ Evaluate model, print classification report """ if model is None: # FIXME: we're overfitting on hyperparameters - they should be chosen # using inner cross-validation, not set to fixed values beforehand. model = get_model(use_precise_form_types=True) annotations = [a for a in annotations if a.fields_annotated] form_types = formtype_model.get_realistic_form_labels( annotations=annotations, n_splits=n_splits, full_type_names=False ) X, y = get_Xy( annotations=annotations, form_types=form_types, full_type_names=True, ) group_kfold = GroupKFold(n_splits=n_splits) groups = [get_domain(ann.url) for ann in annotations] y_pred = cross_val_predict(model, X, y, cv=group_kfold, groups=groups, n_jobs=-1) all_labels = list(annotations[0].field_schema.types.keys()) labels = sorted(set(flatten(y_pred)), key=lambda k: all_labels.index(k)) print((flat_classification_report(y, y_pred, digits=2, labels=labels, target_names=labels))) print(( "{:0.1f}% fields are classified correctly.".format( flat_accuracy_score(y, y_pred) * 100 ) )) print(( "All fields are classified correctly in {:0.1f}% forms.".format( sequence_accuracy_score(y, y_pred) * 100 ) ))
def print_classification_report(annotations, n_folds=10, model=None): """ Evaluate model, print classification report """ if model is None: # FIXME: we're overfitting on hyperparameters - they should be chosen # using inner cross-validation, not set to fixed values beforehand. model = get_model(use_precise_form_types=True) annotations = [a for a in annotations if a.fields_annotated] form_types = formtype_model.get_realistic_form_labels( annotations=annotations, n_folds=n_folds, full_type_names=False ) X, y = get_Xy(annotations=annotations, form_types=form_types, full_type_names=True) cv = get_annotation_folds(annotations, n_folds=n_folds) y_pred = cross_val_predict(model, X, y, cv=cv, n_jobs=-1) all_labels = list(annotations[0].field_schema.types.keys()) labels = sorted(set(flatten(y_pred)), key=lambda k: all_labels.index(k)) print(flat_classification_report(y, y_pred, digits=2, labels=labels, target_names=labels)) print("{:0.1f}% fields are classified correctly.".format(flat_accuracy_score(y, y_pred) * 100)) print("All fields are classified correctly in {:0.1f}% forms.".format(sequence_accuracy_score(y, y_pred) * 100))
def get_crf_metrics(y_pred, y_true, labels): token_acc_score = round(metrics.flat_accuracy_score(y_true, y_pred), 2) token_recall_score = round( metrics.flat_recall_score(y_true, y_pred, average='weighted', labels=labels), 2) token_f1_score = round( metrics.flat_f1_score(y_true, y_pred, average='weighted', labels=labels), 2) token_precision_score = round( metrics.flat_precision_score(y_true, y_pred, average='weighted', labels=labels), 2) report = metrics.flat_classification_report(y_true, y_pred, labels=labels, output_dict=True) report_df = pd.DataFrame(report).T report_df = report_df.round(2) cm_dict = metrics.performance_measure(y_true, y_pred) cm = np.array([[cm_dict['TN'], cm_dict['FP']], [cm_dict['FN'], cm_dict['TP']]]) support = cm_dict['FN'] + cm_dict['TP'] res_d = { 'accuracy': token_acc_score, 'recall': token_recall_score, 'f1_score': token_f1_score, 'precision': token_precision_score, 'support': support, 'cm': cm, 'report': report_df } return res_d
def crfs(tagged_sentences): def features(sentence, index): """ sentence: [w1, w2, ...], index: the index of the word """ return { 'word': sentence[index], 'is_first': index == 0, 'is_last': index == len(sentence) - 1, 'is_capitalized': sentence[index][0].upper() == sentence[index][0], 'is_all_caps': sentence[index].upper() == sentence[index], 'is_all_lower': sentence[index].lower() == sentence[index], 'prefix-1': sentence[index][0], 'prefix-2': sentence[index][:2], 'prefix-3': sentence[index][:3], 'suffix-1': sentence[index][-1], 'suffix-2': sentence[index][-2:], 'suffix-3': sentence[index][-3:], 'prev_word': '' if index == 0 else sentence[index - 1], 'next_word': '' if index == len(sentence) - 1 else sentence[index + 1], 'has_hyphen': '-' in sentence[index], 'is_numeric': sentence[index].isdigit(), 'capitals_inside': sentence[index][1:].lower() != sentence[index][1:] } # Split the dataset for training and testing cutoff = int(.75 * len(tagged_sentences)) training_sentences = tagged_sentences[:cutoff] test_sentences = tagged_sentences[cutoff:] def transform_to_dataset(tagged_sentences): X, y = [], [] for tagged in tagged_sentences: X.append([ features(untag(tagged), index) for index in range(len(tagged)) ]) y.append([tag for _, tag in tagged]) return X, y X_train, y_train = transform_to_dataset(training_sentences) X_test, y_test = transform_to_dataset(test_sentences) print(len(X_train)) print(len(X_test)) print(X_train[0]) print(y_train[0]) model = CRF() model.fit(X_train, y_train) sentence = ['I', 'am', 'Bob', '!'] def pos_tag(sentence): sentence_features = [ features(sentence, index) for index in range(len(sentence)) ] return list(zip(sentence, model.predict([sentence_features])[0])) print(pos_tag( sentence)) # [('I', 'PRP'), ('am', 'VBP'), ('Bob', 'NNP'), ('!', '.')] y_pred = model.predict(X_test) print("CRFs Accuracy", metrics.flat_accuracy_score(y_test, y_pred)) return 0
def test_flat_accuracy(): score = metrics.flat_accuracy_score(y1, y2) assert score == 3 / 5
'feature.possible_transitions': True }) # Provide a file name as a parameter to the train function, such that # the model will be saved to the file when training is finished trainer.train('crf.model') tagger = pycrfsuite.Tagger() tagger.open('crf.model') y_pred = [tagger.tag(xseq) for xseq in X_test] y_pred1 = [tagger.tag(xseq) for xseq in X_test1] #print(flat_accuracy_score(Y_test,y_pred)) ''' # Let's take a look at a random sample in the testing set for i in range(len(X_test)): for x, y in zip(y_pred[i], [x[1].split("=")[1] for x in X_test[i]]): print("%s (%s)" % (y, x)) ''' # Create a mapping of labels to indices labels = {"NonDrug": 1, "Drug": 0} # Convert the sequences of tags into a 1-dimensional array predictions = np.array([labels[tag] for row in y_pred for tag in row]) truths = np.array([labels[tag] for row in Y_test for tag in row]) print(flat_accuracy_score(Y_test, y_pred)) # Print out the classification report print( classification_report(truths, predictions, target_names=["NonDrug", "Drug"]))
print("=======================") print("Load trained model ...") model = pickle.load(open("./models/" + MODEL_NAME, "rb")) print("Done!!!") predict = model.predict(X_test) print("=======================") print("Testing ....") print(len(y_test), len(predict)) avg_count = 0 print(predict[0]) for i in range(len(y_test)): acc = evaluate(predict[i], y_test[i]) # print(acc) avg_count += acc # print(score) print("Avg acc:", avg_count / float(len(y_test))) print(model.classes_) print("Accuracy\t:", metrics.flat_accuracy_score(y_test, predict)) print("Precision\t:", metrics.flat_precision_score(y_test, predict, average=None)) print("Recall\t:", len(metrics.flat_recall_score(y_test, predict, average=None))) print("F1\t:", metrics.flat_f1_score(y_test, predict, average=None)) print("Done!!!")
all_possible_transitions=True) if VERBOSE == "full": print("[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) + "] Learning...") # print(len(features_train), len(target_train), set(target_train)) model = model.fit([features_train], [target_train]) if VERBOSE == "full": print("[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) + "] Testing") labels = list(model.classes_) y_pred = model.predict([features_test]) v = crfsMetrics.flat_accuracy_score(y_pred, [target_test]) if first: scrs = [] first = False else: scrs = joblib.load( "/home/lsablayr/stageM1/debates/step2_M1/learning/scrs") scrs.append([c1, c2, MAX_ITER, v]) joblib.dump(scrs, "scrs", pickle.HIGHEST_PROTOCOL, compress=True) del scrs if v > max_scr: max_scr = v iter_max = MAX_ITER c1_max = c1 c2_max = c2 joblib.dump(model, "modelCRF.save")
def eval(self, pred_tags, gold_tags): if self.model is None: raise ValueError("No trained model") print(self.model.classes_) print("Acc =", metrics.flat_accuracy_score(pred_tags, gold_tags))
def train(c1, c2, MAX_ITER): global targets global features global MAX_ITER_MAX global MAX_ITER_MIN global VERBOSE global TEST_PERCENT global first global iter_max global c1_max global c2_max global max_scr print("(" + str(c1) + "," + str(c2) + "," + str(MAX_ITER) + ")") if VERBOSE == "min": print( '\033[1A' + "[Info][Model=Crf][MAX_ITER=" + str(MAX_ITER) + "] Learning test :", round( float(MAX_ITER - MAX_ITER_MIN) / float(MAX_ITER_MAX - MAX_ITER_MIN) * 100.0, 2), "%") if VERBOSE == "full": print( "[Info][Model=Crf][MAX_ITER=" + str(MAX_ITER) + "]================= NB ITER :", MAX_ITER, "======================================") #on split le dataset # features_train, features_test, target_train, target_test = modelSelect.train_test_split(features, targets_trans, test_size=TEST_PERCENT) sss = modelSelect.StratifiedShuffleSplit(n_splits=2, test_size=TEST_PERCENT) features_train, features_test, target_train, target_test = [], [], [], [] for train_i, test_i in sss.split(f, targets): for i in train_i: features_train.append(features[i]) target_train.append(targets[i]) for i in test_i: features_test.append(features[i]) target_test.append(targets[i]) model = crfs.CRF(algorithm='lbfgs', c1=c1, c2=c2, max_iterations=MAX_ITER, all_possible_transitions=True) if VERBOSE == "full": print("[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) + "] Learning...") # print(len(features_train), len(target_train), set(target_train)) model = model.fit([features_train], [target_train]) if VERBOSE == "full": print("[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) + "] Testing") labels = list(model.classes_) y_pred = model.predict([features_test]) v = crfsMetrics.flat_accuracy_score(y_pred, [target_test]) if first: scrs = [] first = False else: scrs = joblib.load( "/home/lsablayr/stageM1/debates/step2_M1/learning/scrs.gz") scrs.append([c1, c2, MAX_ITER, v]) joblib.dump(scrs, "/home/lsablayr/stageM1/debates/step2_M1/learning/scrs.gz", ('gzip', 3), pickle.HIGHEST_PROTOCOL) del scrs if v > max_scr: max_scr = v iter_max = MAX_ITER c1_max = c1 c2_max = c2 joblib.dump(model, "modelCRF.save") print("New best accuracy for crf model with c1 =", c1, "c2 =", c2, "MAX_ITER =", MAX_ITER, "score :", v) if VERBOSE == "full": print( "[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) + "] Mean test accuracy:", v)