Exemplos de f1_score em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: evaluation_script

Método / Função: f1_score

Exemplos em hotexamples.com: 2

f1_score em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de evaluation_script.f1_score em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: extract.py Projeto: QHZSS/SRAWL

import pandas as pd import json from evaluation_script import normalize_answer, f1_score, exact_match_score ,rougel_score import numpy as np from IPython import embed nqfile="../narrativeqa/qaps.csv" inputfile="out/NarrativeQA9062233-first-only/test_predictions.json" df=pd.read_csv(nqfile) test_df=df[df['set']=='test'] test_prd=json.load(open(inputfile,'r')) n_paragraphs=[10,15,20] f1s, ems ,rougels = [[] for _ in n_paragraphs], [[] for _ in n_paragraphs], [[] for _ in n_paragraphs] for j,predictions in enumerate(test_prd.values()): groundtruth = [test_df.iloc[j,3],test_df.iloc[j,4]] predictions = predictions[:-1] if len(groundtruth)==0: for i in range(len(n_paragraphs)): f1s[i].append(0) ems[i].append(0) rougels[i].append(0) continue for i, prediction in enumerate(predictions): f1s[i].append(max([f1_score(prediction, gt)[0] for gt in groundtruth])) ems[i].append(max([exact_match_score(prediction, gt) for gt in groundtruth])) rougels[i].append(max([rougel_score(prediction, gt) for gt in groundtruth])) for n, f1s_, ems_, rougels_ in zip(n_paragraphs, f1s, ems,rougels): print("n=%d\tF1 %.2f\tEM %.2f\tR-L %.2f"%(n, np.mean(f1s_)*100, np.mean(ems_)*100,np.mean(rougels_)*100))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: evaluate_qa.py Projeto: QHZSS/SRAWL

def write_predictions(logger, all_examples, all_features, all_results, n_best_size, do_lower_case, output_prediction_file, output_nbest_file, verbose_logging, write_prediction=True, n_paragraphs=None): """Write final predictions to the json file.""" example_index_to_features = collections.defaultdict(list) for feature in all_features: example_index_to_features[feature.example_index].append(feature) unique_id_to_result = {} for result in all_results: unique_id_to_result[result.unique_id] = result _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name "PrelimPrediction", [ "paragraph_index", "feature_index", "start_index", "end_index", "logit", "no_answer_logit" ]) all_predictions = collections.OrderedDict() all_nbest_json = collections.OrderedDict() if verbose_logging: all_examples = tqdm(enumerate(all_examples)) else: all_examples = enumerate(all_examples) for (example_index, example) in all_examples: features = example_index_to_features[example_index] if len(features) == 0 and n_paragraphs is None: pred = _NbestPrediction(text="empty", logit=-1000, no_answer_logit=1000) all_predictions[example.qas_id] = ("empty", example.all_answers) all_nbest_json[example.qas_id] = [pred] continue prelim_predictions = [] yn_predictions = [] if n_paragraphs is None: results = sorted(enumerate(features), key=lambda f: unique_id_to_result[f[1].unique_id]. switch[3])[:1] else: results = enumerate(features) for (feature_index, feature) in results: result = unique_id_to_result[feature.unique_id] scores = [] start_logits = result.start_logits[:len(feature.tokens)] end_logits = result.end_logits[:len(feature.tokens)] for (i, s) in enumerate(start_logits): for (j, e) in enumerate(end_logits[i:i + 10]): scores.append(((i, i + j), s + e)) scores = sorted(scores, key=lambda x: x[1], reverse=True) cnt = 0 for (start_index, end_index), score in scores: if start_index >= len(feature.tokens): continue if end_index >= len(feature.tokens): continue if start_index not in feature.token_to_orig_map: continue if end_index not in feature.token_to_orig_map: continue if not feature.token_is_max_context.get(start_index, False): continue if end_index < start_index: continue prelim_predictions.append( _PrelimPrediction( paragraph_index=feature.paragraph_index, feature_index=feature_index, start_index=start_index, end_index=end_index, logit=-result.switch[3], #score, no_answer_logit=result.switch[3])) if n_paragraphs is None: if write_predictions and len( prelim_predictions) >= n_best_size: break elif not write_predictions: break cnt += 1 prelim_predictions = sorted(prelim_predictions, key=lambda x: x.logit, reverse=True) no_answer_logit = result.switch[3] def get_nbest_json(prelim_predictions): seen_predictions = {} nbest = [] for pred in prelim_predictions: if len(nbest) >= n_best_size: break if pred.start_index == pred.end_index == -1: final_text = "yes" elif pred.start_index == pred.end_index == -2: final_text = "no" else: feature = features[pred.feature_index] tok_tokens = feature.tokens[pred.start_index:( pred.end_index + 1)] orig_doc_start = feature.token_to_orig_map[ pred.start_index] orig_doc_end = feature.token_to_orig_map[pred.end_index] orig_tokens = feature.doc_tokens[orig_doc_start:( orig_doc_end + 1)] tok_text = " ".join(tok_tokens) # De-tokenize WordPieces that have been split off. tok_text = tok_text.replace(" ##", "") tok_text = tok_text.replace("##", "") # Clean whitespace tok_text = tok_text.strip() tok_text = " ".join(tok_text.split()) orig_text = " ".join(orig_tokens) final_text = get_final_text(tok_text, orig_text, do_lower_case, \ logger, verbose_logging) if final_text in seen_predictions: continue nbest.append( _NbestPrediction(text=final_text, logit=pred.logit, no_answer_logit=no_answer_logit)) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: nbest.append( _NbestPrediction(text="empty", logit=0.0, no_answer_logit=no_answer_logit)) assert len(nbest) >= 1 total_scores = [] for entry in nbest: total_scores.append(entry.logit) probs = _compute_softmax(total_scores) nbest_json = [] for (i, entry) in enumerate(nbest): output = collections.OrderedDict() output['text'] = entry.text output['probability'] = probs[i] output['logit'] = entry.logit output['no_answer_logit'] = entry.no_answer_logit nbest_json.append(output) assert len(nbest_json) >= 1 return nbest_json if n_paragraphs is None: nbest_json = get_nbest_json(prelim_predictions) all_predictions[example.qas_id] = (nbest_json[0]["text"], example.all_answers) all_nbest_json[example.qas_id] = nbest_json else: all_predictions[example.qas_id] = [] all_nbest_json[example.qas_id] = [] for n in n_paragraphs: nbest_json = get_nbest_json([pred for pred in prelim_predictions if \ pred.paragraph_index<n]) all_predictions[example.qas_id].append(nbest_json[0]["text"]) all_predictions[example.qas_id].append(example.all_answers) if write_prediction: logger.info("Writing predictions to: %s" % (output_prediction_file)) logger.info("Writing nbest to: %s" % (output_nbest_file)) with open(output_prediction_file, "w") as writer: writer.write(json.dumps(all_predictions, indent=4) + "\n") with open(output_nbest_file, "w") as writer: writer.write(json.dumps(all_nbest_json, indent=4) + "\n") if n_paragraphs is None: f1s, ems, rougels = [], [], [] for prediction, groundtruth in all_predictions.values(): if len(groundtruth) == 0: f1s.append(0) ems.append(0) rougels.append(0) continue f1s.append(max([f1_score(prediction, gt)[0] for gt in groundtruth])) ems.append( max([exact_match_score(prediction, gt) for gt in groundtruth])) rougels.append( max([rougel_score(prediction, gt) for gt in groundtruth])) final_f1, final_em, final_rougel = np.mean(f1s), np.mean(ems), np.mean( rougels) else: f1s, ems, rougels = [[] for _ in n_paragraphs ], [[] for _ in n_paragraphs ], [[] for _ in n_paragraphs] for predictions in all_predictions.values(): groundtruth = predictions[-1] predictions = predictions[:-1] if len(groundtruth) == 0: for i in range(len(n_paragraphs)): f1s[i].append(0) ems[i].append(0) rougels[i].append(0) continue for i, prediction in enumerate(predictions): f1s[i].append( max([f1_score(prediction, gt)[0] for gt in groundtruth])) ems[i].append( max([ exact_match_score(prediction, gt) for gt in groundtruth ])) rougels[i].append( max([rougel_score(prediction, gt) for gt in groundtruth])) if (max([rougel_score(prediction, gt) for gt in groundtruth]) != 1): print(f'prediction: {prediction}') print(f'groudtruth: {groundtruth}') print( f'{max([rougel_score(prediction, gt) for gt in groundtruth])}\n' ) for n, f1s_, ems_, rougels_ in zip(n_paragraphs, f1s, ems, rougels): logger.info("n=%d\tF1 %.2f\tEM %.2f\tR-L %.2f" % (n, np.mean(f1s_) * 100, np.mean(ems_) * 100, np.mean(rougels_) * 100)) final_f1, final_em, final_rougel = np.mean(f1s[-1]), np.mean( ems[-1]), np.mean(rougels[-1]) return final_em, final_f1, final_rougel