Exemplo n.º 1
0
def write_summary(tar_real, predictions, step, write=config.write_summary_op):
  ref_sents=[]
  hyp_sents=[]
  for tar, ref_hyp in zip(tar_real, predictions):
      sum_ref = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(tar) if i not in [0, 101, 102]])
      sum_hyp = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(ref_hyp) if i not in [0, 101, 102]])
      sum_ref = convert_wordpiece_to_words(sum_ref)
      sum_hyp = convert_wordpiece_to_words(sum_hyp)
      ref_sents.append(sum_ref)
      hyp_sents.append(sum_hyp)
  try:
    rouges = rouge_all.get_scores(ref_sents , hyp_sents)
    avg_rouge_f1 = np.mean([np.mean([rouge_scores['rouge-1']["f"], 
                                    rouge_scores['rouge-2']["f"], 
                                    rouge_scores['rouge-l']["f"]]) for rouge_scores in rouges])
    _, _, bert_f1 = b_score(ref_sents, hyp_sents, lang='en', model_type=config.pretrained_bert_model)
    avg_bert_f1 = np.mean(bert_f1.numpy())
  except:
    avg_rouge_f1 = 0
    avg_bert_f1 = 0
  
  if write and (step)%config.write_per_step == 0:
    with tf.io.gfile.GFile(file_path.summary_write_path+str(step.numpy()), 'w') as f:
      for ref, hyp in zip(ref_sents, hyp_sents):
        f.write(ref+'\t'+hyp+'\n')
  return (avg_rouge_f1, avg_bert_f1)
def run_inference(dataset, print_output=False):

    for draft_type, refine_type in draft_and_refine_decoder_combinations:
        ref_sents = []
        hyp_sents = []
        for (doc_id, (input_ids, _, _, target_ids, _,
                      _)) in enumerate(dataset, 1):
            start_time = time.time()
            if draft_type != 'beam_search':
                _, _, refined_summary, _ = predict_using_sampling(input_ids,
                                                                  draft_type,
                                                                  refine_type,
                                                                  k=10)
            else:
                _, refined_summary, _ = predict_using_beam_search(
                    input_ids, refine_decoder_sampling_type=refine_type)
            sum_ref = tokenizer.convert_ids_to_tokens(
                [i for i in tf.squeeze(target_ids) if i not in [0, 101, 102]])
            sum_hyp = tokenizer.convert_ids_to_tokens([
                i for i in tf.squeeze(refined_summary)
                if i not in [0, 101, 102]
            ])
            sum_ref = convert_wordpiece_to_words(sum_ref)
            sum_hyp = convert_wordpiece_to_words(sum_hyp)
            if print_output:
                print('Original summary: {}'.format(sum_ref))
                print('Predicted summary: {}'.format(sum_hyp))
            ref_sents.append(sum_ref)
            hyp_sents.append(sum_hyp)
        print(
            f'Calculating scores for {len(ref_sents)} golden summaries and {len(hyp_sents)} predicted summaries'
        )
        try:
            rouges = rouge_all.get_scores(ref_sents, hyp_sents)
            avg_rouge_f1 = np.mean([
                np.mean([
                    rouge_scores['rouge-1']["f"], rouge_scores['rouge-2']["f"],
                    rouge_scores['rouge-l']["f"]
                ]) for rouge_scores in rouges
            ])
            _, _, bert_f1 = b_score(ref_sents,
                                    hyp_sents,
                                    lang='en',
                                    model_type=config.pretrained_bert_model)
            avg_bert_f1 = np.mean(bert_f1.numpy())
        except:
            avg_rouge_f1 = 0
            avg_bert_f1 = 0
        print(
            infer_template.format(draft_type, refine_type, avg_rouge_f1,
                                  avg_bert_f1))
        print(f'time to process document {doc_id} : {time.time()-start_time}')
Exemplo n.º 3
0
def run_inference(dataset, beam_sizes_to_try=h_parms.beam_sizes):
    for beam_size in beam_sizes_to_try:
        ref_sents = []
        hyp_sents = []
        for (doc_id, (input_ids, _, _, target_ids, _,
                      _)) in enumerate(dataset, 1):
            start_time = time.time()
            # translated_output_temp[0] (batch, beam_size, summ_length+1)
            translated_output_temp, enc_output = draft_decoded_summary(
                model, input_ids, target_ids[:, :-1], beam_size)
            draft_predictions = translated_output_temp[0][:, 0, :]
            _, _, dec_padding_mask = create_masks(input_ids,
                                                  target_ids[:, :-1])
            refined_summary, attention_dists = refined_summary_greedy(
                model,
                input_ids,
                enc_output,
                draft_predictions,
                dec_padding_mask,
                training=False)
            sum_ref = tokenizer.convert_ids_to_tokens(
                [i for i in tf.squeeze(target_ids) if i not in [0, 101, 102]])
            sum_hyp = tokenizer.convert_ids_to_tokens([
                i for i in tf.squeeze(refined_summary)
                if i not in [0, 101, 102]
            ])
            sum_ref = convert_wordpiece_to_words(sum_ref)
            sum_hyp = convert_wordpiece_to_words(sum_hyp)
            print('Original summary: {}'.format(sum_ref))
            print('Predicted summary: {}'.format(sum_hyp))
            if sum_ref and sum_hyp:
                ref_sents.append(sum_ref)
                hyp_sents.append(sum_hyp)
        try:
            rouges = rouge_all.get_scores(ref_sents, hyp_sents)
            avg_rouge_f1 = np.mean([
                np.mean([
                    rouge_scores['rouge-1']["f"], rouge_scores['rouge-2']["f"],
                    rouge_scores['rouge-l']["f"]
                ]) for rouge_scores in rouges
            ])
            _, _, bert_f1 = b_score(ref_sents,
                                    hyp_sents,
                                    lang='en',
                                    model_type=config.pretrained_bert_model)
            avg_bert_f1 = np.mean(bert_f1.numpy())
        except:
            avg_rouge_f1 = 0
            avg_bert_f1 = 0
        print(infer_template.format(beam_size, avg_rouge_f1, avg_bert_f1))
        print(f'time to process document {doc_id} : {time.time()-start_time}')
Exemplo n.º 4
0
def write_summary(tar_real, predictions, step, write=config.write_summary_op):
  r_avg_final = []
  total_summary = []
  for i, sub_tar_real in enumerate(tar_real):
    predicted_id = tf.cast(tf.argmax(predictions[i], axis=-1), tf.int32)
    #decoded_logits = argmax(predictions[i])
    #predicted_id = tf.cast(decoded_logits, tf.int32)
    sum_ref = tokenizer.convert_ids_to_tokens([i for i in sub_tar_real.numpy() if i not in [0, 101, 102]])
    sum_hyp = tokenizer.convert_ids_to_tokens([i for i in predicted_id.numpy() if i not in [0, 101, 102]])
    sum_ref = convert_wordpiece_to_words(sum_ref)
    sum_hyp = convert_wordpiece_to_words(sum_hyp)
    # don't consider empty values for ROUGE and BERT score calculation
    if sum_hyp and sum_ref:
      total_summary.append((sum_ref, sum_hyp))
  ref_sents = [ref for ref, _ in total_summary]
  hyp_sents = [hyp for _, hyp in total_summary]
  # returns :- dict of dicts
  if ref_sents and hyp_sents:
      try:  
        rouges = rouge_all.get_scores(ref_sents , hyp_sents)
        avg_rouge_f1 = np.mean([np.mean([rouge_scores['rouge-1']["f"], rouge_scores['rouge-2']["f"], rouge_scores['rouge-l']["f"]]) for rouge_scores in rouges])
        _, _, bert_f1 = b_score(ref_sents, hyp_sents, lang='en', model_type=config.pretrained_bert_model)
        rouge_score =  avg_rouge_f1.astype('float64')
        bert_f1_score =  np.mean(bert_f1.tolist(), dtype=np.float64)
      except ValueError:
        log.warning('Problem in calculating the ROUGE scores')
        rouge_score = 0
        bert_f1_score = 0
  else:
      log.warning('The sentences predicted by the model are empty so setting the scores to 0')
      rouge_score = 0
      bert_f1_score = 0
  
  if write and (step)%config.write_per_step == 0:
    with tf.io.gfile.GFile(file_path.summary_write_path+str(step.numpy()), 'w') as f:
      for ref, hyp in total_summary:
        f.write(ref+'\t'+hyp+'\n')
  return (rouge_score, bert_f1_score)
Exemplo n.º 5
0
 train_loss, train_accuracy = get_loss_and_accuracy()
 validation_loss, validation_accuracy = get_loss_and_accuracy()
 accumulators = []
 # if a checkpoint exists, restore the latest checkpoint.
 ck_pt_mgr, latest_ckpt = check_ckpt(file_path.checkpoint_path)
 total_steps = int(h_parms.epochs * (h_parms.accumulation_steps))
 #train_dataset = train_dataset.repeat(total_steps)
 count=0
 ds_train_size = 287113
 length = 1
 start = randint(ds_train_size - length, size=1)[0]
 train_buffer_size = 287113
 for (step, (input_ids, input_mask, input_segment_ids, target_ids_, target_mask, target_segment_ids)) in enumerate(train_dataset):
     #if step >= config.start_from_batch:
     try:
         sum_hyp = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(input_ids) if i not in [CLS_ID, SEP_ID, 0]])
         ip_ids = tokenizer.encode(' '.join(sum_hyp))
         if len(ip_ids) >= 512 or len(target_ids_)>512 :
             # start = randint(ds_train_size - length, size=1)[0]
             # examples, metadata = tfds.load('cnn_dailymail', with_info=True, as_supervised=True,
             #                                data_dir='/content/drive/My Drive/Text_summarization/cnn_dataset',
             #                                builder_kwargs={"version": "3.0.0"},
             #                                split=tfds.core.ReadInstruction('train', from_=start, to=start + length,
             #                                                                unit='abs'))
             # train_examples = examples
             # train_dataset = map_batch_shuffle(train_examples, train_buffer_size, split='train', shuffle=True,
             #                                   batch_size=1, filter_off=False)
             # repeat = True
             print(len(ip_ids))
             continue
         # if len(input_ids) >= 512 or len(input_mask)>=512 or len(input_segment_ids)>=512 or len(ip_ids)>=512:
                                                             refine_decoder_type=refine_dec_type, 
                                                             k=k, 
                                                             p=p,
                                                             temperature=temperature
                                                             )
   else:
     _, _, refined_summary, _ = predict_using_sampling(
                                                       input_ids,
                                                       draft_decoder_type=draft_dec_type, 
                                                       refine_decoder_type=refine_dec_type, 
                                                       k=k,
                                                       p=p,
                                                       temperature=temperature
                                                       )
   for tar, ref_hyp in zip(target_ids, refined_summary):
     sum_ref = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(tar) if i not in [0, 101, 102]])
     sum_hyp = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(ref_hyp) if i not in [0, 101, 102]])
     sum_ref = convert_wordpiece_to_words(sum_ref)
     sum_hyp = convert_wordpiece_to_words(sum_hyp)
     #print('Original summary: {}'.format(sum_ref))
     #print('Predicted summary: {}'.format(sum_hyp))
     ref_sents.append(sum_ref)
     hyp_sents.append(sum_hyp)
 try:
   rouges = rouge_all.get_scores(ref_sents , hyp_sents)
   avg_rouge_f1 = np.mean([np.mean([rouge_scores['rouge-1']["f"], 
                                   rouge_scores['rouge-2']["f"], 
                                   rouge_scores['rouge-l']["f"]]) for rouge_scores in rouges])
   _, _, bert_f1 = b_score(ref_sents, hyp_sents, lang='en', model_type=config.pretrained_bert_model)
   avg_bert_f1 = np.mean(bert_f1.numpy())
 except:
Exemplo n.º 7
0
def run_eval(
    ckpt_path='/content/drive/My Drive/Text_summarization/BERT_text_summarisation/cnn_checkpoints/ckpt-69'
):
    restore_chkpt(ckpt_path)
    if config.use_tfds:
        examples, metadata = tfds.load(
            config.tfds_name,
            with_info=True,
            as_supervised=True,
            data_dir='/content/drive/My Drive/Text_summarization/cnn_dataset',
            builder_kwargs={"version": "2.0.0"})
        test_examples = examples['test']
        test_buffer_size = metadata.splits['test'].num_examples
        test_dataset = map_batch_shuffle(test_examples,
                                         test_buffer_size,
                                         split='test',
                                         batch_size=h_parms.batch_size)
        log.info('Test TF_dataset created')
        test_dataset = test_dataset.take(1)
    else:
        test_dataset = infer_data_from_df()
    ref_sents = []
    hyp_sents = []
    for (doc_id, (input_ids, _, _, target_ids, _,
                  _)) in tqdm(enumerate(test_dataset, 1)):
        start_time = time.time()
        draft, refined_summary, att = predict_using_beam_search(
            input_ids, beam_size=3, refine_decoder_sampling_type='greedy')
        for tar, ref_hyp in zip(target_ids, refined_summary):
            sum_ref = tokenizer.convert_ids_to_tokens(
                [i for i in tf.squeeze(tar) if i not in [0, 101, 102]])
            sum_hyp = tokenizer.convert_ids_to_tokens(
                [i for i in tf.squeeze(ref_hyp) if i not in [0, 101, 102]])
            sum_ref = convert_wordpiece_to_words(sum_ref)
            sum_hyp = convert_wordpiece_to_words(sum_hyp)
            #print('Original summary: {}'.format(sum_ref))
            #print('Predicted summary: {}'.format(sum_hyp))
            ref_sents.append(sum_ref)
            hyp_sents.append(sum_hyp)
    try:
        rouges = rouge_all.get_scores(ref_sents, hyp_sents)
        avg_rouge_f1 = np.mean([
            np.mean([
                rouge_scores['rouge-1']["f"], rouge_scores['rouge-2']["f"],
                rouge_scores['rouge-l']["f"]
            ]) for rouge_scores in rouges
        ])
        _, _, bert_f1 = b_score(ref_sents,
                                hyp_sents,
                                lang='en',
                                model_type=config.pretrained_bert_model)
        avg_bert_f1 = np.mean(bert_f1.numpy())
    except:
        avg_rouge_f1 = 0
        avg_bert_f1 = 0
    print(
        infer_template.format('beam_search', 'greedy', avg_rouge_f1,
                              avg_bert_f1, 3))
    print(f'time to process document {doc_id} : {time.time()-start_time}')
    print(
        f'Calculating scores for {len(ref_sents)} golden summaries and {len(hyp_sents)} predicted summaries'
    )