def download(self): utils.create_dirs(self.dirs) dataset = fetch_dataset(*osp.split(self.root)) utils.create_masks(data=dataset.data) data = dataset.data edge_attr = torch.ones(data.edge_index.shape[1]) if data.edge_attr is None else data.edge_attr data.edge_attr = edge_attr torch.save((dataset.data, dataset.slices), self.processed_paths[1])
def evaluate(inp_sentence): # 文本转ID input_id_sentence = [pt_tokenizer.vocab_size] \ + pt_tokenizer.encode(inp_sentence) + [pt_tokenizer.vocab_size + 1] # encoder_input.shape: (1, input_sentence_length) # 维度变换 encoder_input = tf.expand_dims(input_id_sentence, 0) # decoder_input.shape: (1, 1) decoder_input = tf.expand_dims([en_tokenizer.vocab_size], 0) for i in range(max_length): encoder_padding_mask, decoder_mask, encoder_decoder_padding_mask \ = create_masks(encoder_input, decoder_input) # predictions.shape: (batch_size, output_target_len, target_vocab_size) predictions, attention_weights = transformer( encoder_input, decoder_input, False, encoder_padding_mask, decoder_mask, encoder_decoder_padding_mask) # predictions.shape: (batch_size, target_vocab_size) 中间位置只取一个值, 中间维度消失 predictions = predictions[:, -1, :] predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) if tf.equal(predicted_id, en_tokenizer.vocab_size + 1): return tf.squeeze(decoder_input, axis=0), attention_weights decoder_input = tf.concat( [decoder_input, [predicted_id]], # predicted_id的中括号注意 axis=-1) return tf.squeeze(decoder_input, axis=0), attention_weights # 维度缩减
def train_step(inp, tar): """ The target (tar) is divided into tar_inp and tar_real tar_inp is passed to the decoder as input. tar_real is the same input shifted by 1: at each position in tar_inp, tar_real contains the next token that should be predicted. """ # sentence = "SOS A lion in the jungle is sleeping EOS" # tar_inp = "SOS A lion in the jungle is sleeping" # tar_real = "A lion in the jungle is sleeping EOS" tar_inp = tar[:, :-1] tar_real = tar[:, 1:] data = create_look_ahead_mask(tf.shape(tar_inp)[1]) enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp) with tf.GradientTape() as tape: predictions, _ = transformer(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) loss = loss_function(tar_real, predictions) gradients = tape.gradient(loss, transformer.trainable_variables) optimizer.apply_gradients(zip(gradients, transformer.trainable_variables)) train_loss(loss) train_accuracy(tar_real, predictions)
def evaluate(self, inp_sentence): # normalize input sentence inp_sentence = self.encode_zh(inp_sentence) encoder_input = tf.expand_dims(inp_sentence, 0) # as the target is english, the first word to the transformer should be the # english start token. decoder_input = [self.tokenizer_en.vocab_size] output = tf.expand_dims(decoder_input, 0) for i in range(self.MAX_SEQ_LENGTH): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( encoder_input, output) # predictions.shape == (batch_size, seq_len, vocab_size) predictions, attention_weights = self.model( encoder_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) # select the last word from the seq_len dimension predictions = predictions[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) # return the result if the predicted_id is equal to the end token if tf.equal(predicted_id, self.tokenizer_en.vocab_size + 1): return tf.squeeze(output, axis=0), attention_weights # concatentate the predicted_id to the output which is given to the decoder # as its input. output = tf.concat([output, predicted_id], axis=-1) return tf.squeeze(output, axis=0), attention_weights
def predict(features, params, model): output = tf.tile([[2]], [params["batch_size"], 1]) # 2 = start_decoding for i in range(params["max_dec_len"]): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( features["enc_input"], output) # predictions.shape == (batch_size, seq_len, vocab_size) predictions, attention_weights = model( features["enc_input"], features["extended_enc_input"], features["max_oov_len"], output, training=params["training"], enc_padding_mask=enc_padding_mask, look_ahead_mask=combined_mask, dec_padding_mask=dec_padding_mask, stats=features["stats"]) # select the last word from the seq_len dimension predictions = predictions[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) # concatentate the predicted_id to the output which is given to the decoder # as its input. output = tf.concat([output, predicted_id], axis=-1) return output, attention_weights
def evaluate(input_document, summary_tokenizer, document_tokenizer): input_document = document_tokenizer.texts_to_sequences([input_document]) input_document = tf.keras.preprocessing.sequence.pad_sequences( input_document, maxlen=encoder_maxlen, padding='post', truncating='post') encoder_input = tf.expand_dims(input_document[0], 0) decoder_input = [summary_tokenizer.word_index["<start>"]] output = tf.expand_dims(decoder_input, 0) for i in range(decoder_maxlen): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( encoder_input, output) predictions, attention_weights = transformer(encoder_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) predictions = predictions[:, -1:, :] predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) if predicted_id == summary_tokenizer.word_index["<end>"]: return tf.squeeze(output, axis=0), attention_weights output = tf.concat([output, predicted_id], axis=-1) return tf.squeeze(output, axis=0), attention_weights
def download_pyg_data(config): """ Downloads a dataset from the PyTorch Geometric library :param config: A dict containing info on the dataset to be downloaded :return: A tuple containing (root directory, dataset name, data directory) """ leaf_dir = config["kwargs"]["root"].split("/")[-1].strip() data_dir = osp.join(config["kwargs"]["root"], "" if config["name"] == leaf_dir else config["name"]) dst_path = osp.join(data_dir, "raw", "data.pt") if not osp.exists(dst_path): DatasetClass = config["class"] dataset = DatasetClass(**config["kwargs"]) utils.create_masks(data=dataset.data) torch.save((dataset.data, dataset.slices), dst_path) return config["kwargs"]["root"], config["name"], data_dir
def train_step(inp, tar_inp, tar_real): enc_padding_mask, combined_mask, dec_padding_mask = utils.create_masks( inp, tar_inp) # shape(inp) = (batch_size, pad_size) # shape(predictions) = (batch_size, pad_size, tar_vocab_size) with tf.GradientTape() as tape: predictions, _ = transformer(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) loss = metrics.loss_function(tar_real, predictions) gradients = tape.gradient(loss, transformer.trainable_variables) optimizer.apply_gradients(zip(gradients, transformer.trainable_variables)) train_loss(loss)
def train_step(inp, tar): tar_inp = tar[:, :-1] tar_real = tar[:, 1:] encoder_padding_mask, decoder_mask, encoder_decoder_padding_mask = create_masks( inp, tar_inp) with tf.GradientTape() as tape: predictions, _ = transformer(inp, tar_inp, True, encoder_padding_mask, decoder_mask, encoder_decoder_padding_mask) loss = loss_function(tar_real, predictions) gradients = tape.gradient(loss, transformer.trainable_variables) optimizer.apply_gradients(zip(gradients, transformer.trainable_variables)) train_loss(loss) train_accuracy(tar_real, predictions)
def train_step(features, labels, params, model, optimizer, loss_object, train_loss_metric): enc_padding_mask, combined_mask, dec_padding_mask = create_masks(features["enc_input"], labels["dec_input"]) with tf.GradientTape() as tape: output, attn_weights = model(features["enc_input"],features["extended_enc_input"], features["max_oov_len"], labels["dec_input"], training=params["training"], enc_padding_mask=enc_padding_mask, look_ahead_mask=combined_mask, dec_padding_mask=dec_padding_mask) loss = loss_function(loss_object, labels["dec_target"], output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss_metric(loss)
def encode(self, X, W, training=False): X_imputed = self.imputation_layer(X, W) enc_padding_mask, look_ahead_mask, dec_padding_mask = create_masks( X_imputed) enc_output = self.encoder(X_imputed, training=training, mask=enc_padding_mask) # use (self.D + 1) because of the SOS added at the beginning hidden = tf.reshape(enc_output, [-1, (self.D + 1) * self.d_model]) if len(self.n_hidden) > 1: for layer in self.ae_encode_layers: hidden = layer(hidden) mu_tilde = self.mu_layer(hidden) log_sigma2_tilde = self.log_sigma2_layer(hidden) z = self.z_layer((mu_tilde, log_sigma2_tilde)) return z, mu_tilde, log_sigma2_tilde
def evaluate(transformer, inp_sequence, max_length=160): """ Given input sequence for encoder and predict target for decoder Parameters: inp_sequence (string): input sequence for encoder Returns: predict logits: shape = (seq_len, vocab_size) """ start_token = token_encode_dic["^"] end_token = token_encode_dic["$"] # inp_sentence is product, shape = (len(inp_sentence)+2, ) inp_sequence = tokenizer.lookup(tf.strings.bytes_split("^"+inp_sequence+"$")) encoder_input = tf.expand_dims(inp_sequence, 0) # (1, len(inp_sentence)+2) # target is reactant,first char is "^" decoder_input = [start_token] output = tf.expand_dims(decoder_input, 0) for i in range(max_length): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( encoder_input, output) # predictions.shape == (batch_size, seq_len, vocab_size) predictions, attention_weights = transformer(encoder_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) # each time choose last element from seq_len dimension, because sequence is shifted predictions = predictions[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) # if predicted_id == ,就返回结果 if predicted_id == end_token: return tf.squeeze(output, axis=0), attention_weights # each time append predicted_id to output, finaly will get whole target sequence output = tf.concat([output, predicted_id], axis=-1) return tf.squeeze(output, axis=0), attention_weights
def decode(self, z, X, training=False): hidden = z for layer in self.ae_decode_layers: hidden = layer(hidden) hidden = self.connect_layer(hidden) # use (self.D + 1) because of the SOS added at the beginning hidden = tf.reshape(hidden, [-1, (self.D + 1), self.d_model]) enc_padding_mask, look_ahead_mask, dec_padding_mask = create_masks( hidden) dec_output, attention_weights = self.decoder( X[:, :-1, :], hidden, training=training, look_ahead_mask=look_ahead_mask, padding_mask=dec_padding_mask) x_raw = self.final_layer(dec_output) x = self.output_activation(x_raw, name="x_output") return x, x_raw, attention_weights
def train_step(inp, tar): tar = tar.get('output_ids') tar_inp = tar[:, :-1] tar_real = tar[:, 1:] enc_padding_mask, combined_mask, dec_padding_mask = create_masks( inp['input_ids'], tar_inp) with tf.GradientTape() as tape: predictions, _ = transformer(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) loss = loss_function(tar_real, predictions) gradients = tape.gradient(loss, transformer.trainable_variables) opt.apply_gradients(zip(gradients, transformer.trainable_variables)) train_loss(loss) train_accuracy(tar_real, predictions)
def _translate_sentence(self, src_seq, max_seq_len): # Only accept batch size equals to 1 in this function. # TODO: expand to batch operation. #assert src_seq.size(0) == 1 src_pad_idx, trg_eos_idx = self.src_pad_idx, self.trg_eos_idx #max_seq_len, beam_size, alpha = self.max_seq_len, self.beam_size, self.alpha beam_size, alpha = self.beam_size, self.alpha with torch.no_grad(): #src_mask = get_pad_mask(src_seq, src_pad_idx) src_mask, _ = create_masks(src_seq, None, src_seq.device, src_pad_idx) enc_output, gen_seq, scores = self._get_init_state( src_seq, src_mask) ans_idx = 0 # default for step in range(2, max_seq_len): # decode up to max length dec_output = self._model_decode(gen_seq[:, :step], enc_output, src_mask) gen_seq, scores = self._get_the_best_score_and_idx( gen_seq, dec_output, scores, step) # Check if all path finished # -- locate the eos in the generated sequences eos_locs = gen_seq == trg_eos_idx # -- replace the eos with its position for the length penalty use seq_lens, _ = self.len_map.masked_fill(~eos_locs, max_seq_len).min(1) # -- check if all beams contain eos if (eos_locs.sum(1) > 0).sum(0).item() == beam_size: # TODO: Try different terminate conditions. _, ans_idx = scores.div( seq_lens.to(enc_output.device)**alpha).max(0) ans_idx = ans_idx.item() break #return gen_seq[ans_idx][:seq_lens[ans_idx]].tolist() return gen_seq[ans_idx][:seq_lens[ans_idx]]
def main(args): #create a writer writer = SummaryWriter('loss_plot_' + args.mode, comment='test') # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing, normalization for the pretrained resnet transform = T.Compose([ T.Resize((224, 224)), T.ToTensor(), T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) val_length = len(os.listdir(args.image_dir_val)) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) data_loader_val = get_loader(args.image_dir_val, args.caption_path_val, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the model # if no-attention model is chosen: if args.model_type == 'no_attention': encoder = Encoder(args.embed_size).to(device) decoder = Decoder(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device) criterion = nn.CrossEntropyLoss() # if attention model is chosen: elif args.model_type == 'attention': encoder = EncoderAtt(encoded_image_size=9).to(device) decoder = DecoderAtt(vocab, args.encoder_dim, args.hidden_size, args.attention_dim, args.embed_size, args.dropout_ratio, args.alpha_c).to(device) # if transformer model is chosen: elif args.model_type == 'transformer': model = Transformer(len(vocab), args.embed_size, args.transformer_layers, 8, args.dropout_ratio).to(device) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, model.encoder.parameters()), lr=args.learning_rate_enc) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, model.decoder.parameters()), lr=args.learning_rate_dec) criterion = nn.CrossEntropyLoss(ignore_index=vocab.word2idx['<pad>']) else: print('Select model_type attention or no_attention') # if model is not transformer: additional step in encoder is needed: freeze lower layers of resnet if args.fine_tune == True if args.model_type != 'transformer': decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=args.learning_rate_dec) encoder.fine_tune(args.fine_tune) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=args.learning_rate_enc) # initialize lists to store results: loss_train = [] loss_val = [] loss_val_epoch = [] loss_train_epoch = [] bleu_res_list = [] cider_res_list = [] rouge_res_list = [] results = {} # calculate total steps fot train and validation total_step = len(data_loader) total_step_val = len(data_loader_val) #For each epoch for epoch in tqdm(range(args.num_epochs)): loss_val_iter = [] loss_train_iter = [] # set model to train mode if args.model_type != 'transformer': encoder.train() decoder.train() else: model.train() # for each entry in data_loader for i, (images, captions, lengths) in tqdm(enumerate(data_loader)): # load images and captions to device images = images.to(device) captions = captions.to(device) # Forward, backward and optimize # forward and backward path is different dependent of model type: if args.model_type == 'no_attention': # get features from encoder features = encoder(images) # pad targergets to a length targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # get output from decoder outputs = decoder(features, captions, lengths) # calculate loss loss = criterion(outputs, targets) # optimizer and backward step decoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss.backward() decoder_optimizer.step() encoder_optimizer.step() elif args.model_type == 'attention': # get features from encoder features = encoder(images) # get targets - starting from 2 word in captions #(the model not sequantial, so targets are predicted in parallel- no need to predict first word in captions) targets = captions[:, 1:] # decode length = length-1 for each caption decode_lengths = [length - 1 for length in lengths] #flatten targets targets = targets.reshape(targets.shape[0] * targets.shape[1]) sampled_caption = [] # get scores and alphas from decoder scores, alphas = decoder(features, captions, decode_lengths) scores = scores.view(-1, scores.shape[-1]) #predicted = prediction with maximum score _, predicted = torch.max(scores, dim=1) # calculate loss loss = decoder.loss(scores, targets, alphas) # optimizer and backward step decoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss.backward() decoder_optimizer.step() encoder_optimizer.step() elif args.model_type == 'transformer': # input is captions without last word trg_input = captions[:, :-1] # create mask trg_mask = create_masks(trg_input) # get scores from model scores = model(images, trg_input, trg_mask) scores = scores.view(-1, scores.shape[-1]) # get targets - starting from 2 word in captions targets = captions[:, 1:] #predicted = prediction with maximum score _, predicted = torch.max(scores, dim=1) # calculate loss loss = criterion( scores, targets.reshape(targets.shape[0] * targets.shape[1])) #forward and backward path decoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss.backward() decoder_optimizer.step() encoder_optimizer.step() else: print('Select model_type attention or no_attention') # append results to loss lists and writer loss_train_iter.append(loss.item()) loss_train.append(loss.item()) writer.add_scalar('Loss/train/iterations', loss.item(), i + 1) # Print log info if i % args.log_step == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'. format(epoch, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) #append mean of last 10 batches as approximate epoch loss loss_train_epoch.append(np.mean(loss_train_iter[-10:])) writer.add_scalar('Loss/train/epoch', np.mean(loss_train_iter[-10:]), epoch + 1) #save model if args.model_type != 'transformer': torch.save( decoder.state_dict(), os.path.join( args.model_path, 'decoder_' + args.mode + '_{}.ckpt'.format(epoch + 1))) torch.save( encoder.state_dict(), os.path.join( args.model_path, 'decoder_' + args.mode + '_{}.ckpt'.format(epoch + 1))) else: torch.save( model.state_dict(), os.path.join( args.model_path, 'model_' + args.mode + '_{}.ckpt'.format(epoch + 1))) np.save( os.path.join(args.predict_json, 'loss_train_temp_' + args.mode + '.npy'), loss_train) #validate model: # set model to eval mode: if args.model_type != 'transformer': encoder.eval() decoder.eval() else: model.eval() total_step = len(data_loader_val) # set no_grad mode: with torch.no_grad(): # for each entry in data_loader for i, (images, captions, lengths) in tqdm(enumerate(data_loader_val)): targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] images = images.to(device) captions = captions.to(device) # forward and backward path is different dependent of model type: if args.model_type == 'no_attention': features = encoder(images) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) elif args.model_type == 'attention': features = encoder(images) sampled_caption = [] targets = captions[:, 1:] decode_lengths = [length - 1 for length in lengths] targets = targets.reshape(targets.shape[0] * targets.shape[1]) scores, alphas = decoder(features, captions, decode_lengths) _, predicted = torch.max(scores, dim=1) scores = scores.view(-1, scores.shape[-1]) sampled_caption = [] loss = decoder.loss(scores, targets, alphas) elif args.model_type == 'transformer': trg_input = captions[:, :-1] trg_mask = create_masks(trg_input) scores = model(images, trg_input, trg_mask) scores = scores.view(-1, scores.shape[-1]) targets = captions[:, 1:] _, predicted = torch.max(scores, dim=1) loss = criterion( scores, targets.reshape(targets.shape[0] * targets.shape[1])) #display results if i % args.log_step == 0: print( 'Epoch [{}/{}], Step [{}/{}], Validation Loss: {:.4f}, Validation Perplexity: {:5.4f}' .format(epoch, args.num_epochs, i, total_step_val, loss.item(), np.exp(loss.item()))) # append results to loss lists and writer loss_val.append(loss.item()) loss_val_iter.append(loss.item()) writer.add_scalar('Loss/validation/iterations', loss.item(), i + 1) np.save( os.path.join(args.predict_json, 'loss_val_' + args.mode + '.npy'), loss_val) print( 'Epoch [{}/{}], Step [{}/{}], Validation Loss: {:.4f}, Validation Perplexity: {:5.4f}' .format(epoch, args.num_epochs, i, total_step_val, loss.item(), np.exp(loss.item()))) # results: epoch validation loss loss_val_epoch.append(np.mean(loss_val_iter)) writer.add_scalar('Loss/validation/epoch', np.mean(loss_val_epoch), epoch + 1) #predict captions: filenames = os.listdir(args.image_dir_val) predicted = {} for file in tqdm(filenames): if file == '.DS_Store': continue # Prepare an image image = load_image(os.path.join(args.image_dir_val, file), transform) image_tensor = image.to(device) # Generate caption starting with <start> word # procedure is different for each model type if args.model_type == 'attention': features = encoder(image_tensor) sampled_ids, _ = decoder.sample(features) sampled_ids = sampled_ids[0].cpu().numpy() #start sampled_caption with <start> sampled_caption = ['<start>'] elif args.model_type == 'no_attention': features = encoder(image_tensor) sampled_ids = decoder.sample(features) sampled_ids = sampled_ids[0].cpu().numpy() sampled_caption = ['<start>'] elif args.model_type == 'transformer': e_outputs = model.encoder(image_tensor) max_seq_length = 20 sampled_ids = torch.zeros(max_seq_length, dtype=torch.long) sampled_ids[0] = torch.LongTensor([[vocab.word2idx['<start>']] ]).to(device) for i in range(1, max_seq_length): trg_mask = np.triu(np.ones((1, i, i)), k=1).astype('uint8') trg_mask = Variable( torch.from_numpy(trg_mask) == 0).to(device) out = model.decoder(sampled_ids[:i].unsqueeze(0), e_outputs, trg_mask) out = model.out(out) out = F.softmax(out, dim=-1) val, ix = out[:, -1].data.topk(1) sampled_ids[i] = ix[0][0] sampled_ids = sampled_ids.cpu().numpy() sampled_caption = [] # Convert word_ids to words for word_id in sampled_ids: word = vocab.idx2word[word_id] sampled_caption.append(word) # break at <end> of the sentence if word == '<end>': break sentence = ' '.join(sampled_caption) predicted[file] = sentence # save predictions to json file: json.dump( predicted, open( os.path.join( args.predict_json, 'predicted_' + args.mode + '_' + str(epoch) + '.json'), 'w')) #validate model with open(args.caption_path_val, 'r') as file: captions = json.load(file) res = {} for r in predicted: res[r] = [predicted[r].strip('<start> ').strip(' <end>')] images = captions['images'] caps = captions['annotations'] gts = {} for image in images: image_id = image['id'] file_name = image['file_name'] list_cap = [] for cap in caps: if cap['image_id'] == image_id: list_cap.append(cap['caption']) gts[file_name] = list_cap #calculate BLUE, CIDER and ROUGE metrics from real and resulting captions bleu_res = bleu(gts, res) cider_res = cider(gts, res) rouge_res = rouge(gts, res) # append resuls to result lists bleu_res_list.append(bleu_res) cider_res_list.append(cider_res) rouge_res_list.append(rouge_res) # write results to writer writer.add_scalar('BLEU1/validation/epoch', bleu_res[0], epoch + 1) writer.add_scalar('BLEU2/validation/epoch', bleu_res[1], epoch + 1) writer.add_scalar('BLEU3/validation/epoch', bleu_res[2], epoch + 1) writer.add_scalar('BLEU4/validation/epoch', bleu_res[3], epoch + 1) writer.add_scalar('CIDEr/validation/epoch', cider_res, epoch + 1) writer.add_scalar('ROUGE/validation/epoch', rouge_res, epoch + 1) results['bleu'] = bleu_res_list results['cider'] = cider_res_list results['rouge'] = rouge_res_list json.dump( results, open(os.path.join(args.predict_json, 'results_' + args.mode + '.json'), 'w')) np.save( os.path.join(args.predict_json, 'loss_train_' + args.mode + '.npy'), loss_train) np.save(os.path.join(args.predict_json, 'loss_val_' + args.mode + '.npy'), loss_val)
def evaluate(sentence, tokenizer, model, max_length=MAX_SEQ_LENGTH, pad_on_left=False, pad_token=0, pad_token_segment_id=0, mask_padding_with_zero=True): sentence = preprocess_sentence(sentence) inputs = tokenizer.encode_plus( sentence, add_special_tokens=True, max_length=max_length, ) input_ids, input_token_type_ids = inputs["input_ids"], inputs[ "token_type_ids"] input_attention_mask = [1 if mask_padding_with_zero else 0 ] * len(input_ids) input_padding_length = max_length - len(input_ids) if pad_on_left: input_ids = ([pad_token] * input_padding_length) + input_ids input_attention_mask = ([0 if mask_padding_with_zero else 1] * input_padding_length) + input_attention_mask input_token_type_ids = ([pad_token_segment_id] * input_padding_length) + input_token_type_ids else: input_ids = input_ids + ([pad_token] * input_padding_length) input_attention_mask = input_attention_mask + ( [0 if mask_padding_with_zero else 1] * input_padding_length) input_token_type_ids = input_token_type_ids + ([pad_token_segment_id] * input_padding_length) assert len( input_ids) == max_length, "Error with input length {} vs {}".format( len(input_ids), max_length) assert len(input_attention_mask ) == max_length, "Error with input length {} vs {}".format( len(input_attention_mask), max_length) assert len(input_token_type_ids ) == max_length, "Error with input length {} vs {}".format( len(input_token_type_ids), max_length) input_ids = tf.expand_dims(input_ids, 0) input_attention_mask = tf.expand_dims(input_attention_mask, 0) input_token_type_ids = tf.expand_dims(input_token_type_ids, 0) bert_input = { 'input_ids': input_ids, 'attention_mask': input_attention_mask, 'token_type_ids': input_token_type_ids } decoder_input = [START_TOKEN] output = tf.expand_dims(decoder_input, 0) for i in range(max_length): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( bert_input['input_ids'], output) predictions, attention_weights = model(bert_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) # select the last word from the seq_len dimension predictions = predictions[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) output = tf.concat([output, predicted_id], axis=-1) # return the result if the predicted_id is equal to the end token if tf.equal(predicted_id, STOP_TOKEN): return tf.squeeze(output, axis=0), attention_weights # concatentate the predicted_id to the output which is given to the decoder # as its input. return tf.squeeze(output, axis=0), attention_weights