def __init__(self, model_container, generator, num_samples_stats, num_display_sentences, output_dir=None): self.model_container = model_container self.output_dir = output_dir self.generator = generator self.num_samples_stats = num_samples_stats self.num_display_sentences = num_display_sentences self.decoder = Decoder(beam_width=beam_width) print("output directory is ", output_dir) if output_dir is not None and not os.path.exists(self.output_dir): os.makedirs(self.output_dir)
def __init__(self, env, other_players, n_seats, stacks=2500, encoding='norm', encoder=None, decoder=None, visualize=False, debug=False): assert len(other_players) == n_seats - 1 self.env = env self.other_players = other_players self.n_seats = n_seats self._debug = debug self._visualize = visualize self._encoder = encoder if not encoder is None else Encoder( n_seats, ranking_encoding=encoding) self._decoder = decoder if not decoder is None else Decoder() self._add_players(n_seats, stacks)
from decoders import gridrnn_Decoder as Decoder elif args.dec_type == 'hidden': from decoders import Hidden_Decoder as Decoder else: from decoders import gridrnn_Decoder as Decoder if args.d_type == 'dcgan': from discriminators import DCGAN_discriminator as Discriminator #elif args.d_type == 'hidden': else: from discriminators import Hidden_discriminator as Discriminator #else: #from discriminators import DCGAN_discriminator as Discriminator generator = Generator(args) decoder = Decoder(args) discriminator = Discriminator(args) if cuda: generator.cuda() discriminator.cuda() decoder.cuda() BCELoss.cuda() MSELoss.cuda() else: print('models', generator, discriminator, decoder) # Initialize weights generator.apply(weights_init_normal) discriminator.apply(weights_init_normal) decoder.apply(weights_init_normal)
shuffle=False, collate_fn=dataset.collate_fn) # iterations per epoch setattr(args, 'iter_per_epoch', math.ceil(dataset.num_data_points[args.split] / args.batch_size)) print("{} iter per epoch.".format(args.iter_per_epoch)) # ---------------------------------------------------------------------------- # setup the model # ---------------------------------------------------------------------------- encoder = Encoder(model_args) encoder.load_state_dict(components['encoder']) decoder = Decoder(model_args, encoder) decoder.load_state_dict(components['decoder']) print("Loaded model from {}".format(args.load_path)) if args.gpuid >= 0: encoder = encoder.cuda() decoder = decoder.cuda() # ---------------------------------------------------------------------------- # evaluation # ---------------------------------------------------------------------------- print("Evaluation start time: {}".format( datetime.datetime.strftime(datetime.datetime.utcnow(), '%d-%b-%Y-%H:%M:%S'))) encoder.eval()
def train(run_name, speaker, start_epoch, stop_epoch, img_c, img_w, img_h, frames_n, absolute_max_string_len, minibatch_size): DATASET_DIR = os.path.join(CURRENT_PATH, speaker, 'datasets') OUTPUT_DIR = os.path.join(CURRENT_PATH, speaker, 'results') LOG_DIR = os.path.join(CURRENT_PATH, speaker, 'logs') curriculum = Curriculum(curriculum_rules) lip_gen = BasicGenerator(dataset_path=DATASET_DIR, minibatch_size=minibatch_size, img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, absolute_max_string_len=absolute_max_string_len, curriculum=curriculum, start_epoch=start_epoch).build() lipnet = LipNet(img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, absolute_max_string_len=absolute_max_string_len, output_size=lip_gen.get_output_size()) lipnet.summary() adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss lipnet.model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=adam) # load weight if necessary if start_epoch > 0: weight_file = os.path.join( OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) lipnet.model.load_weights(weight_file) spell = Spell(path=PREDICT_DICTIONARY) decoder = Decoder(greedy=PREDICT_GREEDY, beam_width=PREDICT_BEAM_WIDTH, postprocessors=[labels_to_text, spell.sentence]) # define callbacks statistics = Statistics(lipnet, lip_gen.next_val(), decoder, 256, output_dir=os.path.join(OUTPUT_DIR, run_name)) visualize = Visualize(os.path.join(OUTPUT_DIR, run_name), lipnet, lip_gen.next_val(), decoder, num_display_sentences=minibatch_size) tensorboard = TensorBoard(log_dir=os.path.join(LOG_DIR, run_name)) csv_logger = CSVLogger(os.path.join( LOG_DIR, "{}-{}.csv".format('training', run_name)), separator=',', append=True) checkpoint = ModelCheckpoint(os.path.join(OUTPUT_DIR, run_name, "weights{epoch:02d}.h5"), monitor='val_loss', save_weights_only=True, mode='auto', period=1) lipnet.model.fit_generator(generator=lip_gen.next_train(), steps_per_epoch=lip_gen.default_training_steps, epochs=stop_epoch, validation_data=lip_gen.next_val(), validation_steps=2, callbacks=[ checkpoint, statistics, visualize, lip_gen, tensorboard, csv_logger ], initial_epoch=start_epoch, verbose=1, max_q_size=5, workers=2, pickle_safe=True)
'num_data_points', 'vocab_size', 'max_ques_count', 'max_ques_len', 'max_ans_len' }: setattr(model_args, key, getattr(dataset, key)) # iterations per epoch setattr(args, 'iter_per_epoch', math.ceil(dataset.num_data_points['train'] / args.batch_size)) print("{} iter per epoch.".format(args.iter_per_epoch)) # ---------------------------------------------------------------------------- # setup the model # ---------------------------------------------------------------------------- encoder = Encoder(model_args) decoder = Decoder(model_args, encoder) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=args.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=args.lr_decay_rate) if args.load_path != '': encoder.load_state_dict(components['encoder']) decoder.load_state_dict(components['decoder']) optimizer.load_state_dict(components['optimizer']) # cuda enabled optimizer, see: for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor):
def beam_search( decoder: Decoder, size: int, bos_index: int, eos_index: int, pad_index: int, encoder_output: Tensor, encoder_hidden: Tensor, src_mask: Tensor, max_output_length: int, alpha: float, embed: Embeddings, n_best: int = 1, ) -> (np.array, np.array): """ Beam search with size k. Inspired by OpenNMT-py, adapted for Transformer. In each decoding step, find the k most likely partial hypotheses. :param decoder: :param size: size of the beam :param bos_index: :param eos_index: :param pad_index: :param encoder_output: :param encoder_hidden: :param src_mask: :param max_output_length: :param alpha: `alpha` factor for length penalty :param embed: :param n_best: return this many hypotheses, <= beam (currently only 1) :return: - stacked_output: output hypotheses (2d array of indices), - stacked_attention_scores: attention scores (3d array) """ assert size > 0, "Beam size must be >0." assert n_best <= size, "Can only return {} best hypotheses.".format(size) # init transformer = isinstance(decoder, TransformerDecoder) batch_size = src_mask.size(0) att_vectors = None # not used for Transformer # Recurrent models only: initialize RNN hidden state # pylint: disable=protected-access if not transformer: hidden = decoder._init_hidden(encoder_hidden) else: hidden = None # tile encoder states and decoder initial states beam_size times if hidden is not None: hidden = tile(hidden, size, dim=1) # layers x batch*k x dec_hidden_size encoder_output = tile(encoder_output.contiguous(), size, dim=0) # batch*k x src_len x enc_hidden_size src_mask = tile(src_mask, size, dim=0) # batch*k x 1 x src_len # Transformer only: create target mask if transformer: trg_mask = src_mask.new_ones([1, 1, 1]) # transformer only else: trg_mask = None # numbering elements in the batch batch_offset = torch.arange(batch_size, dtype=torch.long, device=encoder_output.device) # numbering elements in the extended batch, i.e. beam size copies of each # batch element beam_offset = torch.arange(0, batch_size * size, step=size, dtype=torch.long, device=encoder_output.device) # keeps track of the top beam size hypotheses to expand for each element # in the batch to be further decoded (that are still "alive") alive_seq = torch.full( [batch_size * size, 1], bos_index, dtype=torch.long, device=encoder_output.device, ) # Give full probability to the first beam on the first step. topk_log_probs = torch.zeros(batch_size, size, device=encoder_output.device) topk_log_probs[:, 1:] = float("-inf") # Structure that holds finished hypotheses. hypotheses = [[] for _ in range(batch_size)] results = { "predictions": [[] for _ in range(batch_size)], "scores": [[] for _ in range(batch_size)], "gold_score": [0] * batch_size, } for step in range(max_output_length): # This decides which part of the predicted sentence we feed to the # decoder to make the next prediction. # For Transformer, we feed the complete predicted sentence so far. # For Recurrent models, only feed the previous target word prediction if transformer: # Transformer decoder_input = alive_seq # complete prediction so far else: # Recurrent decoder_input = alive_seq[:, -1].view(-1, 1) # only the last word # expand current hypotheses # decode one single step # logits: logits for final softmax # pylint: disable=unused-variable trg_embed = embed(decoder_input) logits, hidden, att_scores, att_vectors = decoder( encoder_output=encoder_output, encoder_hidden=encoder_hidden, src_mask=src_mask, trg_embed=trg_embed, hidden=hidden, prev_att_vector=att_vectors, unroll_steps=1, trg_mask=trg_mask, # subsequent mask for Transformer only ) # For the Transformer we made predictions for all time steps up to # this point, so we only want to know about the last time step. if transformer: logits = logits[:, -1] # keep only the last time step hidden = None # we don't need to keep it for transformer # batch*k x trg_vocab log_probs = F.log_softmax(logits, dim=-1).squeeze(1) # multiply probs by the beam probability (=add logprobs) log_probs += topk_log_probs.view(-1).unsqueeze(1) curr_scores = log_probs.clone() # compute length penalty if alpha > -1: length_penalty = ((5.0 + (step + 1)) / 6.0)**alpha curr_scores /= length_penalty # flatten log_probs into a list of possibilities curr_scores = curr_scores.reshape(-1, size * decoder.output_size) # pick currently best top k hypotheses (flattened order) topk_scores, topk_ids = curr_scores.topk(size, dim=-1) if alpha > -1: # recover original log probs topk_log_probs = topk_scores * length_penalty else: topk_log_probs = topk_scores.clone() # reconstruct beam origin and true word ids from flattened order topk_beam_index = topk_ids.div(decoder.output_size) topk_ids = topk_ids.fmod(decoder.output_size) # map beam_index to batch_index in the flat representation batch_index = topk_beam_index + beam_offset[:topk_beam_index. size(0)].unsqueeze(1) select_indices = batch_index.view(-1) # append latest prediction alive_seq = torch.cat( [alive_seq.index_select(0, select_indices), topk_ids.view(-1, 1)], -1) # batch_size*k x hyp_len is_finished = topk_ids.eq(eos_index) if step + 1 == max_output_length: is_finished.fill_(True) # end condition is whether the top beam is finished end_condition = is_finished[:, 0].eq(True) # save finished hypotheses if is_finished.any(): predictions = alive_seq.view(-1, size, alive_seq.size(-1)) for i in range(is_finished.size(0)): b = batch_offset[i] if end_condition[i]: is_finished[i].fill_(True) finished_hyp = is_finished[i].nonzero().view(-1) # store finished hypotheses for this batch for j in finished_hyp: # Check if the prediction has more than one EOS. # If it has more than one EOS, it means that the prediction should have already # been added to the hypotheses, so you don't have to add them again. if (predictions[i, j, 1:] == eos_index).nonzero().numel() < 2: hypotheses[b].append(( topk_scores[i, j], predictions[i, j, 1:], ) # ignore start_token ) # if the batch reached the end, save the n_best hypotheses if end_condition[i]: best_hyp = sorted(hypotheses[b], key=lambda x: x[0], reverse=True) for n, (score, pred) in enumerate(best_hyp): if n >= n_best: break results["scores"][b].append(score) results["predictions"][b].append(pred) non_finished = end_condition.eq(False).nonzero().view(-1) # if all sentences are translated, no need to go further # pylint: disable=len-as-condition if len(non_finished) == 0: break # remove finished batches for the next step topk_log_probs = topk_log_probs.index_select(0, non_finished) batch_index = batch_index.index_select(0, non_finished) batch_offset = batch_offset.index_select(0, non_finished) alive_seq = predictions.index_select(0, non_finished).view( -1, alive_seq.size(-1)) # reorder indices, outputs and masks select_indices = batch_index.view(-1) encoder_output = encoder_output.index_select(0, select_indices) src_mask = src_mask.index_select(0, select_indices) if hidden is not None and not transformer: if isinstance(hidden, tuple): # for LSTMs, states are tuples of tensors h, c = hidden h = h.index_select(1, select_indices) c = c.index_select(1, select_indices) hidden = (h, c) else: # for GRUs, states are single tensors hidden = hidden.index_select(1, select_indices) if att_vectors is not None: att_vectors = att_vectors.index_select(0, select_indices) def pad_and_stack_hyps(hyps, pad_value): filled = (np.ones( (len(hyps), max([h.shape[0] for h in hyps])), dtype=int) * pad_value) for j, h in enumerate(hyps): for k, i in enumerate(h): filled[j, k] = i return filled # from results to stacked outputs assert n_best == 1 # only works for n_best=1 for now final_outputs = pad_and_stack_hyps( [r[0].cpu().numpy() for r in results["predictions"]], pad_value=pad_index) return final_outputs, None
for key in { 'num_data_points', 'vocab_size', 'max_ques_count', 'max_ques_len', 'max_ans_len' }: setattr(model_args, key, getattr(dataset, key)) # iterations per epoch setattr(args, 'iter_per_epoch', math.floor(dataset.num_data_points['train'] / args.batch_size)) print("{} iter per epoch.".format(args.iter_per_epoch)) # ---------------------------------------------------------------------------- # setup the model # ---------------------------------------------------------------------------- encoder = Encoder(model_args) decoder = Decoder(model_args, encoder) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=args.lr, weight_decay=args.weight_decay) encoder.word_embed.init_embedding('data/glove/glove6b_init_300d_1.0.npy') start_epoch = 0 if args.load_path != '': components = torch.load(args.load_path) encoder.load_state_dict(components.get('encoder', components)) decoder.load_state_dict(components.get('decoder', components)) optimizer.load_state_dict(components.get('optimizer', components)) start_epoch = components['epoch'] print("Loaded model from {}".format(args.load_path)) print("Decoder: {}".format(args.decoder))
dataset = VisDialDataset(args, [args.split]) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=dataset.collate_fn) # iterations per epoch setattr(args, 'iter_per_epoch', math.floor(dataset.num_data_points[args.split] / args.batch_size)) print("{} iter per epoch.".format(args.iter_per_epoch)) # ---------------------------------------------------------------------------- # setup the model encoder = Encoder(model_args) decoder = Decoder(model_args, encoder) encoder = nn.DataParallel(encoder).cuda() decoder = nn.DataParallel(decoder).cuda() encoder.load_state_dict(components.get('encoder', components)) decoder.load_state_dict(components.get('decoder', components)) print("Loaded model from {}".format(args.load_path)) if args.gpuid >= 0: encoder = encoder.cuda() decoder = decoder.cuda() # ---------------------------------------------------------------------------- # evaluation # ----------------------------------------------------------------------------
class Metrics(keras.callbacks.Callback): def __init__(self, model_container, generator, num_samples_stats, num_display_sentences, output_dir=None): self.model_container = model_container self.output_dir = output_dir self.generator = generator self.num_samples_stats = num_samples_stats self.num_display_sentences = num_display_sentences self.decoder = Decoder(beam_width=beam_width) print("output directory is ", output_dir) if output_dir is not None and not os.path.exists(self.output_dir): os.makedirs(self.output_dir) def get_statistics(self, num): num_left = num data = [] while num_left > 0: output_batch = next(self.generator)[0] num_proc = min(output_batch['the_input'].shape[0], num_left) y_pred = self.model_container.predict( output_batch['the_input'][0:num_proc]) input_length = output_batch['input_length'][0:num_proc] decoded_res = self.decoder.decode(y_pred, input_length) for j in range(0, num_proc): data.append((decoded_res[j], output_batch['source_str'][j])) num_left -= num_proc mean_cer, mean_cer_norm = self.get_mean_character_error_rate(data) mean_wer, mean_wer_norm = self.get_mean_word_error_rate(data) mean_bleu, mean_bleu_norm = self.get_mean_bleu_score(data) return { 'samples': num, 'cer': (mean_cer, mean_cer_norm), 'wer': (mean_wer, mean_wer_norm), 'bleu': (mean_bleu, mean_bleu_norm) } def get_mean_tuples(self, data, individual_length, func): total = 0.0 total_norm = 0.0 length = len(data) for i in range(0, length): val = float(func(data[i][0], data[i][1])) total += val total_norm += val / individual_length return (total / length, total_norm / length) def get_mean_character_error_rate(self, data): mean_individual_length = np.mean([len(pair[1]) for pair in data]) return self.get_mean_tuples(data, mean_individual_length, editdistance.eval) def get_mean_word_error_rate(self, data): mean_individual_length = np.mean( [len(pair[1].split()) for pair in data]) return self.get_mean_tuples(data, mean_individual_length, wer_sentence) def get_mean_bleu_score(self, data): wrapped_data = [([reference], hypothesis) for reference, hypothesis in data] return self.get_mean_tuples(wrapped_data, 1.0, bleu_score.sentence_bleu) def on_train_begin(self, logs={}): with open(os.path.join(self.output_dir, 'stats.csv'), 'w') as csvfile: csvw = csv.writer(csvfile) csvw.writerow([ "Epoch", "Samples", "Mean CER", "Mean CER (Norm)", "Mean WER", "Mean WER (Norm)", "Mean BLEU", "Mean BLEU (Norm)" ]) def on_epoch_end(self, epoch, logs={}): stats = self.get_statistics(self.num_samples_stats) print(( '\n\n[Epoch %d] Out of %d samples: [CER: %.3f - %.3f] [WER: %.3f - %.3f] [BLEU: %.3f - %.3f]\n' % (epoch + 1, stats['samples'], stats['cer'][0], stats['cer'][1], stats['wer'][0], stats['wer'][1], stats['bleu'][0], stats['bleu'][1]))) if self.output_dir is not None: with open(os.path.join(self.output_dir, 'stats.csv'), 'a') as csvfile: csvw = csv.writer(csvfile) csvw.writerow([ epoch, stats['samples'], "{0:.5f}".format(stats['cer'][0]), "{0:.5f}".format(stats['cer'][1]), "{0:.5f}".format(stats['wer'][0]), "{0:.5f}".format(stats['wer'][1]), "{0:.5f}".format(stats['bleu'][0]), "{0:.5f}".format(stats['bleu'][1]) ]) output_batch = next(self.generator)[0] y_pred = self.model_container.predict( output_batch['the_input'][0:self.num_display_sentences]) input_length = output_batch['input_length'][0:self. num_display_sentences] res = self.decoder.decode(y_pred, input_length) with open(os.path.join(self.output_dir, 'e%02d.csv' % (epoch)), 'w') as csvfile: csvw = csv.writer(csvfile) csvw.writerow(["Truth", "Decoded"]) for i in range(self.num_display_sentences): csvw.writerow([output_batch['source_str'][i], res[i]])