def internal_eval(batches, transducer, vocab, previous_predicted_actions, check_condition=True, name='train'): then = time.time() print('evaluating on {} data...'.format(name)) number_correct = 0. total_loss = 0. predictions = [] pred_acts = [] i = 0 # counter of samples for j, batch in enumerate(batches): dy.renew_cg() batch_loss = [] for sample in batch: feats = sample.pos, sample.feats loss, prediction, predicted_actions = transducer.transduce( sample.lemma, feats, external_cg=True) ### predictions.append(prediction) pred_acts.append(predicted_actions) batch_loss.extend(loss) # evaluation correct_prediction = False if (prediction in vocab.word and vocab.word.w2i[prediction] == sample.word): correct_prediction = True number_correct += 1 if check_condition: # display prediction for this sample if it differs the prediction # of the previous epoch or its an error if predicted_actions != previous_predicted_actions[ i] or not correct_prediction: # print( 'BEFORE: ', datasets.action2string(previous_predicted_actions[i], vocab)) print('THIS TIME: ', datasets.action2string(predicted_actions, vocab)) print('TRUE: ', sample.act_repr) print('PRED: ', prediction) print('WORD: ', sample.word_str) print('X' if correct_prediction else 'V') # increment counter of samples i += 1 batch_loss = -dy.average(batch_loss) total_loss += batch_loss.scalar_value() # report progress if j > 0 and j % 100 == 0: print('\t\t...{} batches'.format(j)) accuracy = number_correct / i print('\t...finished in {:.3f} sec'.format(time.time() - then)) return accuracy, total_loss, predictions, pred_acts
def compute_channel(name, batches, transducer, vocab, paths, encoding='utf8'): then = time.time() print('evaluating on {} data...'.format(name)) output = dict() for j, (act_word, batch) in enumerate(batches.items()): dy.renew_cg() log_prob = [] pred_acts = [] candidates = [] for sample in batch: # @TODO one could imagine to draw multiple samples (then sampling=True).... feats = sample.pos, sample.feats loss, _, predicted_actions = transducer.transduce( sample.lemma, feats, oracle_actions={ 'loss': "nll", 'rollout_mixin_beta': 1., 'global_rollout': False, 'target_word': sample.actions, 'optimal': True, 'bias_inserts': False }, sampling=False, channel=True, external_cg=True) pred_acts.append( action2string(predicted_actions, vocab).encode(encoding)) log_prob.append( dy.esum(loss).value()) # sum log probabilities of actions candidates.append(sample.lemma_str.encode(encoding)) results = { 'candidates': candidates, 'log_prob': log_prob, 'acts': pred_acts } output[(sample.word_str).encode(encoding)] = results if j > 0 and j % 100 == 0: print('\t\t...{} batches'.format(j)) print('\t...finished in {:.3f} sec'.format(time.time() - then)) path = os.path.join(paths['results_file_path'], name + '_channel.json') print('Writing results to file "{path}".'.format(path=path)) with open(path, 'w') as w: json.dump(output, w, indent=4) return output
def action2string(self, acts): return datasets.action2string(acts, self.vocab)
def internal_eval_beam(batches, transducer, vocab, beam_width, previous_predicted_actions, check_condition=True, name='train'): assert callable( getattr(transducer, "beam_search_decode", None)), 'transducer does not implement beam search.' then = time.time() print('evaluating on {} data with beam search (beam width {})...'.format( name, beam_width)) number_correct = 0. total_loss = 0. predictions = [] pred_acts = [] i = 0 # counter of samples for j, batch in enumerate(batches): dy.renew_cg() batch_loss = [] for sample in batch: feats = sample.pos, sample.feats hypotheses = transducer.beam_search_decode(sample.lemma, feats, external_cg=True, beam_width=beam_width) # take top hypothesis loss, loss_expr, prediction, predicted_actions = hypotheses[0] predictions.append(prediction) pred_acts.append(predicted_actions) batch_loss.append(loss) # sanity check: Basically, this often is wrong... #assert round(loss, 3) == round(loss_expr.scalar_value(), 3), (loss, loss_expr.scalar_value()) # evaluation correct_prediction = False if (prediction in vocab.word and vocab.word.w2i[prediction] == sample.word): correct_prediction = True number_correct += 1 if check_condition: # compare to greedy prediction: _, greedy_prediction, _ = transducer.transduce( sample.lemma, feats, external_cg=True) if greedy_prediction != prediction: print('Beam! Target: ', sample.word_str) print('Greedy prediction: ', greedy_prediction) print(u'Complete hypotheses:') for log_p, _, pred_word, pred_actions in hypotheses: print(u'Actions {}, word {}, -log p {:.3f}'.format( datasets.action2string(pred_actions, vocab), pred_word, -log_p)) if check_condition: # display prediction for this sample if it differs the prediction # of the previous epoch or its an error if predicted_actions != previous_predicted_actions[ i] or not correct_prediction: # print( 'BEFORE: ', datasets.action2string(previous_predicted_actions[i], vocab)) print('THIS TIME: ', datasets.action2string(predicted_actions, vocab)) print('TRUE: ', sample.act_repr) print('PRED: ', prediction) print('WORD: ', sample.word_str) print('X' if correct_prediction else 'V') # increment counter of samples i += 1 batch_loss = -np.mean(batch_loss) total_loss += batch_loss # report progress if j > 0 and j % 100 == 0: print('\t\t...{} batches'.format(j)) accuracy = number_correct / i print('\t...finished in {:.3f} sec'.format(time.time() - then)) return accuracy, total_loss, predictions, pred_acts
def transduce(self, lemma, feats, oracle_actions=None, external_cg=True, sampling=False, unk_avg=True, debug_mode=False): def _valid_actions(encoder): valid_actions = list(self.INSERTS) if len(encoder) > 1: valid_actions += [STEP] else: valid_actions += [END_WORD] return valid_actions if not external_cg: dy.renew_cg() if oracle_actions: # reverse to enable simple popping oracle_actions = oracle_actions[::-1] oracle_actions.pop() # Deterministic insertion of BEGIN_WORD # vectorize lemma lemma_enc = self._build_lemma(lemma, unk_avg, is_training=bool(oracle_actions)) # vectorize features features = self._build_features(*feats) # add encoder and decoder to computation graph encoder = Encoder(self.fbuffRNN, self.bbuffRNN) decoder = self.wordRNN.initial_state() # add classifier to computation graph if self.MLP_DIM: # decoder output to hidden W_s2h = dy.parameter(self.pW_s2h) b_s2h = dy.parameter(self.pb_s2h) # hidden to action W_act = dy.parameter(self.pW_act) b_act = dy.parameter(self.pb_act) # encoder is a stack which pops lemma characters # and their representations from the top encoder.transduce(lemma_enc, lemma) action_history = [BEGIN_WORD] word = [] losses = [] count = 0 if debug_mode: print() if oracle_actions: print(action2string(oracle_actions, self.vocab)) print(lemma2string(lemma, self.vocab)) while len(action_history) <= MAX_ACTION_SEQ_LEN: # what is at the top of encoder? encoder_embedding, char_enc = encoder.embedding(extra=True) if debug_mode: print('Action history: ', action_history, action2string(action_history, self.vocab)) print('Encoder length: ', len(encoder)) print('Current char: ', char_enc, lemma2string([char_enc], self.vocab)) print('Word so far: ', u''.join(word)) # decoder decoder_input = dy.concatenate([ encoder_embedding, features, self.ACT_LOOKUP[action_history[-1]] ]) decoder = decoder.add_input(decoder_input) decoder_output = decoder.output() # classifier if self.MLP_DIM: h = self.NONLIN(W_s2h * decoder_output + b_s2h) else: h = decoder_output valid_actions = _valid_actions(encoder) log_probs = dy.log_softmax(W_act * h + b_act, valid_actions) if oracle_actions is None: if sampling: dist = np.exp(log_probs.npvalue()) # sample according to softmax rand = np.random.rand() for action, p in enumerate(dist): rand -= p if rand <= 0: break else: action = np.argmax(log_probs.npvalue()) else: action = oracle_actions.pop() losses.append(dy.pick(log_probs, action)) action_history.append(action) if action == STEP: # Delete action encoder.pop() elif action == END_WORD: # Finish transduction break else: # Insert action assert action in self.INSERTS, (char_, action2string([char_], self.vocab), self.INSERTS) char_ = self.vocab.act.i2w[action] word.append(char_) word = u''.join(word) return losses, word, action_history
def beam_search_decode(self, lemma, feats, external_cg=True, unk_avg=True, beam_width=4): # Returns an expression of the loss for the sequence of actions. # (that is, the oracle_actions if present or the predicted sequence otherwise) def _valid_actions(encoder): valid_actions = list(self.INSERTS) if len(encoder) > 1: valid_actions += [STEP] else: valid_actions += [END_WORD] return valid_actions if not external_cg: dy.renew_cg() # vectorize lemma lemma_enc = self._build_lemma(lemma, unk_avg, is_training=False) # vectorize features features = self._build_features(*feats) # add encoder and decoder to computation graph encoder = Encoder(self.fbuffRNN, self.bbuffRNN) decoder = self.wordRNN.initial_state() # encoder is a stack which pops lemma characters and their # representations from the top. encoder.transduce(lemma_enc, lemma) # add classifier to computation graph if self.MLP_DIM: # decoder output to hidden W_s2h = dy.parameter(self.pW_s2h) b_s2h = dy.parameter(self.pb_s2h) # hidden to action W_act = dy.parameter(self.pW_act) b_act = dy.parameter(self.pb_act) # a list of tuples: # (decoder state, encoder state, list of previous actions, # log prob of previous actions, log prob of previous actions as dynet object, # word generated so far) beam = [(decoder, encoder, [BEGIN_WORD], 0., 0., [])] beam_length = 0 complete_hypotheses = [] while beam_length <= MAX_ACTION_SEQ_LEN: if not beam or beam_width == 0: break # compute probability of each of the actions and choose an action # either from the oracle or if there is no oracle, based on the model expansion = [] # print 'Beam length: ', beam_length for decoder, encoder, prev_actions, log_p, log_p_expr, word in beam: # print 'Expansion: ', action2string(prev_actions, self.vocab), log_p, ''.join(word) encoder_embedding, char_enc = encoder.embedding(extra=True) # decoder decoder_input = dy.concatenate([ encoder_embedding, features, self.ACT_LOOKUP[prev_actions[-1]] ]) decoder = decoder.add_input(decoder_input) decoder_output = decoder.output() # generate if self.MLP_DIM: h = self.NONLIN(W_s2h * decoder_output + b_s2h) else: h = decoder_output logits = W_act * h + b_act valid_actions = _valid_actions(encoder) log_probs_expr = dy.log_softmax(logits, valid_actions) log_probs = log_probs_expr.npvalue() top_actions = np.argsort(log_probs)[-beam_width:] # print 'top_actions: ', top_actions, action2string(top_actions, self.vocab) # print 'log_probs: ', log_probs # print expansion.extend( ((decoder, encoder.copy(), list(prev_actions), a, log_p + log_probs[a], log_p_expr + log_probs_expr[a], list(word), char_enc) for a in top_actions)) # print 'Overall, {} expansions'.format(len(expansion)) beam = [] expansion.sort(key=lambda e: e[4]) for e in expansion[-beam_width:]: decoder, encoder, prev_actions, action, log_p, log_p_expr, word, char_enc = e prev_actions.append(action) # execute the action to update the transducer state if action == END_WORD: # 1. Finish transduction: # * beam width should be decremented # * expansion should be taken off the beam and # stored to final hypotheses set beam_width -= 1 complete_hypotheses.append( (log_p, log_p_expr, u''.join(word), prev_actions)) else: if action == STEP: encoder.pop() else: # one of the INSERT actions # 1. Append inserted character to the output word assert action in self.INSERTS, (char_, action2string( [char_], self.vocab), self.INSERTS) char_ = self.vocab.act.i2w[action] word.append(char_) beam.append((decoder, encoder, prev_actions, log_p, log_p_expr, word)) beam_length += 1 if not complete_hypotheses: # nothing found because the model is so crappy complete_hypotheses = [ (log_p, log_p_expr, u''.join(word), prev_actions) for _, _, prev_actions, log_p, log_p_expr, word in beam ] complete_hypotheses.sort(key=lambda h: h[0], reverse=True) # print u'Complete hypotheses:' # for log_p, _, word, actions in complete_hypotheses: # print u'Actions {}, word {}, log p {:.3f}'.format(action2string(actions, self.vocab), word, log_p) return complete_hypotheses