def predict_and_score(self, eval_instances, random=False, verbosity=0): predictions = [] scores = [] batches = iterators.iter_batches(eval_instances, self.options.listener_eval_batch_size) num_batches = (len(eval_instances) - 1) // self.options.listener_eval_batch_size + 1 if self.options.verbosity + verbosity >= 2: print('Testing') progress.start_task('Eval batch', num_batches) for batch_num, batch in enumerate(batches): progress.progress(batch_num) batch = list(batch) xs, (y, ) = self._data_to_arrays(batch, test=True) probs = self.model.predict(xs) if random: indices = sample(probs) predictions.extend(indices) else: predictions.extend(probs.argmax(axis=1)) scores_arr = np.log(probs[np.arange(len(batch)), y]) scores.extend(scores_arr.tolist()) progress.end_task() if self.options.verbosity >= 9: print('%s %ss:') % (self.id, 'sample' if random else 'prediction') for inst, prediction in zip(eval_instances, predictions): print('%s -> %s' % (repr(inst.input), repr(prediction))) return predictions, scores
def train(self, training_instances, validation_instances=None, metrics=None): id_tag = (self.id + ': ') if self.id else '' if self.options.verbosity >= 2: print(id_tag + 'Training priors') self.train_priors(training_instances, listener_data=self.options.listener) self.dataset = training_instances xs, ys = self._data_to_arrays(training_instances, init_vectorizer=True) self._build_model() if self.options.verbosity >= 2: print(id_tag + 'Training conditional model') summary_path = config.get_file_path('losses.tfevents') if summary_path: writer = summary.SummaryWriter(summary_path) else: writer = None progress.start_task('Iteration', self.options.train_iters) for iteration in range(self.options.train_iters): progress.progress(iteration) self.model.fit(xs, ys, batch_size=self.options.batch_size, num_epochs=self.options.train_epochs, summary_writer=writer, step=iteration * self.options.train_epochs) validation_results = self.validate(validation_instances, metrics, iteration=iteration) if writer is not None: step = (iteration + 1) * self.options.train_epochs self.on_iter_end(step, writer) for key, value in validation_results.iteritems(): tag = 'val/' + key.split('.', 1)[1].replace('.', '/') writer.log_scalar(step, tag, value) writer.flush() progress.end_task()
def train(self, training_instances, validation_instances='ignored', metrics='ignored'): progress.start_task('Example', len(training_instances)) for i, inst in enumerate(training_instances): progress.progress(i) self.seen.update([inst.output]) progress.end_task() self.num_examples += len(training_instances)
def train(self, training_instances, validation_instances=None, metrics=None): if not hasattr(self, 'model'): self.model = self.build_model( self.init_vectorizer(training_instances)) minibatches = iterators.gen_batches(training_instances, self.options.batch_size) progress.start_task('Epoch', self.options.train_epochs) for epoch in range(self.options.train_epochs): progress.progress(epoch) progress.start_task('Minibatch', len(minibatches)) for b, batch in enumerate(minibatches): progress.progress(b) self.train_batch(batch) progress.end_task() self.validate_and_log(validation_instances, metrics, self.model.summary_writer, epoch=epoch) progress.end_task()
def predict_and_score(self, eval_instances, random=False, split='default', verbosity=4): predictions = [] scores = [] minibatches = iterators.gen_batches(eval_instances, self.options.batch_size) tokenize, detokenize = tokenizers.TOKENIZERS[self.options.tokenizer] if verbosity > 2: progress.start_task('Eval minibatch', len(minibatches)) for b, batch in enumerate(minibatches): if verbosity > 2: progress.progress(b) outputs_batch, scores_batch = self.model.eval( [self.instance_to_tuple(inst) for inst in batch], split=split) preds_batch = outputs_batch['sample' if random else 'beam'] detokenized = self.collate_preds(preds_batch, detokenize) predictions.extend(detokenized) scores.extend(self.collate_scores(scores_batch)) if verbosity > 2: progress.end_task() return predictions, scores
def train(self, training_instances, validation_instances, metrics): self.init_vectorizers(training_instances) self.build_graph() self.init_params() batches = iterators.gen_batches(training_instances, batch_size=self.options.batch_size) if self.options.verbosity >= 1: progress.start_task('Epoch', self.options.train_epochs) for epoch in range(self.options.train_epochs): if self.options.verbosity >= 1: progress.progress(epoch) if self.options.verbosity >= 1: progress.start_task('Batch', len(batches)) for i, batch in enumerate(batches): if self.options.verbosity >= 1: progress.progress(i) batch = list(batch) feed_dict = self.vectorize_inputs(batch) feed_dict.update(self.vectorize_labels(batch)) self.run_train(feed_dict) if self.options.verbosity >= 1: progress.end_task() if self.options.verbosity >= 1: progress.end_task()
def predict_and_score(self, eval_instances, random=False, verbosity=0): predictions = [] scores = [] batches = iterators.iter_batches(eval_instances, self.options.listener_eval_batch_size) num_batches = (len(eval_instances) - 1) // self.options.listener_eval_batch_size + 1 if self.options.verbosity + verbosity >= 2: print('Testing') progress.start_task('Eval batch', num_batches) for batch_num, batch in enumerate(batches): progress.progress(batch_num) batch = list(batch) xs, (y,) = self._data_to_arrays(batch, test=True) probs = self.model.predict(xs) if random: indices = sample(probs) predictions.extend(indices) else: predictions.extend(probs.argmax(axis=1)) scores_arr = np.log(probs[np.arange(len(batch)), y]) scores.extend(scores_arr.tolist()) progress.end_task() if self.options.verbosity >= 9: print('%s %ss:') % (self.id, 'sample' if random else 'prediction') for inst, prediction in zip(eval_instances, predictions): print('%s -> %s' % (repr(inst.input), repr(prediction))) return predictions, scores
def predict_and_score(self, eval_instances, random=False, verbosity=0): predictions = [] scores = [] batches = iterators.gen_batches(eval_instances, batch_size=self.options.eval_batch_size) with gzip.open(config.get_file_path('dists.b64.gz'), 'w'): pass if self.options.verbosity + verbosity >= 1: progress.start_task('Eval batch', len(batches)) for i, batch in enumerate(batches): if self.options.verbosity + verbosity >= 1: progress.progress(i) batch = list(batch) feed_dict = self.vectorize_inputs(batch) feed_dict.update(self.vectorize_labels(batch)) output = self.run_predict(feed_dict) predictions_batch = self.output_to_preds(output, batch, sample=random) predictions.extend(predictions_batch) labels = self.vectorize_labels(batch) scores_batch = self.output_to_scores(output, labels) scores.extend(scores_batch) if self.options.verbosity + verbosity >= 1: progress.end_task() return predictions, scores
def score(self, eval_instances, verbosity=0): result = [] batches = iterators.iter_batches(eval_instances, self.options.speaker_eval_batch_size) num_batches = (len(eval_instances) - 1) // self.options.speaker_eval_batch_size + 1 if self.options.verbosity + verbosity >= 2: print('Scoring') if self.options.verbosity + verbosity >= 1: progress.start_task('Score batch', num_batches) for batch_num, batch in enumerate(batches): if self.options.verbosity + verbosity >= 1: progress.progress(batch_num) batch = list(batch) xs, (n, ) = self._data_to_arrays(batch, test=False) if self.use_color_mask: mask = xs[3] else: mask = xs[2] probs = self.model.predict(xs) token_probs = probs[np.arange(probs.shape[0])[:, np.newaxis], np.arange(probs.shape[1]), n] scores_arr = np.sum(np.log(token_probs) * mask, axis=1) scores = scores_arr.tolist() result.extend(scores) if self.options.verbosity + verbosity >= 1: progress.end_task() return result
def predict_and_score(self, eval_instances): num_instances = len(eval_instances) # make features for eval dataset print "making features for eval dataset..." self.X_eval = self.make_features(eval_instances) # find log probabilities using model trained above print "finding probabilities..." log_probs = self.model.predict_log_proba(self.X_eval)[:, 1] reshaped = np.reshape(log_probs, (num_instances, 3)) final_probs = reshaped - logsumexp(reshaped, axis=1, keepdims=True) preds = [] scores = [] print "making predictions..." progress.start_task('Example', len(eval_instances)) for i, inst in enumerate(eval_instances): progress.progress(i) pred = np.argmax(final_probs[i]) score = final_probs[i][inst.output] preds.append(pred) scores.append(score) progress.end_task() return preds, scores
def predict_and_score(self, eval_instances): num_instances = len(eval_instances) # make features for eval dataset print "making features for eval dataset..." self.X_eval = self.make_features(eval_instances) # find log probabilities using model trained above print "finding probabilities..." log_probs = self.model.predict_log_proba(self.X_eval)[:,1] reshaped = np.reshape(log_probs,(num_instances,3)) final_probs = reshaped - logsumexp(reshaped, axis=1, keepdims=True) preds = [] scores = [] print "making predictions..." progress.start_task('Example', len(eval_instances)) for i, inst in enumerate(eval_instances): progress.progress(i) pred = np.argmax(final_probs[i]) score = final_probs[i][inst.output] preds.append(pred) scores.append(score) progress.end_task() return preds, scores
def predict(self, eval_instances, random=False, verbosity=0): result = [] batches = iterators.iter_batches(eval_instances, self.options.speaker_eval_batch_size) num_batches = (len(eval_instances) - 1) // self.options.speaker_eval_batch_size + 1 eos_index = self.seq_vec.vectorize(['</s>'])[0] if self.options.verbosity + verbosity >= 2: print('Predicting') if self.options.verbosity + verbosity >= 1: progress.start_task('Predict batch', num_batches) for batch_num, batch in enumerate(batches): if self.options.verbosity + verbosity >= 1: progress.progress(batch_num) batch = list(batch) (c, _p, mask), (_y, ) = self._data_to_arrays(batch, test=True) assert mask.all() # We shouldn't be masking anything in prediction beam_size = 1 if random else self.options.speaker_beam_size done = np.zeros((len(batch), beam_size), dtype=np.bool) beam = np.zeros((len(batch), beam_size, self.seq_vec.max_len), dtype=np.int32) beam[:, :, 0] = self.seq_vec.vectorize(['<s>'])[0] beam_scores = np.log(np.zeros((len(batch), beam_size))) beam_scores[:, 0] = 0.0 c = np.repeat(c, beam_size, axis=0) mask = np.repeat(mask, beam_size, axis=0) for length in range(1, self.seq_vec.max_len): if done.all(): break p = beam.reshape( (beam.shape[0] * beam.shape[1], beam.shape[2]))[:, :-1] probs = self.model.predict([c, p, mask]) if random: indices = sample(probs[:, length - 1, :]) beam[:, 0, length] = indices done = np.logical_or(done, indices == eos_index) else: assert probs.shape[1] == p.shape[1], (probs.shape[1], p.shape[1]) assert probs.shape[2] == len( self.seq_vec.tokens), (probs.shape[2], len(self.seq_vec.tokens)) scores = np.log(probs)[:, length - 1, :].reshape( (beam.shape[0], beam.shape[1], probs.shape[2])) beam_search_step(scores, length, beam, beam_scores, done, eos_index) outputs = self.seq_vec.unvectorize_all(beam[:, 0, :]) result.extend([' '.join(strip_invalid_tokens(o)) for o in outputs]) if self.options.verbosity + verbosity >= 1: progress.end_task() return result
def predict_and_score(self, eval_instances): predict = [''] * len(eval_instances) score = [] progress.start_task('Example', len(eval_instances)) for i, inst in enumerate(eval_instances): progress.progress(i) score.append(self._get_log_prob(inst.output)) progress.end_task() return predict, score
def train_one_batch(self, insts, env, t): env.configure([inst.input for inst in insts], verbosity=self.options.verbosity) observation = env._get_obs() info = None self.init_belief(env, observation) if self.options.verbosity >= 1: progress.start_task('Step', self.options.max_steps) for step in range(self.options.max_steps): if self.options.verbosity >= 1: progress.progress(step) if self.options.render: env.render() actions = self.action(env, observation, info, testing=False) prev_obs = observation observation, reward, done, info = env.step(actions) self.update_belief(env, prev_obs, actions, observation, reward, done, info) if all(done): break ''' from tensorflow.python.client import timeline trace = timeline.Timeline(step_stats=self.run_metadata.step_stats) with config.open('timeline.ctf.json', 'w') as trace_file: trace_file.write(trace.generate_chrome_trace_format()) ''' rewards = np.array(self.rewards) # max_steps x batch_size done = np.array(self.done, dtype=np.int32) # max_steps x batch_size actions = np.array(self.actions).reshape(rewards.shape) # force actions on steps where reward is zero (already done) to nop actions[1:, :] *= (1 - done)[:-1, :] for game in range(rewards.shape[1]): action_hist = np.bincount(actions[:, game], minlength=len(cards_env.ACTIONS)).tolist() if self.options.verbosity >= 7: print('Total reward: {} {}'.format(rewards[:, game].sum(), action_hist)) total_rewards = np.repeat(rewards.sum(axis=0), rewards.shape[0]) assert total_rewards.shape == (rewards.shape[0] * rewards.shape[1],), \ (total_rewards.shape, rewards.shape) credit = np.ones(done.shape) credit[1:, :] *= 1.0 - done[:-1, :] credit = credit.ravel() # (credit / credit.sum(axis=0)).ravel() assert credit.shape == total_rewards.shape, (credit.shape, total_rewards.shape) if self.options.verbosity >= 1: progress.end_task() feed_dict = self.batch_inputs(self.inputs[:-cards_env.MAX_BATCH_SIZE]) for label, value in zip(self.label_vars, [np.array(self.actions), total_rewards, credit]): feed_dict[label] = value self.run_train(feed_dict)
def train(self, training_instances, validation_instances=None, metrics=None, keep_params=False): id_tag = (self.id + ': ') if self.id else '' if self.options.verbosity >= 2: print(id_tag + 'Training priors') self.train_priors(training_instances, listener_data=self.options.listener) self.dataset = training_instances xs, ys = self._data_to_arrays(training_instances, init_vectorizer=not hasattr(self, 'model')) if not hasattr(self, 'model') or not keep_params: if self.options.verbosity >= 2: print(id_tag + 'Building model') if keep_params: warnings.warn("keep_params was passed, but the model hasn't been built; " "initializing all parameters.") self._build_model() else: if not hasattr(self.options, 'reset_optimizer_vars') or \ self.options.reset_optimizer_vars: if self.options.verbosity >= 2: print(id_tag + 'Resetting optimizer') self.model.reset_optimizer() if self.options.verbosity >= 2: print(id_tag + 'Training conditional model') if hasattr(self, 'writer'): writer = self.writer else: summary_path = config.get_file_path('losses.tfevents') if summary_path: writer = summary.SummaryWriter(summary_path) else: writer = None self.writer = writer if not hasattr(self, 'step_base'): self.step_base = 0 progress.start_task('Iteration', self.options.train_iters) for iteration in range(self.options.train_iters): progress.progress(iteration) self.model.fit(xs, ys, batch_size=self.options.batch_size, num_epochs=self.options.train_epochs, summary_writer=writer, step=self.step_base + iteration * self.options.train_epochs) validation_results = self.validate(validation_instances, metrics, iteration=iteration) if writer is not None: step = self.step_base + (iteration + 1) * self.options.train_epochs self.on_iter_end(step, writer) for key, value in validation_results.iteritems(): tag = 'val/' + key.split('.', 1)[1].replace('.', '/') writer.log_scalar(step, tag, value) self.step_base += self.options.train_iters * self.options.train_epochs writer.flush() progress.end_task()
def fit(self, Xs, ys, batch_size, num_epochs, summary_writer=None, step=0): if not isinstance(Xs, Sequence): raise ValueError('Xs should be a sequence, instead got %s' % (Xs,)) if not isinstance(ys, Sequence): raise ValueError('ys should be a sequence, instead got %s' % (ys,)) history = OrderedDict((tag, []) for tag in self.monitored_tags) id_tag = (self.id + '/') if self.id else '' params = self.params() progress.start_task('Epoch', num_epochs) epoch_start = time.time() for epoch in range(num_epochs): progress.progress(epoch) history_epoch = OrderedDict((tag, []) for tag in self.monitored_tags) num_minibatches_approx = len(ys[0]) // batch_size + 1 progress.start_task('Minibatch', num_minibatches_approx) for i, batch in enumerate(self.minibatches(Xs, ys, batch_size, shuffle=True)): progress.progress(i) if self.options.verbosity >= 8: print('types: %s' % ([type(v) for t in batch for v in t],)) print('shapes: %s' % ([v.shape for t in batch for v in t],)) inputs, targets, synth = batch monitored = self.train_fn(*inputs + targets + synth) for tag, value in zip(self.monitored_tags, monitored): if self.options.verbosity >= 10: print('%s: %s' % (tag, value)) history_epoch[tag].append(value) progress.end_task() for tag, values in history_epoch.items(): values_array = np.array([np.asarray(v) for v in values]) history[tag].append(values_array) mean_values = np.mean(values_array, axis=0) if len(mean_values.shape) == 0: summary_writer.log_scalar(step + epoch, tag, mean_values) else: summary_writer.log_histogram(step + epoch, tag, mean_values) if self.options.monitor_params: for param in params: val = param.get_value() tag = 'param/' + param.name if len(val.shape) == 0: summary_writer.log_scalar(step + epoch, tag, val) else: summary_writer.log_histogram(step + epoch, tag, val) epoch_end = time.time() examples_per_sec = len(ys[0]) / (epoch_end - epoch_start) summary_writer.log_scalar(step + epoch, id_tag + 'examples_per_sec', examples_per_sec) epoch_start = epoch_end progress.end_task() return history
def predict_and_score(self, eval_instances, random='ignored', verbosity=0): self.get_options() eval_instances = list(eval_instances) predictions = [] scores = [] env = gym.make(cards_env.register()) batches = iterators.gen_batches(eval_instances, batch_size=cards_env.MAX_BATCH_SIZE) if self.options.verbosity + verbosity >= 1: progress.start_task('Eval batch', len(batches)) for i, batch in enumerate(batches): batch = list(batch) if self.options.verbosity + verbosity >= 1: progress.progress(i) total_reward = np.zeros((len(batch),)) done = np.zeros((len(batch),), dtype=np.bool) env.configure([inst.input for inst in batch], verbosity=verbosity) observation = env._get_obs() info = None self.init_belief(env, observation) if self.options.verbosity + verbosity >= 1: progress.start_task('Step', self.options.max_steps) for step in range(self.options.max_steps): if self.options.verbosity + verbosity >= 1: progress.progress(step) if self.options.render: env.render() action = self.action(env, observation, info) prev_obs = [np.copy(a) for a in observation] observation, reward, done_step, info = env.step(action) self.update_belief(env, prev_obs, action, observation, reward, done, info) done = np.bitwise_or(done, done_step[:len(batch)]) total_reward += np.array(reward[:len(batch)]) if done.all(): break if self.options.verbosity + verbosity >= 1: progress.end_task() predictions.extend([''] * len(batch)) scores.extend(total_reward.tolist()) env.close() if self.options.verbosity + verbosity >= 1: progress.end_task() return predictions, scores
def predict_and_score(self, eval_instances): most_common = self.seen.most_common(1)[0][0] predict = [most_common] * len(eval_instances) score = [] progress.start_task('Example', len(eval_instances)) for i, inst in enumerate(eval_instances): progress.progress(i) score.append(np.log(self._get_smoothed_prob(inst.output))) progress.end_task() return predict, score
def init_vectorizer(self, training_instances): vec = vectorizers.Seq2SeqVectorizer() vec.add((['<s>', '</s>'], ['<s>', '</s>'])) progress.start_task('Vectorizer instance', len(training_instances)) for i, inst in enumerate(training_instances): progress.progress(i) vec.add(self.instance_to_tuple(inst)) progress.end_task() return vec
def predict_and_score(self, eval_instances, random=False, verbosity=0): from fields import build_instance options = self.get_options() predictions = [] scores = [] base_is_listener = self.override_listener() assert options.listener, 'Eval data should be listener data for DirectRefGameLearner' true_batch_size = options.listener_eval_batch_size / options.num_distractors batches = iterators.iter_batches(eval_instances, true_batch_size) num_batches = (len(eval_instances) - 1) // true_batch_size + 1 if options.verbosity + verbosity >= 2: print('Testing') progress.start_task('Eval batch', num_batches) for batch_num, batch in enumerate(batches): progress.progress(batch_num) batch = list(batch) assert batch[ 0].alt_outputs, 'No context given for direct listener testing' context = len(batch[0].alt_outputs) if self.options.direct_base_uses_context: output_grid = [ build_instance(inst.input, target, inst.alt_outputs, base_is_listener) for inst in batch for target in range(len(inst.alt_outputs)) ] else: output_grid = [ build_instance(inst.input, color, None, base_is_listener) for inst in batch for color in inst.alt_outputs ] assert len(output_grid) == context * len(batch), \ 'Context must be the same number of colors for all examples' true_indices = np.array([inst.output for inst in batch]) grid_scores = self.base.score(output_grid, verbosity=verbosity) log_probs = np.array(grid_scores).reshape((len(batch), context)) # Renormalize over only the context colors log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis] # Cap confidences to reasonable values if options.direct_min_score is not None and options.direct_min_score <= 0.0: log_probs = np.maximum(options.direct_min_score, log_probs) # Normalize again (so we always return log probabilities) log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis] assert log_probs.shape == (len(batch), context) pred_indices = np.argmax(log_probs, axis=1) predictions.extend(pred_indices.tolist()) # Extract the score of the true color scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist()) progress.end_task() return predictions, scores
def init_vectorizer(self, training_instances): vec = self.vectorizer_class() vec.add((['<input>', '</input>'], ['<dialogue>', '</dialogue>', '<eos>', 'YOU:', 'THEM:'], ['<output>', '</output>'])) progress.start_task('Vectorizer instance', len(training_instances)) for i, inst in enumerate(training_instances): progress.progress(i) vec.add(self.instance_to_tuple(inst)) progress.end_task() return vec
def train(self, training_instances, validation_instances='ignored', metrics='ignored'): self.names = sorted(set(inst.output for inst in training_instances)) + ['<unk>'] self.name_to_index = defaultdict(lambda: -1, {n: i for i, n in enumerate(self.names)}) self.hists = [] progress.start_task('Histogram', len(self.GRANULARITY)) for i, g in enumerate(self.GRANULARITY): progress.progress(i) self.hists.append(Histogram(training_instances, self.names, granularity=g, use_progress=True)) progress.end_task() self.num_params = sum(h.num_params for h in self.hists)
def predict_and_score(self, eval_instances): predictions = [] scores = [] progress.start_task('Example', len(eval_instances)) for i, inst in enumerate(eval_instances): progress.progress(i) hist_probs = self.hist_probs(inst.input) name = self.names[hist_probs.argmax()] prob = hist_probs[self.name_to_index[inst.output]] predictions.append(name) scores.append(np.log(prob)) progress.end_task() return predictions, scores
def train(self, training_instances, validation_instances='ignored', metrics='ignored'): tokenize = TOKENIZERS[self.tokenizer] tokenized = [tokenize(inst.output) + ['</s>'] for inst in training_instances] self.seq_vec.add_all(tokenized) unk_replaced = self.seq_vec.unk_replace_all(tokenized) progress.start_task('Example', len(training_instances)) for i, utt in enumerate(unk_replaced): progress.progress(i) self.token_counts.update(utt) self.num_tokens += len(utt) progress.end_task()
def add_data(self, training_instances): if self.use_progress: progress.start_task('Example', len(training_instances)) for i, inst in enumerate(training_instances): if self.use_progress: progress.progress(i) bucket = self.get_bucket(inst.input) self.buckets[bucket][inst.output] += 1 self.bucket_counts[bucket] += 1 if self.use_progress: progress.end_task()
def predict_and_score(self, eval_instances, random=False, verbosity=0): predictions = [] scores = [] progress.start_task('Instance', len(eval_instances)) for inst_num, inst in enumerate(eval_instances): progress.progress(inst_num) pred, score = self.predict_one_inst(inst) predictions.append(pred) scores.append(score) progress.end_task() return predictions, scores
def predict_and_score(self, eval_instances, random=False, verbosity=0): from fields import build_instance options = self.get_options() predictions = [] scores = [] base_is_listener = self.override_listener() assert options.listener, 'Eval data should be listener data for DirectRefGameLearner' true_batch_size = options.listener_eval_batch_size / options.num_distractors batches = iterators.iter_batches(eval_instances, true_batch_size) num_batches = (len(eval_instances) - 1) // true_batch_size + 1 if options.verbosity + verbosity >= 2: print('Testing') progress.start_task('Eval batch', num_batches) for batch_num, batch in enumerate(batches): progress.progress(batch_num) batch = list(batch) assert batch[0].alt_outputs, 'No context given for direct listener testing' context = len(batch[0].alt_outputs) if self.options.direct_base_uses_context: output_grid = [build_instance(inst.input, target, inst.alt_outputs, base_is_listener) for inst in batch for target in range(len(inst.alt_outputs))] else: output_grid = [build_instance(inst.input, color, None, base_is_listener) for inst in batch for color in inst.alt_outputs] assert len(output_grid) == context * len(batch), \ 'Context must be the same number of colors for all examples' true_indices = np.array([inst.output for inst in batch]) grid_scores = self.base.score(output_grid, verbosity=verbosity) log_probs = np.array(grid_scores).reshape((len(batch), context)) # Renormalize over only the context colors log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis] # Cap confidences to reasonable values if options.direct_min_score is not None and options.direct_min_score <= 0.0: log_probs = np.maximum(options.direct_min_score, log_probs) # Normalize again (so we always return log probabilities) log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis] assert log_probs.shape == (len(batch), context) pred_indices = np.argmax(log_probs, axis=1) predictions.extend(pred_indices.tolist()) # Extract the score of the true color scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist()) progress.end_task() return predictions, scores
def train(self, training_instances, validation_instances='ignored', metrics='ignored'): tokenize = TOKENIZERS[self.tokenizer] tokenized = [ tokenize(inst.output) + ['</s>'] for inst in training_instances ] self.seq_vec.add_all(tokenized) unk_replaced = self.seq_vec.unk_replace_all(tokenized) progress.start_task('Example', len(training_instances)) for i, utt in enumerate(unk_replaced): progress.progress(i) self.token_counts.update(utt) self.num_tokens += len(utt) progress.end_task()
def predict_and_score(self, eval_instances, random='ignored', verbosity='ignored'): options = config.options() predictions = [] scores = [] pool = multiprocessing.Pool(options.lux_threads) batch_size = options.lux_batch_size progress.start_task('Example', len(eval_instances)) for start in range(0, len(eval_instances), batch_size): progress.progress(start) batch_output = pool.map(lux_predict_and_score, eval_instances[start:start + batch_size]) batch_preds, batch_scores = zip(*batch_output) predictions.extend(batch_preds) scores.extend(batch_scores) progress.end_task() return predictions, scores
def output_grids(model, input_filename): with gzip.open(input_filename, 'rb') as infile: grids = [json.loads(line.strip()) for line in infile] dirname, filename = os.path.split(input_filename) data_filename = os.path.join(dirname, 'data.eval.jsons') with open(data_filename, 'r') as infile: insts = [json.loads(line.strip()) for line in infile] output_filename = os.path.join(dirname, 's0_' + filename) with gzip.open(output_filename, 'w') as outfile: progress.start_task('Example', len(insts)) for i, (inst, grid) in enumerate(zip(insts, grids)): progress.progress(i) insts, shape = build_insts(inst, grid) scores = model.score(insts, verbosity=-4) substitute_grid(scores, grid, shape) json.dump(grid, outfile) outfile.write('\n') progress.end_task()
def predict_and_score(self, eval_instances, random='ignored', verbosity=4): eval_instances = list(eval_instances) predictions = [] scores = [] if verbosity >= 1: progress.start_task('Eval instance', len(eval_instances)) for i, inst in enumerate(eval_instances): if verbosity >= 1: progress.progress(i) pred = '' # TODO: make prediction score = -float('inf') # TODO: score gold output predictions.append(pred) scores.append(score) if verbosity >= 1: progress.end_task() return predictions, scores
def train(self, training_instances, validation_instances='ignored', metrics='ignored'): self.names = sorted(set(inst.output for inst in training_instances)) + ['<unk>'] self.name_to_index = defaultdict( lambda: -1, {n: i for i, n in enumerate(self.names)}) self.hists = [] progress.start_task('Histogram', len(self.GRANULARITY)) for i, g in enumerate(self.GRANULARITY): progress.progress(i) self.hists.append( Histogram(training_instances, self.names, granularity=g, use_progress=True)) progress.end_task() self.num_params = sum(h.num_params for h in self.hists)
def predict_and_score(self, eval_instances, random="ignored", verbosity=4): eval_instances = list(eval_instances) predictions = [] scores = [] if verbosity >= 1: progress.start_task("Eval instance", len(eval_instances)) for i, inst in enumerate(eval_instances): if verbosity >= 1: progress.progress(i) pred = "" # TODO: make prediction score = -float("inf") # TODO: score gold output predictions.append(pred) scores.append(score) if verbosity >= 1: progress.end_task() return predictions, scores
def predict_and_score(self, eval_instances, random=False, split='default', verbosity=4): predictions = [] scores = [] if verbosity > 2: progress.start_task('Eval instances', len(eval_instances)) for i, inst in enumerate(eval_instances): if verbosity > 2: progress.progress(i) game = get_game(inst.input) num_turns = count_dialogue_turns(inst.input) if (game, num_turns) in self.lookup: pred = self.lookup[(game, num_turns)] elif game in self.lookup: pred = self.lookup[game] else: pred = 'NEVER BEEN HERE BEFORE' predictions.append(pred) scores.append(0.0) if verbosity > 2: progress.end_task() return predictions, scores
def train(self, training_instances, validation_instances='ignored', metrics='ignored'): self.build_graph() env = gym.make(cards_env.register()) self.init_params() if self.options.verbosity >= 1: progress.start_task('Epoch', self.options.pg_train_epochs) for epoch in range(self.options.pg_train_epochs): if self.options.verbosity >= 1: progress.progress(epoch) batches = iterators.iter_batches(training_instances, self.options.pg_batch_size) num_batches = (len(training_instances) - 1) // self.options.pg_batch_size + 1 if self.options.verbosity >= 1: progress.start_task('Batch', num_batches) try: for batch_num, batch in enumerate(batches): if self.options.verbosity >= 1: progress.progress(batch_num) step = epoch * num_batches + batch_num self.train_one_batch(list(batch), env, t=step) if step % 10 == 0: check_prefix = config.get_file_path('checkpoint') self.saver.save(self.session, check_prefix, global_step=step) except KeyboardInterrupt: self.summary_writer.flush() raise if self.options.verbosity >= 1: progress.end_task() if self.options.verbosity >= 1: progress.end_task()
def predict_and_score(self, eval_instances, random=False, verbosity=0): predictions = [] scores = [] if self.options.verbosity + verbosity >= 1: progress.start_task('Instance', len(eval_instances)) all_cards = [r + s for r in cards_env.RANKS for s in cards_env.SUITS] cards_to_loc = {k: (1, 1) for k in all_cards} for i, inst in enumerate(eval_instances): if self.options.verbosity + verbosity >= 1: progress.progress(i) walls = inst.input['walls'] num_possible_locs = np.ones(walls.shape).sum() - walls.sum() predictions.append(world.build_world(walls, dict(cards_to_loc)).__dict__) score = -len(all_cards) * np.log(num_possible_locs + 3.0) - np.log(num_possible_locs) scores.append(score) if self.options.verbosity + verbosity >= 1: progress.end_task() return predictions, scores
def predict_and_score(self, eval_instances, random=False, verbosity=0): options = config.options() predictions = [] scores = [] all_utts = self.base.seq_vec.tokens sym_vec = vectorizers.SymbolVectorizer() sym_vec.add_all(all_utts) prior_scores = self.prior_scores(all_utts) base_is_listener = (type(self.base) in listener.LISTENERS.values()) true_batch_size = options.listener_eval_batch_size / len(all_utts) batches = iterators.iter_batches(eval_instances, true_batch_size) num_batches = (len(eval_instances) - 1) // true_batch_size + 1 if options.verbosity + verbosity >= 2: print('Testing') progress.start_task('Eval batch', num_batches) for batch_num, batch in enumerate(batches): progress.progress(batch_num) batch = list(batch) context = len( batch[0].alt_inputs) if batch[0].alt_inputs is not None else 0 if context: output_grid = [ (instance.Instance(utt, color) if base_is_listener else instance.Instance(color, utt)) for inst in batch for color in inst.alt_inputs for utt in sym_vec.tokens ] assert len(output_grid) == context * len(batch) * len(all_utts), \ 'Context must be the same number of colors for all examples' true_indices = np.array([inst.input for inst in batch]) else: output_grid = [ (instance.Instance(utt, inst.input) if base_is_listener else instance.Instance(inst.input, utt)) for inst in batch for utt in sym_vec.tokens ] true_indices = sym_vec.vectorize_all( [inst.input for inst in batch]) if len(true_indices.shape) == 2: # Sequence vectorizer; we're only using single tokens for now. true_indices = true_indices[:, 0] scores = self.base.score(output_grid, verbosity=verbosity) if context: log_probs = np.array(scores).reshape( (len(batch), context, len(all_utts))) orig_log_probs = log_probs[np.arange(len(batch)), true_indices, :] # Renormalize over only the context colors, and extract the score of # the true color. log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis, :] log_probs = log_probs[np.arange(len(batch)), true_indices, :] else: log_probs = np.array(scores).reshape( (len(batch), len(all_utts))) orig_log_probs = log_probs assert log_probs.shape == (len(batch), len(all_utts)) # Add in the prior scores, if used (S1 \propto L0 * P) if prior_scores is not None: log_probs = log_probs + 0.5 * prior_scores if options.exhaustive_base_weight: w = options.exhaustive_base_weight log_probs = w * orig_log_probs + (1.0 - w) * log_probs # Normalize across utterances. Note that the listener returns probability # densities over colors. log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis] if random: pred_indices = sample(np.exp(log_probs)) else: pred_indices = np.argmax(log_probs, axis=1) predictions.extend(sym_vec.unvectorize_all(pred_indices)) scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist()) progress.end_task() return predictions, scores
def train(self, training_instances, validation_instances=None, metrics=None): progress.start_task('Instance', len(training_instances)) for i, inst in enumerate(training_instances): progress.progress(i) self.train_inst(inst) progress.end_task()
def predict_and_score(self, eval_instances, random=False, verbosity=0): options = config.options() predictions = [] scores = [] all_utts = self.base.seq_vec.tokens sym_vec = vectorizers.SymbolVectorizer() sym_vec.add_all(all_utts) prior_scores = self.prior_scores(all_utts) base_is_listener = (type(self.base) in listener.LISTENERS.values()) true_batch_size = options.listener_eval_batch_size / len(all_utts) batches = iterators.iter_batches(eval_instances, true_batch_size) num_batches = (len(eval_instances) - 1) // true_batch_size + 1 if options.verbosity + verbosity >= 2: print('Testing') progress.start_task('Eval batch', num_batches) for batch_num, batch in enumerate(batches): progress.progress(batch_num) batch = list(batch) context = len(batch[0].alt_inputs) if batch[0].alt_inputs is not None else 0 if context: output_grid = [(instance.Instance(utt, color) if base_is_listener else instance.Instance(color, utt)) for inst in batch for color in inst.alt_inputs for utt in sym_vec.tokens] assert len(output_grid) == context * len(batch) * len(all_utts), \ 'Context must be the same number of colors for all examples' true_indices = np.array([inst.input for inst in batch]) else: output_grid = [(instance.Instance(utt, inst.input) if base_is_listener else instance.Instance(inst.input, utt)) for inst in batch for utt in sym_vec.tokens] true_indices = sym_vec.vectorize_all([inst.input for inst in batch]) if len(true_indices.shape) == 2: # Sequence vectorizer; we're only using single tokens for now. true_indices = true_indices[:, 0] scores = self.base.score(output_grid, verbosity=verbosity) if context: log_probs = np.array(scores).reshape((len(batch), context, len(all_utts))) orig_log_probs = log_probs[np.arange(len(batch)), true_indices, :] # Renormalize over only the context colors, and extract the score of # the true color. log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis, :] log_probs = log_probs[np.arange(len(batch)), true_indices, :] else: log_probs = np.array(scores).reshape((len(batch), len(all_utts))) orig_log_probs = log_probs assert log_probs.shape == (len(batch), len(all_utts)) # Add in the prior scores, if used (S1 \propto L0 * P) if prior_scores is not None: log_probs = log_probs + 0.5 * prior_scores if options.exhaustive_base_weight: w = options.exhaustive_base_weight log_probs = w * orig_log_probs + (1.0 - w) * log_probs # Normalize across utterances. Note that the listener returns probability # densities over colors. log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis] if random: pred_indices = sample(np.exp(log_probs)) else: pred_indices = np.argmax(log_probs, axis=1) predictions.extend(sym_vec.unvectorize_all(pred_indices)) scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist()) progress.end_task() return predictions, scores
def predict_and_score(self, eval_instances, random=False, verbosity=0): options = self.get_options() predictions = [] scores = [] if options.verbosity + verbosity >= 2: print('Building alternative utterance list') sym_vec = vectorizers.SymbolVectorizer() sym_vec.add_all([inst.input for inst in self.get_dataset(self.base)]) assert eval_instances[0].alt_outputs, \ 'Context required for L(S(L)): %s' % eval_instances[0].__dict__ context_len = len(eval_instances[0].alt_outputs) if options.exhaustive_num_samples > 0: num_alt_utts = options.exhaustive_num_samples * context_len + 1 num_sample_sets = options.exhaustive_num_sample_sets else: num_alt_utts = len(sym_vec.tokens) + 1 num_sample_sets = 1 true_batch_size = max(options.listener_eval_batch_size / (num_alt_utts * num_sample_sets * context_len), 1) batches = iterators.iter_batches(eval_instances, true_batch_size) num_batches = (len(eval_instances) - 1) // true_batch_size + 1 if options.exhaustive_output_speaker_samples: self.truncate_utterances_files('s1_samples.%s.jsons', num_sample_sets) if options.exhaustive_output_speaker_predictions: self.truncate_utterances_files('s1_predictions.%s.jsons', num_sample_sets) if options.exhaustive_output_all_grids: self.truncate_utterances_files('grids.%s.jsons.gz', 1) if options.verbosity + verbosity >= 2: print('Testing') progress.start_task('Eval batch', num_batches) for batch_num, batch in enumerate(batches): progress.progress(batch_num) batch = list(batch) output_grid = self.build_grid(batch, sym_vec.tokens) assert len(output_grid) == len(batch) * num_sample_sets * context_len * num_alt_utts, \ 'Context must be the same number of colors for all examples %s' % \ ((len(output_grid), len(batch), num_sample_sets, context_len, num_alt_utts),) true_indices = np.array([inst.output for inst in batch]) grid_scores = self.base.score(output_grid, verbosity=verbosity) l0_log_probs = np.array(grid_scores).reshape((len(batch), num_sample_sets, context_len, num_alt_utts)) # Renormalize over only the context colors, and extract the score of # the true color according to the base model. l0_log_probs -= logsumexp(l0_log_probs, axis=2)[:, :, np.newaxis, :] assert l0_log_probs.shape == (len(batch), num_sample_sets, context_len, num_alt_utts), l0_log_probs.shape orig_log_probs = l0_log_probs[np.arange(len(batch)), 0, :, 0] assert orig_log_probs.shape == (len(batch), context_len), orig_log_probs.shape # Apply temperature parameter before speaker. utilities = options.exhaustive_inv_temperature * l0_log_probs # Normalize across utterances. Note that the listener returns probability # densities over colors. s1_log_probs = utilities - logsumexp(utilities, axis=3)[:, :, :, np.newaxis] assert s1_log_probs.shape == (len(batch), num_sample_sets, context_len, num_alt_utts), s1_log_probs.shape if options.exhaustive_output_speaker_samples or \ options.exhaustive_output_speaker_predictions: speaker_dist = s1_log_probs[np.arange(len(batch)), :, true_indices, 1:] if options.exhaustive_output_speaker_samples: speaker_sample_indices = sample(np.exp(speaker_dist)) self.write_speaker_utterances('s1_samples.%s.jsons', output_grid, speaker_sample_indices, l0_log_probs.shape) if options.exhaustive_output_speaker_predictions: speaker_pred_indices = np.argmax(speaker_dist, axis=2) self.write_speaker_utterances('s1_predictions.%s.jsons', output_grid, speaker_pred_indices, l0_log_probs.shape) # Normalize again across context colors. l2_log_probs = s1_log_probs - logsumexp(s1_log_probs, axis=2)[:, :, np.newaxis, :] assert l2_log_probs.shape == (len(batch), num_sample_sets, context_len, num_alt_utts), l2_log_probs.shape # Extract the score of each color for the input utterance according to the L2 model. log_probs = l2_log_probs[np.arange(len(batch)), :, :, 0] assert log_probs.shape == (len(batch), num_sample_sets, context_len), log_probs.shape # Blend L0 and L2 (if enabled) to produce final score. if options.exhaustive_base_weight: w = options.exhaustive_base_weight # Bump zero probabilities up to epsilon ~= 3e-23, because previously we would # only have -inf log probs, but now if w < 0 we could get NaNs. log_probs = (w * np.maximum(orig_log_probs[:, np.newaxis, :], -52.0) + (1.0 - w) * np.maximum(log_probs, -52.0)) # Normalize across context one more time to prevent cheating when # blending. log_probs -= logsumexp(log_probs, axis=2)[:, :, np.newaxis] # Average (in probability space) over sample sets log_probs = logsumexp(log_probs, axis=1) - np.log(log_probs.shape[1]) if options.exhaustive_output_all_grids: self.write_grids(output_grid, l0_log_probs, s1_log_probs, l2_log_probs, log_probs) if random: pred_indices = sample(np.exp(log_probs)) else: pred_indices = np.argmax(log_probs, axis=1) predictions.extend(pred_indices) # Extract the score of the true color according to the combined model. scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist()) progress.end_task() return predictions, scores
def predict_and_score(self, eval_instances, random=False, verbosity=0): options = self.get_options() predictions = [] scores = [] if options.verbosity + verbosity >= 2: print('Building alternative utterance list') sym_vec = vectorizers.SymbolVectorizer() sym_vec.add_all([inst.input for inst in self.get_dataset(self.base)]) assert eval_instances[0].alt_outputs, \ 'Context required for L(S(L)): %s' % eval_instances[0].__dict__ context_len = len(eval_instances[0].alt_outputs) if options.exhaustive_num_samples > 0: num_alt_utts = options.exhaustive_num_samples * context_len + 1 num_sample_sets = options.exhaustive_num_sample_sets else: num_alt_utts = len(sym_vec.tokens) + 1 num_sample_sets = 1 true_batch_size = max( options.listener_eval_batch_size / (num_alt_utts * num_sample_sets * context_len), 1) batches = iterators.iter_batches(eval_instances, true_batch_size) num_batches = (len(eval_instances) - 1) // true_batch_size + 1 if options.exhaustive_output_speaker_samples: self.truncate_utterances_files('s1_samples.%s.jsons', num_sample_sets) if options.exhaustive_output_speaker_predictions: self.truncate_utterances_files('s1_predictions.%s.jsons', num_sample_sets) if options.exhaustive_output_all_grids: self.truncate_utterances_files('grids.%s.jsons.gz', 1) if options.verbosity + verbosity >= 2: print('Testing') progress.start_task('Eval batch', num_batches) for batch_num, batch in enumerate(batches): progress.progress(batch_num) batch = list(batch) output_grid = self.build_grid(batch, sym_vec.tokens) assert len(output_grid) == len(batch) * num_sample_sets * context_len * num_alt_utts, \ 'Context must be the same number of colors for all examples %s' % \ ((len(output_grid), len(batch), num_sample_sets, context_len, num_alt_utts),) true_indices = np.array([inst.output for inst in batch]) grid_scores = self.base.score(output_grid, verbosity=verbosity) l0_log_probs = np.array(grid_scores).reshape( (len(batch), num_sample_sets, context_len, num_alt_utts)) # Renormalize over only the context colors, and extract the score of # the true color according to the base model. l0_log_probs -= logsumexp(l0_log_probs, axis=2)[:, :, np.newaxis, :] assert l0_log_probs.shape == (len(batch), num_sample_sets, context_len, num_alt_utts), l0_log_probs.shape orig_log_probs = l0_log_probs[np.arange(len(batch)), 0, :, 0] assert orig_log_probs.shape == (len(batch), context_len), orig_log_probs.shape # Apply temperature parameter before speaker. utilities = options.exhaustive_inv_temperature * l0_log_probs # Normalize across utterances. Note that the listener returns probability # densities over colors. s1_log_probs = utilities - logsumexp(utilities, axis=3)[:, :, :, np.newaxis] assert s1_log_probs.shape == (len(batch), num_sample_sets, context_len, num_alt_utts), s1_log_probs.shape if options.exhaustive_output_speaker_samples or \ options.exhaustive_output_speaker_predictions: speaker_dist = s1_log_probs[np.arange(len(batch)), :, true_indices, 1:] if options.exhaustive_output_speaker_samples: speaker_sample_indices = sample(np.exp(speaker_dist)) self.write_speaker_utterances('s1_samples.%s.jsons', output_grid, speaker_sample_indices, l0_log_probs.shape) if options.exhaustive_output_speaker_predictions: speaker_pred_indices = np.argmax(speaker_dist, axis=2) self.write_speaker_utterances('s1_predictions.%s.jsons', output_grid, speaker_pred_indices, l0_log_probs.shape) # Normalize again across context colors. l2_log_probs = s1_log_probs - logsumexp( s1_log_probs, axis=2)[:, :, np.newaxis, :] assert l2_log_probs.shape == (len(batch), num_sample_sets, context_len, num_alt_utts), l2_log_probs.shape # Extract the score of each color for the input utterance according to the L2 model. log_probs = l2_log_probs[np.arange(len(batch)), :, :, 0] assert log_probs.shape == (len(batch), num_sample_sets, context_len), log_probs.shape # Blend L0 and L2 (if enabled) to produce final score. if options.exhaustive_base_weight: w = options.exhaustive_base_weight # Bump zero probabilities up to epsilon ~= 3e-23, because previously we would # only have -inf log probs, but now if w < 0 we could get NaNs. log_probs = ( w * np.maximum(orig_log_probs[:, np.newaxis, :], -52.0) + (1.0 - w) * np.maximum(log_probs, -52.0)) # Normalize across context one more time to prevent cheating when # blending. log_probs -= logsumexp(log_probs, axis=2)[:, :, np.newaxis] # Average (in probability space) over sample sets log_probs = logsumexp(log_probs, axis=1) - np.log( log_probs.shape[1]) if options.exhaustive_output_all_grids: self.write_grids(output_grid, l0_log_probs, s1_log_probs, l2_log_probs, log_probs) if random: pred_indices = sample(np.exp(log_probs)) else: pred_indices = np.argmax(log_probs, axis=1) predictions.extend(pred_indices) # Extract the score of the true color according to the combined model. scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist()) progress.end_task() return predictions, scores