class UnigramPrior(object): ''' >>> p = UnigramPrior() >>> p.train([instance.Instance('blue')]) >>> p.sample(3) # doctest: +ELLIPSIS [Instance('...', None), Instance('...', None), Instance('...', None)] ''' def __init__(self): self.vec = SequenceVectorizer() self.vec.add_all([['</s>'], ['<MASK>']]) self.counts = theano.shared(np.zeros((self.vec.num_types,), dtype=np.int32)) self.total = theano.shared(np.array(0, dtype=np.int32)) self.log_probs = T.cast(self.counts, 'float32') / T.cast(self.total, 'float32') self.mask_index = self.vec.vectorize(['<MASK>'])[0] def train(self, training_instances, listener_data=True): get_utt = (lambda inst: inst.input) if listener_data else (lambda inst: inst.output) tokenized = [get_utt(inst).split() for inst in training_instances] self.vec.add_all(tokenized) x = self.vec.vectorize_all(self.pad(tokenized, self.vec.max_len)) vocab_size = self.vec.num_types counts = np.bincount(x.flatten(), minlength=vocab_size).astype(np.int32) counts[self.mask_index] = 0 self.counts.set_value(counts) self.total.set_value(np.sum(counts)) def apply(self, input_vars): (x,) = input_vars token_probs = self.log_probs[x] if self.mask_index is not None: token_probs = token_probs * T.cast(T.eq(x, self.mask_index), 'float32') if token_probs.ndim == 1: return token_probs else: return token_probs.sum(axis=1) def sample(self, num_samples=1): indices = np.array([[sample(self.counts.get_value() * 1.0 / self.total.get_value()) for _t in range(self.vec.max_len)] for _s in range(num_samples)], dtype=np.int32) return [instance.Instance(' '.join(strip_invalid_tokens(s))) for s in self.vec.unvectorize_all(indices)] def pad(self, sequences, length): ''' Adds </s> tokens followed by zero or more <MASK> tokens to bring the total length of all sequences to `length + 1` (the addition of one is because all sequences receive a </s>, but `length` should be the max length of the original sequences). >>> UnigramPrior().pad([['blue'], ['very', 'blue']], 2) [['blue', '</s>', '<MASK>'], ['very', 'blue', '</s>']] ''' return [seq + ['</s>'] + ['<MASK>'] * (length - len(seq)) for seq in sequences]
class SpeakerLearner(NeuralLearner): ''' An speaker with a feedforward neural net color input passed into an RNN to generate a description. ''' def __init__(self, id=None, context_len=1): super(SpeakerLearner, self).__init__(id=id) self.seq_vec = SequenceVectorizer( unk_threshold=self.options.speaker_unk_threshold) color_repr = COLOR_REPRS[self.options.speaker_color_repr] self.color_vec = color_repr(self.options.speaker_color_resolution, hsv=self.options.speaker_hsv) self.context_len = context_len @property def use_color_mask(self): return False def predict(self, eval_instances, random=False, verbosity=0): result = [] batches = iterators.iter_batches(eval_instances, self.options.speaker_eval_batch_size) num_batches = (len(eval_instances) - 1) // self.options.speaker_eval_batch_size + 1 eos_index = self.seq_vec.vectorize(['</s>'])[0] if self.options.verbosity + verbosity >= 2: print('Predicting') if self.options.verbosity + verbosity >= 1: progress.start_task('Predict batch', num_batches) for batch_num, batch in enumerate(batches): if self.options.verbosity + verbosity >= 1: progress.progress(batch_num) batch = list(batch) if self.use_color_mask: (c, cm, _p, mask), (_y, ) = self._data_to_arrays(batch, test=True) else: (c, _p, mask), (_y, ) = self._data_to_arrays(batch, test=True) assert mask.all() # We shouldn't be masking anything in prediction beam_size = 1 if random else self.options.speaker_beam_size done = np.zeros((len(batch), beam_size), dtype=np.bool) beam = np.zeros((len(batch), beam_size, self.seq_vec.max_len), dtype=np.int32) beam[:, :, 0] = self.seq_vec.vectorize(['<s>'])[0] beam_scores = np.log(np.zeros((len(batch), beam_size))) beam_scores[:, 0] = 0.0 c = np.repeat(c, beam_size, axis=0) mask = np.repeat(mask, beam_size, axis=0) if self.use_color_mask: cm = np.repeat(cm, beam_size, axis=0) for length in range(1, self.seq_vec.max_len): if done.all(): break p = beam.reshape( (beam.shape[0] * beam.shape[1], beam.shape[2]))[:, :-1] inputs = [c, cm, p, mask ] if self.use_color_mask else [c, p, mask] probs = self.model.predict(inputs) if random: indices = sample(probs[:, length - 1, :]) beam[:, 0, length] = indices done = np.logical_or(done, indices == eos_index) else: assert probs.shape[1] == p.shape[1], (probs.shape[1], p.shape[1]) assert probs.shape[2] == len( self.seq_vec.tokens), (probs.shape[2], len(self.seq_vec.tokens)) scores = np.log(probs)[:, length - 1, :].reshape( (beam.shape[0], beam.shape[1], probs.shape[2])) beam_search_step(scores, length, beam, beam_scores, done, eos_index) outputs = self.seq_vec.unvectorize_all(beam[:, 0, :]) result.extend([' '.join(strip_invalid_tokens(o)) for o in outputs]) if self.options.verbosity + verbosity >= 1: progress.end_task() return result def score(self, eval_instances, verbosity=0): result = [] batches = iterators.iter_batches(eval_instances, self.options.speaker_eval_batch_size) num_batches = (len(eval_instances) - 1) // self.options.speaker_eval_batch_size + 1 if self.options.verbosity + verbosity >= 2: print('Scoring') if self.options.verbosity + verbosity >= 1: progress.start_task('Score batch', num_batches) for batch_num, batch in enumerate(batches): if self.options.verbosity + verbosity >= 1: progress.progress(batch_num) batch = list(batch) xs, (n, ) = self._data_to_arrays(batch, test=False) if self.use_color_mask: mask = xs[3] else: mask = xs[2] probs = self.model.predict(xs) token_probs = probs[np.arange(probs.shape[0])[:, np.newaxis], np.arange(probs.shape[1]), n] scores_arr = np.sum(np.log(token_probs) * mask, axis=1) scores = scores_arr.tolist() result.extend(scores) if self.options.verbosity + verbosity >= 1: progress.end_task() return result def _data_to_arrays(self, training_instances, init_vectorizer=False, test=False, inverted=False): context_len = self.context_len if hasattr(self, 'context_len') else 1 use_context = context_len > 1 def get_multi(val): if isinstance(val, tuple): assert len(val) == 1 return val[0] else: return val get_i, get_o = (lambda inst: inst.input), (lambda inst: inst.output) get_color, get_desc_simple = (get_o, get_i) if inverted else (get_i, get_o) get_desc = lambda inst: get_multi(get_desc_simple(inst)) get_i_ind, get_o_ind = ( (lambda inst: inst.alt_inputs[get_multi(inst.input)]), (lambda inst: inst.alt_outputs[get_multi(inst.output)])) get_color_indexed = get_o_ind if inverted else get_i_ind get_alt_i, get_alt_o = (lambda inst: inst.alt_inputs), ( lambda inst: inst.alt_outputs) get_alt_colors = get_alt_o if inverted else get_alt_i if hasattr(self.options, 'speaker_tokenizer'): tokenize = TOKENIZERS[self.options.speaker_tokenizer] else: tokenize = TOKENIZERS['whitespace'] if init_vectorizer: tokenized = [['<s>'] + tokenize(get_desc(inst)) + ['</s>'] for inst in training_instances] self.seq_vec.add_all(tokenized) unk_replaced = self.seq_vec.unk_replace_all(tokenized) config.dump(unk_replaced, 'unk_replaced.train.jsons', lines=True) colors = [] previous = [] next_tokens = [] if self.options.verbosity >= 9: print('%s _data_to_arrays:' % self.id) for i, inst in enumerate(training_instances): desc, color = get_desc(inst), get_color(inst) if isinstance(color, numbers.Number): color = get_color_indexed(inst) if test: full = ['<s>'] + ['</s>'] * (self.seq_vec.max_len - 1) else: desc = tokenize(desc) full = (['<s>'] + desc + ['</s>'] + ['<MASK>'] * (self.seq_vec.max_len - 1 - len(desc))) prev = full[:-1] next = full[1:] if self.options.verbosity >= 9: print('%s, %s -> %s' % (repr(color), repr(prev), repr(next))) colors.append(color) if use_context: new_context = get_alt_colors(inst) index = get_color(inst) if isinstance(index, tuple): assert len(index) == 1 index = index[0] assert len(new_context) == context_len, \ 'Inconsistent context lengths: %s' % ((context_len, len(new_context)),) colors.extend( [c for j, c in enumerate(new_context) if j != index]) previous.append(prev) next_tokens.append(next) P = np.zeros((len(previous), self.seq_vec.max_len - 1), dtype=np.int32) mask = np.zeros((len(previous), self.seq_vec.max_len - 1), dtype=np.int32) N = np.zeros((len(next_tokens), self.seq_vec.max_len - 1), dtype=np.int32) c = self.color_vec.vectorize_all(colors, hsv=True) if len(c.shape) == 1: c = c.reshape((len(colors) / context_len, context_len)) else: c = c.reshape((len(colors) / context_len, context_len * c.shape[1]) + c.shape[2:]) for i, (color, prev, next) in enumerate(zip(colors, previous, next_tokens)): if len(prev) > P.shape[1]: prev = prev[:P.shape[1]] if len(next) > N.shape[1]: next = next[:N.shape[1]] P[i, :len(prev)] = self.seq_vec.vectorize(prev) N[i, :len(next)] = self.seq_vec.vectorize(next) for t, token in enumerate(next): mask[i, t] = (token != '<MASK>') c = np.tile(c[:, np.newaxis, ...], [1, self.seq_vec.max_len - 1] + [1] * (c.ndim - 1)) if self.options.verbosity >= 9: print('c: %s' % (repr(c), )) print('P: %s' % (repr(P), )) print('mask: %s' % (repr(mask), )) print('N: %s' % (repr(N), )) return [c, P, mask], [N] def _build_model(self, model_class=SimpleLasagneModel): id_tag = (self.id + '/') if self.id else '' input_vars = self.color_vec.get_input_vars( self.id, recurrent=not self.use_color_mask) if self.use_color_mask: input_vars.append(T.imatrix(id_tag + 'color_mask')) input_vars.extend( [T.imatrix(id_tag + 'previous'), T.imatrix(id_tag + 'mask')]) target_var = T.imatrix(id_tag + 'targets') self.l_out, self.input_layers = self._get_l_out(input_vars) self.model = model_class( input_vars, [target_var], self.l_out, id=self.id, loss=self.masked_loss(input_vars), optimizer=OPTIMIZERS[self.options.speaker_optimizer], learning_rate=self.options.speaker_learning_rate) def train_priors(self, training_instances, listener_data=False): prior_class = PRIORS[self.options.speaker_prior] self.prior_emp = prior_class(recurrent=True) self.prior_smooth = prior_class(recurrent=True) self.prior_emp.train(training_instances, listener_data=listener_data) self.prior_smooth.train(training_instances, listener_data=listener_data) def _get_l_out(self, input_vars): check_options(self.options) id_tag = (self.id + '/') if self.id else '' prev_output_var, mask_var = input_vars[-2:] color_input_vars = input_vars[:-2] context_len = self.context_len if hasattr(self, 'context_len') else 1 l_color_repr, color_inputs = self.color_vec.get_input_layer( color_input_vars, recurrent_length=self.seq_vec.max_len - 1, cell_size=self.options.speaker_cell_size, context_len=context_len, id=self.id) l_hidden_color = dimshuffle(l_color_repr, (0, 2, 1)) for i in range(1, self.options.speaker_hidden_color_layers + 1): l_hidden_color = NINLayer( l_hidden_color, num_units=self.options.speaker_cell_size, nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity], name=id_tag + 'hidden_color%d' % i) l_hidden_color = dimshuffle(l_hidden_color, (0, 2, 1)) l_prev_out = InputLayer(shape=(None, self.seq_vec.max_len - 1), input_var=prev_output_var, name=id_tag + 'prev_input') l_prev_embed = EmbeddingLayer( l_prev_out, input_size=len(self.seq_vec.tokens), output_size=self.options.speaker_cell_size, name=id_tag + 'prev_embed') l_in = ConcatLayer([l_hidden_color, l_prev_embed], axis=2, name=id_tag + 'color_prev') l_mask_in = InputLayer(shape=(None, self.seq_vec.max_len - 1), input_var=mask_var, name=id_tag + 'mask_input') l_rec_drop = l_in cell = CELLS[self.options.speaker_cell] cell_kwargs = { 'mask_input': (None if self.options.speaker_no_mask else l_mask_in), 'grad_clipping': self.options.speaker_grad_clipping, 'num_units': self.options.speaker_cell_size, } if self.options.speaker_cell == 'LSTM': cell_kwargs['forgetgate'] = Gate( b=Constant(self.options.speaker_forget_bias)) if self.options.speaker_cell != 'GRU': cell_kwargs['nonlinearity'] = NONLINEARITIES[ self.options.speaker_nonlinearity] for i in range(1, self.options.speaker_recurrent_layers): l_rec = cell(l_rec_drop, name=id_tag + 'rec%d' % i, **cell_kwargs) if self.options.speaker_dropout > 0.0: l_rec_drop = DropoutLayer(l_rec, p=self.options.speaker_dropout, name=id_tag + 'rec%d_drop' % i) else: l_rec_drop = l_rec l_rec = cell(l_rec_drop, name=id_tag + 'rec%d' % self.options.speaker_recurrent_layers, **cell_kwargs) l_shape = ReshapeLayer(l_rec, (-1, self.options.speaker_cell_size), name=id_tag + 'reshape') l_hidden_out = l_shape for i in range(1, self.options.speaker_hidden_out_layers + 1): l_hidden_out = DenseLayer( l_hidden_out, num_units=self.options.speaker_cell_size, nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity], name=id_tag + 'hidden_out%d' % i) l_softmax = DenseLayer(l_hidden_out, num_units=len(self.seq_vec.tokens), nonlinearity=softmax, name=id_tag + 'softmax') l_out = ReshapeLayer( l_softmax, (-1, self.seq_vec.max_len - 1, len(self.seq_vec.tokens)), name=id_tag + 'out') return l_out, color_inputs + [l_prev_out, l_mask_in] def loss_out(self, input_vars=None, target_var=None): if input_vars is None: input_vars = self.model.input_vars if target_var is None: target_var = self.model.target_var pred = get_output(self.l_out, dict(zip(self.input_layers, input_vars))) loss = self.masked_loss(input_vars) return loss(pred, target_var) def masked_loss(self, input_vars): return masked_seq_crossentropy(input_vars[-1]) def sample_prior_smooth(self, num_samples): return self.prior_smooth.sample(num_samples)