def listified_fn(*input_list): input_dict = OrderedDict() input_it = iter(input_list) input_dict.update(equizip(sequences.keys(), it.islice(input_it, len(sequences)))) for name, info in outputs_info.items(): if info is None: continue # no inputs elif isinstance(info, (dict, OrderedDict)): ntaps = len(info.get("taps", [-1])) else: # assume some kind of tensor variable or numpy array ntaps = 1 taps = [next(input_it) for _ in range(ntaps)] input_dict[name] = taps if ntaps > 1 else taps[0] input_dict.update(equizip(non_sequences.keys(), it.islice(input_it, len(non_sequences)))) # input_list should be exactly empty here try: next(input_it) except StopIteration: pass else: assert False output_dict = fn(**input_dict) output_list = [output_dict[output_name].copy(name=output_name) for output_name in outputs_info.keys()] return output_list
def compute_steps(self, previous_steps): """Build a Theano expression for steps for all parameters. Override this method if you want to process the steps with respect to all parameters as a whole, not parameter-wise. Parameters ---------- previous_steps : OrderedDict An :class:`~OrderedDict` of (:class:`~tensor.TensorSharedVariable` :class:`~tensor.TensorVariable`) pairs. The keys are the parameters being trained, the values are the expressions for quantities related to gradients of the cost with respect to the parameters, either the gradients themselves or steps in related directions. Returns ------- steps : OrderedDict A dictionary of the proposed steps in the same form as `previous_steps`. updates : list A list of tuples representing updates to be performed. """ parameter_wise = [self.compute_step(parameter, previous_steps[parameter]) for parameter in previous_steps] steps, updates = equizip(*parameter_wise) steps = OrderedDict((parameter, step) for parameter, step in equizip(previous_steps.keys(), steps)) updates = list(itertools.chain(*updates)) return steps, updates
def _push_allocation_config(self): if not len(self.dims) - 1 == len(self.linear_transformations): raise ValueError for input_dim, output_dim, layer in \ equizip(self.dims[:-1], self.dims[1:], self.linear_transformations): layer.input_dim = input_dim layer.output_dim = output_dim layer.use_bias = self.use_bias for dim, bn in equizip(self.dims[1:], self.batch_norms): bn.input_dim = dim
def compute_steps(self, previous_steps): subparams = [subparam for (subparam, _, _, _) in self.subtensor_params.values()] keys = [param for param in previous_steps if param not in subparams] parameter_wise = [self.compute_step(param, previous_steps[param]) for param in keys] # We use a special compute_step for lookup tables for param, (subparam, canonized_indices, _, _) in self.subtensor_params.iteritems(): keys.append(subparam) parameter_wise.append(self.compute_step_subparam(param, canonized_indices, previous_steps[subparam])) steps, updates = equizip(*parameter_wise) steps = OrderedDict((param, step) for param, step in equizip(keys, steps)) updates = list(itertools.chain(*updates)) return steps, updates
def get_data(self, request=None): data = next(self.child_epoch_iterator) data = OrderedDict(equizip(self.sources, data)) feature_batch = data[self.source] windowed_features = [] for features in feature_batch: features_padded = features.copy() features_shifted = [features] # shift forward for i in xrange(self.window_size / 2): feats = numpy.roll(features_padded, i + 1, axis=0) feats[:i + 1, :] = 0 features_shifted.append(feats) features_padded = features.copy() # shift backward for i in xrange(self.window_size / 2): feats = numpy.roll(features_padded, -i - 1, axis=0) feats[-i - 1:, :] = 0 features_shifted.append(numpy.roll(features_padded, -i - 1, axis=0)) windowed_features.append(numpy.concatenate( features_shifted, axis=1)) data[self.source] = windowed_features return data.values()
def compute_next_states(self, contexts, topical_embeddings,topical_contexts,content_embeddings,states, outputs): """Computes next states. Parameters ---------- contexts : dict A {name: :class:`numpy.ndarray`} dictionary of contexts. states : dict A {name: :class:`numpy.ndarray`} dictionary of states. outputs : :class:`numpy.ndarray` A :class:`numpy.ndarray` of this step outputs. Returns ------- A {name: numpy.array} dictionary of next states. """ input_states = [states[name] for name in self.input_state_names] tmp=topical_contexts.values(); tmp2=topical_embeddings.values(); tmp3=contexts.values() tmp4=list(tmp3) next_values = self.next_state_computer(*(list(contexts.values()) +topical_embeddings.values()+topical_contexts.values()+content_embeddings.values()+ input_states + [outputs])) return OrderedDict(equizip(self.state_names, next_values))
def predict(f_gen, X=None, Y=None, filenames=None, savepath=None, n_attempts=None): if X is None or Y is None or filenames is None: X, Y, filenames = cocoXYFilenames(dataType='val2014') ep = DataETL.getTokenizedStream( X=X, Y=Y, sources=('X', 'Y'), batch_size=1).get_epoch_iterator() if savepath: generated_captions = {} for filename in filenames: try: # No good way to make sure the filename is matching im_vects, txt_enc = ep.next() txt = " ".join(vect.inverse_transform(txt_enc)) print "\nTrying for: ", txt if n_attempts: batch_size = n_attempts else: message=("Number of attempts to generate correct text? ") batch_size = int(input(message)) # make this call compatible with GRUs and LSTMs # GRU - generated is tuple of 3 elements (states, outputs, costs) # LSTM - generated is tuple of 4 elements (states, cells, outputs, costs) generated = f_gen( np.repeat(im_vects, batch_size, 0) ) outputs = generated[-2] costs = generated[-1] outputs = list(outputs.T) costs = list(costs.T) for i in range(len(outputs)): outputs[i] = list(outputs[i]) try: # 0 is my PAD character, via foxhound Tokenizer true_length = outputs[i].index(0) except ValueError: # full sequence length true_length = len(outputs[i]) outputs[i] = outputs[i][:true_length] costs[i] = costs[i][:true_length].mean() messages = [] for sample, cost in equizip(outputs, costs): # vect.inverse_transform needs a shape (seq, 1) array sample = np.array(sample).reshape(-1, 1) message = "({0:0.3f}) ".format(cost) message += " ".join(vect.inverse_transform(sample)) messages.append((cost, message)) messages.sort(key=operator.itemgetter(0), reverse=True) # convert to decimal to be picklable messages = [(decimal.Decimal(float(cost)), message) for cost, message in messages] for _, message in messages: print(message) if savepath: generated_captions[filename] = messages except: if savepath: dict2json(generated_captions, savepath, cls=DecimalEncoder) return
def _compile_initial_state_and_context_computer(self): initial_states = VariableFilter( applications=[self.generator.initial_states], roles=[OUTPUT])(self.cg) #print("initial_states") #print initial_states initial_states2 = VariableFilter( bricks=[Encoder], roles=[OUTPUT])(self.cg) outputs = OrderedDict([(v.tag.name, v) for v in initial_states]) outputs[initial_states2[0].tag.name] = initial_states2[0] beam_size = unpack(VariableFilter( applications=[self.generator.initial_states], name='batch_size')(self.cg)) print self.inputs #print("outputs") #print outputs for name, context in equizip(self.context_names, self.contexts): outputs[name] = context outputs['beam_size'] = beam_size self.initial_state_and_context_computer = function( self.inputs, outputs, on_unused_input='ignore')
def __init__(self, step_rule=None, gradients=None, known_grads=None, consider_constant=None, on_unused_sources='raise', theano_func_kwargs=None, **kwargs): if gradients: kwargs.setdefault("parameters", gradients.keys()) super(GradientDescent, self).__init__(**kwargs) self.gradients = gradients if not self.gradients: logger.info("Taking the cost gradient") self.gradients = dict( equizip(self.parameters, tensor.grad( self.cost, self.parameters, known_grads=known_grads, consider_constant=consider_constant))) logger.info("The cost gradient computation graph is built") else: if known_grads: raise ValueError("known_grads has no effect when gradients " "are passed in") if consider_constant is not None: raise ValueError("consider_constant has no effect when " "gradients are passed in") self.step_rule = step_rule if step_rule else Scale() self.total_gradient_norm = l2_norm( self.gradients.values()).copy(name="total_gradient_norm") self.steps, self.step_rule_updates = ( self.step_rule.compute_steps(self.gradients)) self.total_step_norm = l2_norm( self.steps.values()).copy(name="total_step_norm") self.on_unused_sources = on_unused_sources self.theano_func_kwargs = (theano_func_kwargs if theano_func_kwargs is not None else dict())
def _allocate(self): input_dim = ((self.input_dim,) if not isinstance(self.input_dim, collections.Sequence) else self.input_dim) broadcastable = (tuple(False for _ in input_dim) if self.broadcastable is None else self.broadcastable) if len(input_dim) != len(broadcastable): raise ValueError("input_dim and broadcastable must be same length") var_dim = tuple(1 if broadcast else dim for dim, broadcast in equizip(input_dim, broadcastable)) broadcastable = broadcastable # "beta", from the Ioffe & Szegedy manuscript. if self.learn_shift: self.shift = shared_floatx_nans(var_dim, name='batch_norm_shift', broadcastable=broadcastable) add_role(self.shift, BATCH_NORM_SHIFT_PARAMETER) self.parameters.append(self.shift) else: self.shift = tensor.constant(0, dtype=theano.config.floatX) if self.learn_scale and not self.mean_only: # "gamma", from the Ioffe & Szegedy manuscript. self.scale = shared_floatx_nans(var_dim, name='batch_norm_scale', broadcastable=broadcastable) add_role(self.scale, BATCH_NORM_SCALE_PARAMETER) self.parameters.append(self.scale) else: self.scale = tensor.constant(1., dtype=theano.config.floatX) self._allocate_population_statistics(var_dim, broadcastable)
def predict(f_gen): ep = getTestStream(batch_size=1).get_epoch_iterator() while True: im_vects, txt_enc = ep.next() mnist_txt = "".join(code2char[code] for code in txt_enc[0]) print "\nTrying for: ", mnist_txt message=("Number of attempts to generate correct text? ") batch_size = int(input(message)) states, outputs, costs = f_gen( np.repeat(im_vects, batch_size, 0) ) outputs = list(outputs.T) costs = list(costs.T) for i in range(len(outputs)): outputs[i] = list(outputs[i]) try: # 0 was my stop character for MNIST alphabetic true_length = outputs[i].index(0) except ValueError: # full sequence length true_length = len(outputs[i]) outputs[i] = outputs[i][:true_length] costs[i] = costs[i][:true_length].sum() messages = [] for sample, cost in equizip(outputs, costs): message = "({0:0.3f}) ".format(cost) message += "".join(code2char[code] for code in sample) messages.append((cost, message)) messages.sort(key=operator.itemgetter(0), reverse=True) for _, message in messages: print(message)
def _compile_initial_state_computer(self): # TODO: should be now extractable from the computation graph initial_states = self.generator.initial_states( 1, as_dict=True, **dict(equizip(self.context_names, self.contexts))) self.initial_state_computer = function( self.contexts, initial_states, on_unused_input='ignore')
def result_to_lists(result): outputs, masks, costs, weights = [array.T for array in result] outputs = [list(output[:mask.sum()]) for output, mask in equizip(outputs, masks)] costs = list(costs.T.sum(axis=0)) weights = numpy.argsort(-weights, axis=0) return outputs, costs, weights
def get_aggregated_values(self): """Readout the aggregated values.""" if not self._initialized: raise Exception("To readout you must first initialize, then " "process batches!") ret_vals = self._readout_fun() return OrderedDict(equizip(self.variable_names, ret_vals))
def parse(string): """Constructs a path from its string representation. .. todo:: More error checking. Parameters ---------- string : str String representation of the path. """ elements = Path.separator_re.split(string)[1:] separators = elements[::2] parts = elements[1::2] if not len(elements) == 2 * len(separators) == 2 * len(parts): raise ValueError nodes = [] for separator, part in equizip(separators, parts): if separator == Path.separator: nodes.append(Path.BrickName(part)) elif Path.parameter_separator == Path.parameter_separator: nodes.append(Path.ParameterName(part)) else: # This can not if separator_re is a correct regexp raise ValueError("Wrong separator {}".format(separator)) return Path(nodes)
def __init__(self, step_rule=None, gradients=None, known_grads=None, **kwargs): if gradients: kwargs.setdefault("params", gradients.keys()) super(GradientDescent, self).__init__(**kwargs) self.gradients = gradients if not self.gradients: logger.info("Taking the cost gradient") self.gradients = dict( equizip(self.params, tensor.grad(self.cost, self.params, known_grads=known_grads))) logger.info("The cost gradient computation graph is built") else: if known_grads: raise ValueError("known_grads has no effect when gradients " "are passed in") self.step_rule = step_rule if step_rule else Scale() self.total_gradient_norm = named_copy(l2_norm(self.gradients.values()), "total_gradient_norm") self.steps, self.step_rule_updates = ( self.step_rule.compute_steps(self.gradients)) self.total_step_norm = named_copy(l2_norm(self.steps.values()), "total_step_norm")
def merge_parallel(src_filename, trg_filename, merged_filename): with open(src_filename, 'r') as left: with open(trg_filename, 'r') as right: with open(merged_filename, 'w') as final: for lline, rline in equizip(left, right): if (lline != '\n') and (rline != '\n'): final.write(lline[:-1] + ' ||| ' + rline)
def train_rnnrbm(train, rnnrbm, epochs=1000, test=None, bokeh=True, load_path=None): cdk = theano.shared(10) lr = theano.shared(float32(0.004)) cost, v_sample = rnnrbm.cost(examples=x, mask=x_mask, k=cdk) error_rate = MismulitclassificationRate().apply(x, v_sample[-1], x_mask) error_rate.name = "error on note as a whole" mistake_rate = MismulitmistakeRate().apply(x, v_sample[-1], x_mask) mistake_rate.name = "single error within note" cost.name = 'rbm_cost' model = Model(cost) cg = ComputationGraph([cost]) step_rule = CompositeRule( [RemoveNotFinite(), StepClipping(30.0), Adam(learning_rate=lr), StepClipping(6.0), RemoveNotFinite()]) # Scale(0.01) gradients = dict(equizip(cg.parameters, T.grad(cost, cg.parameters, consider_constant=[v_sample]))) algorithm = GradientDescent(step_rule=step_rule, gradients=gradients, cost=cost, params=cg.parameters) algorithm.add_updates(cg.updates) extensions = [ SharedVariableModifier(parameter=cdk, function=lambda n, v: rnnrbm_cdk[n] if rnnrbm_cdk.get(n) else v), SharedVariableModifier(parameter=lr, function=lambda n, v: float32(0.78 * v) if n % (200 * 5) == 0 else v), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring( [cost, error_rate, mistake_rate, ], # hidden_states, debug_val, param_nans, # aggregation.mean(algorithm.total_gradient_norm)], #+ params, prefix="train", after_epoch=False, every_n_batches=40), Timing(), Printing(), ProgressBar()] if test is not None: extensions.append(DataStreamMonitoring( [cost, error_rate, mistake_rate], data_stream=test, updates=cg.updates, prefix="test", after_epoch=False, every_n_batches=40)) if bokeh: extensions.append(Plot( 'Training RNN-RBM', channels=[ ['train_error on note as a whole', 'train_single error within note', 'test_error on note as a whole', 'test_single error within note'], ['train_final_cost'], # ['train_total_gradient_norm'], ])) main_loop = MainLoop(algorithm=algorithm, data_stream=train, model=model, extensions=extensions ) return main_loop
def _compile_initial_state_computer(self): initial_states = [ self.generator.initial_state( name, self.beam_size, **dict(equizip(self.context_names, self.contexts))) for name in self.state_names] self.initial_state_computer = function( self.contexts, initial_states, on_unused_input='ignore')
def apply(self, *args, **kwargs): """Applies forward and backward networks and concatenates outputs.""" forward = self.children[0].apply(as_list=True, *args, **kwargs) backward = [x[::-1] for x in self.children[1].apply(reverse=True, as_list=True, *args, **kwargs)] return [tensor.concatenate([f, b], axis=2) for f, b in equizip(forward, backward)]
def _cache(self): data = next(self.child_epoch_iterator) indexes = range(len(data[0])) self.rng.shuffle(indexes) data = [[dt[i] for i in indexes] for dt in data] self.cache = OrderedDict([(name, self.cache[name] + dt) for name, dt in equizip(self.data_stream.sources, data)]) self.num_frames.extend([x.shape[0] for x in data[0]])
def _push_allocation_config(self): super(BatchNormalizedMLP, self)._push_allocation_config() # Do the extra allocation pushing for the BatchNormalization # bricks. They need as their input dimension the output dimension # of each linear transformation. Exclude the first dimension, # which is the input dimension. for act, dim in equizip(self.activations, self.dims[1:]): assert isinstance(act.children[0], BatchNormalization) act.children[0].input_dim = dim
def result_to_lists(result): outputs, masks, costs, weights, representation, states= [array.T for array in result] outputs = [list(output[:mask.sum()]) for output, mask in equizip(outputs, masks)] representation = representation.T #print representation.shape costs = list(costs.T.sum(axis=0)) return outputs, costs, weights, representation, states
def _push_allocation_config(self): if not len(self.dims) - 1 == len(self.linear_transformations): raise ValueError for input_dim, output_dim, layer in \ equizip(self.dims[:-1], self.dims[1:], self.linear_transformations): layer.input_dim = input_dim layer.output_dim = output_dim if getattr(self, 'use_bias', None) is not None: layer.use_bias = self.use_bias
def _compile_initial_state_and_context_computer(self): initial_states = VariableFilter( applications=[self.generator.initial_states], roles=[OUTPUT])(self.cg) outputs = OrderedDict([(v.tag.name, v) for v in initial_states]) beam_size = unpack(VariableFilter( applications=[self.generator.initial_states], name='batch_size')(self.cg)) for name, context in equizip(self.context_names, self.contexts): outputs[name] = context for name, embedding in equizip(self.topical_names, self.topical_embeddings): outputs[name] = embedding for name, context in equizip(self.topical_context_names, self.topical_contexts): outputs[name] = context for name, embedding in equizip(self.content_names, self.content_embeddings): outputs[name] = embedding outputs['beam_size'] = beam_size self.initial_state_and_context_computer = function( self.inputs, outputs, on_unused_input='ignore')
def get_data(self, request=None): data = next(self.child_epoch_iterator) data = OrderedDict(equizip(self.sources, data)) dt = data[self.source] indexes = ((slice(None, None, self.step),) + (slice(None),) * (len(dt.shape) - 1)) subsampled = dt[indexes] data[self.source] = subsampled return data.values()
def _compute_jacobians(self): if self.case_costs is None or self.case_costs.ndim == 0: raise ValueError("can't infer jacobians; no case_costs specified") elif self.intpic_parameters is None or len(self.parameters) == 0: raise ValueError("can't infer jacobians; no parameters specified") logging.info("Taking the intpic jacobians") jacobians = gradient.jacobian(self.case_costs, self.intpic_parameters) jacobian_map = OrderedDict(equizip(self.intpic_parameters, jacobians)) logging.info("The intpic jacobian computation graph is built") return jacobian_map
def test_save_load_parameter_values(): param_values = [("/a/b", numpy.zeros(3)), ("/a/c", numpy.ones(4))] filename = tempfile.mkdtemp() + 'params.npz' save_parameter_values(dict(param_values), filename) loaded_values = sorted(list(load_parameter_values(filename).items()), key=lambda tuple_: tuple_[0]) assert len(loaded_values) == len(param_values) for old, new in equizip(param_values, loaded_values): assert old[0] == new[0] assert numpy.all(old[1] == new[1])
def get_snapshot(self, data): """Evaluate all role-carrying Theano variables on given data. Parameters ---------- data : dict of (data source, data) pairs Data for input variables. The sources should match with the names of the input variables. Returns ------- Dictionary of (variable, variable value on given data) pairs. """ role_variables = [var for var in self.variables if hasattr(var.tag, "roles") and not is_shared_variable(var)] value_holders = [shared_like(var) for var in role_variables] function = self.get_theano_function(equizip(value_holders, role_variables)) function(*(data[input_.name] for input_ in self.inputs)) return OrderedDict( [(var, value_holder.get_value(borrow=True)) for var, value_holder in equizip(role_variables, value_holders)] )
def scan(fn, sequences=None, outputs_info=None, non_sequences=None, **scan_kwargs): # we don't care about the order, as long as it's consistent sequences = OrderedDict(sequences or []) outputs_info = OrderedDict(outputs_info or []) non_sequences = OrderedDict(non_sequences or []) # make sure names are unique assert not (set(sequences) & set(outputs_info) & set(non_sequences)) def listified_fn(*input_list): input_dict = OrderedDict() input_it = iter(input_list) input_dict.update(equizip(sequences.keys(), it.islice(input_it, len(sequences)))) for name, info in outputs_info.items(): if info is None: continue # no inputs elif isinstance(info, (dict, OrderedDict)): ntaps = len(info.get("taps", [-1])) else: # assume some kind of tensor variable or numpy array ntaps = 1 taps = [next(input_it) for _ in range(ntaps)] input_dict[name] = taps if ntaps > 1 else taps[0] input_dict.update(equizip(non_sequences.keys(), it.islice(input_it, len(non_sequences)))) # input_list should be exactly empty here try: next(input_it) except StopIteration: pass else: assert False output_dict = fn(**input_dict) output_list = [output_dict[output_name].copy(name=output_name) for output_name in outputs_info.keys()] return output_list outputs, updates = theano.scan( listified_fn, sequences=sequences.values(), outputs_info=outputs_info.values(), non_sequences=non_sequences.values(), **scan_kwargs) outputs = OrderedDict(equizip(outputs_info.keys(), outputs)) return outputs, updates
def main(mode, save_path, num_batches, data_path=None): reverser = WordReverser(100, len(char2code), name="reverser") if mode == "train": # Data processing pipeline dataset_options = dict(dictionary=char2code, level="character", preprocess=_lower) if data_path: dataset = TextFile(data_path, **dataset_options) else: dataset = OneBillionWord("training", [99], **dataset_options) data_stream = dataset.get_example_stream() data_stream = Filter(data_stream, _filter_long) data_stream = Mapping(data_stream, reverse_words, add_sources=("targets", )) data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(10)) data_stream = Padding(data_stream) data_stream = Mapping(data_stream, _transpose) # Initialization settings reverser.weights_init = IsotropicGaussian(0.1) reverser.biases_init = Constant(0.0) reverser.push_initialization_config() reverser.encoder.weights_init = Orthogonal() reverser.generator.transition.weights_init = Orthogonal() # Build the cost computation graph chars = tensor.lmatrix("features") chars_mask = tensor.matrix("features_mask") targets = tensor.lmatrix("targets") targets_mask = tensor.matrix("targets_mask") batch_cost = reverser.cost(chars, chars_mask, targets, targets_mask).sum() batch_size = chars.shape[1].copy(name="batch_size") cost = aggregation.mean(batch_cost, batch_size) cost.name = "sequence_log_likelihood" logger.info("Cost graph is built") # Give an idea of what's going on model = Model(cost) parameters = model.get_parameter_dict() logger.info("Parameters:\n" + pprint.pformat([(key, value.get_value().shape) for key, value in parameters.items()], width=120)) # Initialize parameters for brick in model.get_top_bricks(): brick.initialize() # Define the training algorithm. cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule( [StepClipping(10.0), Scale(0.01)])) # Fetch variables useful for debugging generator = reverser.generator (energies, ) = VariableFilter(applications=[generator.readout.readout], name_regex="output")(cg.variables) (activations, ) = VariableFilter( applications=[generator.transition.apply], name=generator.transition.apply.states[0])(cg.variables) max_length = chars.shape[0].copy(name="max_length") cost_per_character = aggregation.mean( batch_cost, batch_size * max_length).copy(name="character_log_likelihood") min_energy = energies.min().copy(name="min_energy") max_energy = energies.max().copy(name="max_energy") mean_activation = abs(activations).mean().copy(name="mean_activation") observables = [ cost, min_energy, max_energy, mean_activation, batch_size, max_length, cost_per_character, algorithm.total_step_norm, algorithm.total_gradient_norm ] for name, parameter in parameters.items(): observables.append(parameter.norm(2).copy(name + "_norm")) observables.append( algorithm.gradients[parameter].norm(2).copy(name + "_grad_norm")) # Construct the main loop and start training! average_monitoring = TrainingDataMonitoring(observables, prefix="average", every_n_batches=10) main_loop = MainLoop( model=model, data_stream=data_stream, algorithm=algorithm, extensions=[ Timing(), TrainingDataMonitoring(observables, after_batch=True), average_monitoring, FinishAfter(after_n_batches=num_batches) # This shows a way to handle NaN emerging during # training: simply finish it. .add_condition(["after_batch"], _is_nan), # Saving the model and the log separately is convenient, # because loading the whole pickle takes quite some time. Checkpoint(save_path, every_n_batches=500, save_separately=["model", "log"]), Printing(every_n_batches=1) ]) main_loop.run() elif mode == "sample" or mode == "beam_search": chars = tensor.lmatrix("input") generated = reverser.generate(chars) model = Model(generated) logger.info("Loading the model..") model.set_parameter_values(load_parameter_values(save_path)) def generate(input_): """Generate output sequences for an input sequence. Incapsulates most of the difference between sampling and beam search. Returns ------- outputs : list of lists Trimmed output sequences. costs : list The negative log-likelihood of generating the respective sequences. """ if mode == "beam_search": samples, = VariableFilter( applications=[reverser.generator.generate], name="outputs")(ComputationGraph(generated[1])) # NOTE: this will recompile beam search functions # every time user presses Enter. Do not create # a new `BeamSearch` object every time if # speed is important for you. beam_search = BeamSearch(samples) outputs, costs = beam_search.search({chars: input_}, char2code['</S>'], 3 * input_.shape[0]) else: _1, outputs, _2, _3, costs = ( model.get_theano_function()(input_)) outputs = list(outputs.T) costs = list(costs.T) for i in range(len(outputs)): outputs[i] = list(outputs[i]) try: true_length = outputs[i].index(char2code['</S>']) + 1 except ValueError: true_length = len(outputs[i]) outputs[i] = outputs[i][:true_length] costs[i] = costs[i][:true_length].sum() return outputs, costs while True: try: line = input("Enter a sentence\n") message = ("Enter the number of samples\n" if mode == "sample" else "Enter the beam size\n") batch_size = int(input(message)) except EOFError: break except Exception: traceback.print_exc() continue encoded_input = [ char2code.get(char, char2code["<UNK>"]) for char in line.lower().strip() ] encoded_input = ([char2code['<S>']] + encoded_input + [char2code['</S>']]) print("Encoder input:", encoded_input) target = reverse_words((encoded_input, ))[0] print("Target: ", target) samples, costs = generate( numpy.repeat(numpy.array(encoded_input)[:, None], batch_size, axis=1)) messages = [] for sample, cost in equizip(samples, costs): message = "({})".format(cost) message += "".join(code2char[code] for code in sample) if sample == target: message += " CORRECT!" messages.append((cost, message)) messages.sort(key=operator.itemgetter(0), reverse=True) for _, message in messages: print(message)
def test_equizip(): yield verify_same, equizip, zip, None, [3, 4], [9, 2], [9, 9] yield verify_same, equizip, zip, None, [3, 4, 8, 4, 2] assert_raises(IterableLengthMismatch, list, equizip([5, 4, 3], [2, 1])) assert_raises(IterableLengthMismatch, list, equizip([5, 4, 3], []))
def _compute_jacobians(components, parameters): logging.info("Taking the component jacobians") jacobians = gradient.jacobian(components, parameters) jacobian_map = OrderedDict(equizip(parameters, jacobians)) logging.info("The component jacobian computation graph is built") return jacobian_map
def _push_allocation_config(self): for input_dim, output_dim, child in \ equizip(self.input_dims, self.output_dims, self.children): child.input_dim = input_dim child.output_dim = output_dim
def result_to_lists(result): outputs, masks, costs = [array.T for array in result] outputs = [list(output[:int(mask.sum())]) for output, mask in equizip(outputs, masks)] costs = list(costs.T.sum(axis=0)) return outputs, costs
def apply(self, *args, **kwargs): args = args + tuple(kwargs[name] for name in self.input_names[len(args):]) return [ child.apply(arg) for arg, child in equizip(args, self.children) ]
def __call__(self, parser, args, values, option_string=None): setattr(args, self.dest, equizip(values[::2], values[1::2]))
beam_search = BeamSearch(samples); for line in f_in.readlines(): #{ inp = _tokenise(line); form = '|'.join(line.strip().split('|||')[1:]); encoded_input = _encode(inp); # print(inp,'→',encoded_input, sys.stderr); target = morph_lookup((encoded_input,))[0] # print('Target:','→',target, sys.stderr); input_arr = numpy.repeat(numpy.array(encoded_input)[:, None],BEAM, axis=1); samples, costs = generate(m, input_arr, beam_search); total = total + 1.0; messages = [] for sample, cost in equizip(samples, costs): #{ # message = "({})".format(cost) message = "".join(Globals.code2char[code] for code in sample) if sample == target: #{ message += " CORRECT!" #} messages.append([float(cost), message]) #messages.sort(key=operator.itemgetter(0), reverse=True) #} messages.sort() for message in messages[0:n_best]: #{ if 'CORRECT' in message[1]: #{ correct = correct + 1.0; #} print('%.2f\t%.6f\t%s\t%s' % (correct/total*100.0, message[0], form, message[1]), file=sys.stderr) #}
mistake_rate = MismulitmistakeRate().apply(x, v_sample[-1], x_mask) mistake_rate.name = "single error within note" model = Model(cost) cg = ComputationGraph([cost]) step_rule = CompositeRule([ RemoveNotFinite(), StepClipping(20.0), Adam(learning_rate=.001), StepClipping(3.0), RemoveNotFinite() ]) # Scale(0.01) gradients = dict( equizip(cg.parameters, T.grad(cost, cg.parameters, consider_constant=[v_sample]))) algorithm = GradientDescent(step_rule=step_rule, gradients=gradients, cost=cost, params=cg.parameters) # # algorithm = GradientDescent(step_rule=step_rule, cost=cost, params=cg.parameters) ## l2/l1 regularization # reg = 0.000005 # params = VariableFilter(roles=[WEIGHT, BIAS])(cg.variables) # param_nans = 0 # for i, p in enumerate(params): # # cost += reg * abs(p).sum() # cost += reg * (p ** 2).sum() # param_nans += T.isnan(p).sum()