def listified_fn(*input_list):
        input_dict = OrderedDict()
        input_it = iter(input_list)
        input_dict.update(equizip(sequences.keys(),
                                  it.islice(input_it, len(sequences))))
        for name, info in outputs_info.items():
            if info is None:
                continue # no inputs
            elif isinstance(info, (dict, OrderedDict)):
                ntaps = len(info.get("taps", [-1]))
            else:
                # assume some kind of tensor variable or numpy array
                ntaps = 1
            taps = [next(input_it) for _ in range(ntaps)]
            input_dict[name] = taps if ntaps > 1 else taps[0]
        input_dict.update(equizip(non_sequences.keys(),
                                  it.islice(input_it, len(non_sequences))))

        # input_list should be exactly empty here
        try:
            next(input_it)
        except StopIteration:
            pass
        else:
            assert False

        output_dict = fn(**input_dict)
        output_list = [output_dict[output_name].copy(name=output_name)
                       for output_name in outputs_info.keys()]
        return output_list
示例#2
0
    def compute_steps(self, previous_steps):
        """Build a Theano expression for steps for all parameters.

        Override this method if you want to process the steps
        with respect to all parameters as a whole, not parameter-wise.

        Parameters
        ----------
        previous_steps : OrderedDict
            An :class:`~OrderedDict` of
            (:class:`~tensor.TensorSharedVariable`
            :class:`~tensor.TensorVariable`) pairs. The keys are the
            parameters being trained, the values are the expressions for
            quantities related to gradients of the cost with respect to
            the parameters, either the gradients themselves or steps in
            related directions.

        Returns
        -------
        steps : OrderedDict
            A dictionary of the proposed steps in the same form as
            `previous_steps`.
        updates : list
            A list of tuples representing updates to be performed.

        """
        parameter_wise = [self.compute_step(parameter, previous_steps[parameter]) for parameter in previous_steps]
        steps, updates = equizip(*parameter_wise)
        steps = OrderedDict((parameter, step) for parameter, step in equizip(previous_steps.keys(), steps))
        updates = list(itertools.chain(*updates))
        return steps, updates
示例#3
0
 def _push_allocation_config(self):
     if not len(self.dims) - 1 == len(self.linear_transformations):
         raise ValueError
     for input_dim, output_dim, layer in \
             equizip(self.dims[:-1], self.dims[1:],
                     self.linear_transformations):
         layer.input_dim = input_dim
         layer.output_dim = output_dim
         layer.use_bias = self.use_bias
     for dim, bn in equizip(self.dims[1:], self.batch_norms):
         bn.input_dim = dim
示例#4
0
    def compute_steps(self, previous_steps):
        subparams = [subparam for (subparam, _, _, _) in self.subtensor_params.values()]
        keys = [param for param in previous_steps if param not in subparams]
        parameter_wise = [self.compute_step(param, previous_steps[param]) for param in keys]
        
        # We use a special compute_step for lookup tables
        for param, (subparam, canonized_indices, _, _) in self.subtensor_params.iteritems():
            keys.append(subparam)
            parameter_wise.append(self.compute_step_subparam(param, canonized_indices, previous_steps[subparam]))

        steps, updates = equizip(*parameter_wise)
        steps = OrderedDict((param, step) for param, step 
                            in equizip(keys, steps))
        updates = list(itertools.chain(*updates))
        return steps, updates
    def get_data(self, request=None):
        data = next(self.child_epoch_iterator)
        data = OrderedDict(equizip(self.sources, data))
        feature_batch = data[self.source]

        windowed_features = []
        for features in feature_batch:
            features_padded = features.copy()

            features_shifted = [features]
            # shift forward
            for i in xrange(self.window_size / 2):
                feats = numpy.roll(features_padded, i + 1, axis=0)
                feats[:i + 1, :] = 0
                features_shifted.append(feats)
            features_padded = features.copy()

            # shift backward
            for i in xrange(self.window_size / 2):
                feats = numpy.roll(features_padded, -i - 1, axis=0)
                feats[-i - 1:, :] = 0
                features_shifted.append(numpy.roll(features_padded, -i - 1,
                                                   axis=0))
            windowed_features.append(numpy.concatenate(
                features_shifted, axis=1))
        data[self.source] = windowed_features
        return data.values()
示例#6
0
    def compute_next_states(self, contexts, topical_embeddings,topical_contexts,content_embeddings,states, outputs):
        """Computes next states.

        Parameters
        ----------
        contexts : dict
            A {name: :class:`numpy.ndarray`} dictionary of contexts.
        states : dict
            A {name: :class:`numpy.ndarray`} dictionary of states.
        outputs : :class:`numpy.ndarray`
            A :class:`numpy.ndarray` of this step outputs.

        Returns
        -------
        A {name: numpy.array} dictionary of next states.

        """
        input_states = [states[name] for name in self.input_state_names]
        tmp=topical_contexts.values();
        tmp2=topical_embeddings.values();
        tmp3=contexts.values()
        tmp4=list(tmp3)
        next_values = self.next_state_computer(*(list(contexts.values()) +topical_embeddings.values()+topical_contexts.values()+content_embeddings.values()+
                                                 input_states + [outputs]))
        return OrderedDict(equizip(self.state_names, next_values))
示例#7
0
    def predict(f_gen, X=None, Y=None, filenames=None, savepath=None, n_attempts=None):
        if X is None or Y is None or filenames is None:
            X, Y, filenames = cocoXYFilenames(dataType='val2014')
        ep = DataETL.getTokenizedStream(
            X=X, Y=Y, sources=('X', 'Y'), batch_size=1).get_epoch_iterator()
        if savepath:
            generated_captions = {}

        for filename in filenames:
            try:
                # No good way to make sure the filename is matching
                im_vects, txt_enc = ep.next()
                txt = " ".join(vect.inverse_transform(txt_enc))
                print "\nTrying for: ", txt
                if n_attempts:
                    batch_size = n_attempts
                else:
                    message=("Number of attempts to generate correct text? ")
                    batch_size = int(input(message)) 

                # make this call compatible with GRUs and LSTMs
                # GRU - generated is tuple of 3 elements (states, outputs, costs) 
                # LSTM - generated is tuple of 4 elements (states, cells, outputs, costs) 
                generated = f_gen(
                        np.repeat(im_vects, batch_size, 0)
                        )
                outputs = generated[-2]
                costs = generated[-1]

                outputs = list(outputs.T)
                costs = list(costs.T)
                for i in range(len(outputs)):
                    outputs[i] = list(outputs[i])
                    try:
                        # 0 is my PAD character, via foxhound Tokenizer
                        true_length = outputs[i].index(0)
                    except ValueError:
                        # full sequence length
                        true_length = len(outputs[i])
                    outputs[i] = outputs[i][:true_length]
                    costs[i] = costs[i][:true_length].mean()
                messages = []
                for sample, cost in equizip(outputs, costs):
                    # vect.inverse_transform needs a shape (seq, 1) array
                    sample = np.array(sample).reshape(-1, 1)
                    message = "({0:0.3f}) ".format(cost)
                    message += " ".join(vect.inverse_transform(sample))
                    messages.append((cost, message))
                messages.sort(key=operator.itemgetter(0), reverse=True)

                # convert to decimal to be picklable
                messages = [(decimal.Decimal(float(cost)), message) for cost, message in messages] 
                for _, message in messages:
                    print(message)
                if savepath:
                    generated_captions[filename] = messages
            except:
                if savepath:
                    dict2json(generated_captions, savepath, cls=DecimalEncoder)
                return
示例#8
0
    def _compile_initial_state_and_context_computer(self):
        initial_states = VariableFilter(
                            applications=[self.generator.initial_states],
                            roles=[OUTPUT])(self.cg)

        #print("initial_states")
        #print initial_states

        initial_states2 = VariableFilter(
                            bricks=[Encoder],
                            roles=[OUTPUT])(self.cg)
        
        outputs = OrderedDict([(v.tag.name, v) for v in initial_states])

        outputs[initial_states2[0].tag.name] = initial_states2[0]

        beam_size = unpack(VariableFilter(
                            applications=[self.generator.initial_states],
                            name='batch_size')(self.cg))
        print self.inputs
        #print("outputs")
        #print outputs

        for name, context in equizip(self.context_names, self.contexts):
            outputs[name] = context
        outputs['beam_size'] = beam_size
        self.initial_state_and_context_computer = function(
            self.inputs, outputs, on_unused_input='ignore')
示例#9
0
    def __init__(self, step_rule=None, gradients=None, known_grads=None,
                 consider_constant=None, on_unused_sources='raise',
                 theano_func_kwargs=None, **kwargs):
        if gradients:
            kwargs.setdefault("parameters", gradients.keys())
        super(GradientDescent, self).__init__(**kwargs)

        self.gradients = gradients
        if not self.gradients:
            logger.info("Taking the cost gradient")
            self.gradients = dict(
                equizip(self.parameters, tensor.grad(
                    self.cost, self.parameters,
                    known_grads=known_grads,
                    consider_constant=consider_constant)))
            logger.info("The cost gradient computation graph is built")
        else:
            if known_grads:
                raise ValueError("known_grads has no effect when gradients "
                                 "are passed in")
            if consider_constant is not None:
                raise ValueError("consider_constant has no effect when "
                                 "gradients are passed in")
        self.step_rule = step_rule if step_rule else Scale()

        self.total_gradient_norm = l2_norm(
            self.gradients.values()).copy(name="total_gradient_norm")
        self.steps, self.step_rule_updates = (
            self.step_rule.compute_steps(self.gradients))
        self.total_step_norm = l2_norm(
            self.steps.values()).copy(name="total_step_norm")
        self.on_unused_sources = on_unused_sources
        self.theano_func_kwargs = (theano_func_kwargs if theano_func_kwargs
                                   is not None else dict())
示例#10
0
文件: bn.py 项目: Excalibur269/blocks
    def _allocate(self):
        input_dim = ((self.input_dim,)
                     if not isinstance(self.input_dim, collections.Sequence)
                     else self.input_dim)
        broadcastable = (tuple(False for _ in input_dim)
                         if self.broadcastable is None else self.broadcastable)
        if len(input_dim) != len(broadcastable):
            raise ValueError("input_dim and broadcastable must be same length")
        var_dim = tuple(1 if broadcast else dim for dim, broadcast in
                        equizip(input_dim, broadcastable))
        broadcastable = broadcastable

        # "beta", from the Ioffe & Szegedy manuscript.
        if self.learn_shift:
            self.shift = shared_floatx_nans(var_dim, name='batch_norm_shift',
                                            broadcastable=broadcastable)
            add_role(self.shift, BATCH_NORM_SHIFT_PARAMETER)
            self.parameters.append(self.shift)
        else:
            self.shift = tensor.constant(0, dtype=theano.config.floatX)

        if self.learn_scale and not self.mean_only:
            # "gamma", from the Ioffe & Szegedy manuscript.
            self.scale = shared_floatx_nans(var_dim, name='batch_norm_scale',
                                            broadcastable=broadcastable)

            add_role(self.scale, BATCH_NORM_SCALE_PARAMETER)
            self.parameters.append(self.scale)
        else:
            self.scale = tensor.constant(1., dtype=theano.config.floatX)

        self._allocate_population_statistics(var_dim, broadcastable)
示例#11
0
文件: mnist.py 项目: youralien/MLFun
 def predict(f_gen):
     ep = getTestStream(batch_size=1).get_epoch_iterator()
     while True:
         im_vects, txt_enc = ep.next()
         mnist_txt = "".join(code2char[code] for code in txt_enc[0])
         print "\nTrying for: ", mnist_txt
         message=("Number of attempts to generate correct text? ")
         batch_size = int(input(message))
         states, outputs, costs = f_gen(
                 np.repeat(im_vects, batch_size, 0)
                 )
         outputs = list(outputs.T)
         costs = list(costs.T)
         for i in range(len(outputs)):
             outputs[i] = list(outputs[i])
             try:
                 # 0 was my stop character for MNIST alphabetic
                 true_length = outputs[i].index(0)
             except ValueError:
                 # full sequence length
                 true_length = len(outputs[i])
             outputs[i] = outputs[i][:true_length]
             costs[i] = costs[i][:true_length].sum()
         messages = []
         for sample, cost in equizip(outputs, costs):
             message = "({0:0.3f}) ".format(cost)
             message += "".join(code2char[code] for code in sample)
             messages.append((cost, message))
         messages.sort(key=operator.itemgetter(0), reverse=True)
         for _, message in messages:
             print(message)
示例#12
0
 def _compile_initial_state_computer(self):
     # TODO: should be now extractable from the computation graph
     initial_states = self.generator.initial_states(
             1, as_dict=True,
             **dict(equizip(self.context_names, self.contexts)))
     self.initial_state_computer = function(
         self.contexts, initial_states, on_unused_input='ignore')
示例#13
0
 def result_to_lists(result):
     outputs, masks, costs, weights = [array.T for array in result]
     outputs = [list(output[:mask.sum()])
                for output, mask in equizip(outputs, masks)]
     costs = list(costs.T.sum(axis=0))
     weights = numpy.argsort(-weights, axis=0)
     return outputs, costs, weights 
示例#14
0
 def get_aggregated_values(self):
     """Readout the aggregated values."""
     if not self._initialized:
         raise Exception("To readout you must first initialize, then "
                         "process batches!")
     ret_vals = self._readout_fun()
     return OrderedDict(equizip(self.variable_names, ret_vals))
示例#15
0
文件: select_.py 项目: EloiZ/DeepCube
    def parse(string):
        """Constructs a path from its string representation.

        .. todo::

            More error checking.

        Parameters
        ----------
        string : str
            String representation of the path.

        """
        elements = Path.separator_re.split(string)[1:]
        separators = elements[::2]
        parts = elements[1::2]
        if not len(elements) == 2 * len(separators) == 2 * len(parts):
            raise ValueError

        nodes = []
        for separator, part in equizip(separators, parts):
            if separator == Path.separator:
                nodes.append(Path.BrickName(part))
            elif Path.parameter_separator == Path.parameter_separator:
                nodes.append(Path.ParameterName(part))
            else:
                # This can not if separator_re is a correct regexp
                raise ValueError("Wrong separator {}".format(separator))

        return Path(nodes)
示例#16
0
文件: __init__.py 项目: Fdenpc/blocks
    def __init__(self, step_rule=None, gradients=None, known_grads=None,
                 **kwargs):
        if gradients:
            kwargs.setdefault("params", gradients.keys())
        super(GradientDescent, self).__init__(**kwargs)

        self.gradients = gradients
        if not self.gradients:
            logger.info("Taking the cost gradient")
            self.gradients = dict(
                equizip(self.params, tensor.grad(self.cost, self.params,
                                                 known_grads=known_grads)))
            logger.info("The cost gradient computation graph is built")
        else:
            if known_grads:
                raise ValueError("known_grads has no effect when gradients "
                                 "are passed in")
        self.step_rule = step_rule if step_rule else Scale()

        self.total_gradient_norm = named_copy(l2_norm(self.gradients.values()),
                                              "total_gradient_norm")
        self.steps, self.step_rule_updates = (
            self.step_rule.compute_steps(self.gradients))
        self.total_step_norm = named_copy(l2_norm(self.steps.values()),
                                          "total_step_norm")
示例#17
0
def merge_parallel(src_filename, trg_filename, merged_filename):
    with open(src_filename, 'r') as left:
        with open(trg_filename, 'r') as right:
            with open(merged_filename, 'w') as final:
                for lline, rline in equizip(left, right):
                    if (lline != '\n') and (rline != '\n'):
                        final.write(lline[:-1] + ' ||| ' + rline)
示例#18
0
def train_rnnrbm(train, rnnrbm, epochs=1000, test=None, bokeh=True,
                 load_path=None):
    cdk = theano.shared(10)
    lr = theano.shared(float32(0.004))

    cost, v_sample = rnnrbm.cost(examples=x, mask=x_mask, k=cdk)

    error_rate = MismulitclassificationRate().apply(x, v_sample[-1], x_mask)
    error_rate.name = "error on note as a whole"
    mistake_rate = MismulitmistakeRate().apply(x, v_sample[-1], x_mask)
    mistake_rate.name = "single error within note"
    cost.name = 'rbm_cost'

    model = Model(cost)
    cg = ComputationGraph([cost])
    step_rule = CompositeRule(
        [RemoveNotFinite(), StepClipping(30.0), Adam(learning_rate=lr), StepClipping(6.0),
         RemoveNotFinite()])  # Scale(0.01)
    gradients = dict(equizip(cg.parameters, T.grad(cost, cg.parameters, consider_constant=[v_sample])))
    algorithm = GradientDescent(step_rule=step_rule, gradients=gradients, cost=cost,
                                params=cg.parameters)
    algorithm.add_updates(cg.updates)
    extensions = [
        SharedVariableModifier(parameter=cdk,
                               function=lambda n, v: rnnrbm_cdk[n] if rnnrbm_cdk.get(n) else v),
        SharedVariableModifier(parameter=lr,
                               function=lambda n, v: float32(0.78 * v) if n % (200 * 5) == 0 else v),
        FinishAfter(after_n_epochs=epochs),
        TrainingDataMonitoring(
            [cost, error_rate, mistake_rate, ],  # hidden_states, debug_val, param_nans,
            # aggregation.mean(algorithm.total_gradient_norm)],  #+ params,
            prefix="train",
            after_epoch=False, every_n_batches=40),
        Timing(),
        Printing(),
        ProgressBar()]
    if test is not None:
        extensions.append(DataStreamMonitoring(
            [cost, error_rate, mistake_rate],
            data_stream=test,
            updates=cg.updates,
            prefix="test", after_epoch=False, every_n_batches=40))
    if bokeh:
        extensions.append(Plot(
            'Training RNN-RBM',
            channels=[
                ['train_error on note as a whole', 'train_single error within note',
                 'test_error on note as a whole',
                 'test_single error within note'],
                ['train_final_cost'],
                # ['train_total_gradient_norm'],
            ]))

    main_loop = MainLoop(algorithm=algorithm,
                         data_stream=train,
                         model=model,
                         extensions=extensions
                         )
    return main_loop
示例#19
0
 def _compile_initial_state_computer(self):
     initial_states = [
         self.generator.initial_state(
             name, self.beam_size,
             **dict(equizip(self.context_names, self.contexts)))
         for name in self.state_names]
     self.initial_state_computer = function(
         self.contexts, initial_states, on_unused_input='ignore')
示例#20
0
 def apply(self, *args, **kwargs):
     """Applies forward and backward networks and concatenates outputs."""
     forward = self.children[0].apply(as_list=True, *args, **kwargs)
     backward = [x[::-1] for x in
                 self.children[1].apply(reverse=True, as_list=True,
                                        *args, **kwargs)]
     return [tensor.concatenate([f, b], axis=2)
             for f, b in equizip(forward, backward)]
 def _cache(self):
     data = next(self.child_epoch_iterator)
     indexes = range(len(data[0]))
     self.rng.shuffle(indexes)
     data = [[dt[i] for i in indexes] for dt in data]
     self.cache = OrderedDict([(name, self.cache[name] + dt) for name, dt
                               in equizip(self.data_stream.sources, data)])
     self.num_frames.extend([x.shape[0] for x in data[0]])
示例#22
0
文件: bn.py 项目: abdulqayyum/blocks
 def _push_allocation_config(self):
     super(BatchNormalizedMLP, self)._push_allocation_config()
     # Do the extra allocation pushing for the BatchNormalization
     # bricks. They need as their input dimension the output dimension
     # of each linear transformation.  Exclude the first dimension,
     # which is the input dimension.
     for act, dim in equizip(self.activations, self.dims[1:]):
         assert isinstance(act.children[0], BatchNormalization)
         act.children[0].input_dim = dim
示例#23
0
    def result_to_lists(result):
        outputs, masks, costs, weights, representation, states= [array.T for array in result]
        outputs = [list(output[:mask.sum()])
                   for output, mask in equizip(outputs, masks)]

        representation = representation.T
        #print representation.shape
        costs = list(costs.T.sum(axis=0))

        return outputs, costs, weights, representation, states
示例#24
0
 def _push_allocation_config(self):
     if not len(self.dims) - 1 == len(self.linear_transformations):
         raise ValueError
     for input_dim, output_dim, layer in \
             equizip(self.dims[:-1], self.dims[1:],
                     self.linear_transformations):
         layer.input_dim = input_dim
         layer.output_dim = output_dim
         if getattr(self, 'use_bias', None) is not None:
             layer.use_bias = self.use_bias
示例#25
0
 def _compile_initial_state_and_context_computer(self):
     initial_states = VariableFilter(
                         applications=[self.generator.initial_states],
                         roles=[OUTPUT])(self.cg)
     outputs = OrderedDict([(v.tag.name, v) for v in initial_states])
     beam_size = unpack(VariableFilter(
                         applications=[self.generator.initial_states],
                         name='batch_size')(self.cg))
     for name, context in equizip(self.context_names, self.contexts):
         outputs[name] = context
     for name, embedding in equizip(self.topical_names, self.topical_embeddings):
         outputs[name] = embedding
     for name, context in equizip(self.topical_context_names, self.topical_contexts):
         outputs[name] = context
     for name, embedding in equizip(self.content_names, self.content_embeddings):
         outputs[name] = embedding
     outputs['beam_size'] = beam_size
     self.initial_state_and_context_computer = function(
         self.inputs, outputs, on_unused_input='ignore')
    def get_data(self, request=None):
        data = next(self.child_epoch_iterator)
        data = OrderedDict(equizip(self.sources, data))
        dt = data[self.source]

        indexes = ((slice(None, None, self.step),) +
                   (slice(None),) * (len(dt.shape) - 1))
        subsampled = dt[indexes]
        data[self.source] = subsampled
        return data.values()
示例#27
0
 def _compute_jacobians(self):
     if self.case_costs is None or self.case_costs.ndim == 0:
         raise ValueError("can't infer jacobians; no case_costs specified")
     elif self.intpic_parameters is None or len(self.parameters) == 0:
         raise ValueError("can't infer jacobians; no parameters specified")
     logging.info("Taking the intpic jacobians")
     jacobians = gradient.jacobian(self.case_costs, self.intpic_parameters)
     jacobian_map = OrderedDict(equizip(self.intpic_parameters, jacobians))
     logging.info("The intpic jacobian computation graph is built")
     return jacobian_map
示例#28
0
def test_save_load_parameter_values():
    param_values = [("/a/b", numpy.zeros(3)), ("/a/c", numpy.ones(4))]
    filename = tempfile.mkdtemp() + 'params.npz'
    save_parameter_values(dict(param_values), filename)
    loaded_values = sorted(list(load_parameter_values(filename).items()),
                           key=lambda tuple_: tuple_[0])
    assert len(loaded_values) == len(param_values)
    for old, new in equizip(param_values, loaded_values):
        assert old[0] == new[0]
        assert numpy.all(old[1] == new[1])
示例#29
0
文件: graph.py 项目: vikkamath/blocks
    def get_snapshot(self, data):
        """Evaluate all role-carrying Theano variables on given data.

        Parameters
        ----------
        data : dict of (data source, data) pairs
            Data for input variables. The sources should match with the
            names of the input variables.

        Returns
        -------
        Dictionary of (variable, variable value on given data) pairs.

        """
        role_variables = [var for var in self.variables if hasattr(var.tag, "roles") and not is_shared_variable(var)]
        value_holders = [shared_like(var) for var in role_variables]
        function = self.get_theano_function(equizip(value_holders, role_variables))
        function(*(data[input_.name] for input_ in self.inputs))
        return OrderedDict(
            [(var, value_holder.get_value(borrow=True)) for var, value_holder in equizip(role_variables, value_holders)]
        )
def scan(fn,
         sequences=None,
         outputs_info=None,
         non_sequences=None,
         **scan_kwargs):
    # we don't care about the order, as long as it's consistent
    sequences = OrderedDict(sequences or [])
    outputs_info = OrderedDict(outputs_info or [])
    non_sequences = OrderedDict(non_sequences or [])

    # make sure names are unique
    assert not (set(sequences) & set(outputs_info) & set(non_sequences))

    def listified_fn(*input_list):
        input_dict = OrderedDict()
        input_it = iter(input_list)
        input_dict.update(equizip(sequences.keys(),
                                  it.islice(input_it, len(sequences))))
        for name, info in outputs_info.items():
            if info is None:
                continue # no inputs
            elif isinstance(info, (dict, OrderedDict)):
                ntaps = len(info.get("taps", [-1]))
            else:
                # assume some kind of tensor variable or numpy array
                ntaps = 1
            taps = [next(input_it) for _ in range(ntaps)]
            input_dict[name] = taps if ntaps > 1 else taps[0]
        input_dict.update(equizip(non_sequences.keys(),
                                  it.islice(input_it, len(non_sequences))))

        # input_list should be exactly empty here
        try:
            next(input_it)
        except StopIteration:
            pass
        else:
            assert False

        output_dict = fn(**input_dict)
        output_list = [output_dict[output_name].copy(name=output_name)
                       for output_name in outputs_info.keys()]
        return output_list

    outputs, updates = theano.scan(
        listified_fn,
        sequences=sequences.values(),
        outputs_info=outputs_info.values(),
        non_sequences=non_sequences.values(),
        **scan_kwargs)
    outputs = OrderedDict(equizip(outputs_info.keys(), outputs))
    return outputs, updates
示例#31
0
def main(mode, save_path, num_batches, data_path=None):
    reverser = WordReverser(100, len(char2code), name="reverser")

    if mode == "train":
        # Data processing pipeline
        dataset_options = dict(dictionary=char2code,
                               level="character",
                               preprocess=_lower)
        if data_path:
            dataset = TextFile(data_path, **dataset_options)
        else:
            dataset = OneBillionWord("training", [99], **dataset_options)
        data_stream = dataset.get_example_stream()
        data_stream = Filter(data_stream, _filter_long)
        data_stream = Mapping(data_stream,
                              reverse_words,
                              add_sources=("targets", ))
        data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(10))
        data_stream = Padding(data_stream)
        data_stream = Mapping(data_stream, _transpose)

        # Initialization settings
        reverser.weights_init = IsotropicGaussian(0.1)
        reverser.biases_init = Constant(0.0)
        reverser.push_initialization_config()
        reverser.encoder.weights_init = Orthogonal()
        reverser.generator.transition.weights_init = Orthogonal()

        # Build the cost computation graph
        chars = tensor.lmatrix("features")
        chars_mask = tensor.matrix("features_mask")
        targets = tensor.lmatrix("targets")
        targets_mask = tensor.matrix("targets_mask")
        batch_cost = reverser.cost(chars, chars_mask, targets,
                                   targets_mask).sum()
        batch_size = chars.shape[1].copy(name="batch_size")
        cost = aggregation.mean(batch_cost, batch_size)
        cost.name = "sequence_log_likelihood"
        logger.info("Cost graph is built")

        # Give an idea of what's going on
        model = Model(cost)
        parameters = model.get_parameter_dict()
        logger.info("Parameters:\n" +
                    pprint.pformat([(key, value.get_value().shape)
                                    for key, value in parameters.items()],
                                   width=120))

        # Initialize parameters
        for brick in model.get_top_bricks():
            brick.initialize()

        # Define the training algorithm.
        cg = ComputationGraph(cost)
        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    step_rule=CompositeRule(
                                        [StepClipping(10.0),
                                         Scale(0.01)]))

        # Fetch variables useful for debugging
        generator = reverser.generator
        (energies, ) = VariableFilter(applications=[generator.readout.readout],
                                      name_regex="output")(cg.variables)
        (activations, ) = VariableFilter(
            applications=[generator.transition.apply],
            name=generator.transition.apply.states[0])(cg.variables)
        max_length = chars.shape[0].copy(name="max_length")
        cost_per_character = aggregation.mean(
            batch_cost,
            batch_size * max_length).copy(name="character_log_likelihood")
        min_energy = energies.min().copy(name="min_energy")
        max_energy = energies.max().copy(name="max_energy")
        mean_activation = abs(activations).mean().copy(name="mean_activation")
        observables = [
            cost, min_energy, max_energy, mean_activation, batch_size,
            max_length, cost_per_character, algorithm.total_step_norm,
            algorithm.total_gradient_norm
        ]
        for name, parameter in parameters.items():
            observables.append(parameter.norm(2).copy(name + "_norm"))
            observables.append(
                algorithm.gradients[parameter].norm(2).copy(name +
                                                            "_grad_norm"))

        # Construct the main loop and start training!
        average_monitoring = TrainingDataMonitoring(observables,
                                                    prefix="average",
                                                    every_n_batches=10)
        main_loop = MainLoop(
            model=model,
            data_stream=data_stream,
            algorithm=algorithm,
            extensions=[
                Timing(),
                TrainingDataMonitoring(observables, after_batch=True),
                average_monitoring,
                FinishAfter(after_n_batches=num_batches)
                # This shows a way to handle NaN emerging during
                # training: simply finish it.
                .add_condition(["after_batch"], _is_nan),
                # Saving the model and the log separately is convenient,
                # because loading the whole pickle takes quite some time.
                Checkpoint(save_path,
                           every_n_batches=500,
                           save_separately=["model", "log"]),
                Printing(every_n_batches=1)
            ])
        main_loop.run()
    elif mode == "sample" or mode == "beam_search":
        chars = tensor.lmatrix("input")
        generated = reverser.generate(chars)
        model = Model(generated)
        logger.info("Loading the model..")
        model.set_parameter_values(load_parameter_values(save_path))

        def generate(input_):
            """Generate output sequences for an input sequence.

            Incapsulates most of the difference between sampling and beam
            search.

            Returns
            -------
            outputs : list of lists
                Trimmed output sequences.
            costs : list
                The negative log-likelihood of generating the respective
                sequences.

            """
            if mode == "beam_search":
                samples, = VariableFilter(
                    applications=[reverser.generator.generate],
                    name="outputs")(ComputationGraph(generated[1]))
                # NOTE: this will recompile beam search functions
                # every time user presses Enter. Do not create
                # a new `BeamSearch` object every time if
                # speed is important for you.
                beam_search = BeamSearch(samples)
                outputs, costs = beam_search.search({chars: input_},
                                                    char2code['</S>'],
                                                    3 * input_.shape[0])
            else:
                _1, outputs, _2, _3, costs = (
                    model.get_theano_function()(input_))
                outputs = list(outputs.T)
                costs = list(costs.T)
                for i in range(len(outputs)):
                    outputs[i] = list(outputs[i])
                    try:
                        true_length = outputs[i].index(char2code['</S>']) + 1
                    except ValueError:
                        true_length = len(outputs[i])
                    outputs[i] = outputs[i][:true_length]
                    costs[i] = costs[i][:true_length].sum()
            return outputs, costs

        while True:
            try:
                line = input("Enter a sentence\n")
                message = ("Enter the number of samples\n"
                           if mode == "sample" else "Enter the beam size\n")
                batch_size = int(input(message))
            except EOFError:
                break
            except Exception:
                traceback.print_exc()
                continue

            encoded_input = [
                char2code.get(char, char2code["<UNK>"])
                for char in line.lower().strip()
            ]
            encoded_input = ([char2code['<S>']] + encoded_input +
                             [char2code['</S>']])
            print("Encoder input:", encoded_input)
            target = reverse_words((encoded_input, ))[0]
            print("Target: ", target)

            samples, costs = generate(
                numpy.repeat(numpy.array(encoded_input)[:, None],
                             batch_size,
                             axis=1))
            messages = []
            for sample, cost in equizip(samples, costs):
                message = "({})".format(cost)
                message += "".join(code2char[code] for code in sample)
                if sample == target:
                    message += " CORRECT!"
                messages.append((cost, message))
            messages.sort(key=operator.itemgetter(0), reverse=True)
            for _, message in messages:
                print(message)
示例#32
0
def test_equizip():
    yield verify_same, equizip, zip, None, [3, 4], [9, 2], [9, 9]
    yield verify_same, equizip, zip, None, [3, 4, 8, 4, 2]
    assert_raises(IterableLengthMismatch, list, equizip([5, 4, 3], [2, 1]))
    assert_raises(IterableLengthMismatch, list, equizip([5, 4, 3], []))
示例#33
0
def _compute_jacobians(components, parameters):
    logging.info("Taking the component jacobians")
    jacobians = gradient.jacobian(components, parameters)
    jacobian_map = OrderedDict(equizip(parameters, jacobians))
    logging.info("The component jacobian computation graph is built")
    return jacobian_map
示例#34
0
 def _push_allocation_config(self):
     for input_dim, output_dim, child in \
             equizip(self.input_dims, self.output_dims, self.children):
         child.input_dim = input_dim
         child.output_dim = output_dim
示例#35
0
 def result_to_lists(result):
     outputs, masks, costs = [array.T for array in result]
     outputs = [list(output[:int(mask.sum())])
                for output, mask in equizip(outputs, masks)]
     costs = list(costs.T.sum(axis=0))
     return outputs, costs
示例#36
0
 def apply(self, *args, **kwargs):
     args = args + tuple(kwargs[name]
                         for name in self.input_names[len(args):])
     return [
         child.apply(arg) for arg, child in equizip(args, self.children)
     ]
示例#37
0
 def __call__(self, parser, args, values, option_string=None):
     setattr(args, self.dest, equizip(values[::2], values[1::2]))
示例#38
0
beam_search = BeamSearch(samples);

for line in f_in.readlines(): #{
	inp = _tokenise(line);
	form = '|'.join(line.strip().split('|||')[1:]);
	encoded_input = _encode(inp);
#	print(inp,'→',encoded_input, sys.stderr);	
	target = morph_lookup((encoded_input,))[0]	
#	print('Target:','→',target, sys.stderr);	

	input_arr = numpy.repeat(numpy.array(encoded_input)[:, None],BEAM, axis=1);
	samples, costs = generate(m, input_arr, beam_search);
	total = total + 1.0;

	messages = []
	for sample, cost in equizip(samples, costs): #{
#		message = "({})".format(cost)
		message = "".join(Globals.code2char[code] for code in sample)
		if sample == target: #{
			message += " CORRECT!"
		#}
		messages.append([float(cost), message])
		#messages.sort(key=operator.itemgetter(0), reverse=True)
	#}
	messages.sort()
	for message in messages[0:n_best]: #{
		if 'CORRECT' in message[1]: #{
			correct = correct + 1.0;
		#}
		print('%.2f\t%.6f\t%s\t%s' % (correct/total*100.0, message[0], form, message[1]), file=sys.stderr)
	#}
示例#39
0
mistake_rate = MismulitmistakeRate().apply(x, v_sample[-1], x_mask)
mistake_rate.name = "single error within note"

model = Model(cost)
cg = ComputationGraph([cost])

step_rule = CompositeRule([
    RemoveNotFinite(),
    StepClipping(20.0),
    Adam(learning_rate=.001),
    StepClipping(3.0),
    RemoveNotFinite()
])  # Scale(0.01)

gradients = dict(
    equizip(cg.parameters,
            T.grad(cost, cg.parameters, consider_constant=[v_sample])))
algorithm = GradientDescent(step_rule=step_rule,
                            gradients=gradients,
                            cost=cost,
                            params=cg.parameters)
#
# algorithm = GradientDescent(step_rule=step_rule, cost=cost, params=cg.parameters)

## l2/l1 regularization
# reg = 0.000005
# params = VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)
# param_nans = 0
# for i, p in enumerate(params):
# # cost += reg * abs(p).sum()
# cost += reg * (p ** 2).sum()
# param_nans += T.isnan(p).sum()