Exemplo n.º 1
0
    def __init__(self, cost, generation_length, dataset,
                 initial_text_length, softmax_sampling,
                 updates, ploting_path=None,
                 interactive_mode=False, **kwargs):
        self.generation_length = generation_length
        self.init_length = initial_text_length
        self.dataset = dataset
        self.output_size = get_output_size(dataset)
        self.ploting_path = ploting_path
        self.softmax_sampling = softmax_sampling
        self.interactive_mode = interactive_mode
        self.has_indices = has_indices(dataset)
        super(TextGenerationExtension, self).__init__(**kwargs)

        # Get presoft and its computation graph
        filter_presoft = VariableFilter(theano_name="presoft")
        presoft = filter_presoft(ComputationGraph(cost).variables)
        cg = ComputationGraph(presoft)

        # Handle the theano shared variables that allow carrying the hidden
        # state
        givens, f_updates = carry_hidden_state(updates, 1,
                                               reset=not(self.has_indices))

        # Compile the theano function
        self.generate = theano.function(inputs=cg.inputs, outputs=presoft,
                                        givens=givens, updates=f_updates)
Exemplo n.º 2
0
def fine_tuning(cost, args):
    param_values = load_parameter_values(args.load_path)
    output_size = get_output_size(args.dataset)

    param_values[
        "/output_layer.W"] = np.concatenate((
            param_values["/output_layer.W"],
            0.1 * np.random.randn(args.state_dim,
                                  output_size).astype(np.float32)))

    model = Model(cost)
    model.set_parameter_values(param_values)

    return cost
def get_presoft(h, args):
    output_size = get_output_size(args.dataset)
    # If args.skip_connections: dim = args.layers * args.state_dim
    # else: dim = args.state_dim
    use_all_states = args.skip_connections or args.skip_output or (args.rnn_type in ["clockwork", "soft"])
    output_layer = Linear(
        input_dim=use_all_states * args.layers *
        args.state_dim + (1 - use_all_states) * args.state_dim,
        output_dim=output_size, name="output_layer")

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()
    presoft = output_layer.apply(h)
    if not has_indices(args.dataset):
        presoft = Tanh().apply(presoft)
    presoft.name = 'presoft'
    return presoft
def get_presoft(h, args):
    output_size = get_output_size(args.dataset)
    # If args.skip_connections: dim = args.layers * args.state_dim
    # else: dim = args.state_dim
    use_all_states = args.skip_connections or args.skip_output or (
        args.rnn_type in ["clockwork", "soft"])
    output_layer = Linear(
        input_dim=use_all_states * args.layers * args.state_dim +
        (1 - use_all_states) * args.state_dim,
        output_dim=output_size,
        name="output_layer")

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()
    presoft = output_layer.apply(h)
    if not has_indices(args.dataset):
        presoft = Tanh().apply(presoft)
    presoft.name = 'presoft'
    return presoft
Exemplo n.º 5
0
    def __init__(self,
                 cost,
                 generation_length,
                 dataset,
                 initial_text_length,
                 softmax_sampling,
                 updates,
                 ploting_path=None,
                 interactive_mode=False,
                 **kwargs):
        self.generation_length = generation_length
        self.init_length = initial_text_length
        self.dataset = dataset
        self.output_size = get_output_size(dataset)
        self.ploting_path = ploting_path
        self.softmax_sampling = softmax_sampling
        self.interactive_mode = interactive_mode
        self.has_indices = has_indices(dataset)
        super(TextGenerationExtension, self).__init__(**kwargs)

        # Get presoft and its computation graph
        filter_presoft = VariableFilter(theano_name="presoft")
        presoft = filter_presoft(ComputationGraph(cost).variables)
        cg = ComputationGraph(presoft)

        # Handle the theano shared variables that allow carrying the hidden
        # state
        givens, f_updates = carry_hidden_state(updates,
                                               1,
                                               reset=not (self.has_indices))

        # Compile the theano function
        self.generate = theano.function(inputs=cg.inputs,
                                        outputs=presoft,
                                        givens=givens,
                                        updates=f_updates)
def visualize_generate(cost, hidden_states, updates,
                       train_stream, valid_stream,
                       args):

    use_indices = has_indices(args.dataset)
    output_size = get_output_size(args.dataset)

    # Get presoft and its computation graph
    filter_presoft = VariableFilter(theano_name="presoft")
    presoft = filter_presoft(ComputationGraph(cost).variables)[0]
    cg = ComputationGraph(presoft)

    # Handle the theano shared variables that allow carrying the hidden
    # state
    givens, f_updates = carry_hidden_state(updates, 1, reset=not(use_indices))

    if args.hide_all_except is not None:
        pass

    # Compile the theano function
    compiled = theano.function(inputs=cg.inputs, outputs=presoft,
                               givens=givens, updates=f_updates)

    epoch_iterator = train_stream.get_epoch_iterator()
    for num in range(10):
        all_ = next(epoch_iterator)
        all_sequence = all_[0][:, 0:1]
        targets = all_[1][:, 0:1]

        # In the case of characters and text
        if use_indices:
            init_ = all_sequence[:args.initial_text_length]

            # Time X Features
            probability_array = np.zeros((0, output_size))
            generated_text = init_

            for i in range(args.generated_text_lenght):
                presoft = compiled(generated_text)
                # Get the last value of presoft
                last_presoft = presoft[-1:, 0, :]

                # Compute the probability distribution
                probabilities = softmax(last_presoft)
                # Store it in the list
                probability_array = np.vstack([probability_array,
                                               probabilities])

                # Sample a character out of the probability distribution
                argmax = (args.softmax_sampling == 'argmax')
                last_output_sample = sample(probabilities, argmax)[:, None, :]

                # Concatenate the new value to the text
                generated_text = np.vstack(
                    [generated_text, last_output_sample])

                ploting_path = None
                if args.save_path is not None:
                    ploting_path = os.path.join(
                        args.save_path, 'prob_plot.png')

                # Convert with real characters
                whole_sentence = conv_into_char(
                    generated_text[:, 0], args.dataset)
                initial_sentence = whole_sentence[:init_.shape[0]]
                selected_sentence = whole_sentence[init_.shape[0]:]

                logger.info(''.join(initial_sentence) + '...')
                logger.info(''.join(whole_sentence))

                if ploting_path is not None:
                    probability_plot(probability_array, selected_sentence,
                                     args.dataset, ploting_path)

        # In the case of sine wave dataset for example
        else:
            presoft = compiled(all_sequence)

            time_plot = presoft.shape[0] - 1

            plt.plot(np.arange(time_plot),
                     targets[:time_plot, 0, 0],
                     label="target")
            plt.plot(np.arange(time_plot), presoft[:time_plot, 0, 0],
                     label="predicted")
            plt.legend()
            plt.grid(True)
            plt.show()
def visualize_generate(cost, hidden_states, updates,
                       train_stream, valid_stream,
                       args):

    use_indices = has_indices(args.dataset)
    output_size = get_output_size(args.dataset)

    # Get presoft and its computation graph
    filter_presoft = VariableFilter(theano_name="presoft")
    presoft = filter_presoft(ComputationGraph(cost).variables)[0]
    cg = ComputationGraph(presoft)

    # Handle the theano shared variables that allow carrying the hidden
    # state
    givens, f_updates = carry_hidden_state(updates, 1, reset=not(use_indices))

    # Compile the theano function
    compiled = theano.function(inputs=cg.inputs, outputs=presoft,
                               givens=givens, updates=f_updates)

    epoch_iterator = train_stream.get_epoch_iterator()
    for num in range(10):
        all_ = next(epoch_iterator)
        all_sequence = all_[0][:, 0:1]
        targets = all_[1][:, 0:1]

        # In the case of characters and text
        if use_indices:
            init_ = all_sequence[:args.initial_text_length]

            # Time X Features
            probability_array = np.zeros((0, output_size))
            generated_text = init_

            for i in range(args.generated_text_lenght):
                presoft = compiled(generated_text)
                # Get the last value of presoft
                last_presoft = presoft[-1:, 0, :]

                # Compute the probability distribution
                probabilities = softmax(last_presoft)
                # Store it in the list
                probability_array = np.vstack([probability_array,
                                               probabilities])

                # Sample a character out of the probability distribution
                argmax = (args.softmax_sampling == 'argmax')
                last_output_sample = sample(probabilities, argmax)[:, None, :]

                # Concatenate the new value to the text
                generated_text = np.vstack(
                    [generated_text, last_output_sample])

                ploting_path = None
                if args.save_path is not None:
                    ploting_path = os.path.join(
                        args.save_path, 'prob_plot.png')

                # Convert with real characters
                whole_sentence = conv_into_char(
                    generated_text[:, 0], args.dataset)
                initial_sentence = whole_sentence[:init_.shape[0]]
                selected_sentence = whole_sentence[init_.shape[0]:]

                logger.info(''.join(initial_sentence) + '...')
                logger.info(''.join(whole_sentence))

                if ploting_path is not None:
                    probability_plot(probability_array, selected_sentence,
                                     args.dataset, ploting_path)

        # In the case of sine wave dataset for example
        else:
            presoft = compiled(all_sequence)

            time_plot = presoft.shape[0] - 1

            plt.plot(np.arange(time_plot),
                     targets[:time_plot, 0, 0],
                     label="target")
            plt.plot(np.arange(time_plot), presoft[:time_plot, 0, 0],
                     label="predicted")
            plt.legend()
            plt.grid(True)
            plt.show()
def get_prernn(args):

    # time x batch
    x_mask = tensor.fmatrix('mask')

    # Compute the state dim
    if args.rnn_type == 'lstm':
        state_dim = 4 * args.state_dim
    else:
        state_dim = args.state_dim

    # Prepare the arguments for the fork
    output_names = []
    output_dims = []
    for d in range(args.layers):
        if d > 0:
            suffix = RECURRENTSTACK_SEPARATOR + str(d)
        else:
            suffix = ''
        if d == 0 or args.skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    # Prepare the brick to be forked (LookupTable or Linear)
    # Check if the dataset provides indices (in the case of a
    # fixed vocabulary, x is 2D tensor) or if it gives raw values
    # (x is 3D tensor)
    if has_indices(args.dataset):
        features = args.mini_batch_size
        x = tensor.lmatrix('features')
        vocab_size = get_output_size(args.dataset)
        lookup = LookupTable(length=vocab_size, dim=state_dim)
        lookup.weights_init = initialization.IsotropicGaussian(0.1)
        lookup.biases_init = initialization.Constant(0)
        forked = FeedforwardSequence([lookup.apply])
        if not has_mask(args.dataset):
            x_mask = tensor.ones_like(x, dtype=floatX)

    else:
        x = tensor.tensor3('features', dtype=floatX)
        if args.used_inputs is not None:
            x = tensor.set_subtensor(x[args.used_inputs:, :, :],
                                     tensor.zeros_like(x[args.used_inputs:,
                                                         :, :],
                                                       dtype=floatX))
        features = get_output_size(args.dataset)
        forked = Linear(input_dim=features, output_dim=state_dim)
        forked.weights_init = initialization.IsotropicGaussian(0.1)
        forked.biases_init = initialization.Constant(0)

        if not has_mask(args.dataset):
            x_mask = tensor.ones_like(x[:, :, 0], dtype=floatX)

    # Define the fork
    fork = Fork(output_names=output_names, input_dim=features,
                output_dims=output_dims,
                prototype=forked)
    fork.initialize()

    # Apply the fork
    prernn = fork.apply(x)

    # Give a name to the input of each layer
    if args.skip_connections:
        for t in range(len(prernn)):
            prernn[t].name = "pre_rnn_" + str(t)
    else:
        prernn.name = "pre_rnn"

    return prernn, x_mask
def get_prernn(args):

    # time x batch
    x_mask = tensor.fmatrix('mask')

    # Compute the state dim
    if args.rnn_type == 'lstm':
        state_dim = 4 * args.state_dim
    else:
        state_dim = args.state_dim

    # Prepare the arguments for the fork
    output_names = []
    output_dims = []
    for d in range(args.layers):
        if d > 0:
            suffix = RECURRENTSTACK_SEPARATOR + str(d)
        else:
            suffix = ''
        if d == 0 or args.skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    # Prepare the brick to be forked (LookupTable or Linear)
    # Check if the dataset provides indices (in the case of a
    # fixed vocabulary, x is 2D tensor) or if it gives raw values
    # (x is 3D tensor)
    if has_indices(args.dataset):
        features = args.mini_batch_size
        x = tensor.lmatrix('features')
        vocab_size = get_output_size(args.dataset)
        lookup = LookupTable(length=vocab_size, dim=state_dim)
        lookup.weights_init = initialization.IsotropicGaussian(0.1)
        lookup.biases_init = initialization.Constant(0)
        forked = FeedforwardSequence([lookup.apply])
        if not has_mask(args.dataset):
            x_mask = tensor.ones_like(x, dtype=floatX)

    else:
        x = tensor.tensor3('features', dtype=floatX)
        if args.used_inputs is not None:
            x = tensor.set_subtensor(
                x[args.used_inputs:, :, :],
                tensor.zeros_like(x[args.used_inputs:, :, :], dtype=floatX))
        features = get_output_size(args.dataset)
        forked = Linear(input_dim=features, output_dim=state_dim)
        forked.weights_init = initialization.IsotropicGaussian(0.1)
        forked.biases_init = initialization.Constant(0)

        if not has_mask(args.dataset):
            x_mask = tensor.ones_like(x[:, :, 0], dtype=floatX)

    # Define the fork
    fork = Fork(output_names=output_names,
                input_dim=features,
                output_dims=output_dims,
                prototype=forked)
    fork.initialize()

    # Apply the fork
    prernn = fork.apply(x)

    # Give a name to the input of each layer
    if args.skip_connections:
        for t in range(len(prernn)):
            prernn[t].name = "pre_rnn_" + str(t)
    else:
        prernn.name = "pre_rnn"

    return prernn, x_mask
Exemplo n.º 10
0
def run_visualizations(cost,
                       updates,
                       train_stream,
                       valid_stream,
                       args,
                       hidden_states=None,
                       gate_values=None):

    # Load the parameters from a dumped model
    assert args.load_path is not None

    param_values = load_parameter_values(args.load_path)
    if args.hide_all_except is not None:
        i = args.hide_all_except
        sdim = args.state_dim

        output_size = get_output_size(args.dataset)

        hidden = np.zeros((args.layers * sdim, output_size), dtype=np.float32)

        output_w = param_values["/output_layer.W"]

        hidden[i * sdim:(i + 1) * sdim, :] = output_w[i * sdim:(i + 1) *
                                                      sdim, :]

        param_values["/output_layer.W"] = hidden

    model = Model(cost)
    model.set_parameter_values(param_values)

    # Run a visualization
    if args.visualize == "generate":
        visualize_generate(cost, hidden_states, updates, train_stream,
                           valid_stream, args)

    elif args.visualize == "gates" and (gate_values is not None):
        if args.rnn_type == "lstm":
            visualize_gates_lstm(gate_values, hidden_states, updates,
                                 train_stream, valid_stream, args)
        elif args.rnn_type == "soft":
            visualize_gates_soft(gate_values, hidden_states, updates,
                                 train_stream, valid_stream, args)
        else:
            assert False

    elif args.visualize == "states":
        visualize_states(hidden_states, updates, train_stream, valid_stream,
                         args)

    elif args.visualize == "gradients":
        visualize_gradients(hidden_states, updates, train_stream, valid_stream,
                            args)

    elif args.visualize == "jacobian":
        visualize_jacobian(hidden_states, updates, train_stream, valid_stream,
                           args)

    elif args.visualize == "presoft":
        visualize_presoft(cost, hidden_states, updates, train_stream,
                          valid_stream, args)

    elif args.visualize == "matrices":
        visualize_matrices(args)

    elif args.visualize == "trained_singular_values":
        visualize_singular_values(args)

    elif args.visualize == "gradients_flow_pie":
        visualize_gradients_flow_pie(hidden_states, updates, args)

    else:
        assert False