Python Plot примеры использования

Язык программирования: Python

Пространство имен/Пакет: blocks.extras.extensions.plot

Класс/Тип: Plot

Примеров на hotexamples.com: 26

Python Plot - 26 примеров найдено. Это лучшие примеры Python кода для blocks.extras.extensions.plot.Plot, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Plot(26)

Основные методы

Plot (26)

Пример #1

Показать файл

def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()

Пример #2

Показать файл

def test_plot():
    class Writer(SimpleExtension):
        def do(self, *args, **kwargs):
            self.main_loop.log.current_row['channel'] = (
                self.main_loop.status['iterations_done']**2)

    main_loop = MockMainLoop(extensions=[
        Writer(after_batch=True),
        Plot('test', [['channel']]).set_conditions(after_batch=True),
        FinishAfter(after_n_batches=11)
    ])
    main_loop.run()

Пример #3

Показать файл

test_stream = Flatten(
    DataStream.default_stream(
        dataset=test,
        iteration_scheme=ShuffledScheme(
            test.num_examples,
            batch_size=test.num_examples)))

test_monitor = DataStreamMonitoring(
    variables=[cost],
    data_stream=test_stream,
    prefix='test'
)

plotting = Plot('AdniNet_{}'.format(side),
                channels=[
                    ['entropy', 'validation_entropy'],
                    ['error', 'validation_error'],
                ],
                after_batch=False)

# The main loop will train the network and output reports, etc

stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M')
main = MainLoop(
    data_stream=training_stream,
    model=autoencoder,
    algorithm=algo,
    extensions=[
        FinishAfter(after_n_epochs=max_iter),
        FinishIfNoImprovementAfter(notification_name='validation_error', epochs=3),
        Printing(),
        validation_monitor,

Пример #4

Показать файл

algorithm = GradientDescent(cost=cost,
                            parameters=ComputationGraph(cost).parameters,
                            step_rule=CompositeRule([RemoveNotFinite(), AdaDelta()]))
    # CompositeRule([StepClipping(10.0), Scale(0.02)]))
monitor_cost = TrainingDataMonitoring([cost, error_rate],
                                      prefix="train",
                                      after_epoch=True)

monitor_valid = DataStreamMonitoring([cost, error_rate, edit_distance, errors_per_char],
                                     data_stream=valid_stream,
                                     prefix="valid",
                                     after_epoch=True)

plot = Plot(document='CTC_toy_dataset_%d_%d'%(h_dim, rec_dim),
            channels=[['train_CTC', 'valid_CTC'], 
                      ['train_error_rate', 'valid_error_rate'],
                      ['valid_edit_distance'],
                      ['valid_errors_per_char']],
            after_epoch=True)

model = Model(cost)
main_loop = MainLoop(data_stream=stream, algorithm=algorithm,
                     extensions=[monitor_cost, monitor_valid, plot,
                                 FinishAfter(after_n_epochs=n_epochs),
                                 Printing()],
                     model=model)

print('Starting training ...') # ---------------------------------------------------
main_loop.run()


# vim: set sts=4 ts=4 sw=4 tw=0 et:

Пример #5

Показать файл

Файл: sketch.py Проект: yazici/sketch

def main(name, epochs, batch_size, learning_rate,
         dim, mix_dim, old_model_name, max_length, bokeh, GRU, dropout,
         depth, max_grad, step_method, epsilon, sample, skip, uniform, top):

    #----------------------------------------------------------------------
    datasource = name

    def shnum(x):
        """ Convert a positive float into a short tag-usable string
             E.g.: 0 -> 0, 0.005 -> 53, 100 -> 1-2
        """
        return '0' if x <= 0 else '%s%d' % (("%e"%x)[0], -np.floor(np.log10(x)))

    jobname = "%s-%dX%dm%dd%dr%sb%de%s" % (datasource, depth, dim, mix_dim,
                                           int(dropout*10),
                                           shnum(learning_rate), batch_size,
                                           shnum(epsilon))
    if max_length != 600:
        jobname += '-L%d'%max_length

    if GRU:
        jobname += 'g'
    if max_grad != 5.:
        jobname += 'G%g'%max_grad
    if step_method != 'adam':
        jobname += step_method
    if skip:
        jobname += 'D'
        assert depth > 1
    if top:
        jobname += 'T'
        assert depth > 1
    if uniform > 0.:
        jobname += 'u%d'%int(uniform*100)

    if debug:
        jobname += ".debug"

    if sample:
        print("Sampling")
    else:
        print("\nRunning experiment %s" % jobname)
    if old_model_name:
        print("starting from model %s"%old_model_name)

    #----------------------------------------------------------------------
    transitions = [GatedRecurrent(dim=dim) if GRU else LSTM(dim=dim)
                   for _ in range(depth)]
    if depth > 1:
        transition = RecurrentStack(transitions, name="transition",
                                    skip_connections=skip or top)
        if skip:
            source_names=[RecurrentStack.suffix('states', d) for d in range(depth)]
        else:
            source_names=[RecurrentStack.suffix('states', depth-1)]
    else:
        transition = transitions[0]
        transition.name = "transition"
        source_names=['states']

    emitter = SketchEmitter(mix_dim=mix_dim,
                            epsilon=epsilon,
                            name="emitter")
    readout = Readout(
        readout_dim=emitter.get_dim('inputs'),
        source_names=source_names,
        emitter=emitter,
        name="readout")
    generator = SequenceGenerator(readout=readout, transition=transition)

    # Initialization settings
    if uniform > 0.:
        generator.weights_init = Uniform(width=uniform*2.)
    else:
        generator.weights_init = OrthogonalGlorot()
    generator.biases_init = Constant(0)

    # Build the cost computation graph [steps, batch_size, 3]
    x = T.tensor3('features', dtype=floatX)
    if debug:
        x.tag.test_value = np.ones((max_length,batch_size,3)).astype(floatX)
    x = x[:max_length,:,:]  # has to be after setting test_value
    cost = generator.cost(x)
    cost.name = "sequence_log_likelihood"

    # Give an idea of what's going on
    model = Model(cost)
    params = model.get_parameter_dict()
    logger.info("Parameters:\n" +
                pprint.pformat(
                    [(key, value.get_value().shape) for key, value
                     in params.items()],
                    width=120))
    model_size = 0
    for v in params.itervalues():
        s = v.get_value().shape
        model_size += s[0] * (s[1] if len(s) > 1 else 1)
    logger.info("Total number of parameters %d"%model_size)

    #------------------------------------------------------------
    extensions = []
    if old_model_name:
        if old_model_name == 'continue':
            old_model_name = jobname
        with open(old_model_name + '_model', "rb") as f:
            old_model = pickle.load(f)
        model.set_parameter_values(old_model.get_parameter_values())
        del old_model
    else:
        # Initialize parameters
        for brick in model.get_top_bricks():
            brick.initialize()

    if sample:
        assert old_model_name and old_model_name != 'continue'
        Sample(generator, steps=max_length, path=old_model_name).do(None)
        exit(0)

    #------------------------------------------------------------
    # Define the training algorithm.
    cg = ComputationGraph(cost)
    if dropout > 0.:
        from blocks.roles import INPUT, OUTPUT
        dropout_target = VariableFilter(roles=[OUTPUT],
                                        bricks=transitions,
                                        name_regex='states')(cg.variables)
        print('# dropout %d' % len(dropout_target))
        cg = apply_dropout(cg, dropout_target, dropout)
        opt_cost = cg.outputs[0]
    else:
        opt_cost = cost

    if step_method == 'adam':
        step_rule = Adam(learning_rate)
    elif step_method == 'rmsprop':
        step_rule = RMSProp(learning_rate, decay_rate=0.95)
    elif step_method == 'adagrad':
        step_rule = AdaGrad(learning_rate)
    elif step_method == 'adadelta':
        step_rule = AdaDelta()
    elif step_method == 'scale':
        step_rule = Scale(learning_rate)
    else:
        raise Exception('Unknown sttep method %s'%step_method)

    step_rule = CompositeRule([StepClipping(max_grad), step_rule])

    algorithm = GradientDescent(
        cost=opt_cost, parameters=cg.parameters,
        step_rule=step_rule)

    #------------------------------------------------------------
    observables = [cost]

    # Fetch variables useful for debugging
    (energies,) = VariableFilter(
        applications=[generator.readout.readout],
        name_regex="output")(cg.variables)
    min_energy = named_copy(energies.min(), "min_energy")
    max_energy = named_copy(energies.max(), "max_energy")
    observables += [min_energy, max_energy]

    # (activations,) = VariableFilter(
    #     applications=[generator.transition.apply],
    #     name=generator.transition.apply.states[0])(cg.variables)
    # mean_activation = named_copy(abs(activations).mean(),
    #                              "mean_activation")
    # observables.append(mean_activation)

    observables += [algorithm.total_step_norm, algorithm.total_gradient_norm]
    for name, param in params.items():
        observables.append(named_copy(
            param.norm(2), name + "_norm"))
        observables.append(named_copy(
            algorithm.gradients[param].norm(2), name + "_grad_norm"))

    #------------------------------------------------------------
    datasource_fname = os.path.join(fuel.config.data_path, datasource,
                                    datasource+'.hdf5')

    train_ds = H5PYDataset(datasource_fname, #max_length=max_length,
                             which_sets=['train'], sources=('features',),
                             load_in_memory=True)
    train_stream = DataStream(train_ds,
                              iteration_scheme=ShuffledScheme(
                                  train_ds.num_examples, batch_size))

    test_ds = H5PYDataset(datasource_fname, #max_length=max_length,
                            which_sets=['test'], sources=('features',),
                            load_in_memory=True)
    test_stream  = DataStream(test_ds,
                              iteration_scheme=SequentialScheme(
                                  test_ds.num_examples, batch_size))

    train_stream = Mapping(train_stream, _transpose)
    test_stream = Mapping(test_stream, _transpose)

    def stream_stats(ds, label):
        itr = ds.get_epoch_iterator(as_dict=True)
        batch_count = 0
        examples_count = 0
        for batch in itr:
            batch_count += 1
            examples_count += batch['features'].shape[1]
        print('%s #batch %d #examples %d' %
              (label, batch_count, examples_count))

    stream_stats(train_stream, 'train')
    stream_stats(test_stream, 'test')

    extensions += [Timing(every_n_batches=10),
                   TrainingDataMonitoring(
                       observables, prefix="train",
                       every_n_batches=10),
                   DataStreamMonitoring(
                       [cost],  # without dropout
                       test_stream,
                       prefix="test",
                       on_resumption=True,
                       after_epoch=False,  # by default this is True
                       every_n_batches=100),
                   # all monitored data is ready so print it...
                   # (next steps may take more time and we want to see the
                   # results as soon as possible so print as soon as you can)
                   Printing(every_n_batches=10),
                   # perform multiple dumps at different intervals
                   # so if one of them breaks (has nan) we can hopefully
                   # find a model from few batches ago in the other
                   Checkpoint(jobname,
                              before_training=False, after_epoch=True,
                              save_separately=['log', 'model']),
                   Sample(generator, steps=max_length,
                          path=jobname+'.test',
                          every_n_batches=100),
                   ProgressBar(),
                   FinishAfter(after_n_epochs=epochs)
                    # This shows a way to handle NaN emerging during
                    # training: simply finish it.
                    .add_condition(["after_batch"], _is_nan),
                   ]

    if bokeh:
        from blocks.extras.extensions.plot import Plot
        extensions.append(Plot(
            'sketch',
            channels=[['cost']], every_n_batches=10))

    # Construct the main loop and start training!
    main_loop = MainLoop(
        model=model,
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=extensions
        )

    main_loop.run()

Пример #6

Показать файл

        DataStreamMonitoring(valid_monitored,
                             valid_stream,
                             prefix='valid',
                             every_n_batches=1000),
        Printing(every_n_batches=1000),
        FinishAfter(every_n_batches=10000000),
        SaveLoadParams(
            dump_path,
            cg,
            before_training=True,  # before training -> load params
            every_n_batches=1000,  # every N batches -> save params
            after_epoch=True,  # after epoch -> save params
            after_training=True,  # after training -> save params
        ),
        RunOnTest(model_name, model, stream, every_n_batches=1000),
    ]

    if use_plot:
        extensions.append(
            Plot(model_name,
                 channels=plot_vars,
                 every_n_batches=500,
                 server_url='http://eos6:5006/'))

    main_loop = MainLoop(model=cg,
                         data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
    main_loop.profile.report()

Пример #7

Показать файл

def main(config,
         tr_stream,
         dev_stream,
         use_bokeh=False,
         slim_iteration_state=False,
         switch_controller=None,
         reset_epoch=False):
    """This method largely corresponds to the ``main`` method in the
    original Blocks implementation in blocks-examples and most of the
    code is copied from there. Following modifications have been made:
    
    - Support fixing word embedding during training
    - Dropout fix https://github.com/mila-udem/blocks-examples/issues/46
    - If necessary, add the exp3s extension
    
    Args:
        config (dict): NMT config
        tr_stream (DataStream): Training data stream
        dev_stream (DataStream): Validation data stream
        use_bokeh (bool): Whether to use bokeh for plotting
        slim_iteration_state (bool): Whether to store the full iteration
                                     state or only the epoch iterator
                                     without data stream state
        switch_controller (SourceSwitchController): Controlling strategy
                                                    if monolingual data
                                                    is used as well
        reset_epoch (bool): Set epoch_started in main loop status to
                            false. Sometimes required if you change
                            training parameters such as 
                            mono_data_integration
    """

    nmt_model = NMTModel(config)
    nmt_model.set_up()

    # Set extensions
    logging.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([nmt_model.cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'],
                      slim_iteration_state,
                      every_n_batches=config['save_freq'])
    ]

    # Add early stopping based on bleu
    if config['bleu_script'] is not None:
        logging.info("Building bleu validator")
        extensions.append(
            BleuValidator(nmt_model.sampling_input,
                          samples=nmt_model.samples,
                          config=config,
                          model=nmt_model.search_model,
                          data_stream=dev_stream,
                          normalize=config['normalized_bleu'],
                          store_full_main_loop=config['store_full_main_loop'],
                          every_n_batches=config['bleu_val_freq']))

    if switch_controller:
        switch_controller.beam_search = BeamSearch(samples=nmt_model.samples)
        switch_controller.src_sentence = nmt_model.sampling_input
        extensions.append(switch_controller)

    # Reload model if necessary
    if config['reload']:
        extensions.append(
            LoadNMT(config['saveto'], slim_iteration_state, reset_epoch))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot('Decoding cost',
                 channels=[['decoder_cost_cost']],
                 after_batch=True))

    # Add an extension for correct handling of SIGTERM and SIGINT
    extensions.append(AlwaysEpochInterrupt(every_n_batches=1))

    # Set up training algorithm
    logging.info("Initializing training algorithm")
    # https://github.com/mila-udem/blocks-examples/issues/46
    train_params = nmt_model.cg.parameters
    # fs439: fix embeddings?
    if config['fix_embeddings']:
        train_params = []
        embedding_params = [
            'softmax1', 'softmax0', 'maxout_bias', 'embeddings', 'lookuptable',
            'transform_feedback'
        ]
        for p in nmt_model.cg.parameters:
            add_param = True
            for ann in p.tag.annotations:
                if ann.name in embedding_params:
                    logging.info("Do not train %s due to annotation %s" %
                                 (p, ann))
                    add_param = False
                    break
            if add_param:
                train_params.append(p)
    # Change cost=cost to cg.outputs[0] ?
    algorithm = GradientDescent(cost=nmt_model.cg.outputs[0]
                                if config['dropout'] < 1.0 else nmt_model.cost,
                                parameters=train_params,
                                step_rule=CompositeRule([
                                    StepClipping(config['step_clipping']),
                                    eval(config['step_rule'])()
                                ]))

    # Initialize main loop
    logging.info("Initializing main loop")
    main_loop = MainLoop(model=nmt_model.training_model,
                         algorithm=algorithm,
                         data_stream=tr_stream,
                         extensions=extensions)

    # Reset epoch
    if reset_epoch:
        main_loop.status['epoch_started'] = False

    # Train!
    main_loop.run()

Пример #8

Показать файл

def main(mode, config, use_bokeh=False):

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                      config['dec_nhids'], config['enc_nhids'] * 2,
                      config['topical_embedding_dim'])
    topical_transformer = topicalq_transformer(config['topical_vocab_size'],
                                               config['topical_embedding_dim'],
                                               config['enc_nhids'],
                                               config['topical_word_num'],
                                               config['batch_size'])

    if mode == "train":

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')
        sampling_input = tensor.lmatrix('input')
        source_topical_word = tensor.lmatrix('source_topical')
        source_topical_mask = tensor.matrix('source_topical_mask')

        # Get training and development set streams
        tr_stream = get_tr_stream_with_topicalq(**config)
        dev_stream = get_dev_stream_with_topicalq(**config)
        topic_embedding = topical_transformer.apply(source_topical_word)
        # Get cost of the model
        representation = encoder.apply(source_sentence, source_sentence_mask)
        tw_representation = topical_transformer.look_up.apply(
            source_topical_word.T)
        content_embedding = representation[0, :,
                                           (representation.shape[2] / 2):]

        cost = decoder.cost(representation, source_sentence_mask,
                            tw_representation, source_topical_mask,
                            target_sentence, target_sentence_mask,
                            topic_embedding, content_embedding)

        logger.info('Creating computational graph')
        cg = ComputationGraph(cost)

        # Initialize model
        logger.info('Initializing model')
        encoder.weights_init = decoder.weights_init = IsotropicGaussian(
            config['weight_scale'])
        encoder.biases_init = decoder.biases_init = Constant(0)
        encoder.push_initialization_config()
        decoder.push_initialization_config()
        encoder.bidir.prototype.weights_init = Orthogonal()
        decoder.transition.weights_init = Orthogonal()
        encoder.initialize()
        decoder.initialize()
        topical_transformer.weights_init = IsotropicGaussian(
            config['weight_scale'])
        topical_transformer.biases_init = Constant(0)
        topical_transformer.push_allocation_config()
        #don't know whether the initialize is for
        topical_transformer.look_up.weights_init = Orthogonal()
        topical_transformer.transformer.weights_init = Orthogonal()
        topical_transformer.initialize()
        word_topical_embedding = cPickle.load(
            open(config['topical_embeddings'], 'rb'))
        np_word_topical_embedding = numpy.array(word_topical_embedding,
                                                dtype='float32')
        topical_transformer.look_up.W.set_value(np_word_topical_embedding)
        topical_transformer.look_up.W.tag.role = []

        # apply dropout for regularization
        if config['dropout'] < 1.0:
            # dropout is applied to the output of maxout in ghog
            logger.info('Applying dropout')
            dropout_inputs = [
                x for x in cg.intermediary_variables
                if x.name == 'maxout_apply_output'
            ]
            cg = apply_dropout(cg, dropout_inputs, config['dropout'])

        # Apply weight noise for regularization
        if config['weight_noise_ff'] > 0.0:
            logger.info('Applying weight noise to ff layers')
            enc_params = Selector(encoder.lookup).get_params().values()
            enc_params += Selector(encoder.fwd_fork).get_params().values()
            enc_params += Selector(encoder.back_fork).get_params().values()
            dec_params = Selector(
                decoder.sequence_generator.readout).get_params().values()
            dec_params += Selector(
                decoder.sequence_generator.fork).get_params().values()
            dec_params += Selector(decoder.state_init).get_params().values()
            cg = apply_noise(cg, enc_params + dec_params,
                             config['weight_noise_ff'])

        # Print shapes
        shapes = [param.get_value().shape for param in cg.parameters]
        logger.info("Parameter shapes: ")
        for shape, count in Counter(shapes).most_common():
            logger.info('    {:15}: {}'.format(shape, count))
        logger.info("Total number of parameters: {}".format(len(shapes)))

        # Print parameter names
        enc_dec_param_dict = merge(
            Selector(encoder).get_parameters(),
            Selector(decoder).get_parameters())
        logger.info("Parameter names: ")
        for name, value in enc_dec_param_dict.items():
            logger.info('    {:15}: {}'.format(value.get_value().shape, name))
        logger.info("Total number of parameters: {}".format(
            len(enc_dec_param_dict)))

        # Set up training model
        logger.info("Building model")
        training_model = Model(cost)

        # Set extensions
        logger.info("Initializing extensions")
        extensions = [
            FinishAfter(after_n_batches=config['finish_after']),
            TrainingDataMonitoring([cost], after_batch=True),
            Printing(after_batch=True),
            CheckpointNMT(config['saveto'],
                          every_n_batches=config['save_freq'])
        ]
        '''
        # Set up beam search and sampling computation graphs if necessary
        if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
            logger.info("Building sampling model")
            sampling_representation = encoder.apply(
                sampling_input, tensor.ones(sampling_input.shape))
            generated = decoder.generate(
                sampling_input, sampling_representation)
            search_model = Model(generated)
            _, samples = VariableFilter(
                bricks=[decoder.sequence_generator], name="outputs")(
                    ComputationGraph(generated[1]))

        # Add sampling
        if config['hook_samples'] >= 1:
            logger.info("Building sampler")
            extensions.append(
                Sampler(model=search_model, data_stream=tr_stream,
                        hook_samples=config['hook_samples'],
                        every_n_batches=config['sampling_freq'],
                        src_vocab_size=config['src_vocab_size']))

        # Add early stopping based on bleu
        if config['bleu_script'] is not None:
            logger.info("Building bleu validator")
            extensions.append(
                BleuValidator(sampling_input, samples=samples, config=config,
                              model=search_model, data_stream=dev_stream,
                              normalize=config['normalized_bleu'],
                              every_n_batches=config['bleu_val_freq']))
        '''

        # Reload model if necessary
        if config['reload']:
            extensions.append(LoadNMT(config['saveto']))

        # Plot cost in bokeh if necessary
        if use_bokeh and BOKEH_AVAILABLE:
            extensions.append(
                Plot('Cs-En',
                     channels=[['decoder_cost_cost']],
                     after_batch=True))

        # Set up training algorithm
        logger.info("Initializing training algorithm")
        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    on_unused_sources='warn',
                                    step_rule=CompositeRule([
                                        StepClipping(config['step_clipping']),
                                        eval(config['step_rule'])()
                                    ]))

        # Initialize main loop
        logger.info("Initializing main loop")
        main_loop = MainLoop(model=training_model,
                             algorithm=algorithm,
                             data_stream=tr_stream,
                             extensions=extensions)

        # Train!
        main_loop.run()

    elif mode == 'translate':

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_topical_word = tensor.lmatrix('source_topical')

        # Get test set stream
        test_stream = get_dev_stream_with_topicalq(
            config['test_set'], config['src_vocab'], config['src_vocab_size'],
            config['topical_test_set'], config['topical_vocab'],
            config['topical_vocab_size'], config['unk_id'])
        ftrans = open(config['test_set'] + '.trans.out', 'w')

        # Helper utilities
        sutils = SamplingBase()
        unk_idx = config['unk_id']
        src_eos_idx = config['src_vocab_size'] - 1
        trg_eos_idx = config['trg_vocab_size'] - 1

        # Get beam search
        logger.info("Building sampling model")
        topic_embedding = topical_transformer.apply(source_topical_word)
        representation = encoder.apply(source_sentence,
                                       tensor.ones(source_sentence.shape))
        tw_representation = topical_transformer.look_up.apply(
            source_topical_word.T)
        content_embedding = representation[0, :,
                                           (representation.shape[2] / 2):]
        generated = decoder.generate(source_sentence,
                                     representation,
                                     tw_representation,
                                     topical_embedding=topic_embedding,
                                     content_embedding=content_embedding)

        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs
        beam_search = BeamSearch(samples=samples)

        logger.info("Loading the model..")
        model = Model(generated)
        loader = LoadNMT(config['saveto'])
        loader.set_model_parameters(model, loader.load_parameters())

        # Get target vocabulary
        trg_vocab = _ensure_special_tokens(pickle.load(
            open(config['trg_vocab'], 'rb')),
                                           bos_idx=0,
                                           eos_idx=trg_eos_idx,
                                           unk_idx=unk_idx)
        trg_ivocab = {v: k for k, v in trg_vocab.items()}

        logger.info("Started translation: ")
        total_cost = 0.0

        for i, line in enumerate(test_stream.get_epoch_iterator()):

            seq = sutils._oov_to_unk(line[0], config['src_vocab_size'],
                                     unk_idx)
            seq2 = line[1]
            input_ = numpy.tile(seq, (config['beam_size'], 1))
            input_topical = numpy.tile(seq2, (config['beam_size'], 1))

            # draw sample, checking to ensure we don't get an empty string back
            trans, costs = \
                beam_search.search(
                    input_values={source_sentence: input_,source_topical_word:input_topical},
                    max_length=10*len(seq), eol_symbol=src_eos_idx,
                    ignore_first_eol=True)
            '''
            # normalize costs according to the sequence lengths
            if config['normalized_bleu']:
                lengths = numpy.array([len(s) for s in trans])
                costs = costs / lengths
            '''
            #best = numpy.argsort(costs)[0]
            best = numpy.argsort(costs)[0:config['beam_size']]
            for b in best:
                try:
                    total_cost += costs[b]
                    trans_out = trans[b]

                    # convert idx to words
                    trans_out = sutils._idx_to_word(trans_out, trg_ivocab)

                except ValueError:
                    logger.info(
                        "Can NOT find a translation for line: {}".format(i +
                                                                         1))
                    trans_out = '<UNK>'

                print(trans_out, file=ftrans)

            if i != 0 and i % 100 == 0:
                logger.info("Translated {} lines of test set...".format(i))

        logger.info("Total cost of the test: {}".format(total_cost))
        ftrans.close()
    elif mode == 'rerank':
        # Create Theano variables
        ftrans = open(config['val_set'] + '.scores.out', 'w')
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')

        config['src_data'] = config['val_set']
        config['trg_data'] = config['val_set_grndtruth']
        config['batch_size'] = 1
        config['sort_k_batches'] = 1
        test_stream = get_tr_stream_unsorted(**config)
        logger.info("Building sampling model")
        representations = encoder.apply(source_sentence, source_sentence_mask)
        costs = decoder.cost(representations, source_sentence_mask,
                             target_sentence, target_sentence_mask)
        logger.info("Loading the model..")
        model = Model(costs)
        loader = LoadNMT(config['saveto'])
        loader.set_model_parameters(model, loader.load_parameters())

        costs_computer = function([
            source_sentence, source_sentence_mask, target_sentence,
            target_sentence_mask
        ], costs)
        iterator = test_stream.get_epoch_iterator()

        scores = []
        for i, (src, src_mask, trg, trg_mask) in enumerate(iterator):
            costs = costs_computer(*[src, src_mask, trg, trg_mask])
            cost = costs.sum()
            print(i, cost)
            scores.append(cost)
            ftrans.write(str(cost) + "\n")
        ftrans.close()

Пример #9

Показать файл

def main(config, tr_stream, dev_stream, use_bokeh=False):

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.lmatrix('input')

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask)

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # apply dropout for regularization
    if config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        logger.info('Applying dropout')
        dropout_inputs = [x for x in cg.intermediary_variables
                          if x.name == 'maxout_apply_output']
        cg = apply_dropout(cg, dropout_inputs, config['dropout'])

    # Apply weight noise for regularization
    if config['weight_noise_ff'] > 0.0:
        logger.info('Applying weight noise to ff layers')
        enc_params = Selector(encoder.lookup).get_params().values()
        enc_params += Selector(encoder.fwd_fork).get_params().values()
        enc_params += Selector(encoder.back_fork).get_params().values()
        dec_params = Selector(
            decoder.sequence_generator.readout).get_params().values()
        dec_params += Selector(
            decoder.sequence_generator.fork).get_params().values()
        dec_params += Selector(decoder.state_init).get_params().values()
        cg = apply_noise(cg, enc_params+dec_params, config['weight_noise_ff'])

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    # Print parameter names
    enc_dec_param_dict = merge(Selector(encoder).get_parameters(),
                               Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}"
                .format(len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)

    # Set extensions
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'],
                      every_n_batches=config['save_freq'])
    ]

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    # Add sampling
    if config['hook_samples'] >= 1:
        logger.info("Building sampler")
        extensions.append(
            Sampler(model=search_model, data_stream=tr_stream,
                    hook_samples=config['hook_samples'],
                    every_n_batches=config['sampling_freq'],
                    src_vocab_size=config['src_vocab_size']))

    # Add early stopping based on bleu
    if config['bleu_script'] is not None:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(sampling_input, samples=samples, config=config,
                          model=search_model, data_stream=dev_stream,
                          normalize=config['normalized_bleu'],
                          every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot('Cs-En', channels=[['decoder_cost_cost']],
                 after_batch=True))

    # Set up training algorithm
    logger.info("Initializing training algorithm")
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=CompositeRule([StepClipping(config['step_clipping']),
                                 eval(config['step_rule'])()])
    )

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(
        model=training_model,
        algorithm=algorithm,
        data_stream=tr_stream,
        extensions=extensions
    )

    # Train!
    main_loop.run()

Пример #10

Показать файл

def construct_monitors(algorithm,
                       task,
                       n_patches,
                       x,
                       x_shape,
                       graph,
                       name,
                       ram,
                       model,
                       cost,
                       n_spatial_dims,
                       plot_url,
                       patchmonitor_interval=100,
                       **kwargs):
    location, scale, savings = util.get_recurrent_auxiliaries(
        "location scale savings".split(), graph, n_patches)

    channels = util.Channels()
    channels.extend(task.monitor_channels(graph))

    channels.append(util.named(savings.mean(), "savings.mean"))

    for variable_name in "location scale".split():
        variable = locals()[variable_name]
        channels.append(variable.mean(axis=0), "%s.mean" % variable_name)
        channels.append(variable.var(axis=0), "%s.variance" % variable_name)

    channels.append(algorithm.total_gradient_norm, "total_gradient_norm")

    step_norms = util.Channels()
    step_norms.extend(
        util.named(l2_norm([algorithm.steps[param]]), "%s.step_norm" % name)
        for name, param in model.get_parameter_dict().items())
    step_channels = step_norms.get_channels()

    #for activation in VariableFilter(roles=[OUTPUT])(graph.variables):
    #    quantity = activation.mean()
    #    quantity.name = "%s.mean" % util.get_path(activation)
    #    channels.append(quantity)

    data_independent_channels = util.Channels()
    for parameter in graph.parameters:
        if parameter.name in "gamma beta".split():
            quantity = parameter.mean()
            quantity.name = "%s.mean" % util.get_path(parameter)
            data_independent_channels.append(quantity)

    extensions = []

    extensions.append(
        TrainingDataMonitoring(step_channels, prefix="train",
                               after_epoch=True))

    extensions.append(
        DataStreamMonitoring(data_independent_channels.get_channels(),
                             data_stream=None,
                             after_epoch=True))
    extensions.extend(
        DataStreamMonitoring((channels.get_channels() + [cost]),
                             data_stream=task.get_stream(which, monitor=True),
                             prefix=which,
                             after_epoch=True)
        for which in "train valid test".split())

    patchmonitor = None
    if n_spatial_dims == 2:
        patchmonitor_klass = PatchMonitoring
    elif n_spatial_dims == 3:
        patchmonitor_klass = VideoPatchMonitoring

    if patchmonitor_klass:
        patch = T.stack(*[
            ram.crop(x, x_shape, location[:, i, :], scale[:, i, :])
            for i in xrange(n_patches)
        ])
        patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))
        patch_extractor = theano.function([x, x_shape],
                                          [location, scale, patch])

        for which in "train valid".split():
            patchmonitor = patchmonitor_klass(
                save_to="%s_patches_%s" % (name, which),
                data_stream=task.get_stream(which,
                                            shuffle=False,
                                            num_examples=5),
                every_n_batches=patchmonitor_interval,
                extractor=patch_extractor,
                map_to_input_space=attention.static_map_to_input_space)
            patchmonitor.save_patches("patchmonitor_test.png")
            extensions.append(patchmonitor)

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions

Пример #11

Показать файл

Файл: attention_on_mnist.py Проект: zuiwufenghua/LSTM-Attention

def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    #attention --->
    patch_shape = (16, 16)
    image_shape = (784, 100)
    import numpy
    import theano.tensor as T
    n_spatial_dims = 2
    cropper = SoftRectangularCropper(n_spatial_dims=n_spatial_dims,
                                     patch_shape=patch_shape,
                                     image_shape=image_shape,
                                     kernel=Gaussian())

    batch_size = 10
    scales = 1.3**numpy.arange(-7, 6)
    n_patches = len(scales)
    locations = (numpy.ones(
        (n_patches, batch_size, 2)) * image_shape / 2).astype(numpy.float32)
    scales = numpy.tile(scales[:, numpy.newaxis, numpy.newaxis],
                        (1, batch_size, 2)).astype(numpy.float32)
    Tpatches = T.stack(*[
        cropper.apply(x, T.constant(location), T.constant(scale))[0]
        for location, scale in zip(locations, scales)
    ])
    patches = theano.function([x], Tpatches)(batch['features'])

    import ipdb as pdb
    pdb.set_trace()
    probs = mlp.apply(tensor.flatten(patches, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()

Пример #12

Показать файл

def construct_monitors(algorithm, task, model, graphs, outputs, plot_url,
                       hyperparameters, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring
    from patchmonitor import PatchMonitoring, VideoPatchMonitoring

    extensions = []

    if True:
        extensions.append(
            TrainingDataMonitoring([
                algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
                for name, param in model.get_parameter_dict().items()
            ],
                                   prefix="train",
                                   after_epoch=True))

    if True:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        extensions.append(
            DataStreamMonitoring(data_independent_channels,
                                 data_stream=None,
                                 after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key] for key in "cost".split())
        channels.extend(outputs[which_set][key]
                        for key in task.monitor_outputs())
        if which_set == "train":
            if True:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(
                        graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var),
                                               []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

            channels.append(
                algorithm.total_gradient_norm.copy(name="total_gradient_norm"))
        extensions.append(
            DataStreamMonitoring(channels,
                                 prefix=which_set,
                                 after_epoch=True,
                                 data_stream=task.get_stream(which_set,
                                                             monitor=True)))

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions

Пример #13

Показать файл

def main(job_id, params):

    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))

    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))
    hidden_units = int(config.get('hyperparams', 'hidden_units', 16))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    fine_tune = config.getboolean('hyperparams', 'fine_tune')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([
            AdaGrad(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])
    else:
        solver_type = CompositeRule([
            RMSProp(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])

    rn_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/rnet/2015-06-25-18:13'
    ln_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/lnet/2015-06-29-11:45'

    right_dim = 10519
    left_dim = 11427

    train = H5PYDataset(data_file, which_set='train')
    valid = H5PYDataset(data_file, which_set='valid')
    test = H5PYDataset(data_file, which_set='test')

    l_x = tensor.matrix('l_features')
    r_x = tensor.matrix('r_features')
    y = tensor.lmatrix('targets')

    lnet = load(ln_file).model.get_top_bricks()[0]
    rnet = load(rn_file).model.get_top_bricks()[0]

    # Pre-trained layers:

    # Inputs -> hidden_1 -> hidden 2
    for side, net in zip(['l', 'r'], [lnet, rnet]):
        for child in net.children:
            child.name = side + '_' + child.name

    ll1 = lnet.children[0]
    lr1 = lnet.children[1]
    ll2 = lnet.children[2]
    lr2 = lnet.children[3]

    rl1 = rnet.children[0]
    rr1 = rnet.children[1]
    rl2 = rnet.children[2]
    rr2 = rnet.children[3]

    l_h = lr2.apply(ll2.apply(lr1.apply(ll1.apply(l_x))))
    r_h = rr2.apply(rl2.apply(rr1.apply(rl1.apply(r_x))))

    input_dim = ll2.output_dim + rl2.output_dim

    # hidden_2 -> hidden_3 -> hidden_4 -> Logistic output
    output_mlp = MLP(activations=[
        Rectifier(name='h3'),
        Rectifier(name='h4'),
        Softmax(name='output'),
    ],
                     dims=[
                         input_dim,
                         hidden_units,
                         hidden_units,
                         2,
                     ],
                     weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                     biases_init=IsotropicGaussian(std=W_sd, mean=W_mu))

    output_mlp.initialize()

    # # Concatenate the inputs from the two hidden subnets into a single variable
    # # for input into the next layer.
    merge = tensor.concatenate([l_h, r_h], axis=1)
    #
    y_hat = output_mlp.apply(merge)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [
        input for input in inputs if input.name.startswith('linear_')
    ]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], 0.2)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:],
                                  dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # If no fine-tuning of l-r models is wanted, find the params for only
    # the joint layers:
    if fine_tune:
        params_to_update = dropout_graph.parameters
    else:
        params_to_update = VariableFilter(
            [PARAMETER], bricks=output_mlp.children)(cost_graph)

    # Learning Algorithm:
    algo = GradientDescent(step_rule=solver_type,
                           params=params_to_update,
                           cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(dataset=train,
                                  iteration_scheme=ShuffledScheme(
                                      train.num_examples,
                                      batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([
        dropout_cost,
        aggregation.mean(error),
        aggregation.mean(algo.total_gradient_norm)
    ],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(dataset=valid,
                                  iteration_scheme=ShuffledScheme(
                                      valid.num_examples,
                                      batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(variables=[cost, error],
                                              data_stream=validation_stream,
                                              prefix='validation',
                                              after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(test.num_examples,
                                            batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(variables=[error],
                                        data_stream=test_stream,
                                        prefix='test',
                                        after_training=True)

    plotting = Plot(
        'AdniNet_LeftRight',
        channels=[
            ['dropout_entropy'],
            ['error', 'validation_error'],
        ],
    )

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}'.format(stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(data_stream=training_stream,
                         model=model,
                         algorithm=algo,
                         extensions=[
                             validation_monitor,
                             training_monitor,
                             plotting,
                             FinishAfter(after_n_epochs=max_epoch),
                             FinishIfNoImprovementAfter(
                                 notification_name='validation_error',
                                 epochs=1),
                             Printing(),
                             ProgressBar(),
                             checkpoint,
                             test_monitor,
                         ])
    main_loop.run()
    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss

Пример #14

Показать файл

def construct_monitors(algorithm, task, model, graphs, outputs, updates,
                       monitor_options, n_spatial_dims, plot_url,
                       hyperparameters, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring

    extensions = []

    if "steps" in monitor_options:
        extensions.append(
            TrainingDataMonitoring([
                algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
                for name, param in model.get_parameter_dict().items()
            ],
                                   prefix="train",
                                   after_epoch=True))

    if "parameters" in monitor_options:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        extensions.append(
            DataStreamMonitoring(data_independent_channels,
                                 data_stream=None,
                                 after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key] for key in "cost".split())
        channels.extend(outputs[which_set][key]
                        for key in task.monitor_outputs())
        if which_set == "train":
            if "activations" in monitor_options:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(
                        graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var),
                                               []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

            channels.append(
                algorithm.total_gradient_norm.copy(name="total_gradient_norm"))

        if "batch_normalization" in monitor_options:
            errors = []
            for population_stat, update in updates[which_set]:
                if population_stat.name.startswith("population"):
                    # this is a super robust way to get the
                    # corresponding batch statistic from the
                    # exponential moving average expression
                    batch_stat = update.owner.inputs[1].owner.inputs[1]
                    errors.append(((population_stat - batch_stat)**2).mean())
            if errors:
                channels.append(
                    T.stack(errors).mean().copy(
                        name="population_statistic_mse"))

        extensions.append(
            DataStreamMonitoring(channels,
                                 prefix=which_set,
                                 after_epoch=True,
                                 data_stream=task.get_stream(which_set,
                                                             monitor=True)))

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions

Пример #15

Показать файл

def main(job_id, params):
    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))
    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
    input_dropout_ratio = float(
        config.get('hyperparams', 'input_dropout_ratio', 0.2))
    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    side = config.get('hyperparams', 'side', 'b')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([
            AdaGrad(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])
    else:
        solver_type = CompositeRule([
            RMSProp(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])

    input_dim = {'l': 11427, 'r': 10519, 'b': 10519 + 11427}
    data_file = config.get('hyperparams', 'data_file')

    if 'b' in side:
        train = H5PYDataset(data_file, which_set='train')
        valid = H5PYDataset(data_file, which_set='valid')
        test = H5PYDataset(data_file, which_set='test')
        x_l = tensor.matrix('l_features')
        x_r = tensor.matrix('r_features')
        x = tensor.concatenate([x_l, x_r], axis=1)

    else:
        train = H5PYDataset(data_file,
                            which_set='train',
                            sources=['{}_features'.format(side), 'targets'])
        valid = H5PYDataset(data_file,
                            which_set='valid',
                            sources=['{}_features'.format(side), 'targets'])
        test = H5PYDataset(data_file,
                           which_set='test',
                           sources=['{}_features'.format(side), 'targets'])
        x = tensor.matrix('{}_features'.format(side))

    y = tensor.lmatrix('targets')

    # Define a feed-forward net with an input, two hidden layers, and a softmax output:
    model = MLP(activations=[
        Rectifier(name='h1'),
        Rectifier(name='h2'),
        Softmax(name='output'),
    ],
                dims=[input_dim[side], hidden_units, hidden_units, 2],
                weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                biases_init=IsotropicGaussian(b_sd, b_mu))

    # Don't forget to initialize params:
    model.initialize()

    # y_hat is the output of the neural net with x as its inputs
    y_hat = model.apply(x)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [
        input for input in inputs if input.name.startswith('linear_')
    ]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]],
                                  input_dropout_ratio)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:],
                                  dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # Learning Algorithm (notice: we use the dropout cost for learning):
    algo = GradientDescent(step_rule=solver_type,
                           params=dropout_graph.parameters,
                           cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(dataset=train,
                                  iteration_scheme=ShuffledScheme(
                                      train.num_examples,
                                      batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([
        dropout_cost,
        aggregation.mean(error),
        aggregation.mean(algo.total_gradient_norm)
    ],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(dataset=valid,
                                  iteration_scheme=ShuffledScheme(
                                      valid.num_examples,
                                      batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(variables=[cost, error],
                                              data_stream=validation_stream,
                                              prefix='validation',
                                              after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(test.num_examples,
                                            batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(variables=[error],
                                        data_stream=test_stream,
                                        prefix='test',
                                        after_training=True)

    plotting = Plot('AdniNet_{}'.format(side),
                    channels=[
                        ['dropout_entropy', 'validation_entropy'],
                        ['error', 'validation_error'],
                    ],
                    after_batch=False)

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}net/{}'.format(side, stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # Home-brewed class for early stopping when we detect we have started to overfit
    early_stopper = FinishIfOverfitting(error_name='error',
                                        validation_name='validation_error',
                                        threshold=0.1,
                                        epochs=5,
                                        burn_in=100)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(data_stream=training_stream,
                         model=model,
                         algorithm=algo,
                         extensions=[
                             validation_monitor,
                             training_monitor,
                             plotting,
                             FinishAfter(after_n_epochs=max_epoch),
                             early_stopper,
                             Printing(),
                             ProgressBar(),
                             checkpoint,
                             test_monitor,
                         ])
    main_loop.run()

    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss

Пример #16

Показать файл

def main(mode, config, use_bokeh=False):

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'], config['enc_nhids'])
    topical_transformer = topicalq_transformer(
        config['source_topic_vocab_size'], config['topical_embedding_dim'],
        config['enc_nhids'], config['topical_word_num'], config['batch_size'])
    decoder = Decoder(vocab_size=config['trg_vocab_size'],
                      topicWord_size=config['trg_topic_vocab_size'],
                      embedding_dim=config['dec_embed'],
                      topical_dim=config['topical_embedding_dim'],
                      state_dim=config['dec_nhids'],
                      representation_dim=config['enc_nhids'] * 2,
                      match_function=config['match_function'],
                      use_doubly_stochastic=config['use_doubly_stochastic'],
                      lambda_ds=config['lambda_ds'],
                      use_local_attention=config['use_local_attention'],
                      window_size=config['window_size'],
                      use_step_decay_cost=config['use_step_decay_cost'],
                      use_concentration_cost=config['use_concentration_cost'],
                      lambda_ct=config['lambda_ct'],
                      use_stablilizer=config['use_stablilizer'],
                      lambda_st=config['lambda_st'])
    # here attended dim (representation_dim) of decoder is 2*enc_nhinds
    # because the context given by the encoder is a bidirectional context

    if mode == "train":

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')
        target_topic_sentence = tensor.lmatrix('target_topic')
        target_topic_binary_sentence = tensor.lmatrix('target_binary_topic')
        #target_topic_sentence_mask=tensor.lmatrix('target_topic_mask');
        sampling_input = tensor.lmatrix('input')
        source_topical_word = tensor.lmatrix('source_topical')
        source_topical_mask = tensor.matrix('source_topical_mask')

        topic_embedding = topical_transformer.apply(source_topical_word)

        # Get training and development set streams
        tr_stream = get_tr_stream_with_topic_target(**config)
        #dev_stream = get_dev_tr_stream_with_topic_target(**config)

        # Get cost of the model
        representations = encoder.apply(source_sentence, source_sentence_mask)
        tw_representation = topical_transformer.look_up.apply(
            source_topical_word.T)
        content_embedding = representations[0, :,
                                            (representations.shape[2] / 2):]
        cost = decoder.cost(representations, source_sentence_mask,
                            tw_representation, source_topical_mask,
                            target_sentence, target_sentence_mask,
                            target_topic_sentence,
                            target_topic_binary_sentence, topic_embedding,
                            content_embedding)

        logger.info('Creating computational graph')
        perplexity = tensor.exp(cost)
        perplexity.name = 'perplexity'

        cg = ComputationGraph(cost)
        costs_computer = function([
            target_sentence, target_sentence_mask, source_sentence,
            source_sentence_mask, source_topical_word, target_topic_sentence,
            target_topic_binary_sentence
        ], (perplexity),
                                  on_unused_input='ignore')

        # Initialize model
        logger.info('Initializing model')
        encoder.weights_init = decoder.weights_init = IsotropicGaussian(
            config['weight_scale'])
        encoder.biases_init = decoder.biases_init = Constant(0)
        encoder.push_initialization_config()
        decoder.push_initialization_config()
        encoder.bidir.prototype.weights_init = Orthogonal()
        decoder.transition.weights_init = Orthogonal()
        encoder.initialize()
        decoder.initialize()

        topical_transformer.weights_init = IsotropicGaussian(
            config['weight_scale'])
        topical_transformer.biases_init = Constant(0)
        topical_transformer.push_allocation_config()
        #don't know whether the initialize is for
        topical_transformer.look_up.weights_init = Orthogonal()
        topical_transformer.transformer.weights_init = Orthogonal()
        topical_transformer.initialize()
        word_topical_embedding = cPickle.load(
            open(config['topical_embeddings'], 'rb'))
        np_word_topical_embedding = numpy.array(word_topical_embedding,
                                                dtype='float32')
        topical_transformer.look_up.W.set_value(np_word_topical_embedding)
        topical_transformer.look_up.W.tag.role = []

        # apply dropout for regularization
        if config['dropout'] < 1.0:
            # dropout is applied to the output of maxout in ghog
            logger.info('Applying dropout')
            dropout_inputs = [
                x for x in cg.intermediary_variables
                if x.name == 'maxout_apply_output'
            ]
            cg = apply_dropout(cg, dropout_inputs, config['dropout'])

        # Apply weight noise for regularization
        if config['weight_noise_ff'] > 0.0:
            logger.info('Applying weight noise to ff layers')
            enc_params = Selector(encoder.lookup).get_params().values()
            enc_params += Selector(encoder.fwd_fork).get_params().values()
            enc_params += Selector(encoder.back_fork).get_params().values()
            dec_params = Selector(
                decoder.sequence_generator.readout).get_params().values()
            dec_params += Selector(
                decoder.sequence_generator.fork).get_params().values()
            dec_params += Selector(decoder.state_init).get_params().values()
            cg = apply_noise(cg, enc_params + dec_params,
                             config['weight_noise_ff'])

        # Print shapes
        shapes = [param.get_value().shape for param in cg.parameters]
        logger.info("Parameter shapes: ")
        for shape, count in Counter(shapes).most_common():
            logger.info('    {:15}: {}'.format(shape, count))
        logger.info("Total number of parameters: {}".format(len(shapes)))

        # Print parameter names
        enc_dec_param_dict = merge(
            Selector(encoder).get_parameters(),
            Selector(decoder).get_parameters())
        logger.info("Parameter names: ")
        for name, value in enc_dec_param_dict.items():
            logger.info('    {:15}: {}'.format(value.get_value().shape, name))
        logger.info("Total number of parameters: {}".format(
            len(enc_dec_param_dict)))

        # Set up training model
        logger.info("Building model")
        training_model = Model(cost)

        # Set extensions
        logger.info("Initializing extensions")
        extensions = [
            FinishAfter(after_n_batches=config['finish_after']),
            TrainingDataMonitoring([perplexity], after_batch=True),
            CheckpointNMT(config['saveto'],
                          config['model_name'],
                          every_n_batches=config['save_freq'])
        ]

        # # Set up beam search and sampling computation graphs if necessary
        # if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        #     logger.info("Building sampling model")
        #     sampling_representation = encoder.apply(
        #         sampling_input, tensor.ones(sampling_input.shape))
        #     generated = decoder.generate(
        #         sampling_input, sampling_representation)
        #     search_model = Model(generated)
        #     _, samples = VariableFilter(
        #         bricks=[decoder.sequence_generator], name="outputs")(
        #             ComputationGraph(generated[1]))
        #
        # # Add sampling
        # if config['hook_samples'] >= 1:
        #     logger.info("Building sampler")
        #     extensions.append(
        #         Sampler(model=search_model, data_stream=tr_stream,
        #                 model_name=config['model_name'],
        #                 hook_samples=config['hook_samples'],
        #                 every_n_batches=config['sampling_freq'],
        #                 src_vocab_size=config['src_vocab_size']))
        #
        # # Add early stopping based on bleu
        # if False:
        #     logger.info("Building bleu validator")
        #     extensions.append(
        #         BleuValidator(sampling_input, samples=samples, config=config,
        #                       model=search_model, data_stream=dev_stream,
        #                       normalize=config['normalized_bleu'],
        #                       every_n_batches=config['bleu_val_freq'],
        #                       n_best=3,
        #                       track_n_models=6))
        #
        # logger.info("Building perplexity validator")
        # extensions.append(
        #         pplValidation( config=config,
        #                 model=costs_computer, data_stream=dev_stream,
        #                 model_name=config['model_name'],
        #                 every_n_batches=config['sampling_freq']))

        # Plot cost in bokeh if necessary
        if use_bokeh and BOKEH_AVAILABLE:
            extensions.append(
                Plot('Cs-En',
                     channels=[['decoder_cost_cost']],
                     after_batch=True))

        # Reload model if necessary
        if config['reload']:
            extensions.append(LoadNMT(config['saveto']))

        initial_learning_rate = config['initial_learning_rate']
        log_path = os.path.join(config['saveto'], 'log')
        if config['reload'] and os.path.exists(log_path):
            with open(log_path, 'rb') as source:
                log = cPickle.load(source)
                last = max(log.keys()) - 1
                if 'learning_rate' in log[last]:
                    initial_learning_rate = log[last]['learning_rate']

        # Set up training algorithm
        logger.info("Initializing training algorithm")
        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    step_rule=CompositeRule([
                                        Scale(initial_learning_rate),
                                        StepClipping(config['step_clipping']),
                                        eval(config['step_rule'])()
                                    ]),
                                    on_unused_sources='ignore')

        _learning_rate = algorithm.step_rule.components[0].learning_rate
        if config['learning_rate_decay']:
            extensions.append(
                LearningRateHalver(record_name='validation_cost',
                                   comparator=lambda x, y: x > y,
                                   learning_rate=_learning_rate,
                                   patience_default=3))
        else:
            extensions.append(OldModelRemover(saveto=config['saveto']))

        if config['learning_rate_grow']:
            extensions.append(
                LearningRateDoubler(record_name='validation_cost',
                                    comparator=lambda x, y: x < y,
                                    learning_rate=_learning_rate,
                                    patience_default=3))

        extensions.append(
            SimplePrinting(config['model_name'], after_batch=True))

        # Initialize main loop
        logger.info("Initializing main loop")
        main_loop = MainLoop(model=training_model,
                             algorithm=algorithm,
                             data_stream=tr_stream,
                             extensions=extensions)

        # Train!
        main_loop.run()

    elif mode == 'translate':

        logger.info('Creating theano variables')
        sampling_input = tensor.lmatrix('source')
        source_topical_word = tensor.lmatrix('source_topical')
        tw_vocab_overlap = tensor.lmatrix('tw_vocab_overlap')
        tw_vocab_overlap_matrix = cPickle.load(
            open(config['tw_vocab_overlap'], 'rb'))
        tw_vocab_overlap_matrix = numpy.array(tw_vocab_overlap_matrix,
                                              dtype='int32')
        #tw_vocab_overlap=shared(tw_vocab_overlap_matrix);

        topic_embedding = topical_transformer.apply(source_topical_word)

        sutils = SamplingBase()
        unk_idx = config['unk_id']
        src_eos_idx = config['src_vocab_size'] - 1
        trg_eos_idx = config['trg_vocab_size'] - 1
        trg_vocab = _ensure_special_tokens(cPickle.load(
            open(config['trg_vocab'], 'rb')),
                                           bos_idx=0,
                                           eos_idx=trg_eos_idx,
                                           unk_idx=unk_idx)
        trg_ivocab = {v: k for k, v in trg_vocab.items()}

        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        topic_embedding = topical_transformer.apply(source_topical_word)
        tw_representation = topical_transformer.look_up.apply(
            source_topical_word.T)
        content_embedding = sampling_representation[0, :, (
            sampling_representation.shape[2] / 2):]
        generated = decoder.generate(sampling_input,
                                     sampling_representation,
                                     tw_representation,
                                     topical_embedding=topic_embedding,
                                     content_embedding=content_embedding)

        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs
        beam_search = BeamSearch(samples=samples)

        logger.info("Loading the model..")
        model = Model(generated)
        #loader = LoadNMT(config['saveto'])
        loader = LoadNMT(config['validation_load'])
        loader.set_model_parameters(model, loader.load_parameters_default())

        logger.info("Started translation: ")
        test_stream = get_dev_stream_with_topicalq(**config)
        ts = test_stream.get_epoch_iterator()
        rts = open(config['val_set_source']).readlines()
        ftrans_original = open(config['val_output_orig'], 'w')
        saved_weights = []
        total_cost = 0.0

        pbar = ProgressBar(max_value=len(rts)).start()
        for i, (line, line_raw) in enumerate(zip(ts, rts)):
            trans_in = line_raw.split()
            seq = sutils._oov_to_unk(line[0], config['src_vocab_size'],
                                     unk_idx)
            seq1 = line[1]
            input_topical = numpy.tile(seq1, (config['beam_size'], 1))
            input_ = numpy.tile(seq, (config['beam_size'], 1))

            # draw sample, checking to ensure we don't get an empty string back
            trans, costs, attendeds, weights = \
                beam_search.search(
                    input_values={sampling_input: input_,source_topical_word:input_topical,tw_vocab_overlap:tw_vocab_overlap_matrix},
                    tw_vocab_overlap=tw_vocab_overlap_matrix,
                    max_length=3*len(seq), eol_symbol=trg_eos_idx,
                    ignore_first_eol=True)

            # normalize costs according to the sequence lengths
            if config['normalized_bleu']:
                lengths = numpy.array([len(s) for s in trans])
                costs = costs / lengths

            best = numpy.argsort(costs)[0]
            try:
                total_cost += costs[best]
                trans_out = trans[best]
                weight = weights[best][:, :len(trans_in)]
                trans_out = sutils._idx_to_word(trans_out, trg_ivocab)
            except ValueError:
                logger.info(
                    "Can NOT find a translation for line: {}".format(i + 1))
                trans_out = '<UNK>'

            saved_weights.append(weight)
            print(' '.join(trans_out), file=ftrans_original)
            pbar.update(i + 1)

        pbar.finish()
        logger.info("Total cost of the test: {}".format(total_cost))
        cPickle.dump(saved_weights, open(config['attention_weights'], 'wb'))
        ftrans_original.close()
        # ap = afterprocesser(config)
        # ap.main()

    elif mode == 'score':
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')
        target_topic_sentence = tensor.lmatrix('target_topic')
        target_topic_binary_sentence = tensor.lmatrix('target_binary_topic')
        source_topical_word = tensor.lmatrix('source_topical')

        topic_embedding = topical_transformer.apply(source_topical_word)
        # Get cost of the model
        representations = encoder.apply(source_sentence, source_sentence_mask)
        costs = decoder.cost(representations, source_sentence_mask,
                             target_sentence, target_sentence_mask,
                             target_topic_sentence,
                             target_topic_binary_sentence, topic_embedding)

        config['batch_size'] = 1
        config['sort_k_batches'] = 1
        # Get test set stream
        test_stream = get_tr_stream_with_topic_target(**config)

        logger.info("Building sampling model")

        logger.info("Loading the model..")
        model = Model(costs)
        loader = LoadNMT(config['validation_load'])
        loader.set_model_parameters(model, loader.load_parameters_default())

        costs_computer = function([
            target_sentence, target_sentence_mask, source_sentence,
            source_sentence_mask, source_topical_word, target_topic_sentence,
            target_topic_binary_sentence
        ], (costs),
                                  on_unused_input='ignore')

        iterator = test_stream.get_epoch_iterator()

        scores = []
        att_weights = []
        for i, (src, src_mask, trg, trg_mask, te, te_mask, tt, tt_mask, tb,
                tb_mask) in enumerate(iterator):
            costs = costs_computer(*[trg, trg_mask, src, src_mask, te, tt, tb])
            cost = costs.sum()
            print(i, cost)
            scores.append(cost)

        print(sum(scores) / 10007)

Пример #17

Показать файл

monitor_valid = DataStreamMonitoring([cost, error_rate],
                                     data_stream=valid_stream,
                                     prefix="valid",
                                     after_epoch=True)

plot = Plot(
    document=
    'dreem_conv F%d,%d,%d ConvEEG%s%s%s Conv%s%s%s Out%s,dropout%s Noise%s %s'
    % (
        eeg_gaussian_filter_width,
        eeg_gaussian_filter_sigma,
        eeg_gaussian_filter_step,
        repr([x['filter_size'] for x in conv_eeg]),
        repr([x['num_filters'] for x in conv_eeg]),
        repr([x['pool_size'] for x in conv_eeg]),
        repr([x['filter_size'] for x in conv_all]),
        repr([x['num_filters'] for x in conv_all]),
        repr([x['pool_size'] for x in conv_all]),
        repr(out_hidden),
        repr(out_dropout),
        repr(weight_noise),
        step_rule.__class__.__name__,
    ),
    channels=[['train_cost', 'valid_cost'],
              ['train_error_rate', 'valid_error_rate']],
    every_n_batches=monitor_freq,
    after_epoch=True)

model = Model(cost)
main_loop = MainLoop(
    data_stream=stream,
    algorithm=algorithm,

Пример #18

Показать файл

Файл: __init__.py Проект: rizar/tle-mnist

def main(save_to, cost_name, learning_rate, momentum, num_epochs):
    mlp = MLP([None], [784, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    scores = mlp.apply(x)

    batch_size = y.shape[0]
    indices = tensor.arange(y.shape[0])
    target_scores = tensor.set_subtensor(
        tensor.zeros((batch_size, 10))[indices, y.flatten()], 1)
    score_diff = scores - target_scores

    # Logistic Regression
    if cost_name == 'lr':
        cost = Softmax().categorical_cross_entropy(y.flatten(), scores).mean()
    # MSE
    elif cost_name == 'mse':
        cost = (score_diff**2).mean()
    # Perceptron
    elif cost_name == 'perceptron':
        cost = (scores.max(axis=1) - scores[indices, y.flatten()]).mean()
    # TLE
    elif cost_name == 'minmin':
        cost = abs(score_diff[indices, y.flatten()]).mean()
        cost += abs(score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLEcut
    elif cost_name == 'minmin_cut':
        # Score of the groundtruth should be greater or equal than its target score
        cost = tensor.maximum(0, -score_diff[indices, y.flatten()]).mean()
        # Score of the prediction should be less or equal than its actual score
        cost += tensor.maximum(0, score_diff[indices,
                                             scores.argmax(axis=1)]).mean()
    # TLE2
    elif cost_name == 'minmin2':
        cost = ((score_diff[tensor.arange(y.shape[0]), y.flatten()])**2).mean()
        cost += ((score_diff[tensor.arange(y.shape[0]),
                             scores.argmax(axis=1)])**2).mean()
    # Direct loss minimization
    elif cost_name == 'direct':
        epsilon = 0.1
        cost = (-scores[indices,
                        (scores + epsilon * target_scores).argmax(axis=1)] +
                scores[indices, scores.argmax(axis=1)]).mean()
        cost /= epsilon
    elif cost_name == 'svm':
        cost = (scores[indices, (scores - 1 * target_scores).argmax(axis=1)] -
                scores[indices, y.flatten()]).mean()
    else:
        raise ValueError("Unknown cost " + cost)

    error_rate = MisclassificationRate().apply(y.flatten(), scores)
    error_rate.name = 'error_rate'

    cg = ComputationGraph([cost])
    cost.name = 'cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    if learning_rate == None:
        learning_rate = 0.0001
    if momentum == None:
        momentum = 0.0
    rule = Momentum(learning_rate=learning_rate, momentum=momentum)
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=rule)
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        # CallbackExtension(
        #    lambda: rule.learning_rate.set_value(rule.learning_rate.get_value() * 0.9),
        #    after_epoch=True),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm), rule.learning_rate
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[['test_cost', 'test_error_rate'],
                           ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()

    df = pandas.DataFrame.from_dict(main_loop.log, orient='index')
    res = {
        'cost': cost_name,
        'learning_rate': learning_rate,
        'momentum': momentum,
        'train_cost': df.train_cost.iloc[-1],
        'test_cost': df.test_cost.iloc[-1],
        'best_test_cost': df.test_cost.min(),
        'train_error': df.train_error_rate.iloc[-1],
        'test_error': df.test_error_rate.iloc[-1],
        'best_test_error': df.test_error_rate.min()
    }
    res = {
        k: float(v) if isinstance(v, numpy.ndarray) else v
        for k, v in res.items()
    }
    json.dump(res, sys.stdout)
    sys.stdout.flush()

Пример #19

Показать файл

                prefix='train', every_n_batches=config.print_freq),
            DataStreamMonitoring(
                [v for l in m.monitor_vars for v in l],
                valid_stream,
                prefix='valid',
                every_n_batches=config.valid_freq),
            Printing(every_n_batches=config.print_freq, after_epoch=False),
    ]
    extensions.append(FinishAfter(after_n_epochs=200)) #after_n_batches
    if plot_avail:
        plot_channels = [['valid_' + v.name for v in p]for p in m.monitor_vars]+[['train_' + v.name for v in p] for p in m.monitor_vars] 
        extensions.append(
            Plot(document='CFM_%s_%s_%s_%s_%s_%s_%s_%s_%s_%s_%s_%s'
            %(config.name,config.couches,config.hidden_dim,
              config.activation_function_name,config.batch_size,config.w_noise_std,
              config.i_dropout, config.algo,config.learning_rate_value,
              config.momentum_value,config.decay_rate_value,config.StepClipping_value),
                 channels=plot_channels,
                 every_n_batches=config.print_freq,
                 after_epoch=False)
        )

    if config.save_freq is not None and dump_path is not None:
        extensions.append(
            SaveLoadParams(path=dump_path+'CFM_%s_%s_%s_%s_%s_%s_%s_%s_%s_%s_%s_%s.pkl'
            %(config.name,config.couches,config.hidden_dim,
              config.activation_function_name,config.batch_size,config.w_noise_std,
              config.i_dropout, config.algo,config.learning_rate_value,
              config.momentum_value,config.decay_rate_value,config.StepClipping_value),
                           model=model,
                           before_training=True,
                           after_training=True,

Пример #20

Показать файл

Файл: main.py Проект: ablavatski/tsa-rnn

def construct_monitors(algorithm,
                       task,
                       n_patches,
                       x,
                       x_uncentered,
                       hs,
                       graph,
                       plot_url,
                       name,
                       ram,
                       model,
                       cost,
                       n_spatial_dims,
                       patchmonitor_interval=100,
                       **kwargs):
    location, scale, savings = util.get_recurrent_auxiliaries(
        "location scale savings".split(), graph, n_patches)

    channels = util.Channels()
    channels.extend(task.monitor_channels(graph))

    #for i in xrange(n_patches):
    #    channels.append(hs[:, i].mean(), "h%i.mean" % i)

    channels.append(util.named(savings.mean(), "savings.mean"))

    for variable_name in "location scale".split():
        variable = locals()[variable_name]
        channels.append(
            variable.var(axis=0).mean(), "%s.batch_variance" % variable_name)
        channels.append(
            variable.var(axis=1).mean(), "%s.time_variance" % variable_name)

    #step_norms = util.Channels()
    #step_norms.extend(util.named(l2_norm([algorithm.steps[param]]),
    #                             "%s.step_norm" % name)
    #                  for name, param in model.get_parameter_dict().items())
    #step_channels = step_norms.get_channels()

    #for activation in VariableFilter(roles=[OUTPUT])(graph.variables):
    #    quantity = activation.mean()
    #    quantity.name = "%s.mean" % util.get_path(activation)
    #    channels.append(quantity)

    for parameter in graph.parameters:
        if parameter.name in "gamma beta".split():
            quantity = parameter.mean()
            quantity.name = "%s.mean" % util.get_path(parameter)
            channels.append(quantity)

    extensions = []

    #extensions.append(TrainingDataMonitoring(
    #    step_channels,
    #    prefix="train", after_epoch=True))

    extensions.extend(
        DataStreamMonitoring((channels.get_channels() + [cost]),
                             data_stream=task.get_stream(which),
                             prefix=which,
                             after_epoch=True)
        for which in "train valid test".split())

    patchmonitor = None
    if n_spatial_dims == 2:
        patchmonitor_klass = PatchMonitoring
    elif n_spatial_dims == 3:
        patchmonitor_klass = VideoPatchMonitoring

    if patchmonitor_klass:
        # get patches from original (uncentered) images
        patch = T.stack(*[
            ram.attention.crop(x_uncentered, location[:, i, :], scale[:, i, :])
            for i in xrange(n_patches)
        ])
        patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))

        patchmonitor = patchmonitor_klass(
            task.get_stream("valid", SequentialScheme(5, 5)),
            every_n_batches=patchmonitor_interval,
            extractor=theano.function([x_uncentered],
                                      [location, scale, patch]),
            map_to_input_space=masonry.static_map_to_input_space)
        patchmonitor.save_patches("test.png")
        extensions.append(patchmonitor)

    plot_channels = []
    plot_channels.extend(task.plot_channels())
    plot_channels.append(["train_cost"])
    #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

    extensions.append(
        Plot(name,
             channels=plot_channels,
             after_epoch=True,
             server_url=plot_url))

    return extensions

Пример #21

Показать файл

Файл: conv_old.py Проект: Alexis211/dreem

                            parameters=ComputationGraph(cost).parameters,
                            step_rule=step_rule)

monitor_cost = TrainingDataMonitoring([cost_reg, error_rate_reg],
                                      prefix="train",
                                      every_n_batches=monitor_freq,
                                      after_epoch=False)

monitor_valid = DataStreamMonitoring([cost, error_rate],
                                     data_stream=valid_stream,
                                     prefix="valid",
                                     after_epoch=True)

plot = Plot(document='dreem_conv',
            channels=[['train_cost', 'valid_cost'],
                      ['train_error_rate', 'valid_error_rate']],
            every_n_batches=monitor_freq,
            after_epoch=True)

model = Model(cost)
main_loop = MainLoop(data_stream=stream, algorithm=algorithm,
                     extensions=[
                                 ProgressBar(),

                                 monitor_cost, monitor_valid,

                                 plot,
                                 Printing(every_n_batches=monitor_freq, after_epoch=True),

                                 SaveLoadParams('conv_params.pkl', Model(cost), before_training=True, after_epoch=True),

Пример #22

Показать файл

def initialize_all(config, save_path, bokeh_name, params, bokeh_server, bokeh,
                   test_tag, use_load_ext, load_log, fast_start):
    root_path, extension = os.path.splitext(save_path)

    data = Data(**config['data'])
    train_conf = config['training']
    recognizer = create_model(config, data, test_tag)

    # Separate attention_params to be handled differently
    # when regularization is applied
    attention = recognizer.generator.transition.attention
    attention_params = Selector(attention).get_parameters().values()

    logger.info(
        "Initialization schemes for all bricks.\n"
        "Works well only in my branch with __repr__ added to all them,\n"
        "there is an issue #463 in Blocks to do that properly.")

    def show_init_scheme(cur):
        result = dict()
        for attr in dir(cur):
            if attr.endswith('_init'):
                result[attr] = getattr(cur, attr)
        for child in cur.children:
            result[child.name] = show_init_scheme(child)
        return result

    logger.info(pprint.pformat(show_init_scheme(recognizer)))

    prediction, prediction_mask = add_exploration(recognizer, data, train_conf)

    #
    # Observables:
    #
    primary_observables = []  # monitored each batch
    secondary_observables = []  # monitored every 10 batches
    validation_observables = []  # monitored on the validation set

    cg = recognizer.get_cost_graph(batch=True,
                                   prediction=prediction,
                                   prediction_mask=prediction_mask)
    labels, = VariableFilter(applications=[recognizer.cost], name='labels')(cg)
    labels_mask, = VariableFilter(applications=[recognizer.cost],
                                  name='labels_mask')(cg)

    gain_matrix = VariableFilter(
        theano_name=RewardRegressionEmitter.GAIN_MATRIX)(cg)
    if len(gain_matrix):
        gain_matrix, = gain_matrix
        primary_observables.append(named_copy(gain_matrix.min(), 'min_gain'))
        primary_observables.append(named_copy(gain_matrix.max(), 'max_gain'))

    batch_cost = cg.outputs[0].sum()
    batch_size = named_copy(recognizer.recordings.shape[1], "batch_size")
    # Assumes constant batch size. `aggregation.mean` is not used because
    # of Blocks #514.
    cost = batch_cost / batch_size
    cost.name = "sequence_total_cost"
    logger.info("Cost graph is built")

    # Fetch variables useful for debugging.
    # It is important not to use any aggregation schemes here,
    # as it's currently impossible to spread the effect of
    # regularization on their variables, see Blocks #514.
    cost_cg = ComputationGraph(cost)
    r = recognizer
    energies, = VariableFilter(applications=[r.generator.readout.readout],
                               name="output_0")(cost_cg)
    bottom_output = VariableFilter(applications=[r.bottom.apply],
                                   name="output")(cost_cg)[-1]
    attended, = VariableFilter(applications=[r.generator.transition.apply],
                               name="attended")(cost_cg)
    attended_mask, = VariableFilter(applications=[
        r.generator.transition.apply
    ],
                                    name="attended_mask")(cost_cg)
    weights, = VariableFilter(applications=[r.generator.evaluate],
                              name="weights")(cost_cg)
    max_recording_length = named_copy(r.recordings.shape[0],
                                      "max_recording_length")
    # To exclude subsampling related bugs
    max_attended_mask_length = named_copy(attended_mask.shape[0],
                                          "max_attended_mask_length")
    max_attended_length = named_copy(attended.shape[0], "max_attended_length")
    max_num_phonemes = named_copy(labels.shape[0], "max_num_phonemes")
    min_energy = named_copy(energies.min(), "min_energy")
    max_energy = named_copy(energies.max(), "max_energy")
    mean_attended = named_copy(abs(attended).mean(), "mean_attended")
    mean_bottom_output = named_copy(
        abs(bottom_output).mean(), "mean_bottom_output")
    weights_penalty = named_copy(monotonicity_penalty(weights, labels_mask),
                                 "weights_penalty")
    weights_entropy = named_copy(entropy(weights, labels_mask),
                                 "weights_entropy")
    mask_density = named_copy(labels_mask.mean(), "mask_density")
    cg = ComputationGraph([
        cost, weights_penalty, weights_entropy, min_energy, max_energy,
        mean_attended, mean_bottom_output, batch_size, max_num_phonemes,
        mask_density
    ])
    # Regularization. It is applied explicitly to all variables
    # of interest, it could not be applied to the cost only as it
    # would not have effect on auxiliary variables, see Blocks #514.
    reg_config = config['regularization']
    regularized_cg = cg
    if reg_config.get('dropout'):
        logger.info('apply dropout')
        regularized_cg = apply_dropout(cg, [bottom_output], 0.5)
    if reg_config.get('noise'):
        logger.info('apply noise')
        noise_subjects = [
            p for p in cg.parameters if p not in attention_params
        ]
        regularized_cg = apply_noise(cg, noise_subjects, reg_config['noise'])

    train_cost = regularized_cg.outputs[0]
    if reg_config.get("penalty_coof", .0) > 0:
        # big warning!!!
        # here we assume that:
        # regularized_weights_penalty = regularized_cg.outputs[1]
        train_cost = (train_cost + reg_config.get("penalty_coof", .0) *
                      regularized_cg.outputs[1] / batch_size)
    if reg_config.get("decay", .0) > 0:
        train_cost = (
            train_cost + reg_config.get("decay", .0) *
            l2_norm(VariableFilter(roles=[WEIGHT])(cg.parameters))**2)

    train_cost = named_copy(train_cost, 'train_cost')

    gradients = None
    if reg_config.get('adaptive_noise'):
        logger.info('apply adaptive noise')
        if ((reg_config.get("penalty_coof", .0) > 0)
                or (reg_config.get("decay", .0) > 0)):
            logger.error('using  adaptive noise with alignment weight panalty '
                         'or weight decay is probably stupid')
        train_cost, regularized_cg, gradients, noise_brick = apply_adaptive_noise(
            cg,
            cg.outputs[0],
            variables=cg.parameters,
            num_examples=data.get_dataset('train').num_examples,
            parameters=SpeechModel(
                regularized_cg.outputs[0]).get_parameter_dict().values(),
            **reg_config.get('adaptive_noise'))
        train_cost.name = 'train_cost'
        adapt_noise_cg = ComputationGraph(train_cost)
        model_prior_mean = named_copy(
            VariableFilter(applications=[noise_brick.apply],
                           name='model_prior_mean')(adapt_noise_cg)[0],
            'model_prior_mean')
        model_cost = named_copy(
            VariableFilter(applications=[noise_brick.apply],
                           name='model_cost')(adapt_noise_cg)[0], 'model_cost')
        model_prior_variance = named_copy(
            VariableFilter(applications=[noise_brick.apply],
                           name='model_prior_variance')(adapt_noise_cg)[0],
            'model_prior_variance')
        regularized_cg = ComputationGraph(
            [train_cost, model_cost] + regularized_cg.outputs +
            [model_prior_mean, model_prior_variance])
        primary_observables += [
            regularized_cg.outputs[1],  # model cost
            regularized_cg.outputs[2],  # task cost
            regularized_cg.outputs[-2],  # model prior mean
            regularized_cg.outputs[-1]
        ]  # model prior variance

    # Model is weird class, we spend lots of time arguing with Bart
    # what it should be. However it can already nice things, e.g.
    # one extract all the parameters from the computation graphs
    # and give them hierahical names. This help to notice when a
    # because of some bug a parameter is not in the computation
    # graph.
    model = SpeechModel(train_cost)
    if params:
        logger.info("Load parameters from " + params)
        # please note: we cannot use recognizer.load_params
        # as it builds a new computation graph that dies not have
        # shapred variables added by adaptive weight noise
        param_values = load_parameter_values(params)
        model.set_parameter_values(param_values)

    parameters = model.get_parameter_dict()
    logger.info("Parameters:\n" +
                pprint.pformat([(key, parameters[key].get_value().shape)
                                for key in sorted(parameters.keys())],
                               width=120))

    # Define the training algorithm.
    clipping = StepClipping(train_conf['gradient_threshold'])
    clipping.threshold.name = "gradient_norm_threshold"
    rule_names = train_conf.get('rules', ['momentum'])
    core_rules = []
    if 'momentum' in rule_names:
        logger.info("Using scaling and momentum for training")
        core_rules.append(Momentum(train_conf['scale'],
                                   train_conf['momentum']))
    if 'adadelta' in rule_names:
        logger.info("Using AdaDelta for training")
        core_rules.append(
            AdaDelta(train_conf['decay_rate'], train_conf['epsilon']))
    max_norm_rules = []
    if reg_config.get('max_norm', False) > 0:
        logger.info("Apply MaxNorm")
        maxnorm_subjects = VariableFilter(roles=[WEIGHT])(cg.parameters)
        if reg_config.get('max_norm_exclude_lookup', False):
            maxnorm_subjects = [
                v for v in maxnorm_subjects
                if not isinstance(get_brick(v), LookupTable)
            ]
        logger.info("Parameters covered by MaxNorm:\n" + pprint.pformat(
            [name for name, p in parameters.items() if p in maxnorm_subjects]))
        logger.info("Parameters NOT covered by MaxNorm:\n" + pprint.pformat([
            name for name, p in parameters.items() if not p in maxnorm_subjects
        ]))
        max_norm_rules = [
            Restrict(VariableClipping(reg_config['max_norm'], axis=0),
                     maxnorm_subjects)
        ]
    burn_in = []
    if train_conf.get('burn_in_steps', 0):
        burn_in.append(BurnIn(num_steps=train_conf['burn_in_steps']))
    algorithm = GradientDescent(
        cost=train_cost,
        parameters=parameters.values(),
        gradients=gradients,
        step_rule=CompositeRule(
            [clipping] + core_rules + max_norm_rules +
            # Parameters are not changed at all
            # when nans are encountered.
            [RemoveNotFinite(0.0)] + burn_in),
        on_unused_sources='warn')

    logger.debug("Scan Ops in the gradients")
    gradient_cg = ComputationGraph(algorithm.gradients.values())
    for op in ComputationGraph(gradient_cg).scans:
        logger.debug(op)

    # More variables for debugging: some of them can be added only
    # after the `algorithm` object is created.
    secondary_observables += list(regularized_cg.outputs)
    if not 'train_cost' in [v.name for v in secondary_observables]:
        secondary_observables += [train_cost]
    secondary_observables += [
        algorithm.total_step_norm, algorithm.total_gradient_norm,
        clipping.threshold
    ]
    for name, param in parameters.items():
        num_elements = numpy.product(param.get_value().shape)
        norm = param.norm(2) / num_elements**0.5
        grad_norm = algorithm.gradients[param].norm(2) / num_elements**0.5
        step_norm = algorithm.steps[param].norm(2) / num_elements**0.5
        stats = tensor.stack(norm, grad_norm, step_norm, step_norm / grad_norm)
        stats.name = name + '_stats'
        secondary_observables.append(stats)

    primary_observables += [
        train_cost, algorithm.total_gradient_norm, algorithm.total_step_norm,
        clipping.threshold, max_recording_length, max_attended_length,
        max_attended_mask_length
    ]

    validation_observables += [
        rename(aggregation.mean(batch_cost, batch_size), cost.name),
        rename(aggregation.sum_(batch_size), 'num_utterances'),
        weights_entropy, weights_penalty
    ]

    def attach_aggregation_schemes(variables):
        # Aggregation specification has to be factored out as a separate
        # function as it has to be applied at the very last stage
        # separately to training and validation observables.
        result = []
        for var in variables:
            if var.name == 'weights_penalty':
                result.append(
                    rename(aggregation.mean(var, batch_size),
                           'weights_penalty_per_recording'))
            elif var.name == 'weights_entropy':
                result.append(
                    rename(aggregation.mean(var, labels_mask.sum()),
                           'weights_entropy_per_label'))
            else:
                result.append(var)
        return result

    mon_conf = config['monitoring']

    # Build main loop.
    logger.info("Initialize extensions")
    extensions = []
    if use_load_ext and params:
        extensions.append(
            Load(params, load_iteration_state=True, load_log=True))
    if load_log and params:
        extensions.append(LoadLog(params))
    extensions += [
        Timing(after_batch=True),
        CGStatistics(),
        #CodeVersion(['lvsr']),
    ]
    extensions.append(
        TrainingDataMonitoring(primary_observables, after_batch=True))
    average_monitoring = TrainingDataMonitoring(
        attach_aggregation_schemes(secondary_observables),
        prefix="average",
        every_n_batches=10)
    extensions.append(average_monitoring)
    validation = DataStreamMonitoring(
        attach_aggregation_schemes(validation_observables),
        data.get_stream("valid", shuffle=False),
        prefix="valid").set_conditions(
            before_first_epoch=not fast_start,
            every_n_epochs=mon_conf['validate_every_epochs'],
            every_n_batches=mon_conf['validate_every_batches'],
            after_training=False)
    extensions.append(validation)
    per = PhonemeErrorRate(recognizer, data, **config['monitoring']['search'])
    per_monitoring = DataStreamMonitoring(
        [per],
        data.get_stream("valid", batches=False, shuffle=False),
        prefix="valid").set_conditions(
            before_first_epoch=not fast_start,
            every_n_epochs=mon_conf['search_every_epochs'],
            every_n_batches=mon_conf['search_every_batches'],
            after_training=False)
    extensions.append(per_monitoring)
    track_the_best_per = TrackTheBest(
        per_monitoring.record_name(per)).set_conditions(
            before_first_epoch=True, after_epoch=True)
    track_the_best_cost = TrackTheBest(
        validation.record_name(cost)).set_conditions(before_first_epoch=True,
                                                     after_epoch=True)
    extensions += [track_the_best_cost, track_the_best_per]
    extensions.append(
        AdaptiveClipping(algorithm.total_gradient_norm.name,
                         clipping,
                         train_conf['gradient_threshold'],
                         decay_rate=0.998,
                         burnin_period=500))
    extensions += [
        SwitchOffLengthFilter(
            data.length_filter,
            after_n_batches=train_conf.get('stop_filtering')),
        FinishAfter(after_n_batches=train_conf['num_batches'],
                    after_n_epochs=train_conf['num_epochs']).add_condition(
                        ["after_batch"], _gradient_norm_is_none),
    ]
    channels = [
        # Plot 1: training and validation costs
        [
            average_monitoring.record_name(train_cost),
            validation.record_name(cost)
        ],
        # Plot 2: gradient norm,
        [
            average_monitoring.record_name(algorithm.total_gradient_norm),
            average_monitoring.record_name(clipping.threshold)
        ],
        # Plot 3: phoneme error rate
        [per_monitoring.record_name(per)],
        # Plot 4: training and validation mean weight entropy
        [
            average_monitoring._record_name('weights_entropy_per_label'),
            validation._record_name('weights_entropy_per_label')
        ],
        # Plot 5: training and validation monotonicity penalty
        [
            average_monitoring._record_name('weights_penalty_per_recording'),
            validation._record_name('weights_penalty_per_recording')
        ]
    ]
    if bokeh:
        extensions += [
            Plot(bokeh_name if bokeh_name else os.path.basename(save_path),
                 channels,
                 every_n_batches=10,
                 server_url=bokeh_server),
        ]
    extensions += [
        Checkpoint(save_path,
                   before_first_epoch=not fast_start,
                   after_epoch=True,
                   every_n_batches=train_conf.get('save_every_n_batches'),
                   save_separately=["model", "log"],
                   use_cpickle=True).add_condition(
                       ['after_epoch'],
                       OnLogRecord(track_the_best_per.notification_name),
                       (root_path + "_best" + extension, )).add_condition(
                           ['after_epoch'],
                           OnLogRecord(track_the_best_cost.notification_name),
                           (root_path + "_best_ll" + extension, )),
        ProgressBar()
    ]
    extensions.append(EmbedIPython(use_main_loop_run_caller_env=True))
    if config['net']['criterion']['name'].startswith('mse'):
        extensions.append(
            LogInputsGains(labels, cg, recognizer.generator.readout.emitter,
                           data))

    if train_conf.get('patience'):
        patience_conf = train_conf['patience']
        if not patience_conf.get('notification_names'):
            # setdefault will not work for empty list
            patience_conf['notification_names'] = [
                track_the_best_per.notification_name,
                track_the_best_cost.notification_name
            ]
        extensions.append(Patience(**patience_conf))

    extensions.append(
        Printing(every_n_batches=1, attribute_filter=PrintingFilterList()))

    return model, algorithm, data, extensions

Пример #23

Показать файл

Файл: main.py Проект: ChunHungLiu/tsa-rnn

def construct_monitors(algorithm, task, model, graphs, outputs, updates,
                       monitor_options, n_spatial_dims, plot_url,
                       hyperparameters, patchmonitor_interval, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring

    extensions = []

    if "steps" in monitor_options:
        step_channels = []
        step_channels.extend([
            algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
            for name, param in model.get_parameter_dict().items()
        ])
        step_channels.append(
            algorithm.total_step_norm.copy(name="total_step_norm"))
        step_channels.append(
            algorithm.total_gradient_norm.copy(name="total_gradient_norm"))

        from extensions import Compressor
        for step_rule in algorithm.step_rule.components:
            if isinstance(step_rule, Compressor):
                step_channels.append(
                    step_rule.norm.copy(name="compressor.norm"))
                step_channels.append(
                    step_rule.newnorm.copy(name="compressor.newnorm"))
                step_channels.append(
                    step_rule.median.copy(name="compressor.median"))
                step_channels.append(
                    step_rule.ratio.copy(name="compressor.ratio"))

        step_channels.extend(
            outputs["train"][key] for key in
            "cost emitter_cost excursion_cost cross_entropy error_rate".split(
            ))

        step_channels.extend(
            util.uniqueify_names_last_resort(
                util.dedup((
                    var.mean().copy(name="bn_stat:%s" % util.get_path(var))
                    for var in graph.deep_ancestors([outputs["train"]["cost"]])
                    if hasattr(var.tag, "batch_normalization_brick")),
                           equal=util.equal_computations)))

        logger.warning("constructing training data monitor")
        extensions.append(
            TrainingDataMonitoring(step_channels,
                                   prefix="iteration",
                                   after_batch=True))

    if "parameters" in monitor_options:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        for key in "location_std scale_std".split():
            data_independent_channels.append(
                hyperparameters[key].copy(name="parameter:%s" % key))
        extensions.append(
            DataStreamMonitoring(data_independent_channels,
                                 data_stream=None,
                                 after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key]
                        for key in "cost emitter_cost excursion_cost".split())
        channels.extend(outputs[which_set][key]
                        for key in task.monitor_outputs())
        channels.append(
            outputs[which_set]["savings"].mean().copy(name="mean_savings"))

        if "theta" in monitor_options:
            for key in "true_scale raw_location raw_scale".split():
                for stat in "mean var".split():
                    channels.append(
                        getattr(outputs[which_set][key],
                                stat)(axis=1).copy(name="%s.%s" % (key, stat)))
        if which_set == "train":
            if "activations" in monitor_options:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(
                        graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var),
                                               []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

        if "batch_normalization" in monitor_options:
            errors = []
            for population_stat, update in updates[which_set]:
                if population_stat.name.startswith("population"):
                    # this is a super robust way to get the
                    # corresponding batch statistic from the
                    # exponential moving average expression
                    batch_stat = update.owner.inputs[1].owner.inputs[1]
                    errors.append(((population_stat - batch_stat)**2).mean())
            if errors:
                channels.append(
                    T.stack(errors).mean().copy(
                        name="population_statistic_mse"))

        logger.warning("constructing %s monitor" % which_set)
        extensions.append(
            DataStreamMonitoring(channels,
                                 prefix=which_set,
                                 after_epoch=True,
                                 data_stream=task.get_stream(which_set,
                                                             monitor=True)))

    if "patches" in monitor_options:
        from patchmonitor import PatchMonitoring, VideoPatchMonitoring

        patchmonitor = None
        if n_spatial_dims == 2:
            patchmonitor_klass = PatchMonitoring
        elif n_spatial_dims == 3:
            patchmonitor_klass = VideoPatchMonitoring

        if patchmonitor_klass:
            for which in "train valid".split():
                patch = outputs[which]["patch"]
                patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))
                patch_extractor = theano.function(
                    [outputs[which][key] for key in "x x_shape".split()], [
                        outputs[which][key]
                        for key in "raw_location raw_scale".split()
                    ] + [patch])

                patchmonitor = patchmonitor_klass(
                    save_to="%s_patches_%s" % (hyperparameters["name"], which),
                    data_stream=task.get_stream(which,
                                                shuffle=False,
                                                num_examples=10),
                    every_n_batches=patchmonitor_interval,
                    extractor=patch_extractor,
                    map_to_input_space=attention.static_map_to_input_space)
                patchmonitor.save_patches("patchmonitor_test.png")
                extensions.append(patchmonitor)

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions

Пример #24

Показать файл

Файл: train.py Проект: zlpmichelle/HRAN

def main(mode, config, use_bokeh=False):

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'],name='word_encoder')
    decoder = Decoder(vocab_size=config['trg_vocab_size'],
                      embedding_dim=config['dec_embed'],
                      state_dim=config['dec_nhids'],
                      representation_dim=config['enc_nhids'] * 2,
                      match_function=config['match_function'],
                      use_doubly_stochastic=config['use_doubly_stochastic'],
                      lambda_ds=config['lambda_ds'],
                      use_local_attention=config['use_local_attention'],
                      window_size=config['window_size'],
                      use_step_decay_cost=config['use_step_decay_cost'],
                      use_concentration_cost=config['use_concentration_cost'],
                      lambda_ct=config['lambda_ct'],
                      use_stablilizer=config['use_stablilizer'],
                      lambda_st=config['lambda_st'])
    # here attended dim (representation_dim) of decoder is 2*enc_nhinds
    # because the context given by the encoder is a bidirectional context

    if mode == "train":

        # Create Theano variables
        logger.info('Creating theano variables')
        context_sentences=[];
        context_sentence_masks=[];
        for i in range(config['ctx_num']):
            context_sentences.append(tensor.lmatrix('context_'+str(i)));
            context_sentence_masks.append(tensor.matrix('context_'+str(i)+'_mask'));
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')
        sampling_input = tensor.lmatrix('input')
        dev_source = tensor.lmatrix('dev_source')
        dev_target=tensor.lmatrix('dev_target')

        # Get training and development set streams
        tr_stream = get_tr_stream_withContext(**config)
        dev_stream = get_dev_stream_with_grdTruth(**config)

        # Get cost of the model
        sentence_representations_list=encoder.apply(source_sentence, source_sentence_mask);
        sentence_representations_list=sentence_representations_list.dimshuffle(['x',0,1,2]);
        sentence_masks_list=source_sentence_mask.T.dimshuffle(['x',0,1]);
        for i in range(config['ctx_num']):
            tmp_rep=encoder.apply(context_sentences[i],context_sentence_masks[i]);
            tmp_rep=tmp_rep.dimshuffle(['x',0,1,2]);
            sentence_representations_list=tensor.concatenate([sentence_representations_list,tmp_rep],axis=0);
            sentence_masks_list=tensor.concatenate([sentence_masks_list,context_sentence_masks[i].T.dimshuffle(['x',0,1])],axis=0);


        cost = decoder.cost(sentence_representations_list,
                            sentence_masks_list,
                            target_sentence,
                            target_sentence_mask)

        logger.info('Creating computational graph')
        perplexity = tensor.exp(cost)
        perplexity.name = 'perplexity'
        costs_computer = function(context_sentences+context_sentence_masks+[target_sentence,
                                   target_sentence_mask,
                                   source_sentence,
                                   source_sentence_mask], (perplexity))
        cg = ComputationGraph(cost)

        # Initialize model
        logger.info('Initializing model')
        encoder.weights_init =decoder.weights_init = IsotropicGaussian(
            config['weight_scale'])
        encoder.biases_init =decoder.biases_init = Constant(0)
        encoder.push_initialization_config()
        decoder.push_initialization_config()
        encoder.bidir.prototype.weights_init = Orthogonal()
        decoder.transition.weights_init = Orthogonal()
        encoder.initialize()
        decoder.initialize()

        # apply dropout for regularization
        if config['dropout'] < 1.0:
            # dropout is applied to the output of maxout in ghog
            logger.info('Applying dropout')
            dropout_inputs = [x for x in cg.intermediary_variables
                              if x.name == 'maxout_apply_output']
            cg = apply_dropout(cg, dropout_inputs, config['dropout'])

        # Apply weight noise for regularization
        if config['weight_noise_ff'] > 0.0:
            logger.info('Applying weight noise to ff layers')
            enc_params = Selector(encoder.lookup).get_params().values()
            enc_params += Selector(encoder.fwd_fork).get_params().values()
            enc_params += Selector(encoder.back_fork).get_params().values()
            dec_params = Selector(
                decoder.sequence_generator.readout).get_params().values()
            dec_params += Selector(
                decoder.sequence_generator.fork).get_params().values()
            dec_params += Selector(decoder.state_init).get_params().values()
            cg = apply_noise(
                cg, enc_params+dec_params, config['weight_noise_ff'])


        # Print shapes
        shapes = [param.get_value().shape for param in cg.parameters]
        logger.info("Parameter shapes: ")
        for shape, count in Counter(shapes).most_common():
            logger.info('    {:15}: {}'.format(shape, count))
        logger.info("Total number of parameters: {}".format(len(shapes)))

        # Print parameter names
        enc_dec_param_dict = merge(Selector(encoder).get_parameters(),
                                   Selector(decoder).get_parameters())
        logger.info("Parameter names: ")
        for name, value in enc_dec_param_dict.items():
            logger.info('    {:15}: {}'.format(value.get_value().shape, name))
        logger.info("Total number of parameters: {}"
                    .format(len(enc_dec_param_dict)))


        # Set up training model
        logger.info("Building model")
        training_model = Model(cost)

        # Set extensions
        logger.info("Initializing extensions")
        extensions = [
            FinishAfter(after_n_batches=config['finish_after']),
            TrainingDataMonitoring([perplexity], after_batch=True),
            CheckpointNMT(config['saveto'],
                          config['model_name'],
                          every_n_batches=config['save_freq'])
        ]

        # Set up beam search and sampling computation graphs if necessary
        if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
            logger.info("Building sampling model")
            sampling_representation = encoder.apply(
                sampling_input, tensor.ones(sampling_input.shape))
            generated = decoder.generate(
                sampling_input, sampling_representation)
            search_model = Model(generated)
            _, samples = VariableFilter(
                bricks=[decoder.sequence_generator], name="outputs")(
                    ComputationGraph(generated[1]))

        # Add sampling
        if config['hook_samples'] >= 1:
            logger.info("Building sampler")
            extensions.append(
                Sampler(model=search_model, data_stream=tr_stream,
                        model_name=config['model_name'],
                        hook_samples=config['hook_samples'],
                        every_n_batches=config['sampling_freq'],
                        src_vocab_size=config['src_vocab_size']))

        # Add early stopping based on bleu
        if False:
            logger.info("Building bleu validator")
            extensions.append(
                BleuValidator(sampling_input, samples=samples, config=config,
                              model=search_model, data_stream=dev_stream,
                              normalize=config['normalized_bleu'],
                              every_n_batches=config['bleu_val_freq'],
                              n_best=3,
                              track_n_models=6))

        logger.info("Building perplexity validator")
        extensions.append(
                pplValidation(dev_source,dev_target, config=config,
                        model=costs_computer, data_stream=dev_stream,
                        model_name=config['model_name'],
                        every_n_batches=config['sampling_freq']))


        # Plot cost in bokeh if necessary
        if use_bokeh and BOKEH_AVAILABLE:
            extensions.append(
                Plot('Cs-En', channels=[['decoder_cost_cost']],
                     after_batch=True))

        # Reload model if necessary
        if config['reload']:
            extensions.append(LoadNMT(config['saveto']))

        initial_learning_rate = config['initial_learning_rate']
        log_path = os.path.join(config['saveto'], 'log')
        if config['reload'] and os.path.exists(log_path):
            with open(log_path, 'rb') as source:
                log = cPickle.load(source)
                last = max(log.keys()) - 1
                if 'learning_rate' in log[last]:
                    initial_learning_rate = log[last]['learning_rate']

        # Set up training algorithm
        logger.info("Initializing training algorithm")
        algorithm = GradientDescent(
            cost=cost, parameters=cg.parameters,
            step_rule=CompositeRule([Scale(initial_learning_rate),
                                     StepClipping(config['step_clipping']),
                                     eval(config['step_rule'])()]))

        _learning_rate = algorithm.step_rule.components[0].learning_rate
        if config['learning_rate_decay']:
            extensions.append(
                LearningRateHalver(record_name='validation_cost',
                                   comparator=lambda x, y: x > y,
                                   learning_rate=_learning_rate,
                                   patience_default=3))
        else:
            extensions.append(OldModelRemover(saveto=config['saveto']))

        if config['learning_rate_grow']:
            extensions.append(
                LearningRateDoubler(record_name='validation_cost',
                                    comparator=lambda x, y: x < y,
                                    learning_rate=_learning_rate,
                                    patience_default=3))

        extensions.append(
            SimplePrinting(config['model_name'], after_batch=True))

        # Initialize main loop
        logger.info("Initializing main loop")
        main_loop = MainLoop(
            model=training_model,
            algorithm=algorithm,
            data_stream=tr_stream,
            extensions=extensions
        )

        # Train!
        main_loop.run()

    elif mode == 'ppl':
        # Create Theano variables
        # Create Theano variables
        logger.info('Creating theano variables')
        context_sentences=[];
        context_sentence_masks=[];
        for i in range(config['ctx_num']):
            context_sentences.append(tensor.lmatrix('context_'+str(i)));
            context_sentence_masks.append(tensor.matrix('context_'+str(i)+'_mask'));
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')

        # Get training and development set streams
        #tr_stream = get_tr_stream_withContext(**config)
        dev_stream = get_dev_stream_withContext_grdTruth(**config)

        # Get cost of the model
        sentence_representations_list=encoder.apply(source_sentence, source_sentence_mask);
        sentence_representations_list=sentence_representations_list.dimshuffle(['x',0,1,2]);
        sentence_masks_list=source_sentence_mask.T.dimshuffle(['x',0,1]);
        for i in range(config['ctx_num']):
            tmp_rep=encoder.apply(context_sentences[i],context_sentence_masks[i]);
            tmp_rep=tmp_rep.dimshuffle(['x',0,1,2]);
            sentence_representations_list=tensor.concatenate([sentence_representations_list,tmp_rep],axis=0);
            sentence_masks_list=tensor.concatenate([sentence_masks_list,context_sentence_masks[i].T.dimshuffle(['x',0,1])],axis=0);


        cost = decoder.cost(sentence_representations_list,
                            sentence_masks_list,
                            target_sentence,
                            target_sentence_mask)

        logger.info('Creating computational graph')
        costs_computer = function(context_sentences+context_sentence_masks+[target_sentence,
                                   target_sentence_mask,
                                   source_sentence,
                                   source_sentence_mask], (cost))


        logger.info("Loading the model..")
        model = Model(cost)
        #loader = LoadNMT(config['saveto'])
        loader = LoadNMT(config['validation_load']);
        loader.set_model_parameters(model, loader.load_parameters_default())
        logger.info("Started Validation: ")

        ts = dev_stream.get_epoch_iterator()
        total_cost = 0.0
        total_tokens=0.0
        #pbar = ProgressBar(max_value=len(ts)).start()#modified
        pbar = ProgressBar(max_value=10000).start();
        for i, (ctx_0,ctx_0_mask,ctx_1,ctx_1_mask,ctx_2,ctx_2_mask,src, src_mask, trg, trg_mask) in enumerate(ts):
            costs  = costs_computer(*[ctx_0,ctx_1,ctx_2,ctx_0_mask,ctx_1_mask,ctx_2_mask,trg, trg_mask,src, src_mask])
            cost = costs.sum()
            total_cost+=cost
            total_tokens+=trg_mask.sum()
            pbar.update(i + 1)
        total_cost/=total_tokens;
        pbar.finish()
        #dev_stream.reset()

        # run afterprocess
        # self.ap.main()
        total_cost=2**total_cost;
        print("Average validation cost: " + str(total_cost));
    elif mode == 'translate':

        logger.info('Creating theano variables')
        context_sentences=[];
        context_sentence_masks=[];
        for i in range(config['ctx_num']):
            context_sentences.append(tensor.lmatrix('context_'+str(i)));
            context_sentence_masks.append(tensor.matrix('context_'+str(i)+'_mask'));
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')

        sutils = SamplingBase()
        unk_idx = config['unk_id']
        src_eos_idx = config['src_vocab_size'] - 1
        trg_eos_idx = config['trg_vocab_size'] - 1
        trg_vocab = _ensure_special_tokens(
            cPickle.load(open(config['trg_vocab'], 'rb')), bos_idx=0,
            eos_idx=trg_eos_idx, unk_idx=unk_idx)
        trg_ivocab = {v: k for k, v in trg_vocab.items()}
        config['batch_size'] = 1

        sentence_representations_list=encoder.apply(source_sentence, source_sentence_mask);
        sentence_representations_list=sentence_representations_list.dimshuffle(['x',0,1,2]);
        sentence_masks_list=source_sentence_mask.T.dimshuffle(['x',0,1]);
        for i in range(config['ctx_num']):
            tmp_rep=encoder.apply(context_sentences[i],context_sentence_masks[i]);
            tmp_rep=tmp_rep.dimshuffle(['x',0,1,2]);
            sentence_representations_list=tensor.concatenate([sentence_representations_list,tmp_rep],axis=0);
            sentence_masks_list=tensor.concatenate([sentence_masks_list,context_sentence_masks[i].T.dimshuffle(['x',0,1])],axis=0);
        generated = decoder.generate(sentence_representations_list,sentence_masks_list)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs
        beam_search = BeamSearch(samples=samples)

        logger.info("Loading the model..")
        model = Model(generated)
        #loader = LoadNMT(config['saveto'])
        loader = LoadNMT(config['validation_load']);
        loader.set_model_parameters(model, loader.load_parameters_default())

        logger.info("Started translation: ")
        test_stream = get_dev_stream_withContext(**config)
        ts = test_stream.get_epoch_iterator()
        rts = open(config['val_set_source']).readlines()
        ftrans_original = open(config['val_output_orig'], 'w')
        saved_weights = []
        total_cost = 0.0

        pbar = ProgressBar(max_value=len(rts)).start()
        for i, (line, line_raw) in enumerate(zip(ts, rts)):
            trans_in = line_raw[3].split()
            seqs=[];
            input_=[];
            input_mask=[];
            for j in range(config['ctx_num']+1):
                seqs.append(sutils._oov_to_unk(
                    line[2*j][0], config['src_vocab_size'], unk_idx))
                input_mask.append(numpy.tile(line[2*j+1][0],(config['beam_size'], 1)))
                input_.append(numpy.tile(seqs[j], (config['beam_size'], 1)))
            #v=costs_computer(input_[0]);
            # draw sample, checking to ensure we don't get an empty string back
            trans, costs, attendeds, weights = \
                beam_search.search(
                    input_values={source_sentence: input_[3],source_sentence_mask:input_mask[3],
                                  context_sentences[0]: input_[0],context_sentence_masks[0]:input_mask[0],
                                  context_sentences[1]: input_[1],context_sentence_masks[1]:input_mask[1],
                                  context_sentences[2]: input_[2],context_sentence_masks[2]:input_mask[2]},
                    max_length=3*len(seqs[2]), eol_symbol=trg_eos_idx,
                    ignore_first_eol=True)

            # normalize costs according to the sequence lengths
            if config['normalized_bleu']:
                lengths = numpy.array([len(s) for s in trans])
                costs = costs / lengths

            b = numpy.argsort(costs)[0]
            #best=numpy.argsort(costs)[0:config['beam_size']];
            #for b in best:
            try:
                total_cost += costs[b]
                trans_out = trans[b]
                totalLen=4*len(line[0][0]);
                #weight = weights[b][:, :totalLen]
                weight=weights
                trans_out = sutils._idx_to_word(trans_out, trg_ivocab)
            except ValueError:
                logger.info(
                    "Can NOT find a translation for line: {}".format(i+1))
                trans_out = '<UNK>'
            saved_weights.append(weight)
            print(' '.join(trans_out), file=ftrans_original)
            pbar.update(i + 1)

        pbar.finish()
        logger.info("Total cost of the test: {}".format(total_cost))
        cPickle.dump(saved_weights, open(config['attention_weights'], 'wb'))
        ftrans_original.close()
        ap = afterprocesser(config)
        ap.main()

Пример #25

Показать файл

        ]
    if valid_stream is not None and config.valid_freq != -1:
        extensions += [
            DataStreamMonitoring([v for l in m.monitor_vars_valid for v in l],
                                 valid_stream,
                                 prefix='valid'),
        ]
    if plot_avail:
        plot_channels = [
            ['train_' + v.name for v in lt] + ['valid_' + v.name for v in lv]
            for lt, lv in zip(m.monitor_vars, m.monitor_vars_valid)
        ]
        extensions += [
            Plot(
                document='deepmind_qa_' + model_name,
                channels=plot_channels,
                # server_url='http://localhost:5006/', # If you need, change this
                every_n_batches=config.print_freq)
        ]
    extensions += [
        Printing(after_epoch=True),
        # EvaluateModel(path="", model=test_model, data_stream=valid_stream, vocab_size = ds.vocab_size, vocab = ds.vocab, eval_mode='batch', quiet=True, after_epoch=True),
        ProgressBar()
    ]

    main_loop = MainLoop(model=model,
                         data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    # Run the model !

Пример #26

Показать файл

Файл: train.py Проект: Alexis211/transpose_features

def train_model(m,
                train_stream,
                valid_stream,
                load_location=None,
                save_location=None):

    # Define the model
    model = Model(m.cost_reg)

    ae_excl_vars = set()
    if hasattr(m, 'ae_costs'):
        for i, cost in enumerate(m.ae_costs):
            print "Trianing stacked AE layer", i + 1
            # train autoencoder component separately
            cost.name = 'ae_cost%d' % i

            cg = ComputationGraph(cost)
            params = set(cg.parameters) - ae_excl_vars
            ae_excl_vars = ae_excl_vars | params

            algorithm = GradientDescent(cost=cost,
                                        step_rule=config.step_rule,
                                        params=list(params))
            main_loop = MainLoop(data_stream=NoData(train_stream),
                                 algorithm=algorithm,
                                 extensions=[
                                     TrainingDataMonitoring([cost],
                                                            prefix='train',
                                                            every_n_epochs=1),
                                     Printing(every_n_epochs=1),
                                     FinishAfter(every_n_epochs=1000),
                                 ])
            main_loop.run()

    cg = ComputationGraph(m.cost_reg)
    params = list(set(cg.parameters) - ae_excl_vars)
    algorithm = GradientDescent(cost=m.cost_reg,
                                step_rule=config.step_rule,
                                params=params)
    main_loop = MainLoop(
        model=model,
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            TrainingDataMonitoring([m.cost_reg, m.ber_reg, m.cost, m.ber],
                                   prefix='train',
                                   every_n_epochs=1 * config.pt_freq),
            DataStreamMonitoring([m.cost, m.ber],
                                 valid_stream,
                                 prefix='valid',
                                 after_epoch=False,
                                 every_n_epochs=5 * config.pt_freq),
            Printing(every_n_epochs=1 * config.pt_freq, after_epoch=False),
            Plot(document='tr_' + model_name + '_' + config.param_desc,
                 channels=[['train_cost', 'train_cost_reg', 'valid_cost'],
                           ['train_ber', 'train_ber_reg', 'valid_ber']],
                 server_url='http://eos21:4201',
                 every_n_epochs=1 * config.pt_freq,
                 after_epoch=False),
            FinishAfter(every_n_epochs=10000)
        ])
    main_loop.run()