dims = (10, 13, 5, 4) config_dict = dict() config_dict['batch_size'] = dims[2] config_dict['num_subwords'] = dims[1] config_dict['num_words'] = dims[0] config_dict['subword_embedding_size'] = dims[3] config_dict['input_vocab_size'] = 42 config_dict['output_vocab_size'] = 42 config_dict['subword_RNN_hidden_state_size'] = 6 # 2 more than subword_embedding_size for testing config_dict['LM_RNN_hidden_state_size'] = 8 # 2 more than subword_RNN_hidden_state_size config_dict['table_width'] = 0.08 config_dict['max_out_dim'] = 11 # 3 more than LM_RNN_hidden_state_size config_dict['max_out_K'] = 3 baseline_model = BaselineModel(config_dict) baseline_model.initialize() y_hat_CE = baseline_model.cost(subword_id_input_=x, subword_id_input_mask_=x_mask, subword_id_target_=y, subword_id_target_mask_=y_mask) cg = ComputationGraph(y_hat_CE) # cost = cg.outputs[0] f_cross_entropy = theano.function([x, x_mask, y, y_mask], [y_hat_CE]) print('Graph inputs') print(cg.inputs) print(cg.inputs) num_times = 5 for data in stream.get_epoch_iterator(as_dict=True): num_times -= 1
def run_training(config, tr_stream, dev_stream=None, use_bokeh=True): # Monitoring extensions try: from blocks_extras.extensions.plot import Plot BOKEH_AVAILABLE = True except ImportError: BOKEH_AVAILABLE = False print('Bokeh avalablity: ' + str(BOKEH_AVAILABLE)) logger = logging.getLogger(__name__) # Create Theano variables logger.info('Creating theano variables') x = T.tensor3('features', dtype=config.params['data_dtype']) x_mask = T.tensor3('features_mask', dtype=config.params['mask_dtype']) y = T.matrix('targets', dtype=config.params['data_dtype']) y_mask = T.matrix('targets_mask', dtype=config.params['mask_dtype']) # Construct model logger.info('Building baseline model') baseline_model = BaselineModel(config.params) baseline_model.initialize() cost = baseline_model.cost(subword_id_input_=x, subword_id_input_mask_=x_mask, subword_id_target_=y, subword_id_target_mask_=y_mask) logger.info('Creating computational graph') cg = ComputationGraph(cost) # apply dropout for regularization if config.params['dropout'] < 1.0: # dropout is applied to the output of maxout in ghog logger.info('Applying dropout') dropout_inputs = [x for x in cg.intermediary_variables if x.name == 'maxout_apply_output'] print(cg.intermediary_variables) print(cg.variables) print(cg.inputs) print(cg.parameters) print(dropout_inputs) cg = apply_dropout(cg, dropout_inputs, config.params['dropout']) logger.info("Initializing extensions") extensions = [ FinishAfter(after_n_batches=config.params['finish_after']), TrainingDataMonitoring([cost], after_batch=True), Printing(after_batch=True) #CheckpointNMT(config['saveto'], every_n_batches=config['save_freq'])] ] # Plot cost in bokeh if necessary if use_bokeh and BOKEH_AVAILABLE: extensions.append( Plot('Baseline model', channels=[['baselinemodel_cost_cost']], after_batch=True)) # Set up training algorithm logger.info("Initializing training algorithm") algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=CompositeRule([StepClipping(config.params['step_clipping']), eval(config.params['step_rule'])()]) ) # Initialize main loop logger.info("Initializing main loop") main_loop = MainLoop( model=baseline_model, algorithm=algorithm, data_stream=tr_stream, extensions=extensions ) # Train main_loop.run() print('DONE TRAINING')