def main(config, tr_stream): # Create Theano variables logger.info('Creating theano variables') source_char_seq = tensor.lmatrix('source_char_seq') source_sample_matrix = tensor.btensor3('source_sample_matrix') source_char_aux = tensor.bmatrix('source_char_aux') source_word_mask = tensor.bmatrix('source_word_mask') target_char_seq = tensor.lmatrix('target_char_seq') target_char_aux = tensor.bmatrix('target_char_aux') target_char_mask = tensor.bmatrix('target_char_mask') target_sample_matrix = tensor.btensor3('target_sample_matrix') target_word_mask = tensor.bmatrix('target_word_mask') target_resample_matrix = tensor.btensor3('target_resample_matrix') target_prev_char_seq = tensor.lmatrix('target_prev_char_seq') target_prev_char_aux = tensor.bmatrix('target_prev_char_aux') target_bos_idx = tr_stream.trg_bos target_space_idx = tr_stream.space_idx['target'] src_vocab = pickle.load(open(config['src_vocab'], 'rb')) logger.info('Building RNN encoder-decoder') encoder = BidirectionalEncoder(config['src_vocab_size'], config['enc_embed'], config['src_dgru_nhids'], config['enc_nhids'], config['src_dgru_depth'], config['bidir_encoder_depth']) decoder = Decoder(config['trg_vocab_size'], config['dec_embed'], config['trg_dgru_nhids'], config['trg_igru_nhids'], config['dec_nhids'], config['enc_nhids'] * 2, config['transition_depth'], config['trg_igru_depth'], config['trg_dgru_depth'], target_space_idx, target_bos_idx) representation = encoder.apply(source_char_seq, source_sample_matrix, source_char_aux, source_word_mask) cost = decoder.cost(representation, source_word_mask, target_char_seq, target_sample_matrix, target_resample_matrix, target_char_aux, target_char_mask, target_word_mask, target_prev_char_seq, target_prev_char_aux) # Set up model logger.info("Building model") training_model = Model(cost) # Set extensions logger.info("Initializing extensions") # Reload model if necessary extensions = [LoadNMT(config['saveto'])] # Initialize main loop logger.info("Initializing main loop") main_loop = MainLoop( model=training_model, algorithm=None, data_stream=None, extensions=extensions ) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') char_embedding = encoder.decimator.apply(source_char_seq.T, source_sample_matrix, source_char_aux.T) embedding(Model(char_embedding), src_vocab)
def test_save_and_load(self): """Check that main loop have been saved properly.""" old_value = self.W.get_value() self.W.set_value(old_value * 2) new_main_loop = MainLoop(model=self.model, data_stream=self.data_stream, algorithm=self.algorithm, extensions=[Load('myweirdmodel.tar')]) new_main_loop.extensions[0].main_loop = new_main_loop new_main_loop._run_extensions('before_training') assert_allclose(self.W.get_value(), old_value)
def test_save_and_load(self): """Check that main loop have been saved properly.""" old_value = self.W.get_value() self.W.set_value(old_value * 2) new_main_loop = MainLoop( model=self.model, data_stream=self.data_stream, algorithm=self.algorithm, extensions=[Load("myweirdmodel.tar")], ) new_main_loop.extensions[0].main_loop = new_main_loop new_main_loop._run_extensions("before_training") assert_allclose(self.W.get_value(), old_value)
def test_load(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop(data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.picklebarrel') ]) main_loop.run() # Load the parameters, log and iteration state old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('myweirdmodel.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('mynonexisting.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training')
def test_checkpointing(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop( data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.tar', parameters=[W])] ) main_loop.run() # Load it again old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop( model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[Load('myweirdmodel.tar')] ) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop( model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[Load('mynonexisting.tar')] ) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') # Cleaning if os.path.exists('myweirdmodel.tar'): os.remove('myweirdmodel.tar')
def test_load_log_and_iteration_state(self): """Check we can save the log and iteration state separately.""" skip_if_configuration_set("log_backend", "sqlite", 'Bug with log.status["resumed_from"]') new_main_loop = MainLoop( model=self.model, data_stream=self.data_stream, algorithm=self.algorithm, extensions=[Load("myweirdmodel.tar", True, True)], ) new_main_loop.extensions[0].main_loop = new_main_loop new_main_loop._run_extensions("before_training") # Check the log new_keys = sorted(new_main_loop.log.status.keys()) old_keys = sorted(self.main_loop.log.status.keys()) for new_key, old_key in zip(new_keys, old_keys): assert new_key == old_key assert new_main_loop.log.status[new_key] == self.main_loop.log.status[old_key] # Check the iteration state new = next(new_main_loop.iteration_state[1])["data"] old = next(self.main_loop.iteration_state[1])["data"] assert_allclose(new, old)
def test_load_log_and_iteration_state(self): """Check we can save the log and iteration state separately.""" skip_if_configuration_set('log_backend', 'sqlite', 'Bug with log.status["resumed_from"]') new_main_loop = MainLoop( model=self.model, data_stream=self.data_stream, algorithm=self.algorithm, extensions=[Load('myweirdmodel.tar', True, True)]) new_main_loop.extensions[0].main_loop = new_main_loop new_main_loop._run_extensions('before_training') # Check the log new_keys = sorted(new_main_loop.log.status.keys()) old_keys = sorted(self.main_loop.log.status.keys()) for new_key, old_key in zip(new_keys, old_keys): assert new_key == old_key assert (new_main_loop.log.status[new_key] == self.main_loop.log.status[old_key]) # Check the iteration state new = next(new_main_loop.iteration_state[1])['data'] old = next(self.main_loop.iteration_state[1])['data'] assert_allclose(new, old)
def main(config, test_stream): # Create Theano variables logger.info('Creating theano variables') source_char_seq = tensor.lmatrix('source_char_seq') source_sample_matrix = tensor.tensor3('source_sample_matrix') source_char_aux = tensor.matrix('source_char_aux') source_word_mask = tensor.matrix('source_word_mask') target_char_seq = tensor.lmatrix('target_char_seq') target_char_aux = tensor.matrix('target_char_aux') target_char_mask = tensor.matrix('target_char_mask') target_sample_matrix = tensor.tensor3('target_sample_matrix') target_word_mask = tensor.matrix('target_word_mask') target_resample_matrix = tensor.tensor3('target_resample_matrix') target_prev_char_seq = tensor.lmatrix('target_prev_char_seq') target_prev_char_aux = tensor.matrix('target_prev_char_aux') target_bos_idx = test_stream.trg_bos target_space_idx = test_stream.space_idx['target'] # Construct model logger.info('Building RNN encoder-decoder') encoder = BidirectionalEncoder(config['src_vocab_size'], config['enc_embed'], config['src_dgru_nhids'], config['enc_nhids'], config['src_dgru_depth'], config['bidir_encoder_depth']) decoder = Decoder(config['trg_vocab_size'], config['dec_embed'], config['trg_dgru_nhids'], config['trg_igru_nhids'], config['dec_nhids'], config['enc_nhids'] * 2, config['transition_depth'], config['trg_igru_depth'], config['trg_dgru_depth'], target_space_idx, target_bos_idx) representation = encoder.apply(source_char_seq, source_sample_matrix, source_char_aux, source_word_mask) cost = decoder.cost(representation, source_word_mask, target_char_seq, target_sample_matrix, target_resample_matrix, target_char_aux, target_char_mask, target_word_mask, target_prev_char_seq, target_prev_char_aux) # Set up training model logger.info("Building model") training_model = Model(cost) # Set extensions logger.info("Initializing extensions") # Extensions extensions = [] # Reload model if necessary if config['reload']: extensions.append(LoadNMT(config['saveto'])) # Set up beam search and sampling computation graphs if necessary if config['bleu_script'] is not None: logger.info("Building sampling model") generated = decoder.generate(representation, source_word_mask) search_model = Model(generated) _, samples = VariableFilter( bricks=[decoder.sequence_generator], name="outputs")( ComputationGraph(generated[config['transition_depth']]) ) # generated[config['transition_depth']] is next_outputs logger.info("Building bleu tester") extensions.append( BleuTester(source_char_seq, source_sample_matrix, source_char_aux, source_word_mask, samples=samples, config=config, model=search_model, data_stream=test_stream, normalize=config['normalized_bleu'])) # Initialize main loop logger.info("Initializing main loop") main_loop = MainLoop(model=training_model, algorithm=None, data_stream=None, extensions=extensions) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training')
# ds, valid_stream = data.setup_squad_datastream(valid_path, vocab_path, config) ds, valid_stream = data.setup_squad_ranker_datastream(os.path.join(os.getcwd(),'squad_short/squadnewdev.txt'),os.path.join(os.getcwd(), 'squad/vocab.txt'),config, 221697) snapshot_path = os.path.join("model_params", model_name+".pkl") # Build model m = config.Model(config, ds.vocab_size) # Build the Blocks stuff for training # test_model = Model(m.generations) test_model = Model(m.predictions) model = Model(m.sgd_cost) algorithm = None extensions = [RankerEvaluator(path=snapshot_path, model=test_model, data_stream=valid_stream, vocab_size = ds.vocab_size, vocab = ds.vocab, eval_mode=eval_mode, before_training=True)] main_loop = MainLoop( model=model, data_stream=valid_stream, algorithm=algorithm, extensions=extensions ) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') # Run the model ! # main_loop.run() # main_loop.profile.report()
def main(config): print('working on it ...') # Create Theano variables logger.info('Creating theano variables') source_sentence = tensor.lmatrix('source') source_sentence_mask = tensor.matrix('source_mask') target_sentence = tensor.lmatrix('target') target_sentence_mask = tensor.matrix('target_mask') sampling_input = tensor.lmatrix('input') # Construct model logger.info('Building RNN encoder-decoder') encoder = BidirectionalEncoder( config['src_vocab_size'], config['enc_embed'], config['enc_nhids']) decoder = Decoder( config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'], config['enc_nhids'] * 2) cost = decoder.cost( encoder.apply(source_sentence, source_sentence_mask), source_sentence_mask, target_sentence, target_sentence_mask) # Initialize model logger.info('Initializing model') encoder.weights_init = decoder.weights_init = IsotropicGaussian( config['weight_scale']) encoder.biases_init = decoder.biases_init = Constant(0) encoder.push_initialization_config() decoder.push_initialization_config() encoder.bidir.prototype.weights_init = Orthogonal() decoder.transition.weights_init = Orthogonal() encoder.initialize() decoder.initialize() # Set up training model logger.info("Building model") training_model = Model(cost) # Extensions extensions = [] # Reload model if necessary if config['reload']: extensions.append(LoadNMT(config['saveto'])) # Set up beam search and sampling computation graphs if necessary if config['bleu_script'] is not None: logger.info("Building sampling model") sampling_representation = encoder.apply( sampling_input, tensor.ones(sampling_input.shape)) generated = decoder.generate(sampling_input, sampling_representation) search_model = Model(generated) _, samples = VariableFilter( bricks=[decoder.sequence_generator], name="outputs")( ComputationGraph(generated[1])) # generated[1] is next_outputs''' # Add sampling logger.info("Building sampler") global samplers_ob samplers_ob=Sampler(model=search_model, data_stream=input_sentence_mask, hook_samples=config['hook_samples'], every_n_batches=config['sampling_freq'], src_vocab_size=config['src_vocab_size']) # Initialize main loop logger.info("Initializing main loop") main_loop = MainLoop( model=training_model, algorithm=None, data_stream=None, extensions=extensions ) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training')