def load_models(model_path=save_path, in_size=len(input_columns), out_size=len(output_columns) - 1 if cost_mode == 'RL-MDN' else len(output_columns), hidden_size=hidden_size, num_recurrent_layers=num_recurrent_layers, model=layer_models[0]): initials = [] if not os.path.isfile(model_path): print 'Could not find model file.' sys.exit(0) print 'Loading model from {0}...'.format(model_path) x = tensor.tensor3('features', dtype=theano.config.floatX) y = tensor.tensor3('targets', dtype='floatX') train_flag = [theano.shared(0)] _, latent_size = load_encoder() in_size = latent_size + len(input_columns) y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_recurrent_layers, train_flag) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(model_path)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model print 'Model loaded. Building prediction function...' hiddens = [] for i in range(num_recurrent_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i) ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials.extend( VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) predict_func = theano.function([x], hiddens + [y_hat]) encoder, code_size = load_encoder() return predict_func, initials, encoder, code_size
step_rules = [RMSProp(learning_rate=learning_rate, decay_rate=decay_rate), StepClipping(step_clipping)] algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule(step_rules)) # Extensions gradient_norm = aggregation.mean(algorithm.total_gradient_norm) step_norm = aggregation.mean(algorithm.total_step_norm) monitored_vars = [cost, gradient_norm, step_norm] dev_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True, before_first_epoch=True, data_stream=dev_stream, prefix="dev") train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True, before_first_epoch=True, prefix='tra') extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True), FinishAfter(after_n_epochs=nepochs), saveload.Load(load_path), saveload.Checkpoint(last_path), ] + track_best('dev_cost', save_path) if learning_rate_decay not in (0, 1): extensions.append(SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False)) print 'number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval()) # Finally build the main loop and train the model main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) main_loop.run()
step_rules = [RMSProp(learning_rate=learning_rate, decay_rate=decay_rate), StepClipping(step_clipping)] algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule(step_rules)) # Extensions gradient_norm = aggregation.mean(algorithm.total_gradient_norm) step_norm = aggregation.mean(algorithm.total_step_norm) monitored_vars = [cost, gradient_norm, step_norm] dev_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True, before_first_epoch=True, data_stream=dev_stream, prefix="dev") train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True, before_first_epoch=True, prefix='tra') extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True), FinishAfter(after_n_epochs=nepochs), saveload.Load(load_path), saveload.Checkpoint(last_path), ] + track_best('dev_cost', save_path) if learning_rate_decay not in (0, 1): extensions.append(SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False)) print('number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval())) # Finally build the main loop and train the model main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) main_loop.run()
args.primetext = ix_to_char[numpy.random.randint(vocab_size)] primetext = ''.join( [ch for ch in args.primetext if ch in char_to_ix.keys()]) if len(primetext) == 0: raise Exception('primetext characters are not in the vocabulary') x_curr = numpy.expand_dims(numpy.array( [char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1) print 'Loading model from {0}...'.format(args.model) x = tensor.matrix('features', dtype='uint8') y = tensor.matrix('targets', dtype='uint8') y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(args.model)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model activations = [] initial_states = [] for i in range(num_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == model + str(i) ][0] activations.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) activations.extend(
# Define primetext ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file) if not args.primetext or len(args.primetext) == 0: args.primetext = ix_to_char[numpy.random.randint(vocab_size)] primetext = ''.join([ch for ch in args.primetext if ch in char_to_ix.keys()]) if len(primetext) == 0: raise Exception('primetext characters are not in the vocabulary') x_curr = numpy.expand_dims( numpy.array([char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1) print('Loading model from {0}...'.format(args.model)) x = tensor.matrix('features', dtype='uint8') y = tensor.matrix('targets', dtype='uint8') y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost)) with open(args.model) as f: main_loop.model.set_parameter_values(load_parameters(f)) bin_model = main_loop.model activations = [] initial_states = [] for i in range(num_layers): brick = [b for b in bin_model.get_top_bricks() if b.name==model+str(i)][0] activations.extend(VariableFilter(theano_name=brick.name+'_apply_states')(bin_model.variables)) activations.extend(VariableFilter(theano_name=brick.name+'_apply_cells')(cells)) initial_states.extend(VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) #take activations of last element activations = [act[-1].flatten() for act in activations] states_as_params = [tensor.vector(dtype=initial.dtype) for initial in initial_states] #Get prob. distribution of the last element in the last seq of the batch
y = tensor.tensor3('targets', dtype='floatX') x = x.swapaxes(0, 1) y = y.swapaxes(0, 1) in_size = num_features out_size = num_features y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size[network_mode], num_layers, layer_models[network_mode][0], 'MDN', training=False) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(save_path[network_mode])]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model print 'Model loaded. Building prediction function...' hiddens = [] initials = [] for i in range(num_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[network_mode][i] + str(i) + '-' ][0] hiddens.extend(
def load_models( models=hierarchy_models, in_size=len(hierarchy_input_columns[level_number_in_hierarchy]), out_size=len(hierarchy_output_columns[level_number_in_hierarchy]), hidden_size=hidden_size, num_layers=num_layers, model=layer_models[0]): predict_funcs = [] initials = [] for hierarchy_index in range(len(models)): saved_model = models[hierarchy_index] print 'Loading model from {0}...'.format(models[hierarchy_index]) x = tensor.tensor3('features', dtype=theano.config.floatX) y = tensor.tensor3('targets', dtype=theano.config.floatX) y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_layers, model, training=False) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(saved_model)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model print 'Model loaded. Building prediction function...' hiddens = [] initials.append([]) for i in range(num_layers - specialized_layer_num): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i) + '-' + str(-1) ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials[hierarchy_index].extend( VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) specialized_count = len(game_tasks) if task_specialized else 0 for task in range(specialized_count): for i in range(num_layers - specialized_layer_num, num_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i) + '-' + str(task) ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials[hierarchy_index].extend( VariableFilter(roles=[roles.INITIAL_STATE])( brick.parameters)) output_count = len(game_tasks) if task_specialized else 1 predict_funcs.append([]) for task in range(output_count): predict_funcs[hierarchy_index].append( theano.function([x], hiddens + [y_hat[task]])) return predict_funcs, initials
# Define primetext ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file) if not args.primetext or len(args.primetext) == 0: args.primetext = ix_to_char[numpy.random.randint(vocab_size)] primetext = ''.join([ch for ch in args.primetext if ch in char_to_ix.keys()]) if len(primetext) == 0: raise Exception('primetext characters are not in the vocabulary') x_curr = numpy.expand_dims( numpy.array([char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1) print 'Loading model from {0}...'.format(args.model) x = tensor.matrix('features', dtype='uint8') y = tensor.matrix('targets', dtype='uint8') y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(args.model)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model activations = [] initial_states = [] for i in range(num_layers): brick = [b for b in bin_model.get_top_bricks() if b.name==model+str(i)][0] activations.extend(VariableFilter(theano_name=brick.name+'_apply_states')(bin_model.variables)) activations.extend(VariableFilter(theano_name=brick.name+'_apply_cells')(cells)) initial_states.extend(VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) #take activations of last element activations = [act[-1].flatten() for act in activations] states_as_params = [tensor.vector(dtype=initial.dtype) for initial in initial_states]