def run_network_keras(config, output_file_name=None): for line in config: print line, config[line] print # Unpack configs required learning_params = config['LearningParameters'] net_structure = config['NetworkStructure'] fitting_params = config['TrainingParameters'] file_config = config['FileConfig'] out_directory = file_config['output_directory'] if output_file_name is None: # Use the config files name only if we're not provided one here. output_file_name = file_config['output_file_name'] train_data = get_training_data(config['DatabaseConfig'], config['PreprocessingConfig'], config['FileConfig']) # Training data comes packed in to dictionaries to avoid needing to return a whole pile of values. train_in = train_data['train_in'] train_out = train_data['train_out'] test_in = train_data['test_in'] test_out = train_data['test_out'] galaxy_ids_test = train_data['galaxy_ids_test'] out_normaliser = train_data['out_normaliser'] # Some general statistics of the dataset mean_in = train_data['mean_in'] mean_out = train_data['mean_out'] stddev_in = train_data['stddev_in'] stddev_out = train_data['stddev_out'] min_out = train_data['min_out'] max_out = train_data['max_out'] LOG.info('Dataset shape train') LOG.info('{0}'.format(np.shape(train_in))) LOG.info('{0}'.format(np.shape(train_out))) LOG.info('Dataset shape test') LOG.info('{0}'.format(np.shape(test_in))) LOG.info('{0}'.format(np.shape(test_out))) LOG.info('Compiling neural network model') if net_structure['optimiser'] == 'sgd': optimiser = SGD(lr=learning_params['learning_rate'], momentum=learning_params['momentum'], decay=learning_params['decay'], nesterov=True) else: optimiser = net_structure['optimiser'] print optimiser input_dim = len(train_in[0]) try: output_dim = len(train_out[0]) except: output_dim = 1 model = build_network(net_structure, input_dim, output_dim, optimiser) LOG.info("Compiled.") # Train the model each generation and show predictions against the validation dataset history_log = History_Log() trained = False total_epoch = 0 epoch_history = [] differences = [] lowest_val_loss = 99999 lowest_val_loss_weights = None # Best weight configuration with lowest validation loss lowest_val_loss_epoch = 0 while not trained: LOG.info('epoch {0} / {1}'.format(total_epoch, fitting_params['max_epochs'])) model.fit(train_in, train_out, batch_size=fitting_params['batch_size'], nb_epoch=fitting_params['epochs_per_fit'], validation_split=fitting_params['validation_split'], show_accuracy=True, verbose=False, callbacks=[history_log]) LOG.info('{0}'.format(history_log.epoch_data)) total_epoch += fitting_params['epochs_per_fit'] if np.isnan(history_log.epoch_data['val_loss']): raise FuckingNaN("Nan'd") # If the val loss is lower, save the weights if history_log.epoch_data['val_loss'] < lowest_val_loss: # We have something with lower validation loss. lowest_val_loss = history_log.epoch_data['val_loss'] lowest_val_loss_weights = weights_to_list(model) lowest_val_loss_epoch = total_epoch # Predict a test sample (change 1 to any other value to test more than 1) # and use it to track how the network's output for this/these test(s) changes over time. prediction = model.predict(np.array(test_in[:3])) differences.append(prediction) epoch_history.append(history_log.epoch_data) if history_log.epoch_data[ 'val_loss'] < 0.001 or total_epoch >= fitting_params[ 'max_epochs']: trained = True differences = np.array( differences) # Need special np indexing on this later if not out_directory: out_directory = os.getcwd() if not os.path.exists(out_directory): os.makedirs(out_directory) if not os.path.exists('{0}/graph'.format(out_directory)): os.mkdir('{0}/graph'.format(out_directory)) print '{0}/graph'.format(out_directory) graph_out = '{0}/graph/{1}'.format(out_directory, output_file_name) # Save network weights, graph of network and loss over epoch graph. to_graph(model).write_svg("{0}_Graph.svg".format(graph_out)) model.save_weights('{0}_weights.h5'.format(graph_out), overwrite=True) save_mean_convergence_graph('{0}_convergence'.format(graph_out), differences, mean_out, min_out, max_out, fitting_params['epochs_per_fit']) save_graph(epoch_history, '{0}_loss'.format(graph_out), fitting_params['epochs_per_fit']) # do 30 tests on the network's final weights. test_results, test_means, test_std = do_tests(model, test_in, fitting_params['num_tests']) # evaluate the network on its final weights evaluation = { 'At end of training:': model.evaluate(test_in, test_out, 1000, True, True) } # do 30 tests on the lowest validation loss weights. model = weight_from_list(model, lowest_val_loss_weights) model.save_weights('{0}_best_weights.h5'.format(graph_out), overwrite=True) val_test_results, val_test_means, val_test_std = do_tests( model, test_in, fitting_params['num_tests']) # evaluate the network on the lowest validation loss weights evaluation['Best validation loss:'] = model.evaluate( test_in, test_out, 1000, True, True) with open('{0}/{1}.txt'.format(out_directory, output_file_name), 'w') as f: write_summary_data(f, config, evaluation, epoch_history, mean_in, stddev_in, mean_out, stddev_out, (lowest_val_loss_epoch, lowest_val_loss)) f.write( '\n\n\n\n----------TEST DATA FOR FINAL MODEL----------\n\n\n\n') write_test_data(f, test_results, test_in, test_out, test_means, test_std, galaxy_ids_test, out_normaliser, config['PreprocessingConfig']['single_output']) f.write( '\n\n\n\n----------TEST DATA FOR BEST VALIDATION LOSS MODEL----------\n\n\n\n' ) write_test_data(f, val_test_results, test_in, test_out, val_test_means, val_test_std, galaxy_ids_test, out_normaliser, config['PreprocessingConfig']['single_output'])
def run_network_keras(hidden_connections, hidden_layers, loss, single_output=None, single_input=None, normalise_input=None, normalise_output=None, input_filter_types=None, use_graph=False, unknown_input_handler=None): config['input_filter_types'] = input_filter_types train_data = get_training_data(config, tmp_file, single_output, single_input, normalise_input, normalise_output, unknown_input_handler, percentile_bin=config['percentile_bin'], erase_above=config['erase_above']) test_data = config['test_data'] # number of test sets train_in = train_data['train_in'] train_out = train_data['train_out'] test_in = train_data['test_in'] test_out = train_data['test_out'] redshifts_train = train_data['redshifts_train'] galaxy_ids_test = train_data['galaxy_ids_test'] out_normaliser = train_data['out_normaliser'] mean_in = train_data['mean_in'] mean_out = train_data['mean_out'] stddev_in = train_data['stddev_in'] stddev_out = train_data['stddev_out'] print np.shape(train_in) print np.shape(train_out) print np.shape(test_in) print np.shape(test_out) LOG.info('Compiling neural network model') optimiser = SGD(lr=learning_params['learning_rate'], momentum=learning_params['momentum'], decay=learning_params['decay'], nesterov=True) input_dim = len(train_in[0]) try: output_dim = len(train_out[0]) except: output_dim = 1 model = Sequential() model.add(Dense(output_dim=hidden_connections, input_dim=input_dim)) model.add(PReLU()) model.add(Dense(output_dim=input_dim, input_dim=hidden_connections)) model.compile(loss='mse', optimizer=RMSprop(lr=0.001), class_mode='binary') model.fit(train_in, train_in, 5000, 10000, validation_split=0.3, verbose=True, show_accuracy=True) for i in range(0, 30): ans = model.predict(np.array([test_in[i]])) print 'Test', test_in[i] print 'Ans', ans[0] print print exit()
def run_network_keras(config, output_file_name=None): for line in config: print line, config[line] print # Unpack configs required learning_params = config['LearningParameters'] net_structure = config['NetworkStructure'] fitting_params = config['TrainingParameters'] file_config = config['FileConfig'] out_directory = file_config['output_directory'] if output_file_name is None: # Use the config files name only if we're not provided one here. output_file_name = file_config['output_file_name'] train_data = get_training_data(config['DatabaseConfig'], config['PreprocessingConfig'], config['FileConfig']) # Training data comes packed in to dictionaries to avoid needing to return a whole pile of values. train_in = train_data['train_in'] train_out = train_data['train_out'] test_in = train_data['test_in'] test_out = train_data['test_out'] galaxy_ids_test = train_data['galaxy_ids_test'] out_normaliser = train_data['out_normaliser'] # Some general statistics of the dataset mean_in = train_data['mean_in'] mean_out = train_data['mean_out'] stddev_in = train_data['stddev_in'] stddev_out = train_data['stddev_out'] min_out = train_data['min_out'] max_out = train_data['max_out'] LOG.info('Dataset shape train') LOG.info('{0}'.format(np.shape(train_in))) LOG.info('{0}'.format(np.shape(train_out))) LOG.info('Dataset shape test') LOG.info('{0}'.format(np.shape(test_in))) LOG.info('{0}'.format(np.shape(test_out))) LOG.info('Compiling neural network model') if net_structure['optimiser'] == 'sgd': optimiser = SGD(lr=learning_params['learning_rate'], momentum=learning_params['momentum'], decay=learning_params['decay'], nesterov=True) else: optimiser = net_structure['optimiser'] print optimiser input_dim = len(train_in[0]) try: output_dim = len(train_out[0]) except: output_dim = 1 model = build_network(net_structure, input_dim, output_dim, optimiser) LOG.info("Compiled.") # Train the model each generation and show predictions against the validation dataset history_log = History_Log() trained = False total_epoch = 0 epoch_history = [] differences = [] lowest_val_loss = 99999 lowest_val_loss_weights = None # Best weight configuration with lowest validation loss lowest_val_loss_epoch = 0 while not trained: LOG.info('epoch {0} / {1}'.format(total_epoch, fitting_params['max_epochs'])) model.fit(train_in, train_out, batch_size=fitting_params['batch_size'], nb_epoch=fitting_params['epochs_per_fit'], validation_split=fitting_params['validation_split'], show_accuracy=True, verbose=False, callbacks=[history_log]) LOG.info('{0}'.format(history_log.epoch_data)) total_epoch += fitting_params['epochs_per_fit'] if np.isnan(history_log.epoch_data['val_loss']): raise FuckingNaN("Nan'd") # If the val loss is lower, save the weights if history_log.epoch_data['val_loss'] < lowest_val_loss: # We have something with lower validation loss. lowest_val_loss = history_log.epoch_data['val_loss'] lowest_val_loss_weights = weights_to_list(model) lowest_val_loss_epoch = total_epoch # Predict a test sample (change 1 to any other value to test more than 1) # and use it to track how the network's output for this/these test(s) changes over time. prediction = model.predict(np.array(test_in[:3])) differences.append(prediction) epoch_history.append(history_log.epoch_data) if history_log.epoch_data['val_loss'] < 0.001 or total_epoch >= fitting_params['max_epochs']: trained = True differences = np.array(differences) # Need special np indexing on this later if not out_directory: out_directory = os.getcwd() if not os.path.exists(out_directory): os.makedirs(out_directory) if not os.path.exists('{0}/graph'.format(out_directory)): os.mkdir('{0}/graph'.format(out_directory)) print '{0}/graph'.format(out_directory) graph_out = '{0}/graph/{1}'.format(out_directory, output_file_name) # Save network weights, graph of network and loss over epoch graph. to_graph(model).write_svg("{0}_Graph.svg".format(graph_out)) model.save_weights('{0}_weights.h5'.format(graph_out), overwrite=True) save_mean_convergence_graph('{0}_convergence'.format(graph_out),differences, mean_out, min_out, max_out, fitting_params['epochs_per_fit']) save_graph(epoch_history, '{0}_loss'.format(graph_out), fitting_params['epochs_per_fit']) # do 30 tests on the network's final weights. test_results, test_means, test_std = do_tests(model, test_in, fitting_params['num_tests']) # evaluate the network on its final weights evaluation = {'At end of training:': model.evaluate(test_in, test_out, 1000, True, True)} # do 30 tests on the lowest validation loss weights. model = weight_from_list(model, lowest_val_loss_weights) model.save_weights('{0}_best_weights.h5'.format(graph_out), overwrite=True) val_test_results, val_test_means, val_test_std = do_tests(model, test_in, fitting_params['num_tests']) # evaluate the network on the lowest validation loss weights evaluation['Best validation loss:'] = model.evaluate(test_in, test_out, 1000, True, True) with open('{0}/{1}.txt'.format(out_directory, output_file_name), 'w') as f: write_summary_data(f, config, evaluation, epoch_history, mean_in, stddev_in, mean_out, stddev_out, (lowest_val_loss_epoch, lowest_val_loss) ) f.write('\n\n\n\n----------TEST DATA FOR FINAL MODEL----------\n\n\n\n') write_test_data(f, test_results, test_in, test_out, test_means, test_std, galaxy_ids_test, out_normaliser, config['PreprocessingConfig']['single_output'] ) f.write('\n\n\n\n----------TEST DATA FOR BEST VALIDATION LOSS MODEL----------\n\n\n\n') write_test_data(f, val_test_results, test_in, test_out, val_test_means, val_test_std, galaxy_ids_test, out_normaliser, config['PreprocessingConfig']['single_output'])