def save_best_history(model, trainer): global monitor_save_best global test_monitor global prev_nll monitor_save_best.on_monitor( model, trainer.monitoring_dataset['valid'], trainer) nll = monitor.read_channel(model, 'test_y_nll') + 0 test_monitor.append( (nll, monitor.read_channel(model, 'test_y_misclass')) ) if nll < prev_nll: f = open('best.pkl', 'wb') pk.dump(model, f, protocol=pk.HIGHEST_PROTOCOL) f.close() f = open('monitor.pkl', 'wb') pk.dump(test_monitor, f, protocol=pk.HIGHEST_PROTOCOL) f.close()
def main(job_id, requested_params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params = additional_args params.update(requested_params) if params.get('rate', None) is not None: params['log_init_learning_rate'][0] = numpy.array([params['rate']]) train_params = { 'train_start': params['start'], 'train_stop': params['stop'], 'valid_start': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': 20, 'max_batches': 10, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'init_momentum': 0.5, 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), } with open('slp_fooddata.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) pretrained_model_path = params.get('model', None) if pretrained_model_path is not None: print 'loading pre trained model' pretrained_model = serial.load(pretrained_model_path) print 'loading done' train_obj.model.set_param_values(pretrained_model.get_param_values()) if 'converge' in params: train_obj.algorithm.termination_criterion._criteria[ 0]._max_epochs = 100 train_obj.extensions.append( MonitorBasedSaveBest('valid_y_misclass', params['save'])) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize( train_obj.model) train_obj.main_loop(do_setup=False) original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass)
def main(job_id, requested_params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params = additional_args params.update(requested_params) if params.get('rate', None) is not None: params['log_init_learning_rate'][0] = numpy.array([params['rate']]) train_params = { 'train_start': params['start'], 'train_stop': params['stop'], 'valid_start': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': 20, 'max_batches': 10, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'init_momentum': 0.5, 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), } with open('slp_fooddata.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) pretrained_model_path = params.get('model', None) if pretrained_model_path is not None: print 'loading pre trained model' pretrained_model = serial.load(pretrained_model_path) print 'loading done' train_obj.model.set_param_values(pretrained_model.get_param_values()) if 'converge' in params: train_obj.algorithm.termination_criterion._criteria[0]._max_epochs = 100 train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', params['save'])) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model) train_obj.main_loop(do_setup=False) original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass)
def main(job_id, requested_params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params = additional_args params.update(requested_params) if params['kernel_config'][0] == 'a': output_channels_h2 = int(2.89 * 100) output_channels_h3 = int(1.70 * 100) output_channels_h4 = int(1.00 * 100) elif params['kernel_config'][0] == 'b': output_channels_h2 = int(1.00 * 100) output_channels_h3 = int(1.70 * 100) output_channels_h4 = int(2.89 * 100) elif params['kernel_config'][0] == 'c': output_channels_h2 = int(1.00 * 50) output_channels_h3 = int(3.42 * 50) output_channels_h4 = int(11.67 * 50) elif params['kernel_config'][0] == 'd': output_channels_h2 = int(11.67 * 50) output_channels_h3 = int(3.42 * 50) output_channels_h4 = int(1.00 * 50) else: raise RuntimeError('Unknown kernel config') if params.get('rate', None) is not None: params['log_init_learning_rate'][0] += numpy.array([params['rate']]) print params['log_init_learning_rate'][0] fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0], params['kernel_config'][0]) if 'cached_trainer' + str(fixed_params) not in cache: train_params = { 'train_start': params['start'], 'train_stop': params['stop'], 'valid_start': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': params.get('epochs', 1), 'max_batches': 10, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'kernel_size_h2': int(params['kernel_size_h2'][0]), 'output_channels_h2': output_channels_h2, 'irange_h2': math.pow(10, params['l_ir_h2'][0]), 'max_kernel_norm_h2': params['max_norm_h2'][0], 'kernel_size_h3': int(params['kernel_size_h3'][0]), 'output_channels_h3': output_channels_h3, 'irange_h3': math.pow(10, params['l_ir_h3'][0]), 'max_kernel_norm_h3': params['max_norm_h3'][0], 'kernel_size_h4': int(params['kernel_size_h4'][0]), 'output_channels_h4': output_channels_h4, 'irange_h4': math.pow(10, params['l_ir_h4'][0]), 'max_kernel_norm_h4': params['max_norm_h4'][0], 'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]), 'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]), 'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]), 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), 'init_momentum': params['init_momentum'][0], 'rectifier_left_slope': 0.2 } with open('conv_fooddata_spearmint.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) if 'converge' in params: train_obj.algorithm.termination_criterion._criteria[0]._max_epochs = params.get('epochs', 100) train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', params['save'])) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' # cache['cached_trainer' + str(fixed_params)] = train_obj else: train_obj = cache['cached_trainer' + str(fixed_params)] train_obj.model.monitor.set_state([0, 0, 0]) train_obj.model.training_succeeded = False # train_obj.algorithm.update_callbacks[0].reinit_from_monitor() model = train_obj.model model_params = dict([(param.name, param) for param in model.get_params()]) rng = model.rng update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng) update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng) update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng) update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng) train_obj.algorithm.learning_rate.set_value( math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32))) train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32)) pass pretrained_model_path = params.get('model', None) if pretrained_model_path is not None: print 'loading pre trained model' pretrained_model = serial.load(pretrained_model_path) print 'loading done' train_obj.model.set_param_values(pretrained_model.get_param_values()) if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model) train_obj.main_loop(do_setup=False) if params.get('savelast', False): serial.save(params['save'] + 'f', train_obj.model, on_overwrite='backup') original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass) * 50
def main(job_id, requested_params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params = additional_args params.update(requested_params) output_channels_h2 = int(1.00 * 50) output_channels_h3 = int(3.42 * 50) output_channels_h4 = int(11.67 * 50) dropout_h2 = float(params['dropout_h2'][0]) / 10 dropout_h3 = float(params['dropout_h3'][0]) / 10 dropout_h4 = float(params['dropout_h4'][0]) / 10 dropout_y = float(params['dropout_y'][0]) / 10 if params.get('rate', None) is not None: params['log_init_learning_rate'][0] += numpy.array([params['rate']]) fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0], params['dropout_h2'][0], params['dropout_h3'][0], params['dropout_h4'][0], params['dropout_y'][0]) if 'cached_trainer' + str(fixed_params) not in cache: train_params = { 'train_start': params['start'], 'train_stop': params['stop'], 'valid_start': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': params.get('epochs', 1), 'max_batches': 50, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'kernel_size_h2': int(params['kernel_size_h2'][0]), 'output_channels_h2': output_channels_h2, 'irange_h2': math.pow(10, params['l_ir_h2'][0]), 'max_kernel_norm_h2': params['max_norm_h2'][0], 'dropout_h2': dropout_h2, 'dscale_h2': params['dfac_h2'][0] * 1.0 / dropout_h2, 'w_lr_sc_h2': math.pow(dropout_h2, 2), 'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]), 'kernel_size_h3': int(params['kernel_size_h3'][0]), 'output_channels_h3': output_channels_h3, 'irange_h3': math.pow(10, params['l_ir_h3'][0]), 'max_kernel_norm_h3': params['max_norm_h3'][0], 'dropout_h3': dropout_h3, 'dscale_h3': params['dfac_h3'][0] * 1.0 / dropout_h3, 'w_lr_sc_h3': math.pow(dropout_h3, 2), 'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]), 'kernel_size_h4': int(params['kernel_size_h4'][0]), 'output_channels_h4': output_channels_h4, 'irange_h4': math.pow(10, params['l_ir_h4'][0]), 'max_kernel_norm_h4': params['max_norm_h4'][0], 'dropout_h4': dropout_h4, 'dscale_h4': params['dfac_h4'][0] * 1.0 / dropout_h4, 'w_lr_sc_h4': math.pow(dropout_h4, 2), 'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]), 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'dropout_y': dropout_y, 'dscale_y': 1.0 / dropout_y, 'w_lr_sc_y': math.pow(dropout_y, 2), 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), 'init_momentum': params['init_momentum'][0], 'rectifier_left_slope': 0.2 } with open('conv_fooddata_spearmint.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) if 'converge' in params: train_obj.algorithm.termination_criterion._criteria[0]._max_epochs = params.get('epochs', 100) train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', params['save'])) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' # cache['cached_trainer' + str(fixed_params)] = train_obj else: train_obj = cache['cached_trainer' + str(fixed_params)] train_obj.model.monitor.set_state([0, 0, 0]) train_obj.model.training_succeeded = False # train_obj.algorithm.update_callbacks[0].reinit_from_monitor() model = train_obj.model model_params = dict([(param.name, param) for param in model.get_params()]) rng = model.rng update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng) update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng) update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng) update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng) train_obj.algorithm.learning_rate.set_value( math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32))) train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32)) pass pretrained_model_path = params.get('model', None) if pretrained_model_path is not None: print 'loading pre trained model' pretrained_model = serial.load(pretrained_model_path) print 'loading done' train_obj.model.set_param_values(pretrained_model.get_param_values()) if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model) train_obj.main_loop(do_setup=False) if params.get('savelast', False): serial.save(params['save'] + 'f', train_obj.model, on_overwrite='backup') original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass) * 50
def main(job_id, params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params.update(additional_args) dropout_h2 = float(params['dropout_h2'][0]) / 10 dropout_h3 = float(params['dropout_h3'][0]) / 10 dropout_h4 = float(params['dropout_h4'][0]) / 10 dropout_y = float(params['dropout_y'][0]) / 10 fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0], params['dropout_h2'][0], params['dropout_h3'][0], params['dropout_h4'][0], params['dropout_y'][0]) if 'cached_trainer' + str(fixed_params) not in cache: train_params = { 'train_stop': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': 1, 'max_batches': 50, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'save_file': 'result', 'kernel_size_h2': int(params['kernel_size_h2'][0]), 'output_channels_h2': 1 * k, 'irange_h2': math.pow(10, params['l_ir_h2'][0]), 'max_kernel_norm_h2': params['max_norm_h2'][0], 'dropout_h2': dropout_h2, 'dscale_h2': 1.0 / dropout_h2, 'w_lr_sc_h2': math.pow(dropout_h2, 2), 'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]), 'kernel_size_h3': int(params['kernel_size_h3'][0]), 'output_channels_h3': int(1.7 * k), 'irange_h3': math.pow(10, params['l_ir_h3'][0]), 'max_kernel_norm_h3': params['max_norm_h3'][0], 'dropout_h3': dropout_h3, 'dscale_h3': 1.0 / dropout_h3, 'w_lr_sc_h3': math.pow(dropout_h3, 2), 'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]), 'kernel_size_h4': int(params['kernel_size_h4'][0]), 'output_channels_h4': int(2.5 * k), 'irange_h4': math.pow(10, params['l_ir_h4'][0]), 'max_kernel_norm_h4': params['max_norm_h4'][0], 'dropout_h4': dropout_h4, 'dscale_h4': 1.0 / dropout_h4, 'w_lr_sc_h4': math.pow(dropout_h4, 2), 'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]), 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'dropout_y': dropout_y, 'dscale_y': 1.0 / dropout_y, 'w_lr_sc_y': math.pow(dropout_y, 2), 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), 'init_momentum': params['init_momentum'][0], 'rectifier_left_slope': 0.2 } with open('conv_fooddata_spearmint.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) if 'converge' in params: del train_obj.algorithm.termination_criterion._criteria[:] train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', 'best_model.pkl')) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' # cache['cached_trainer' + str(fixed_params)] = train_obj else: train_obj = cache['cached_trainer' + str(fixed_params)] train_obj.model.monitor.set_state([0, 0, 0]) train_obj.model.training_succeeded = False # train_obj.algorithm.update_callbacks[0].reinit_from_monitor() model = train_obj.model model_params = dict([(param.name, param) for param in model.get_params()]) rng = model.rng update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng) update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng) update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng) update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng) train_obj.algorithm.learning_rate.set_value( math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32))) train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32)) pass if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model) train_obj.main_loop(do_setup=False) original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass) * 50
def main(job_id, params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params.update(additional_args) fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0]) if 'cached_trainer' + str(fixed_params) not in cache: train_params = { 'train_stop': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': 1, 'max_batches': 10, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'save_file': 'result', 'kernel_size_h2': int(params['kernel_size_h2'][0]), 'output_channels_h2': 1 * k, 'irange_h2': math.pow(10, params['l_ir_h2'][0]), 'max_kernel_norm_h2': params['max_norm_h2'][0], 'kernel_size_h3': int(params['kernel_size_h3'][0]), 'output_channels_h3': int(1.7 * k), 'irange_h3': math.pow(10, params['l_ir_h3'][0]), 'max_kernel_norm_h3': params['max_norm_h3'][0], 'kernel_size_h4': int(params['kernel_size_h4'][0]), 'output_channels_h4': int(2.5 * k), 'irange_h4': math.pow(10, params['l_ir_h4'][0]), 'max_kernel_norm_h4': params['max_norm_h4'][0], 'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]), 'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]), 'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]), 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), 'init_momentum': params['init_momentum'][0], 'rectifier_left_slope': 0.2 } with open('conv_fooddata_spearmint.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) if 'converge' in params: del train_obj.algorithm.termination_criterion._criteria[:] train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', 'best_model.pkl')) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' cache['cached_trainer' + str(fixed_params)] = train_obj else: train_obj = cache['cached_trainer' + str(fixed_params)] train_obj.model.monitor.set_state([0, 0, 0]) train_obj.model.training_succeeded = False # train_obj.algorithm.update_callbacks[0].reinit_from_monitor() model = train_obj.model model_params = dict([(param.name, param) for param in model.get_params()]) rng = model.rng update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng) update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng) update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng) update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng) train_obj.algorithm.learning_rate.set_value( math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32))) train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32)) pass if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model) train_obj.main_loop(do_setup=False) original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass) * 50