def test_load_train_file(): """ Loads a YAML file with and without environment variables. """ environ = {'PYLEARN2_DATA_PATH': '/just/a/test/path/'} load_train_file(yaml_path + 'test_model.yaml') load_train_file(yaml_path + 'test_model.yaml', environ=environ)
def test_load_train_file(): """ Loads a YAML file with and without environment variables. """ environ = { 'PYLEARN2_DATA_PATH': '/just/a/test/path/' } load_train_file(yaml_path + 'test_model.yaml') load_train_file(yaml_path + 'test_model.yaml', environ=environ)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--out",required=True,help="The name of the yaml file produced") parser.add_argument("--template",required=True,help="YAML template") parser.add_argument("--hparams",required=True,help="Hyper-parameters configuration") parser.add_argument("--force",action='store_true',help="Force to overwrite the old yaml file produced") parser.add_argument("--range",nargs=2,type=int,help="Subrange of files to execute") options = parser.parse_args() out = options.out template = options.template hparams = options.hparams force = options.force print options # Generates a list of hyper-parameter names and a list of # hyper-parameter values hpnames, hpvalues = generate_params(hparamfile=hparams, generate="uniform", search_mode="fix-grid-search") # Writes template with each hyper-parameter settings in # succesive files and returns the name of the files files = write_files(template="".join(open(template,"r")),hpnames=hpnames, hpvalues=hpvalues,save_path=out,force=force) if options.range: if options.range[1]==-1: options.range[1] = len(files) assert options.range[0] < options.range[1] iterator = xrange(*options.range) else: iterator = xrange(0,len(files)) print list(iterator) print_error_message("errors\n",out,"w") from pylearn2.utils import serial for i in iterator:#xrange(0,len(files)): f = files[i] print f try: serial.load_train_file(f).main_loop() except BaseException as e: print traceback.format_exc() print e print_error_message("%s : %s\n" % (f,str(e)),out)
def test_mnist(): """ Test the mnist.yaml file from the maxout paper on random input """ skip_if_no_gpu() train = load_train_file( os.path.join(pylearn2.__path__[0], "scripts/papers/maxout/mnist.yaml")) # Load fake MNIST data init_value = control.load_data control.load_data = [False] train.dataset = MNIST(which_set='train', one_hot=1, axes=['c', 0, 1, 'b'], start=0, stop=100) train.algorithm._set_monitoring_dataset(train.dataset) control.load_data = init_value # Train shortly and prevent saving train.algorithm.termination_criterion = EpochCounter(max_epochs=1) train.extensions.pop(0) train.save_freq = 0 train.main_loop()
def train_again(yaml): ''' Relaunch training of a model with conditions specified by the YAML Looks for the model file defined by save path and replace the model instanciated by the one that was trained before ------------------------------------------------------------------- yaml : string, filename YAML file defining the exp to be continued ''' context = serial.load_train_file(yaml) print "\tLoaded YAML" # Load the trained model model_file = context.save_path if not os.path.isfile(model_file): model_file = context.extensions[-1].save_path with open(model_file, 'r') as m_f: trained_model = pkl.load(m_f) # Define the continuing one new_model = push_monitor(trained_model, 'trained_model', transfer_experience=True) # Define it as the model to be trained context.model = new_model # Train again context.main_loop()
def limited_epoch_train(file_path, max_epochs = 1): try: train = load_train_file(file_path) train.algorithm.termination_criterion = EpochCounter(max_epochs = max_epochs) train.main_loop() except NoDataPathError: raise SkipTest("PYLEARN2_DATA_PATH environment variable not defined")
def train_again(yaml): ''' Relaunch training of a model with conditions specified by the YAML Looks for the model file defined by save path and replace the model instanciated by the one that was trained before ------------------------------------------------------------------- yaml : string, filename YAML file defining the exp to be continued ''' context = serial.load_train_file(yaml) print "\tLoaded YAML" # Load the trained model model_file = context.save_path if not os.path.isfile(model_file): model_file = context.extensions[-1].save_path with open(model_file, 'r') as m_f: trained_model = pkl.load(m_f) # Define the continuing one new_model = push_monitor(trained_model, 'trained_model', transfer_experience=True) # Define it as the model to be trained context.model = new_model # Train again context.main_loop()
def main(argv): try: opts, args = getopt.getopt(argv, '') student_yaml = args[0] load_layer = int(args[1]) except getopt.GetoptError: usage() sys.exit(2) # Load student student = serial.load_train_file(student_yaml) with open(student_yaml, "r") as sty: student = yaml_parse.load(sty) # Load hints if student.algorithm.cost.hints is not None: student_layers = list(zip(*student.algorithm.cost.hints)[0]) teacher_layers = list(zip(*student.algorithm.cost.hints)[1]) n_hints = len(student_layers) else: n_hints = 0 hint_path = student.save_path[0:-4] + "_hintlayer" + str( load_layer) + ".pkl" for ext in range(len(student.extensions)): if isinstance(student.extensions[ext], MonitorBasedSaveBest): hint_path = student.extensions[ext].save_path[ 0:-9] + "_hintlayer" + str(load_layer) + "_best.pkl" # Load pretrained student network pretrained_model = serial.load(hint_path) #fo = open(hint_path, 'r') #pretrained_model = pkl.load(fo) #fo.close() student.model.layers[0:load_layer + 1] = pretrained_model.layers[0:load_layer + 1] pretrained_model = None del pretrained_model #student.algorithm.learning_rate.set_value(0.001) #for i in range(0,load_layer+1): # student.model.layers[i].W_lr_scale = 0.1 # student.model.layers[i].b_lr_scale = 0.1 student.save_path = student.save_path[0:-4] + "_hint" + str( load_layer) + "_softmax.pkl" for ext in range(len(student.extensions)): if isinstance(student.extensions[ext], MonitorBasedSaveBest): student.extensions[ ext].save_path = student.save_path[0:-4] + "_best.pkl" student.main_loop()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--out", required=True, help="The name of the yaml file produced") parser.add_argument("--template", required=True, help="YAML template") parser.add_argument("--hparams", required=True, help="Hyper-parameters configuration") parser.add_argument("--force", action='store_true', help="Force to overwrite the old yaml file produced") options = parser.parse_args() out = options.out template = options.template hparams = options.hparams force = options.force print options # Generates a list of hyper-parameter names and a list of # hyper-parameter values hpnames, hpvalues = generate_params(hparamfile=hparams, generate="uniform", search_mode="fix-grid-search") # Writes template with each hyper-parameter settings in # succesive files and returns the name of the files files = write_files(template="".join(open(template, "r")), hpnames=hpnames, hpvalues=hpvalues, save_path=out, force=force) print_error_message("errors\n", out, "w") from pylearn2.utils import serial for i in xrange(0, len(files)): f = files[i] try: serial.load_train_file(f).main_loop() except BaseException as e: print traceback.format_exc() print e print_error_message("%s : %s\n" % (f, str(e)), out)
def main(argv): try: opts, args = getopt.getopt(argv, '') student_yaml = args[0] load_layer = int(args[1]) except getopt.GetoptError: usage() sys.exit(2) # Load student student = serial.load_train_file(student_yaml) with open(student_yaml, "r") as sty: student = yaml_parse.load(sty) # Load hints if student.algorithm.cost.hints is not None: student_layers = list(zip(*student.algorithm.cost.hints)[0]) teacher_layers = list(zip(*student.algorithm.cost.hints)[1]) n_hints = len(student_layers) else: n_hints = 0 hint_path = student.save_path[0:-4] + "_hintlayer" + str(load_layer) + ".pkl" for ext in range(len(student.extensions)): if isinstance(student.extensions[ext],MonitorBasedSaveBest): hint_path = student.extensions[ext].save_path[0:-9] + "_hintlayer" + str(load_layer) + "_best.pkl" # Load pretrained student network pretrained_model = serial.load(hint_path) #fo = open(hint_path, 'r') #pretrained_model = pkl.load(fo) #fo.close() student.model.layers[0:load_layer+1] = pretrained_model.layers[0:load_layer+1] pretrained_model = None del pretrained_model #student.algorithm.learning_rate.set_value(0.001) #for i in range(0,load_layer+1): # student.model.layers[i].W_lr_scale = 0.1 # student.model.layers[i].b_lr_scale = 0.1 student.save_path = student.save_path[0:-4] + "_hint" + str(load_layer) + "_softmax.pkl" for ext in range(len(student.extensions)): if isinstance(student.extensions[ext],MonitorBasedSaveBest): student.extensions[ext].save_path = student.save_path[0:-4] + "_best.pkl" student.main_loop()
def instantiate_decoder_from_yaml(yaml_filepath): decoder = serial.load_train_file(yaml_filepath) assert isinstance(decoder, Pylearn2KaldiDecoderProvider) assert isinstance(decoder.model, Model) assert isinstance(decoder.preprocessor, OnlinePreprocessor) or decoder.preprocessor is None return decoder
def main(dataset='data/pylearn2/mnist.pkl.gz', nn_config = "lib/pylearn2_log.yaml"): print '... load and setup data' load_data(dataset) print '... building the model structure' train_obj = serial.load_train_file(nn_config) print '... training the model' train_obj.main_loop()
def main(dataset='data/pylearn2/mnist.pkl.gz', nn_config="lib/pylearn2_log.yaml"): print '... load and setup data' load_data(dataset) print '... building the model structure' train_obj = serial.load_train_file(nn_config) print '... training the model' train_obj.main_loop()
def main(argv, freeze): try: opts, args = getopt.getopt(argv, '') yaml = args[0] model = args[1] except getopt.GetoptError: usage() sys.exit(2) # Load yaml with open(yaml, "r") as sty: train = serial.load_train_file(yaml) #train = yaml_parse.load(sty) # Load pretrained model with bad sigmoid output with open(model, 'r') as fo: model = pkl.load(fo) # Remove the last layer, puts a real sigmoid instead if freeze: for i in range(0, len(model.layers) - 2): model.freeze(model.layers[i].get_params()) ### Add last conv elemwise layer = ConvElemwise(layer_name= 'out', output_channels= 1, kernel_shape=(1,1), irange=0.05, nonlinearity=IdentityConvNonlinearity(), max_kernel_norm= 7.9, tied_b=1) layer.set_mlp(model) layer.set_input_space(model.layers[-3].get_output_space()) model.layers[-2] = layer ### Add Sigmoid layer = SigmoidExtended(layer_name='y', n_classes=1) layer.set_mlp(model) layer.set_input_space(model.layers[-2].get_output_space()) model.layers[-1] = layer #print model.layers #model.monitor = train.model.monitor #train.model = model train.model = push_monitor(model, "old") print train.model #train = Train(train.dataset, model, train.algorithm, train.save_path, # train.save_freq, train.extensions, train.allow_overwrite) train.main_loop()
def _load(self, fname): """ Internal load. """ # load the train object train_obj = serial.load_train_file(fname) if not isinstance(train_obj, Train): raise ValueError('Top level object must be a pylearn2.Train') # prepare for training pre_train(train_obj) self.train_obj = train_obj self.crt_num_steps = 0
def main(argv, freeze): try: opts, args = getopt.getopt(argv, '') yaml = args[0] model = args[1] except getopt.GetoptError: usage() sys.exit(2) # Load yaml with open(yaml, "r") as sty: train = serial.load_train_file(yaml) #train = yaml_parse.load(sty) # Load pretrained model with bad sigmoid output with open(model, 'r') as fo: model = pkl.load(fo) # Remove the last layer, puts a real sigmoid instead if freeze: for i in range(0, len(model.layers) - 2): model.freeze(model.layers[i].get_params()) ### Add last conv elemwise layer = ConvElemwise(layer_name='out', output_channels=1, kernel_shape=(1, 1), irange=0.05, nonlinearity=IdentityConvNonlinearity(), max_kernel_norm=7.9, tied_b=1) layer.set_mlp(model) layer.set_input_space(model.layers[-3].get_output_space()) model.layers[-2] = layer ### Add Sigmoid layer = SigmoidExtended(layer_name='y', n_classes=1) layer.set_mlp(model) layer.set_input_space(model.layers[-2].get_output_space()) model.layers[-1] = layer #print model.layers #model.monitor = train.model.monitor #train.model = model train.model = push_monitor(model, "old") print train.model #train = Train(train.dataset, model, train.algorithm, train.save_path, # train.save_freq, train.extensions, train.allow_overwrite) train.main_loop()
def limited_epoch_train(file_path, max_epochs=1): """ This method trains a given YAML file for a single epoch Parameters ---------- file_path : str The path to the YAML file to be trained max_epochs : int The number of epochs to train this YAML file for. Defaults to 1. """ train = load_train_file(file_path) train.algorithm.termination_criterion = EpochCounter(max_epochs=max_epochs) train.main_loop()
def main(argv): try: opts, args = getopt.getopt(argv, '') model_yaml = args[0] except getopt.GetoptError: usage() sys.exit(2) # Load student #with open(model_yaml, "r") as sty: #model = yaml_parse.load(sty) model = serial.load_train_file(model_yaml) result = numberMult(model) print 'Number of multiplications is %is' % (result)
def main(argv): try: opts, args = getopt.getopt(argv, '') model_yaml = args[0] except getopt.GetoptError: usage() sys.exit(2) # Load student #with open(model_yaml, "r") as sty: #model = yaml_parse.load(sty) model = serial.load_train_file(model_yaml) result = numberParams(model) print 'Number of parameters is %i' % (result)
def main(argv): try: opts, args = getopt.getopt(argv, '') model_yaml = args[0] except getopt.GetoptError: usage() sys.exit(2) # Load student model = serial.load_train_file(model_yaml) #with open(model_yaml, "r") as sty: # model = yaml_parse.load(sty) result = numberMult(model) print 'Number of multiplications is %is' % (result)
def test_train_example(): """ tests that the train example script runs correctly """ path = pylearn2.__path__[0] train_example_path = path + '/scripts/train_example' cwd = os.getcwd() try: os.chdir(train_example_path) train_yaml_path = 'cifar_grbm_smd.yaml' train_object = load_train_file(train_yaml_path) #make the termination criterion really lax so the test won't run for long train_object.algorithm.termination_criterion.prop_decrease = 0.5 train_object.algorithm.termination_criterion.N = 1 train_object.main_loop() finally: os.chdir(cwd)
def main(argv): try: opts, args = getopt.getopt(argv, '') model_yaml = args[0] except getopt.GetoptError: usage() sys.exit(2) # Load student model = serial.load_train_file(model_yaml) #with open(model_yaml, "r") as sty: # model = yaml_parse.load(sty) result = numberParams(model) print 'Number of parameters is %i' % (result)
def test_train_example(): """ tests that the grbm_smd example script runs correctly """ assert config.mode != "DEBUG_MODE" path = pylearn2.__path__[0] train_example_path = os.path.join(path, 'scripts', 'tutorials', 'grbm_smd') cwd = os.getcwd() try: os.chdir(train_example_path) train_yaml_path = os.path.join(train_example_path, 'cifar_grbm_smd.yaml') train_object = load_train_file(train_yaml_path) #make the termination criterion really lax so the test won't run for long train_object.algorithm.termination_criterion.prop_decrease = 0.5 train_object.algorithm.termination_criterion.N = 1 train_object.main_loop() finally: os.chdir(cwd)
def train_again(yaml): ''' Relaunch training of a model with conditions specified by the YAML Looks for the model file defined by save path and replace the model instanciated by the one that was trained before ------------------------------------------------------------------- yaml : string, filename YAML file defining the exp to be continued ''' context = serial.load_train_file(yaml) print "\tLoaded YAML" # Load the trained model model_file = context.save_path for ext in range(len(context.extensions)): if isinstance(context.extensions[ext], MonitorBasedSaveBest): pos = ext else: raise AssertionError( 'No MonitorBasedSaveBest extension in the model!') if not os.path.isfile(model_file): model_file = context.extensions[pos].save_path with open(model_file, 'r') as m_f: trained_model = pkl.load(m_f) # Define the continuing one new_model = push_monitor(trained_model, 'trained_model', transfer_experience=True) # Define it as the model to be trained context.model = new_model context.save_path = context.extensions[pos].save_path[ 0:-4] + "_continue.pkl" context.extensions[pos].save_path = context.save_path[0:-4] + "_best.pkl" # Train again context.main_loop()
def test_train_example(): """ tests that the grbm_smd example script runs correctly """ assert config.mode != "DEBUG_MODE" if 'TRAVIS' in os.environ and os.environ['TRAVIS'] == '1': raise SkipTest() path = pylearn2.__path__[0] train_example_path = path + '/scripts/tutorials/grbm_smd' cwd = os.getcwd() try: os.chdir(train_example_path) train_yaml_path = 'cifar_grbm_smd.yaml' train_object = load_train_file(train_yaml_path) #make the termination criterion really lax so the test won't run for long train_object.algorithm.termination_criterion.prop_decrease = 0.5 train_object.algorithm.termination_criterion.N = 1 train_object.main_loop() finally: os.chdir(cwd)
def test_train_example(): """ tests that the train example script runs correctly """ assert config.mode != "DEBUG_MODE" if 'TRAVIS' in os.environ and os.environ['TRAVIS'] == '1': raise SkipTest() path = pylearn2.__path__[0] train_example_path = path + '/scripts/train_example' cwd = os.getcwd() try: os.chdir(train_example_path) train_yaml_path = 'cifar_grbm_smd.yaml' train_object = load_train_file(train_yaml_path) #make the termination criterion really lax so the test won't run for long train_object.algorithm.termination_criterion.prop_decrease = 0.5 train_object.algorithm.termination_criterion.N = 1 train_object.main_loop() finally: os.chdir(cwd)
def test_train_example(): """ tests that the grbm_smd example script runs correctly """ assert config.mode != "DEBUG_MODE" path = pylearn2.__path__[0] train_example_path = os.path.join(path, 'scripts', 'tutorials', 'grbm_smd') cwd = os.getcwd() try: os.chdir(train_example_path) train_yaml_path = os.path.join(train_example_path, 'cifar_grbm_smd.yaml') train_object = load_train_file(train_yaml_path) #make the termination criterion really lax so the test won't run for long train_object.algorithm.termination_criterion.prop_decrease = 0.5 train_object.algorithm.termination_criterion.N = 1 train_object.main_loop() finally: os.chdir(cwd)
def train_again(yaml): ''' Relaunch training of a model with conditions specified by the YAML Looks for the model file defined by save path and replace the model instanciated by the one that was trained before ------------------------------------------------------------------- yaml : string, filename YAML file defining the exp to be continued ''' context = serial.load_train_file(yaml) print "\tLoaded YAML" # Load the trained model model_file = context.save_path for ext in range(len(context.extensions)): if isinstance(context.extensions[ext],MonitorBasedSaveBest): pos = ext else: raise AssertionError('No MonitorBasedSaveBest extension in the model!') if not os.path.isfile(model_file): model_file = context.extensions[pos].save_path with open(model_file, 'r') as m_f: trained_model = pkl.load(m_f) # Define the continuing one new_model = push_monitor(trained_model, 'trained_model', transfer_experience=True) # Define it as the model to be trained context.model = new_model context.save_path = context.extensions[pos].save_path[0:-4] + "_continue.pkl" context.extensions[pos].save_path = context.save_path[0:-4] + "_best.pkl" # Train again context.main_loop()
def prepare_decoding_pipeline(options): #prepare decoder from yaml or simply instantiate an empty instance #with model and preprocessors = None decoder = prepare_decoder(options.decoder_yaml) if decoder.model is None: if options.model_pkl is None: raise Exception('Not specified how to build or load the model') model = serial.load(options.model_pkl) decoder.model = model if decoder.preprocessor is None: if options.prepr_yaml is None: log.warning("Preprocessor is empty, feats will be fed into the model" " as provided from the pipe or as read from archive.") preprocessor = None else: preprocessor = serial.load_train_file(options.prepr_yaml) decoder.preprocessor = preprocessor if (options.model_pytables is not None) or \ (options.model_pytables_sd is not None): params = load_model_params_from_pytables(options.model_pytables, options.model_pytables_sd) #TODO: add cross-check if parameters keys match 1:1 with the model decoder.set_model_params(params) log_priors = None if options.priors_path is not None: log_priors = load_kaldi_priors(options.priors_path) decoder.build_fwdprop_function(log_priors) assert isinstance(decoder, Pylearn2KaldiDecoderProvider) assert isinstance(decoder.model, Model) assert isinstance(decoder.preprocessor, OnlinePreprocessor) or decoder.preprocessor is None assert decoder.fprop is not None return decoder
def test_mnist_pi(): """ Test the mnist_pi.yaml file from the maxout paper on random input """ train = load_train_file( os.path.join(pylearn2.__path__[0], "scripts/papers/maxout/mnist_pi.yaml") ) # Load fake MNIST data init_value = control.load_data control.load_data = [False] train.dataset = MNIST(which_set='train', start=0, stop=100) train.algorithm._set_monitoring_dataset(train.dataset) control.load_data = init_value # Train shortly and prevent saving train.algorithm.termination_criterion = EpochCounter(max_epochs=1) train.extensions.pop(0) train.save_freq = 0 train.main_loop()
def train_vecs(self, source_vecs, target_vecs): self.build_datasets(source_vecs, target_vecs) from pylearn2.space import VectorSpace from pylearn2.utils.serial import load_train_file # TODO allow overrides via parameters train = load_train_file(self.network_cfg) # Change input layer size model = train.model if not isinstance(model, mlp.MLP): raise RuntimeError("Provided network config does not use " "MLP model -- not supported by this " "translation model code") input_size = self.source_vsm.layer1_size model.set_input_space(VectorSpace(dim=input_size)) # Change output layer size final_layer = model.layers[-1] if not isinstance(final_layer, mlp.Linear): raise RuntimeError("Provided network config does not have " "a linear output layer -- not supported " "by this translation model code") output_size = self.target_vsm.layer1_size # TODO is this sufficient for the linear layer? Might need to # call some setter which updates internal state I don't # understand final_layer.dim = output_size # Now begin training train.main_loop() self.network = train.model self.make_network_fn()
def train(config, level_name=None, timestamp=None, time_budget=None, verbose_logging=None, debug=None): """ Trains a given YAML file. Parameters ---------- config : str A YAML configuration file specifying the training procedure. level_name : bool, optional Display the log level (e.g. DEBUG, INFO) for each logged message. timestamp : bool, optional Display human-readable timestamps for each logged message. time_budget : int, optional Time budget in seconds. Stop training at the end of an epoch if more than this number of seconds has elapsed. verbose_logging : bool, optional Display timestamp, log level and source logger for every logged message (implies timestamp and level_name are True). debug : bool, optional Display any DEBUG-level log messages, False by default. """ train_obj = serial.load_train_file(config) try: iter(train_obj) iterable = True except TypeError: iterable = False # Undo our custom logging setup. restore_defaults() # Set up the root logger with a custom handler that logs stdout for INFO # and DEBUG and stderr for WARNING, ERROR, CRITICAL. root_logger = logging.getLogger() if verbose_logging: formatter = logging.Formatter(fmt="%(asctime)s %(name)s %(levelname)s " "%(message)s") handler = CustomStreamHandler(formatter=formatter) else: if timestamp: prefix = '%(asctime)s ' else: prefix = '' formatter = CustomFormatter(prefix=prefix, only_from='pylearn2') handler = CustomStreamHandler(formatter=formatter) root_logger.addHandler(handler) # Set the root logger level. if debug: root_logger.setLevel(logging.DEBUG) else: root_logger.setLevel(logging.INFO) if iterable: for number, subobj in enumerate(iter(train_obj)): # Publish a variable indicating the training phase. phase_variable = 'PYLEARN2_TRAIN_PHASE' phase_value = 'phase%d' % (number + 1) os.environ[phase_variable] = phase_value # Execute this training phase. subobj.main_loop(time_budget=time_budget) # Clean up, in case there's a lot of memory used that's # necessary for the next phase. del subobj gc.collect() else: train_obj.main_loop(time_budget=time_budget)
def main(): parser = argparse.ArgumentParser(description='Pylearn2 lab.') parser.add_argument('-s', '--save', action='store_true', help='Save the resulting images') parser.add_argument( '-q', '--quit', action='store_true', help='Quit after plotting instead of dropping into IPython') parser.add_argument('directory', type=str, help='Which results directory to use') args = parser.parse_args() # OLD #config_file_path = '/home/jason/s/deep_learning/pylearn/pred_net.yaml' #train = yaml_parse.load_path(config_file_path) #train = serial.load_train_file(config_file_path) #result_prefix = '/home/jason/s/pylearn2/pylearn2/pred/results/' result_prefix = '/u/yosinski/s/galatea/fish/results/' result_dir = os.path.join(result_prefix, args.directory) print 'loading train object...' #train = serial.load_train_file(os.path.join(result_dir, 'pred_net.yaml')) train = serial.load_train_file(os.path.join(result_dir, 'model.yaml')) print 'loading saved model...' #model = serial.load(os.path.join(result_dir, 'pred_net.pkl')) model = serial.load(os.path.join(result_dir, 'model.pkl')) print 'done.' print 'model was trained on:' print model.dataset_yaml_src if train.algorithm.cost is not None: data_specs = train.algorithm.cost.get_data_specs(model) else: data_specs = train.model.get_default_cost().get_data_specs(train.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) num_frames = model.num_frames num_batches = 100 batch_size = train.algorithm.batch_size if train.algorithm.batch_size else 20 * num_frames train_dataset = train.dataset valid_dataset = train.algorithm.monitoring_dataset['valid'] rng = train.algorithm.rng if not is_stochastic(train.algorithm.train_iteration_mode): rng = None train_iterator = train_dataset.iterator( mode=train.algorithm.train_iteration_mode, batch_size=batch_size, data_specs=flat_data_specs, return_tuple=True, rng=rng, num_batches=num_batches * 10) valid_iterator = valid_dataset.iterator( mode=train.algorithm.train_iteration_mode, batch_size=batch_size, data_specs=flat_data_specs, return_tuple=True, # No rng override num_batches=num_batches * 10) train_batches = [train_iterator.next() for ii in range(num_batches)] valid_batches = [valid_iterator.next() for ii in range(num_batches)] print 'got batches with shape:' for dat in train_batches[0]: print ' ', dat.shape ######################### # Plot costs ######################### # Plot costs over time ch_train_objective = model.monitor.channels['train_objective'] ch_valid_objective = model.monitor.channels['valid_objective'] x_vals = ch_train_objective.epoch_record x_label = 'epoch' plot(x_vals, ch_train_objective.val_record, 'b-') plot(x_vals, ch_valid_objective.val_record, 'r-') legend(('train', 'valid')) if args.save: savefig(os.path.join(result_dir, 'costs_lin.png')) savefig(os.path.join(result_dir, 'costs_lin.pdf')) if args.save: gca().set_yscale('log') savefig(os.path.join(result_dir, 'costs_log.png')) savefig(os.path.join(result_dir, 'costs_log.pdf')) gca().set_yscale('linear') ######################### # Compute some accuracies ######################### try: model.fns.feat_to_compout except: model.redo_theano() all_acc_id = [] all_xy_errs = [] print 'Training set:' print ' acc_id\tx_err\ty_err' for bb, batch in enumerate(train_batches): feat, ids, xy = batch idsN_floatX = array(ids.argmax(1), dtype=theano.config.floatX) acc_id = model.fns.wiskott_id_accuracy(feat, idsN_floatX) all_acc_id.append(acc_id) xy_errs = model.fns.wiskott_xy_errors(feat, xy[:, 0:2]) all_xy_errs.append(xy_errs) # Old numpy way #ids_hat,xy_hat = model.fns.feat_to_idxy(feat) #idx_true = np.where( ids == 1 )[1] #idx_hat = np.where(np.sign(ids_hat.T - ids_hat.max(1)).T + 1)[1] #n_correct += (idx_true == idx_hat).sum() #n_total += len(idx_true) print '%2d:\t%g,\t%g,\t%g' % (bb, acc_id, xy_errs[0], xy_errs[1]) ######################### # Embed ######################### if not args.quit: # Start shell ipshell() print 'done.'
space.validate(data) inputs, targets = data outputs = model.logistic_regression(inputs) loss = -(targets * T.log(outputs)).sum(axis=1) return loss.mean() class LogisticRegression(Model): def __init__(self, nvis, nclasses): super(LogisticRegression, self).__init__() self.nvis = nvis self.nclasses = nclasses W_value = numpy.random.uniform(size=(self.nvis, self.nclasses)) self.W = sharedX(W_value, 'W') b_value = numpy.zeros(self.nclasses) self.b = sharedX(b_value, 'b') self._params = [self.W, self.b] self.input_space = VectorSpace(dim=self.nvis) self.output_space = VectorSpace(dim=self.nclasses) def logistic_regression(self, inputs): return T.nnet.softmax(T.dot(inputs, self.W) + self.b) train_obj = serial.load_train_file('../files/log_reg.yaml') train_obj.main_loop()
import numpy as np from pylearn2.utils import serial from theano import function CODE_DIR = '/home/nico/Code/kaggle/GenderWrite/' DATA_DIR = '/home/nico/datasets/Kaggle/GenderWrite/' # load configuration file and train model train_obj = serial.load_train_file(CODE_DIR+'gwmaxout.yaml') # layer-by-layer training #layers = train_obj.model.layers[:] ## first remove all but first and last layers and train first layer #for ii in range(len(layers)-2): # train_obj.model.layers.pop(1) train_obj.main_loop() # now add layers and re-train #for ii in range(len(layers)-2): # train_obj.model.layers.insert(1+ii,layers[1+ii]) # train_obj.main_loop() # generate model output def get_output(model, tdata, layerindex=-1, batch_size=100): Xb = model.get_input_space().make_batch_theano() ymf = model.fprop(Xb, return_all=True) data = tdata.get_topological_view() propagate = function([Xb],ymf)
def main(): parser = argparse.ArgumentParser(description='Pylearn2 lab.') parser.add_argument('-s', '--save', action='store_true', help = 'Save the resulting images') parser.add_argument('-q', '--quit', action='store_true', help = 'Quit after plotting instead of dropping into IPython') parser.add_argument('directory', type = str, help = 'Which results directory to use') args = parser.parse_args() # OLD #config_file_path = '/home/jason/s/deep_learning/pylearn/pred_net.yaml' #train = yaml_parse.load_path(config_file_path) #train = serial.load_train_file(config_file_path) #result_prefix = '/home/jason/s/pylearn2/pylearn2/pred/results/' result_prefix = '/u/yosinski/s/galatea/fish/results/' result_dir = os.path.join(result_prefix, args.directory) print 'loading train object...' #train = serial.load_train_file(os.path.join(result_dir, 'pred_net.yaml')) train = serial.load_train_file(os.path.join(result_dir, 'model.yaml')) print 'loading saved model...' #model = serial.load(os.path.join(result_dir, 'pred_net.pkl')) model = serial.load(os.path.join(result_dir, 'model.pkl')) print 'done.' print 'model was trained on:' print model.dataset_yaml_src if train.algorithm.cost is not None: data_specs = train.algorithm.cost.get_data_specs(model) else: data_specs = train.model.get_default_cost().get_data_specs(train.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) num_frames = model.num_frames num_batches = 100 batch_size = train.algorithm.batch_size if train.algorithm.batch_size else 20*num_frames train_dataset = train.dataset valid_dataset = train.algorithm.monitoring_dataset['valid'] rng = train.algorithm.rng if not is_stochastic(train.algorithm.train_iteration_mode): rng = None train_iterator = train_dataset.iterator(mode = train.algorithm.train_iteration_mode, batch_size = batch_size, data_specs = flat_data_specs, return_tuple = True, rng=rng, num_batches = num_batches * 10) valid_iterator = valid_dataset.iterator(mode = train.algorithm.train_iteration_mode, batch_size = batch_size, data_specs = flat_data_specs, return_tuple = True, # No rng override num_batches = num_batches * 10) train_batches = [train_iterator.next() for ii in range(num_batches)] valid_batches = [valid_iterator.next() for ii in range(num_batches)] print 'got batches with shape:' for dat in train_batches[0]: print ' ', dat.shape ######################### # Plot costs ######################### # Plot costs over time ch_train_objective = model.monitor.channels['train_objective'] ch_valid_objective = model.monitor.channels['valid_objective'] x_vals = ch_train_objective.epoch_record x_label = 'epoch' plot(x_vals, ch_train_objective.val_record, 'b-') plot(x_vals, ch_valid_objective.val_record, 'r-') legend(('train', 'valid')) if args.save: savefig(os.path.join(result_dir, 'costs_lin.png')) savefig(os.path.join(result_dir, 'costs_lin.pdf')) if args.save: gca().set_yscale('log') savefig(os.path.join(result_dir, 'costs_log.png')) savefig(os.path.join(result_dir, 'costs_log.pdf')) gca().set_yscale('linear') ######################### # Compute some accuracies ######################### try: model.fns.feat_to_compout except: model.redo_theano() all_acc_id = [] all_xy_errs = [] print 'Training set:' print ' acc_id\tx_err\ty_err' for bb,batch in enumerate(train_batches): feat,ids,xy = batch idsN_floatX = array(ids.argmax(1), dtype=theano.config.floatX) acc_id = model.fns.wiskott_id_accuracy(feat, idsN_floatX) all_acc_id.append(acc_id) xy_errs = model.fns.wiskott_xy_errors(feat, xy[:,0:2]) all_xy_errs.append(xy_errs) # Old numpy way #ids_hat,xy_hat = model.fns.feat_to_idxy(feat) #idx_true = np.where( ids == 1 )[1] #idx_hat = np.where(np.sign(ids_hat.T - ids_hat.max(1)).T + 1)[1] #n_correct += (idx_true == idx_hat).sum() #n_total += len(idx_true) print '%2d:\t%g,\t%g,\t%g' % (bb, acc_id, xy_errs[0], xy_errs[1]) ######################### # Embed ######################### if not args.quit: # Start shell ipshell() print 'done.'
def main(): config_yaml = open("toy.yaml").read() trainer = serial.load_train_file( config_yaml % dict(save_path='tmp.tmp')) trainer.main_loop()
#!/usr/bin/env python import pylearn2.utils.serial as serial if __name__=='__main__': import sys assert(len(sys.argv) == 2) serial.load_train_file(sys.argv[1]).main_loop()
from pylearn2.utils import serial train_obj = serial.load_train_file('small_dataset.yaml') train_obj.main_loop()
parser = argparse.ArgumentParser( description="Launch an experiment from a YAML configuration file.", epilog='\n'.join(__doc__.strip().split('\n')[1:]).strip(), formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('config', action='store', choices=None, help='A YAML configuration file specifying the ' 'training procedure') return parser if __name__ == "__main__": parser = make_argument_parser() args = parser.parse_args() train_obj = serial.load_train_file(args.config) try: iter(train_obj) iterable = True except TypeError as e: iterable = False if iterable: for number, subobj in enumerate(iter(train_obj)): # Publish a variable indicating the training phase. phase_variable = 'PYLEARN2_TRAIN_PHASE' phase_value = 'phase%d' % (number + 1) os.environ[phase_variable] = phase_value os.putenv(phase_variable, phase_value) # Execute this training phase. subobj.main_loop()
def train(config, level_name=None, timestamp=None, time_budget=None, verbose_logging=None, debug=None): """ Trains a given YAML file. Parameters ---------- config : str A YAML configuration file specifying the training procedure. level_name : bool, optional Display the log level (e.g. DEBUG, INFO) for each logged message. timestamp : bool, optional Display human-readable timestamps for each logged message. time_budget : int, optional Time budget in seconds. Stop training at the end of an epoch if more than this number of seconds has elapsed. verbose_logging : bool, optional Display timestamp, log level and source logger for every logged message (implies timestamp and level_name are True). debug : bool, optional Display any DEBUG-level log messages, False by default. """ train_obj = serial.load_train_file(config) try: iter(train_obj) iterable = True except TypeError: iterable = False # Undo our custom logging setup. restore_defaults() # Set up the root logger with a custom handler that logs stdout for INFO # and DEBUG and stderr for WARNING, ERROR, CRITICAL. root_logger = logging.getLogger() if verbose_logging: formatter = logging.Formatter(fmt="%(asctime)s %(name)s %(levelname)s " "%(message)s") handler = CustomStreamHandler(formatter=formatter) else: if timestamp: prefix = '%(asctime)s ' else: prefix = '' formatter = CustomFormatter(prefix=prefix, only_from='pylearn2') handler = CustomStreamHandler(formatter=formatter) root_logger.addHandler(handler) # Set the root logger level. if debug: root_logger.setLevel(logging.DEBUG) else: root_logger.setLevel(logging.INFO) if iterable: for number, subobj in enumerate(iter(train_obj)): # Publish a variable indicating the training phase. phase_variable = 'PYLEARN2_TRAIN_PHASE' phase_value = 'phase%d' % (number + 1) os.environ[phase_variable] = phase_value # Execute this training phase. subobj.main_loop(time_budget=time_budget) # Clean up, in case there's a lot of memory used that's # necessary for the next phase. del subobj gc.collect() else: train_obj.main_loop(time_budget=time_budget)
OUT = "/yaml/test.yaml" TEMPLATE = DIR+"template.yaml" HPARAMS = "hparams.conf" if __name__ == "__main__": # for transformation in ['translate','scale','rotate','flip','gaussian','sharpen','denoize','occlusion','halfface']: # for transformation in ['scale','rotate','flip','gaussian','sharpen','denoize','occlusion','halfface']: for transformation in ['denoize','sharpen']: out = DIR+transformation+OUT t_template = "".join(open(DIR+transformation+"/"+transformation+".yaml",'r')) # Generates a list of hyper-parameter names and a list of # hyper-parameter values hpnames, hpvalues = generate_params(hparamfile=DIR+transformation+"/"+transformation+".conf", generate="log-uniform", search_mode="fix-grid-search") template = "".join(open(TEMPLATE,'r')) % {'transformations': t_template,'save_path':'%(save_path)s'} # Writes template with each hyper-parameter settings in # succesive files and returns the name of the files files = write_files(template=template,hpnames=hpnames, hpvalues=hpvalues,save_path=out,force=True) # files = write_files(template="".join(open(TEMPLATE),'r'),hpnames=hpnames, # hpvalues=hpvalues,save_path=OUT) for f in files: serial.load_train_file(f).main_loop()
def main(): config_yaml = open("toy.yaml").read() trainer = serial.load_train_file(config_yaml % dict(save_path='tmp.tmp')) trainer.main_loop()
#!/usr/bin/env python import pylearn2.utils.serial as serial if __name__ == '__main__': import sys assert (len(sys.argv) == 2) serial.load_train_file(sys.argv[1]).main_loop()
def create_model(yaml_path): from pylearn2.models.model import Model model = serial.load_train_file(yaml_path) assert isinstance(model, Model) return model
'(implies -T).') parser.add_argument('--debug', '-D', action='store_true', help='Display any DEBUG-level log messages, ' 'suppressed by default.') parser.add_argument('config', action='store', choices=None, help='A YAML configuration file specifying the ' 'training procedure') return parser if __name__ == "__main__": parser = make_argument_parser() args = parser.parse_args() train_obj = serial.load_train_file(args.config) try: iter(train_obj) iterable = True except TypeError as e: iterable = False # Undo our custom logging setup. restore_defaults() # Set up the root logger with a custom handler that logs stdout for INFO # and DEBUG and stderr for WARNING, ERROR, CRITICAL. root_logger = logging.getLogger() if args.verbose_logging: formatter = logging.Formatter(fmt="%(asctime)s %(name)s %(levelname)s " "%(message)s") handler = CustomStreamHandler(formatter=formatter)
#! /usr/bin/env python from pylearn2.utils import serial import sys if __name__ == "__main__": train_obj = serial.load_train_file(sys.argv[1]) train_obj.main_loop()
import numpy as np from pylearn2.utils import serial from theano import function CODE_DIR = '/home/nico/Code/kaggle/GenderWrite/' DATA_DIR = '/home/nico/datasets/Kaggle/GenderWrite/' # load configuration file and train model train_obj = serial.load_train_file(CODE_DIR + 'gwmaxout.yaml') # layer-by-layer training #layers = train_obj.model.layers[:] ## first remove all but first and last layers and train first layer #for ii in range(len(layers)-2): # train_obj.model.layers.pop(1) train_obj.main_loop() # now add layers and re-train #for ii in range(len(layers)-2): # train_obj.model.layers.insert(1+ii,layers[1+ii]) # train_obj.main_loop() # generate model output def get_output(model, tdata, layerindex=-1, batch_size=100): Xb = model.get_input_space().make_batch_theano() ymf = model.fprop(Xb, return_all=True) data = tdata.get_topological_view()
from __future__ import print_function from pylearn2.utils import serial import matplotlib.pyplot as plt import numpy as np train_obj = serial.load_train_file('pylearn_svm.yaml') train_obj.main_loop() W, b = train_obj.model.get_param_values() print(W) print(b) X = train_obj.dataset.X x = np.linspace(-10, 10, 200) f_x = (-1.0 * W[0][0] / W[1][0]) * x - (b[0][0] / W[1][0]) plt.plot(x, f_x) plt.scatter(X[:, 0], X[:, 1]) plt.show()
def main(args=None): usage = "pool_adaptation.py [options] <si-model-dir> <sa-model-dir> <feats-scp> <targets-pdf> " parser = OptionParser() parser.add_option("--adapt-yaml", dest="adapt_yaml", default="", help="Provide the adaptation yaml template to start with") parser.add_option("--freeze-means", dest="freeze_means", default=False, help="Skip means while updating pools") parser.add_option("--freeze-betas", dest="freeze_betas", default=False, help="Skip precisions while updating pools") parser.add_option("--freeze-amp", dest="freeze_amp", default="true", help="Skip activation function amplitudes while updating pools") parser.add_option("--freeze-slopes", dest="freeze_slopes", default="true", help="Skip activation function slopes while updating pools") parser.add_option("--freeze-layer-ids", dest="freeze_layer_ids", default="", help="update params only in this layers, i.e. --layer-ids 012") parser.add_option("--job", dest="JOB", default=0, help="JOB ID used to store model in") parser.add_option("--debug", dest="debug", default=False, help="Prints activations and shapes in text format rather than binary Kaldi archives") (options,args) = parser.parse_args(args=args) print options.adapt_yaml print options.freeze_means print options.freeze_betas print options.freeze_amp print options.freeze_slopes print options.freeze_layer_ids print options.JOB print 'ARGS: ', args #if options.adapt_yaml!='': # NotImplementedError('Lodaing from pkl not yet supported due to GPU/CPU pickle issues.') if len(args) != 5: print usage exit(1) si_model_dir = args[1] sa_model_dir = args[2] feats_scp = args[3] targets_pdf = args[4] #print "si model dir is %s"%si_model_dir model_yaml = "%s/adapt_final%s.yaml"%(si_model_dir, options.JOB) model_params = "%s/cnn_best.h5"%si_model_dir #print 'Yaml path', model_yaml #print 'Params path', model_params if not os.path.isfile(options.adapt_yaml): raise Exception('File %s not found'%options.adapt_yaml) if not os.path.isfile(model_params): raise Exception('File %s not found'%model_params) vars={} vars['adapt_flist']=feats_scp vars['adapt_pdfs']=targets_pdf vars['adapt_lr']=0.05 vars['adapt_momentum']=0.5 vars['sa_dir'] = sa_model_dir vars['JOB'] = options.JOB #print vars #print 'Locals: ',locals() adapt_template = open(options.adapt_yaml, 'r').read() adapt_template_str = adapt_template % vars f = open(model_yaml, 'w') f.write(adapt_template_str) f.close() print 'Building model %s'%model_yaml train_obj = serial.load_train_file(model_yaml) print 'Loading params from %s'%model_params params = ModelPyTables.pytables_to_params(model_params, name='Model') train_obj.model.set_params(params) freeze_regex='softmax_[Wb]|h[0-9]_[Wb]|nlrf_[Wb]' if options.freeze_layer_ids!='': layers = options.freeze_layer_ids freeze_regex = "%s|g[%s]p_u|g[%s]p_beta"%(freeze_regex, layers, layers) if options.freeze_means == 'true': freeze_regex = "%s|g[0-9]p_u"%(freeze_regex) if options.freeze_betas == 'true': freeze_regex = "%s|g[0-9]p_beta"%(freeze_regex) if options.freeze_amp == 'true': freeze_regex = "%s|g[0-9]p_amp"%(freeze_regex) if options.freeze_slopes == 'true': freeze_regex = "%s|g[0-9]p_arg"%(freeze_regex) #print "Freeze regex is", freeze_regex model_params = train_obj.model.get_params() params_to_freeze = {} for param in model_params: if re.match(freeze_regex, str(param)) is not None: if param not in params_to_freeze: params_to_freeze[param] = param #print params_to_freeze if len(params_to_freeze)==len(model_params): print 'None of the parameters were set to be updated. Freeze list is', params_to_freeze exit(0) train_obj.model.freeze(params_to_freeze.values()) print 'Will update those params only: ', train_obj.model.get_params() train_obj.main_loop() train_obj.model.freeze_set = set([]) #unfreeze so get_params will return all model params print 'Unfreezed params are ', train_obj.model.get_params()