def get_segmenter_function(params_loc, img_size, NCV=1, version=1, param_file_key = None): shape = (None, 1, img_size, img_size) input_var = T.tensor4('input') if NCV> 1: expr = 0 params_files = filter(lambda s: 'fcn_v{}'.format(version) in s, os.listdir(params_loc)) if param_file_key is not None: params_files = filter(lambda s: param_file_key in s, params_files) for pfn in params_files: net, _, output_det = build_fcn_segmenter(input_var, shape, version) u.load_params(net['output'], os.path.join(params_loc, pfn)) cv = int(pfn.split('_')[-1][1]); if cv == NCV: expr = expr + output_det * NCV; else: expr = expr + output_det print 'loaded {}'.format(pfn) assert(len(params_files)==NCV+1); expr = expr / NCV /2; print 'loaded {} in ensemble'.format(len(params_files)) else: net, _, output_det = build_fcn_segmenter(input_var, shape, version) u.load_params(net['output'], params_loc) expr = output_det print 'loaded indiv function {}'.format(params_loc) return theano.function([input_var], expr)
def get_segmenter_function(params_loc, img_size, ensemble=False, version=2, param_file_key = '.npz', weight_full_params=0.33): shape = (None, 1, img_size, img_size) input_var = T.tensor4('input') if ensemble: expr = 0 params_files = filter(lambda s: 'v{}'.format(version) in s, os.listdir(params_loc)) if param_file_key is not None: params_files = filter(lambda s: param_file_key in s, params_files) full_params_indices = [i for i,a in enumerate(params_files) if 'f-1' in a] if len(full_params_indices) > 0: wt_norm = (1. - weight_full_params)/(len(params_files) - len(full_params_indices)) wt_full = weight_full_params / len(full_params_indices) params_weights = [(wt_norm if i not in full_params_indices else wt_full) \ for i in xrange(len(params_files))] else: params_weights = [1./len(params_files)] * len(params_files) for pfn,w in zip(params_files, params_weights): net, _, output_det = build_fcn_segmenter(input_var, shape, version) u.load_params(net['output'], os.path.join(params_loc, pfn)) expr = expr + w*output_det print 'loaded {} wt {}'.format(pfn, w) print 'loaded {} in ensemble'.format(len(params_files)) else: net, _, output_det = build_fcn_segmenter(input_var, shape, version) u.load_params(net['output'], params_loc) expr = output_det print 'loaded indiv function {}'.format(params_loc) return theano.function([input_var], expr)
def encoder_decoder(paramsfile, specstr, channels=3, layersplit='encode', shape=(64,64), poolinv=False): inp = T.tensor4('inputs') w,h=shape build_fn = build_cae if poolinv else build_cae_nopoolinv network = build_fn(inp, shape=shape,channels=channels,specstr=specstr) u.load_params(network, paramsfile) laylist = nn.layers.get_all_layers(network) enc_layer_idx = next(i for i in xrange(len(laylist)) if laylist[i].name==layersplit) enc_layer = laylist[enc_layer_idx] return (lambda x: nn.layers.get_output(enc_layer, inputs=x,deterministic=True).eval(), lambda x: nn.layers.get_output(network, inputs={laylist[0]:np.zeros((x.shape[0],channels,w,h), dtype=theano.config.floatX), enc_layer:x}, deterministic=True).eval().reshape(-1,channels,w,h))
def plot_random_GLCMs(glcms, n_per_genre, seed=11): """This function prints some random GLCMs from a `glcms` dictionary for each genre Arguments: glcms {dict} -- a dictionary containing the tuples of glcms n_per_genre {int} -- the number of glcms to plot per genre seed {int} -- the random seed used for sampling """ R = np.random.RandomState(seed) config = load_config() params = load_params() _, genres, _, _, n_pieces_per_genre, _ = extract_from_config( config, params) n_genres = len(genres) random_tracks = R.randint(0, n_pieces_per_genre, (n_genres, n_per_genre)) fig = plt.figure(figsize=(int(1.25 * n_genres), int(2.5 * n_genres))) subplot = 0 for g in range(n_genres): for t in range(n_per_genre): subplot += 1 map_index = int(g * n_pieces_per_genre + random_tracks[g, t]) data_to_plot = glcms["train"][map_index] ax = fig.add_subplot(n_genres, n_per_genre, subplot) ax.set_title(f"-- {data_to_plot[-1]} --") plt.imshow(data_to_plot[1], cmap="gray", interpolation="nearest") ax.axis("off") plt.tight_layout() plt.show()
def main(args): config_path = join('{}_logs'.format(args.train_dir), args.config_file) config_name = args.config_name override_params = args.override_params params = utils.load_params(config_path, config_name, override_params=override_params) params.train_dir = args.train_dir params.data_dir = args.data_dir params.num_gpus = args.n_gpus params.start_new_model = False # Set up environment variables before doing any other global initialization to # make sure it uses the appropriate environment variables. utils.set_default_param_values_and_env_vars(params) # Setup logging & log the version. utils.setup_logging(params.logging_verbosity) # print self.params parameters pp = pprint.PrettyPrinter(indent=2, compact=True) logging.info(pp.pformat(params.values())) logging.info("Pytorch version: {}.".format(torch.__version__)) logging.info("Hostname: {}.".format(socket.gethostname())) core_eval = __import__('core.{}'.format(config_name), fromlist=['']) evaluate = core_eval.Evaluator(params) evaluate.run()
def main(_): if not FLAGS.train_dir or not FLAGS.data_dir: raise ValueError("train_dir and data_dir need to be set.") config_path = join('{}_logs'.format(FLAGS.train_dir), FLAGS.config_file) config_name = FLAGS.config_name override_params = FLAGS.override_params params = utils.load_params( config_path, config_name, override_params=override_params) params.train_dir = FLAGS.train_dir params.data_dir = FLAGS.data_dir params.num_gpus = FLAGS.n_gpus params.start_new_model = False if FLAGS.backend.lower() in ('tensorflow', 'tf'): from neuralnet.tensorflow.eval import Evaluator elif FLAGS.backend.lower() in ('pytorch', 'py', 'torch'): from neuralnet.pytorch.eval import Evaluator else: raise ValueError( "Backend not recognised. Choose between Tensorflow and Pytorch.") evaluate = Evaluator(params) evaluate.run()
def plot_one_map(data_map, map_type, quantized=False): """This function plots one map (either a spectrogram or a mel map) Arguments: data_map {tuple} -- a tuple of the form (file_name, numpy_map, piece_id, genre) map_type {string} -- one of ('spectrogram', 'mel_map') for the type of map to print (affects the y-scale and text) quantized {boolean} -- whether the input maps have already been quantized or not (will handle the color scale) """ params = load_params() frame_length = int(params[map_type]["frame_length_in_s"] * params["sampling_rate"]) hop_length = int((1 - params[map_type]["overlap"]) * frame_length) plt.title(f""" ---------------- {map_type.capitalize()} ---------------- ------- Genre - {data_map[-1]} ------- """) y_axis = "log" if map_type == "spectrogram" else "mel" librosa.display.specshow(data_map[1], sr=params["sampling_rate"], hop_length=hop_length, x_axis="time", y_axis=y_axis) colorbar_format = "%i" if quantized else "%+2.0f dB" plt.colorbar(format=colorbar_format)
def create_initial_workteam(kfp_client, experiment_id, region, sagemaker_client, test_file_dir, download_dir): test_params = utils.load_params( utils.replace_placeholders( os.path.join(test_file_dir, "config.yaml"), os.path.join(download_dir, "config.yaml"), )) test_params["Arguments"]["team_name"] = workteam_name = ( utils.generate_random_string(5) + "-" + test_params["Arguments"]["team_name"]) # First create a workteam using a separate pipeline and get the name, arn of the workteam created. create_workteamjob( kfp_client, test_params, experiment_id, region, sagemaker_client, download_dir, ) workteam_arn = sagemaker_utils.get_workteam_arn(sagemaker_client, workteam_name) return workteam_name, workteam_arn
def test_terminate_trainingjob(kfp_client, experiment_id, region, sagemaker_client): test_file_dir = "resources/config/simple-mnist-training" download_dir = utils.mkdir( os.path.join(test_file_dir + "/generated_test_terminate")) test_params = utils.load_params( utils.replace_placeholders( os.path.join(test_file_dir, "config.yaml"), os.path.join(download_dir, "config.yaml"), )) input_job_name = test_params["Arguments"]["job_name"] = ( utils.generate_random_string(4) + "-terminate-job") run_id, _, workflow_json = kfp_client_utils.compile_run_monitor_pipeline( kfp_client, experiment_id, test_params["PipelineDefinition"], test_params["Arguments"], download_dir, test_params["TestName"], 60, "running", ) print( f"Terminating run: {run_id} where Training job_name: {input_job_name}") kfp_client_utils.terminate_run(kfp_client, run_id) response = sagemaker_utils.describe_training_job(sagemaker_client, input_job_name) assert response["TrainingJobStatus"] in ["Stopping", "Stopped"] utils.remove_dir(download_dir)
def reload(depot, folder, tag, layer): """ """ import utils cwd = os.getcwd() path = join(depot, folder) os.chdir(path) for f in os.listdir('.'): if f.endswith("schedule"): sched_f = f sched_f = open(sched_f) sched = cPickle.load(sched_f) sched_f.close() if layer >= 0: ltype = sched['stack'][layer]['type'] params = utils.load_params(tag + ".params") shape = params[layer]['shape'] model = ltype.__new__(ltype) model.__init__(shape=shape, **sched) model.reload(params[layer]['params']) os.chdir(cwd) return model, sched
def load_model( embed_map=None, path_to_model=PATH_TO_MODEL, # model opts (.pkl) path_to_params=PATH_TO_PARAMS, # model params (.npz) path_to_dictionary=PATH_TO_DICTIONARY, path_to_word2vec=PATH_TO_WORD2VEC ): """ Load all model components + apply vocab expansion """ # Load the worddict print 'Loading dictionary...' with open(path_to_dictionary, 'rb') as f: worddict = pkl.load(f) # Create inverted dictionary print 'Creating inverted dictionary...' word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # Load model options print 'Loading model options...' with open(path_to_model, 'rb') as f: options = pkl.load(f) # Load parameters print 'Loading model parameters...' params = init_params(options) params = load_params(path_to_params, params) tparams = init_tparams(params) # Extractor functions print 'Compiling encoder...' trng = RandomStreams(1234) trng, x, x_mask, ctx, emb = build_encoder(tparams, options) f_enc = theano.function([x, x_mask], ctx, name='f_enc') f_emb = theano.function([x], emb, name='f_emb') trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') # Load word2vec, if applicable if embed_map == None: print 'Loading word2vec embeddings...' embed_map = load_googlenews_vectors(path_to_word2vec) # Lookup table using vocab expansion trick print 'Creating word lookup tables...' table = lookup_table(options, embed_map, worddict, word_idict, f_emb) # Store everything we need in a dictionary print 'Packing up...' model = {} model['options'] = options model['table'] = table model['f_w2v'] = f_w2v return model
def load_checkpoint(model_dir, epoch=None, eval_=False): """Load checkpoint from model directory. Checkpoints should be stored in `model_dir/checkpoints/model-epochX.ckpt`, where `X` is the epoch number. Parameters: model_dir (str): path to model directory epoch (int): epoch number; set to None for last available epoch eval_ (bool): PyTorch evaluation mode. set to True for testing Returns: net (torch.nn.Module): PyTorch checkpoint at `epoch` epoch (int): epoch number """ if epoch is None: # get last epoch ckpt_dir = os.path.join(model_dir, 'checkpoints') epochs = [ int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt" ] epoch = np.sort(epochs)[-1] ckpt_path = os.path.join(model_dir, 'checkpoints', 'model-epoch{}.pt'.format(epoch)) params = utils.load_params(model_dir) print('Loading checkpoint: {}'.format(ckpt_path)) state_dict = torch.load(ckpt_path) net = load_architectures(params['arch'], params['fd']) net.load_state_dict(state_dict) del state_dict if eval_: net.eval() return net, epoch
def main_sim(): parser = argparse.ArgumentParser() parser.add_argument("--dir_scratch", type=str, help="temp directory in which to save data") args = parser.parse_args() dir_scratch = args.dir_scratch path_params = dir_scratch + "params.json" dir_data = dir_scratch + "data/" dir_sim = dir_scratch + "sim/" dir_train = dir_scratch + "train/" dir_spatial = dir_scratch + "spatial/" comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() if rank == 0: params = load_params(path_params) jobs = make_jobs_sim(params, dir_data, dir_sim, dir_train, dir_spatial) jobs = split(jobs, size) else: jobs = None jobs = comm.scatter(jobs, root=0) for job in jobs: worker_sim(job)
def test_load_params(self): w1, b1, w2, b2, w3, b3 = load_params() self.assertEqual(w1.shape, (256, 1024)) self.assertEqual(b1.shape, (154, 256)) self.assertEqual(w2.shape, (256, 256)) self.assertEqual(b2.shape, (154, 256)) self.assertEqual(w3.shape, (23, 256)) self.assertEqual(b3.shape, (154, 23))
def main_ts(): parser = argparse.ArgumentParser() parser.add_argument("--dir_scratch", type=str, help="temp directory in which to save data") args = parser.parse_args() dir_scratch = args.dir_scratch path_params = dir_scratch + "params.json" dir_data = dir_scratch + "data/" dir_ts = dir_scratch + "ts/" comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() # Use half the cores for merging to keep mem within bounds size_half = int(size / 4) if size_half == 0: size_half = 1 print(rank, "hello") # load jobs from json in the root process if rank == 0: params = load_params(path_params) jobs = params['ts'] jobs_ts = add_paths_to_jobs(jobs, dir_data, dir_ts) make_work_data(jobs_ts, dir_data, dir_ts) jobs_ts = split(jobs_ts, size) else: jobs_ts = None jobs_ts = comm.scatter(jobs_ts, root=0) for j in jobs_ts: worker_ts(j) print(rank, "barrier") comm.Barrier() if rank == 0: jobs_merge = add_paths_to_jobs(jobs, dir_data, dir_ts) jobs_merge = make_jobs_merge(jobs_merge, dir_data, dir_ts) jobs_merge = split(jobs_merge, size_half, size) else: jobs_merge = None jobs_merge = comm.scatter(jobs_merge, root=0) for j in jobs_merge: worker_merge(j) print(rank, "barrier") comm.Barrier()
def test_create_endpoint(kfp_client, experiment_id, boto3_session, sagemaker_client, test_file_dir): download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated")) test_params = utils.load_params( utils.replace_placeholders( os.path.join(test_file_dir, "config.yaml"), os.path.join(download_dir, "config.yaml"), )) # Generate random prefix for model, endpoint config and endpoint name # to avoid errors if resources with same name exists test_params["Arguments"]["model_name"] = test_params["Arguments"][ "endpoint_config_name"] = test_params["Arguments"][ "endpoint_name"] = input_endpoint_name = ( utils.generate_random_string(5) + "-" + test_params["Arguments"]["model_name"]) print(f"running test with model/endpoint name: {input_endpoint_name}") _, _, workflow_json = kfp_client_utils.compile_run_monitor_pipeline( kfp_client, experiment_id, test_params["PipelineDefinition"], test_params["Arguments"], download_dir, test_params["TestName"], test_params["Timeout"], ) try: outputs = {"sagemaker-deploy-model": ["endpoint_name"]} output_files = minio_utils.artifact_download_iterator( workflow_json, outputs, download_dir) output_endpoint_name = utils.read_from_file_in_tar( output_files["sagemaker-deploy-model"]["endpoint_name"], "endpoint_name.txt") print(f"endpoint name: {output_endpoint_name}") # Verify output from pipeline is endpoint name assert output_endpoint_name == input_endpoint_name # Verify endpoint is running assert (sagemaker_utils.describe_endpoint( sagemaker_client, input_endpoint_name)["EndpointStatus"] == "InService") # Validate the model for use by running a prediction result = run_predict_mnist(boto3_session, input_endpoint_name, download_dir) print(f"prediction result: {result}") assert json.dumps(result, sort_keys=True) == json.dumps( test_params["ExpectedPrediction"], sort_keys=True) utils.remove_dir(download_dir) finally: # delete endpoint sagemaker_utils.delete_endpoint(sagemaker_client, input_endpoint_name)
def load_model( path_to_model='/home/shunan/Code/skip-thoughts/experiments/amazon/amazon_model_bi.npz', path_to_dictionary='/home/shunan/Code/skip-thoughts/experiments/amazon/word_dicts.pkl', embed_map=None): """ Load all model components + apply vocab expansion """ # Load the worddict print 'Loading dictionary...' with open(path_to_dictionary, 'rb') as f: worddict = pkl.load(f) # Create inverted dictionary print 'Creating inverted dictionary...' word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # Load model options print 'Loading model options...' with open('%s.pkl' % path_to_model, 'rb') as f: options = pkl.load(f) # Load parameters print 'Loading model parameters...' params = init_params(options) params = load_params(path_to_model, params) tparams = init_tparams(params) # Extractor functions print 'Compiling encoder...' trng = RandomStreams(1234) trng, x, x_mask, ctx, emb = build_encoder(tparams, options) f_enc = theano.function([x, x_mask], ctx, name='f_enc') f_emb = theano.function([x], emb, name='f_emb') trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') # Load word2vec, if applicable # if embed_map == None: # print 'Loading word2vec embeddings...' # embed_map = load_googlenews_vectors(path_to_word2vec) # Lookup table using vocab expansion trick print 'Creating word lookup tables...' table = lookup_table(options, embed_map, worddict, word_idict, f_emb) # Store everything we need in a dictionary print 'Packing up...' model = {} model['options'] = options model['table'] = table model['f_w2v'] = f_w2v # model is just a dict. return model
def test_trainingjob(kfp_client, experiment_id, region, sagemaker_client, test_file_dir): download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated")) test_params = utils.load_params( utils.replace_placeholders( os.path.join(test_file_dir, "config.yaml"), os.path.join(download_dir, "config.yaml"), )) _, _, workflow_json = kfp_client_utils.compile_run_monitor_pipeline( kfp_client, experiment_id, test_params["PipelineDefinition"], test_params["Arguments"], download_dir, test_params["TestName"], test_params["Timeout"], ) outputs = { "sagemaker-training-job": ["job_name", "model_artifact_url", "training_image"] } output_files = minio_utils.artifact_download_iterator( workflow_json, outputs, download_dir) # Verify Training job was successful on SageMaker training_job_name = utils.read_from_file_in_tar( output_files["sagemaker-training-job"]["job_name"]) print(f"training job name: {training_job_name}") train_response = sagemaker_utils.describe_training_job( sagemaker_client, training_job_name) assert train_response["TrainingJobStatus"] == "Completed" # Verify model artifacts output was generated from this run model_artifact_url = utils.read_from_file_in_tar( output_files["sagemaker-training-job"]["model_artifact_url"]) print(f"model_artifact_url: {model_artifact_url}") assert model_artifact_url == train_response["ModelArtifacts"][ "S3ModelArtifacts"] assert training_job_name in model_artifact_url # Verify training image output is an ECR image training_image = utils.read_from_file_in_tar( output_files["sagemaker-training-job"]["training_image"]) print(f"Training image used: {training_image}") if "ExpectedTrainingImage" in test_params.keys(): assert test_params["ExpectedTrainingImage"] == training_image else: assert f"dkr.ecr.{region}.amazonaws.com" in training_image assert not argo_utils.error_in_cw_logs( workflow_json["metadata"]["name"] ), "Found the CloudWatch error message in the log output. Check SageMaker to see if the job has failed." utils.remove_dir(download_dir)
def train(params=None): os.makedirs(params['ckpt_path'], exist_ok=True) device = torch.device("cuda") train_dataset = HDRDataset(params['dataset'], params=params, suffix=params['dataset_suffix']) train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True) model = HDRPointwiseNN(params=params) ckpt = get_latest_ckpt(params['ckpt_path']) if ckpt: print('Loading previous state:', ckpt) state_dict = torch.load(ckpt) state_dict, _ = load_params(state_dict) model.load_state_dict(state_dict) model.to(device) mseloss = torch.nn.MSELoss() optimizer = Adam(model.parameters(), params['lr']) count = 0 for e in range(params['epochs']): model.train() for i, (low, full, target) in enumerate(train_loader): optimizer.zero_grad() low = low.to(device) full = full.to(device) t = target.to(device) res = model(low, full) total_loss = mseloss(res, t) total_loss.backward() if (count + 1) % params['log_interval'] == 0: _psnr = psnr(res, t).item() loss = total_loss.item() print(e, count, loss, _psnr) optimizer.step() if (count + 1) % params['ckpt_interval'] == 0: print('@@ MIN:', torch.min(res), 'MAX:', torch.max(res)) model.eval().cpu() ckpt_model_filename = "ckpt_" + str(e) + '_' + str( count) + ".pth" ckpt_model_path = os.path.join(params['ckpt_path'], ckpt_model_filename) state = save_params(model.state_dict(), params) torch.save(state, ckpt_model_path) test(ckpt_model_path) model.to(device).train() count += 1
def save_model(self, netG, avg_param_G, netsD, epoch): self.epoch_tracker.write(epoch) backup_para = copy_G_params(netG) checkpoint = { 'epoch': epoch, 'netG': None, 'netsD_0': None, 'netsD_1': None, 'netsD_2': None, } load_params(netG, avg_param_G) checkpoint['netG'] = netG.state_dict() load_params(netG, backup_para) # for i in range(len(netsD)): netD = netsD[i] checkpoint['netsD_{}'.format(i)] = netD.state_dict() torch.save(checkpoint, self.model_file_name.format(epoch)) print('Save G/Ds models.')
def test_workteamjob( kfp_client, experiment_id, region, sagemaker_client, test_file_dir ): download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated")) test_params = utils.load_params( utils.replace_placeholders( os.path.join(test_file_dir, "config.yaml"), os.path.join(download_dir, "config.yaml"), ) ) # Generate random prefix for workteam_name to avoid errors if resources with same name exists test_params["Arguments"]["team_name"] = workteam_name = ( utils.generate_random_string(5) + "-" + test_params["Arguments"]["team_name"] ) try: workflow_json = create_workteamjob( kfp_client, test_params, experiment_id, region, sagemaker_client, download_dir, ) outputs = {"sagemaker-private-workforce": ["workteam_arn"]} output_files = minio_utils.artifact_download_iterator( workflow_json, outputs, download_dir ) response = sagemaker_utils.describe_workteam(sagemaker_client, workteam_name) # Verify WorkTeam was created in SageMaker assert response["Workteam"]["CreateDate"] is not None assert response["Workteam"]["WorkteamName"] == workteam_name # Verify WorkTeam arn artifact was created in Minio and matches the one in SageMaker workteam_arn = utils.read_from_file_in_tar( output_files["sagemaker-private-workforce"]["workteam_arn"] ) assert response["Workteam"]["WorkteamArn"] == workteam_arn finally: workteams = sagemaker_utils.list_workteams(sagemaker_client)["Workteams"] workteam_names = list(map((lambda x: x["WorkteamName"]), workteams)) # Check workteam was successfully created if workteam_name in workteam_names: sagemaker_utils.delete_workteam(sagemaker_client, workteam_name) # Delete generated files only if the test is successful utils.remove_dir(download_dir)
def display_random_audio_pieces(pieces, n_genre=2, n_tracks_per_genre=2, n_short_term=2, seed=11): """This function takes in a dictionary (`pieces`) of audio pieces and displays the audio of randomly selected pieces in `pieces` Arguments: pieces {dict} -- a dictionary containing audio pieces n_genre {int} -- the number of genres to randomly select n_tracks_per_genre {int} -- the number of tracks to randomly sample in each sampled genre n_short_term {int} -- the number of pieces of the tracks sampled to randomly sample seed {int} -- the random seed used for sampling """ R = np.random.RandomState(seed) config = load_config() params = load_params() train_root, genres, n_train_per_genre, _, _, sampling_rate = extract_from_config( config, params) random_genres = R.choice(genres, n_genre, replace=False) random_tracks = R.choice(n_train_per_genre, (n_genre, n_tracks_per_genre), replace=False) random_pieces = R.choice(params["divide"]["number_pieces"], (n_genre, n_tracks_per_genre, n_short_term), replace=False) print(f"Genres picked are: {random_genres} \n") for g in range(n_genre): current_genre = random_genres[g] track_list = os.listdir(os.path.join(train_root, current_genre)) print(f" -- Tracks for genre - {current_genre} -- \n ") for t in range(n_tracks_per_genre): track = track_list[random_tracks[g, t]] print("Full track:") print(track) IPython.display.display( IPython.display.Audio( os.path.join(train_root, current_genre, track))) start_index = find_short_term_pieces_for(pieces, track) print(start_index) print("Random short-term pieces of the track:") for index in random_pieces[g, t]: print(f"piece {index}") IPython.display.display( IPython.display.Audio(pieces["train"][start_index + index][1], rate=sampling_rate))
def load_model(embed_map=None): """ Load all model components + apply vocab expansion """ # Load the worddict print('Loading dictionary...') with open(path_to_dictionary, 'rb') as f: worddict = pkl.load(f) # Create inverted dictionary print('Creating inverted dictionary...') word_idict = dict() for kk, vv in worddict.items(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # Load model options print('Loading model options...') with open(f'{path_to_model}.pkl', 'rb') as f: options = pkl.load(f) # Load parameters print('Loading model parameters...') params = init_params(options) params = load_params(path_to_model, params) tparams = init_tparams(params) # Extractor functions print('Compiling encoder...') trng = RandomStreams(1234) trng, x, x_mask, ctx, emb = build_encoder(tparams, options) f_enc = theano.function([x, x_mask], ctx, name='f_enc') f_emb = theano.function([x], emb, name='f_emb') trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') # Load word2vec, if applicable if embed_map is None: print('Loading word2vec embeddings...') embed_map = load_googlenews_vectors(path_to_word2vec) # Lookup table using vocab expansion trick print('Creating word lookup tables...') table = lookup_table(options, embed_map, worddict, word_idict, f_emb) # Store everything we need in a dictionary print('Packing up...') model = {} model['options'] = options model['table'] = table model['f_w2v'] = f_w2v return model
def main_train(): parser = argparse.ArgumentParser() parser.add_argument("--dir_scratch", type=str, help="temp directory in which to save data") args = parser.parse_args() dir_scratch = args.dir_scratch path_params = dir_scratch + "params.json" dir_data = dir_scratch + "data/" dir_train = dir_scratch + "train/" comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() # Use half the cores for mergin to keep mem within bounds size_half = int(size / 2) if size_half == 0: size_half = 1 # load jobs from json in the root process if rank == 0: params = load_params(path_params) jobs = params['models'] jobs = add_paths_to_jobs(jobs, dir_data, dir_train) jobs = add_varsets_to_modeljobs(jobs) jobs = add_nsim_to_jobs(jobs, params['nsim']) train_start = params['times']['train_start'] train_end = params['times']['train_end'] allvars = get_model_vars(jobs) print("ALLVARS:") for v in allvars: print(v) make_data_subset(allvars, train_start, train_end, dir_data, dir_train) jobs = split(jobs, size) else: jobs = None jobs = comm.scatter(jobs, root=0) for job in jobs: worker_train(job)
def load_params_from_file(self, fname: str): """ Loads and sets model parameters from file. :param fname: Path to load parameters from. """ assert self._is_built utils.check_condition( os.path.exists(fname), "No model parameter file found under %s. " "This is either not a model directory or the first training " "checkpoint has not happened yet." % fname) self.params, _ = utils.load_params(fname) logger.info('Loaded params from "%s"', fname)
def reload(self, depot, folder, tag): """ reload schedule and parameters from depot/folder/tag.params depot, abs path """ from utils import load_params from os.path import join from gnumpy import as_garray file_prefix = join(depot, folder, tag) params = load_params(file_prefix + ".params") params_stack = params['Stack']['params'] self.params = as_garray(params_stack) for layer, (c1, c2) in izip(self, izip(self.cuts[:-1], self.cuts[1:])): layer.p = self.params[c1:c2]
def main(image_path, results_path, iterations, validation_iterations, kernels_per_dim, params_file, l1reg, base_lr, batches, checkpoint_path, lr_div, lr_mult, disable_train_pis, disable_train_gammas, radial_as, quiet): orig = plt.imread(image_path) if orig.dtype == np.uint8: orig = orig.astype(np.float32) / 255. if params_file is not None: init_params = load_params(params_file) else: init_params = None if results_path is not None: if os.path.exists(results_path): shutil.rmtree(results_path) os.mkdir(results_path) loss_plotter = LossPlotter(path=results_path + "/loss.png", quiet=quiet) image_plotter = ImagePlotter( path=results_path, options=['orig', 'reconstruction', 'gating', 'pis_hist'], quiet=quiet) logger = ModelLogger(path=results_path) smoe = Smoe(orig, kernels_per_dim, init_params=init_params, train_pis=not disable_train_pis, train_gammas=not disable_train_gammas, radial_as=radial_as, start_batches=batches) optimizer1 = tf.train.AdamOptimizer(base_lr) optimizer2 = tf.train.AdamOptimizer(base_lr / lr_div) optimizer3 = tf.train.AdamOptimizer(base_lr * lr_mult) # optimizers have to be set before the restore smoe.set_optimizer(optimizer1, optimizer2, optimizer3) if checkpoint_path is not None: smoe.restore(checkpoint_path) smoe.train(iterations, val_iter=validation_iterations, pis_l1=l1reg, callbacks=[loss_plotter.plot, image_plotter.plot, logger.log]) save_model(smoe, results_path + "/params_best.pkl", best=True) save_model(smoe, results_path + "/params_last.pkl", best=False)
def load_model(path_to_model=default_model): """ Load all model components """ print path_to_model # Load the worddict print 'Loading dictionary...' with open(path_to_model + '.dictionary.pkl', 'rb') as f: worddict = pkl.load(f) # Create inverted dictionary print 'Creating inverted dictionary...' word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # Load model options print 'Loading model options...' with open(path_to_model + '.pkl', 'rb') as f: options = pkl.load(f) # Load parameters print 'Loading model parameters...' params = init_params(options) params = load_params(path_to_model, params) tparams = init_tparams(params) # Extractor functions print 'Compiling image encoder...' trng, [im], images = build_image_encoder(tparams, options) f_ienc = theano.function([im], images, name='f_ienc') print 'Compiling sentence encoder...' trng = RandomStreams(1234) trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options) f_senc = theano.function([x, x_mask], sentences, name='f_senc') # Store everything we need in a dictionary print 'Packing up...' model = {} model['options'] = options model['worddict'] = worddict model['word_idict'] = word_idict model['f_senc'] = f_senc model['f_ienc'] = f_ienc return model
def get_segmenter_function(params_loc, img_size, ensemble=False, version=2, param_file_key='.npz', weight_full_params=0.33): shape = (None, 1, img_size, img_size) input_var = T.tensor4('input') if ensemble: expr = 0 params_files = filter(lambda s: 'v{}'.format(version) in s, os.listdir(params_loc)) if param_file_key is not None: params_files = filter(lambda s: param_file_key in s, params_files) full_params_indices = [ i for i, a in enumerate(params_files) if 'f-1' in a ] if len(full_params_indices) > 0: wt_norm = (1. - weight_full_params) / (len(params_files) - len(full_params_indices)) wt_full = weight_full_params / len(full_params_indices) params_weights = [(wt_norm if i not in full_params_indices else wt_full) \ for i in xrange(len(params_files))] else: params_weights = [1. / len(params_files)] * len(params_files) for pfn, w in zip(params_files, params_weights): net, _, output_det = build_fcn_segmenter(input_var, shape, version) u.load_params(net['output'], os.path.join(params_loc, pfn)) expr = expr + w * output_det print 'loaded {} wt {}'.format(pfn, w) print 'loaded {} in ensemble'.format(len(params_files)) else: net, _, output_det = build_fcn_segmenter(input_var, shape, version) u.load_params(net['output'], params_loc) expr = output_det print 'loaded indiv function {}'.format(params_loc) return theano.function([input_var], expr)
def load_model(path_to_model=default_model): """ Load all model components """ print path_to_model # Load the worddict print 'Loading dictionary...' with open('%s.dictionary.pkl'%path_to_model, 'rb') as f: worddict = pkl.load(f) # Create inverted dictionary print 'Creating inverted dictionary...' word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # Load model options print 'Loading model options...' with open('%s.pkl'%path_to_model, 'rb') as f: options = pkl.load(f) # Load parameters print 'Loading model parameters...' params = init_params(options) params = load_params(path_to_model, params) tparams = init_tparams(params) # Extractor functions print 'Compiling sentence encoder...' trng = RandomStreams(1234) trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options) f_senc = theano.function([x, x_mask], sentences, name='f_senc') print 'Compiling image encoder...' trng, [im], images = build_image_encoder(tparams, options) f_ienc = theano.function([im], images, name='f_ienc') # Store everything we need in a dictionary print 'Packing up...' model = {} model['options'] = options model['worddict'] = worddict model['word_idict'] = word_idict model['f_senc'] = f_senc model['f_ienc'] = f_ienc return model
def load_model(): """ Load all model components """ print path_to_model # Load the worddict print "Loading dictionary..." with open("%s.dictionary.pkl" % path_to_model, "rb") as f: worddict = pkl.load(f) # Create inverted dictionary print "Creating inverted dictionary..." word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = "<eos>" word_idict[1] = "UNK" # Load model options print "Loading model options..." with open("%s.pkl" % path_to_model, "rb") as f: options = pkl.load(f) # Load parameters print "Loading model parameters..." params = init_params(options) params = load_params(path_to_model, params) tparams = init_tparams(params) # Extractor functions print "Compiling sentence encoder..." trng = RandomStreams(1234) trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options) f_senc = theano.function([x, x_mask], sentences, name="f_senc") print "Compiling image encoder..." trng, [im], images = build_image_encoder(tparams, options) f_ienc = theano.function([im], images, name="f_ienc") # Store everything we need in a dictionary print "Packing up..." model = {} model["options"] = options model["worddict"] = worddict model["word_idict"] = word_idict model["f_senc"] = f_senc model["f_ienc"] = f_ienc return model
def get_best_model(return_weights=False): params = utils.load_params() model = MyAlexNet(params).to(device=params.device) checkpoint = os.path.join(config.model_dir, 'last.pth.tar') utils.load_checkpoint(checkpoint, model, params) if return_weights: weights = { "conv1":model.conv1[0].weight.data, "conv2":model.conv2[0].weight.data, "conv3":model.conv3[0].weight.data, "conv4":model.conv4[0].weight.data, "conv5":model.conv5[0].weight.data, } return weights return model, params
def load_params(self, name="", dir_path="", epoch=None): params, aux_states, param_loading_path = load_params(dir_path=dir_path, epoch=epoch, name=name) logging.info('Loading params from \"%s\" to %s' % (param_loading_path, self.name)) for k, v in params.items(): if k in self.params: logging.debug(' Loading %s %s' % (k, str(v.shape))) self.params[k][:] = v else: logging.warn("Found unused param in the saved model file: %s" % k) for k, v in aux_states.items(): self.aux_states[k][:] = v
def run_mlstm1900_multiple_sequences(): sequences = [ "MKLVTITJ", "MKLVDIAJ", "MKLVTIAJ", "MKLRKIAJ", "MKLVTIMJ", ] params = load_params() x = get_embeddings(sequences) h_final, c_final, out = mlstm1900(params, x) print(out.shape) print(h_final.shape) print(c_final.shape)
def gen_testloss(args): # load data and model params = utils.load_params(args.model_dir) ckpt_dir = os.path.join(args.model_dir, 'checkpoints') ckpt_paths = [ int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt" ] ckpt_paths = np.sort(ckpt_paths) # csv headers = [ "epoch", "step", "loss", "discrimn_loss_e", "compress_loss_e", "discrimn_loss_t", "compress_loss_t" ] csv_path = utils.create_csv(args.model_dir, 'losses_test.csv', headers) print('writing to:', csv_path) # load data test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) testloader = DataLoader(testset, batch_size=params['bs'], shuffle=False, num_workers=4) # save loss criterion = MaximalCodingRateReduction(gam1=params['gam1'], gam2=params['gam2'], eps=params['eps']) for epoch, ckpt_path in enumerate(ckpt_paths): net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True) for step, (batch_imgs, batch_lbls) in enumerate(testloader): features = net(batch_imgs.cuda()) loss, loss_empi, loss_theo = criterion(features, batch_lbls, num_classes=len( testset.classes)) utils.save_state(args.model_dir, epoch, step, loss.item(), *loss_empi, *loss_theo, filename='losses_test.csv') print("Finished generating test loss.")
def load_model( path_to_model=PATH_TO_MODEL, # model opts (.pkl) path_to_params=PATH_TO_PARAMS, # model params (.npz) path_to_dictionary=PATH_TO_DICTIONARY ): """ Load a trained model for decoding """ # Load the worddict print 'Loading dictionary...' with open(path_to_dictionary, 'rb') as f: worddict = pkl.load(f) # Create inverted dictionary print 'Creating inverted dictionary...' word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # Load model options print 'Loading model options...' with open('%s.pkl'%path_to_model, 'rb') as f: options = pkl.load(f) # Load parameters print 'Loading model parameters...' params = init_params(options) params = load_params(path_to_params, params) tparams = init_tparams(params) # Sampler. trng = RandomStreams(1234) f_init, f_next = build_sampler(tparams, options, trng) # Pack everything up dec = dict() dec['options'] = options dec['trng'] = trng dec['worddict'] = worddict dec['word_idict'] = word_idict dec['tparams'] = tparams dec['f_init'] = f_init dec['f_next'] = f_next return dec
def test_trainingjob(kfp_client, experiment_id, sagemaker_client, test_file_dir): download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated")) test_params = utils.load_params( utils.replace_placeholders( os.path.join(test_file_dir, "config.yaml"), os.path.join(download_dir, "config.yaml"), )) test_params["Arguments"]["hyperparameters"] = json.dumps( test_params["Arguments"]["hyperparameters"]) test_params["Arguments"]["channels"] = json.dumps( test_params["Arguments"]["channels"]) run_id, status, workflow_json = kfp_client_utils.compile_run_monitor_pipeline( kfp_client, experiment_id, test_params["PipelineDefinition"], test_params["Arguments"], download_dir, test_params["TestName"], test_params["Timeout"], ) outputs = {"sagemaker-training-job": ["job_name", "model_artifact_url"]} output_files = minio_utils.artifact_download_iterator( workflow_json, outputs, download_dir) # Verify Training job was successful on SageMaker training_job_name = utils.extract_information( output_files["sagemaker-training-job"]["job_name"], "job_name.txt") print(f"training job name: {training_job_name}") train_response = sagemaker_utils.describe_training_job( sagemaker_client, training_job_name.decode()) assert train_response["TrainingJobStatus"] == "Completed" # Verify model artifacts output was generated from this run model_artifact_url = utils.extract_information( output_files["sagemaker-training-job"]["model_artifact_url"], "model_artifact_url.txt", ) print(f"model_artifact_url: {model_artifact_url}") assert (model_artifact_url.decode() == train_response["ModelArtifacts"] ["S3ModelArtifacts"]) assert (train_response["ModelArtifacts"]["S3ModelArtifacts"] in model_artifact_url.decode())
def gen_accuracy(args): # load data and model params = utils.load_params(args.model_dir) ckpt_dir = os.path.join(args.model_dir, 'checkpoints') ckpt_paths = [ int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt" ] ckpt_paths = np.sort(ckpt_paths) # csv headers = ["epoch", "acc_train", "acc_test"] csv_path = utils.create_csv(args.model_dir, 'accuracy.csv', headers) for epoch, ckpt_paths in enumerate(ckpt_paths): if epoch % 5 != 0: continue net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True) # load data train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True) trainloader = DataLoader(trainset, batch_size=500, num_workers=4) train_features, train_labels = tf.get_features(net, trainloader, verbose=False) test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) testloader = DataLoader(testset, batch_size=500, num_workers=4) test_features, test_labels = tf.get_features(net, testloader, verbose=False) acc_train, acc_test = svm(args, train_features, train_labels, test_features, test_labels) utils.save_state(args.model_dir, epoch, acc_train, acc_test, filename='accuracy.csv') print("Finished generating accuracy.")
def load_model( path_to_model=PATH_TO_MODEL, # model opts (.pkl) path_to_params=PATH_TO_PARAMS, # model params (.npz) path_to_dictionary=PATH_TO_DICTIONARY): """ Load a trained model for decoding """ # Load the worddict print 'Loading dictionary...' with open(path_to_dictionary, 'rb') as f: worddict = pkl.load(f) # Create inverted dictionary print 'Creating inverted dictionary...' word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # Load model options print 'Loading model options...' with open('%s.pkl' % path_to_model, 'rb') as f: options = pkl.load(f) # Load parameters print 'Loading model parameters...' params = init_params(options) params = load_params(path_to_params, params) tparams = init_tparams(params) # Sampler. trng = RandomStreams(1234) f_init, f_next = build_sampler(tparams, options, trng) # Pack everything up dec = dict() dec['options'] = options dec['trng'] = trng dec['worddict'] = worddict dec['word_idict'] = word_idict dec['tparams'] = tparams dec['f_init'] = f_init dec['f_next'] = f_next return dec
def load_model(path_to_model): """ Load all model components """ print path_to_model # Load model print 'Loading model' with open(path_to_model + '.pkl', 'rb') as f: model = pkl.load(f) options = model['options'] # Load parameters print 'Loading model parameters...' params = init_params(options) params = load_params(path_to_model + '.npz', params) tparams = init_tparams(params) # Extractor functions print 'Compiling sentence encoder...' [x, x_mask], sentences = build_sentence_encoder(tparams, options) f_senc = theano.function([x, x_mask], sentences, name='f_senc') print 'Compiling image encoder...' [im], images = build_image_encoder(tparams, options) f_ienc = theano.function([im], images, name='f_ienc') print 'Compiling error computation...' [s, im], errs = build_errors(options) f_err = theano.function([s,im], errs, name='f_err') # Store everything we need in a dictionary print 'Packing up...' model['f_senc'] = f_senc model['f_ienc'] = f_ienc model['f_err'] = f_err return model
def deconvoluter(params_fn, specstr, shape): input_var = T.tensor4('input') decnet = build_deconv_net(input_var, shape=shape, specstr=specstr) u.load_params(decnet, params_fn) return theano.function([input_var], nn.layers.get_output(decnet))
def train(dim_word=100, # word vector dimensionality dim=1000, # the number of LSTM units encoder='gru', decoder='gru_cond', n_words_src=30000, n_words=30000, patience=10, # early stopping patience max_epochs=5000, finish_after=10000000, # finish after this many updates dispFreq=100, decay_c=0., # L2 regularization penalty alpha_c=0., # alignment regularization clip_c=-1., # gradient clipping threshold lrate=1., # learning rate maxlen=100, # maximum length of the description optimizer='rmsprop', batch_size=16, saveto='model.npz', saveFreq=1000, # save the parameters after every saveFreq updates datasets=[ '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok', '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok'], picked_train_idxes_file=r'', use_dropout=False, reload_=False, overwrite=False, preload='', sort_by_len=False, convert_embedding=True, dump_before_train=False, ): # Model options model_options = locals().copy() if reload_: lrate *= 0.5 # load dictionaries and invert them # reload options if reload_ and os.path.exists(preload): print 'Reloading model options' with open(r'.\model\en2fr.iter160000.npz.pkl', 'rb') as f: model_options = pkl.load(f) print 'Configuration from fy' vocab_en_filename = './data/dic/en2fr_en_vocabs_top1M.pkl' vocab_fr_filename = './data/dic/en2fr_fr_vocabs_top1M.pkl' map_filename = './data/dic/mapFullVocab2Top1MVocab.pkl' lr_discount_freq = 80000 print 'Done' print 'Loading data' text_iterator = TextIterator( datasets[0], datasets[1], vocab_en_filename, vocab_fr_filename, batch_size, maxlen, n_words_src, n_words, ) # sys.stdout.flush() # train_data_x = pkl.load(open(datasets[0], 'rb')) # train_data_y = pkl.load(open(datasets[1], 'rb')) # # if len(picked_train_idxes_file) != 0: # picked_idxes = pkl.load(open(picked_train_idxes_file, 'rb')) # train_data_x = [train_data_x[id] for id in picked_idxes] # train_data_y = [train_data_y[id] for id in picked_idxes] # # print 'Total train:', len(train_data_x) # print 'Max len:', max([len(x) for x in train_data_x]) # sys.stdout.flush() # # if sort_by_len: # slen = np.array([len(s) for s in train_data_x]) # sidx = slen.argsort() # # _sbuf = [train_data_x[i] for i in sidx] # _tbuf = [train_data_y[i] for i in sidx] # # train_data_x = _sbuf # train_data_y = _tbuf # print len(train_data_x[0]), len(train_data_x[-1]) # sys.stdout.flush() # train_batch_idx = get_minibatches_idx(len(train_data_x), batch_size, shuffle=False) # else: # train_batch_idx = get_minibatches_idx(len(train_data_x), batch_size, shuffle=True) print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(preload): print 'Reloading model parameters' params = load_params(preload, params) # for k, v in params.iteritems(): # print '>', k, v.shape, v.dtype # Only convert parameters when reloading if convert_embedding: # ================= # Convert input and output embedding parameters with a exist word embedding # ================= print 'Convert input and output embedding' temp_Wemb = params['Wemb'] orig_emb_mean = np.mean(temp_Wemb, axis=0) params['Wemb'] = np.tile(orig_emb_mean, [params['Wemb'].shape[0], 1]) # Load vocabulary map dicts and do mapping with open(map_filename, 'rb') as map_file: map_en = pkl.load(map_file) map_fr = pkl.load(map_file) for full, top in map_en.iteritems(): emb_size = temp_Wemb.shape[0] if full < emb_size and top < emb_size: params['Wemb'][top] = temp_Wemb[full] print 'Convert input embedding done' temp_ff_logit_W = params['ff_logit_W'] temp_Wemb_dec = params['Wemb_dec'] temp_b = params['ff_logit_b'] orig_ff_logit_W_mean = np.mean(temp_ff_logit_W, axis=1) orig_Wemb_dec_mean = np.mean(temp_Wemb_dec, axis=0) orig_b_mean = np.mean(temp_b) params['ff_logit_W'] = np.tile(orig_ff_logit_W_mean, [params['ff_logit_W'].shape[1], 1]).T params['ff_logit_b'].fill(orig_b_mean) params['Wemb_dec'] = np.tile(orig_Wemb_dec_mean, [params['Wemb_dec'].shape[0], 1]) for full, top in map_en.iteritems(): emb_size = temp_Wemb.shape[0] if full < emb_size and top < emb_size: params['ff_logit_W'][:, top] = temp_ff_logit_W[:, full] params['ff_logit_b'][top] = temp_b[full] params['Wemb_dec'][top] = temp_Wemb[full] print 'Convert output embedding done' # for k, v in params.iteritems(): # print '>', k, v.shape, v.dtype # ================ # End Convert # ================ tparams = init_tparams(params) trng, use_noise, \ x, x_mask, y, y_mask, \ opt_ret, \ cost, x_emb = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask] print 'Building sampler' f_init, f_next = build_sampler(tparams, model_options, trng, use_noise) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=profile) f_x_emb = theano.function([x, x_mask], x_emb, profile=profile) print 'Done' sys.stdout.flush() cost = cost.mean() # apply L2 regularization on weights if decay_c > 0.: decay_c = theano.shared(np.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # regularize the alpha weights if alpha_c > 0. and not model_options['decoder'].endswith('simple'): alpha_c = theano.shared(np.float32(alpha_c), name='alpha_c') alpha_reg = alpha_c * ( (tensor.cast(y_mask.sum(0) // x_mask.sum(0), 'float32')[:, None] - opt_ret['dec_alphas'].sum(0)) ** 2).sum(1).mean() cost += alpha_reg # after all regularizers - compile the computational graph for cost print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=profile) print 'Done' print 'Computing gradient...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Done' sys.stdout.flush() # apply gradient clipping here if clip_c > 0.: g2 = 0. for g in grads: g2 += (g ** 2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (clip_c ** 2), g / tensor.sqrt(g2) * clip_c, g)) grads = new_grads # compile the optimizer, the actual computational graph is compiled here lr = tensor.scalar(name='lr') print 'Building optimizers...', f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Done' print 'Optimization' best_p = None bad_counter = 0 uidx = 0 if reload_: m = re.search('.+iter(\d+?)\.npz', preload) if m: uidx = int(m.group(1)) print 'uidx', uidx, 'l_rate', lrate estop = False history_errs = [] # reload history if dump_before_train: print 'Dumping before train...', saveto_uidx = '{}.iter{}.npz'.format( os.path.splitext(saveto)[0], uidx) np.savez(saveto_uidx, history_errs=history_errs, uidx=uidx, **unzip(tparams)) print 'Done' if saveFreq == -1: saveFreq = len(train[0]) / batch_size for eidx in xrange(max_epochs): n_samples = 0 # for i, batch_idx in train_batch_idx: # # x = [train_data_x[id] for id in batch_idx] # y = [train_data_y[id] for id in batch_idx] for i, (x, y) in enumerate(text_iterator): n_samples += len(x) uidx += 1 use_noise.set_value(1.) x, x_mask, y, y_mask = prepare_data(x, y) if x is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() # compute cost, grads and copy grads to shared variables cost = f_grad_shared(x, x_mask, y, y_mask) # do the update on parameters f_update(lrate) ud = time.time() - ud_start # check for bad numbers, usually we remove non-finite elements # and continue training - but not done here if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1., 1., 1. # discount reward if lr_discount_freq > 0 and np.mod(uidx, lr_discount_freq) == 0: lrate *= 0.5 print 'Discount learning rate to {} at iteration {}'.format(lrate, uidx) # verbose if np.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud sys.stdout.flush() if np.mod(uidx, saveFreq) == 0: # save with uidx if not overwrite: # print 'Saving the model at iteration {}...'.format(uidx), saveto_uidx = '{}.iter{}.npz'.format( os.path.splitext(saveto)[0], uidx) np.savez(saveto_uidx, history_errs=history_errs, uidx=uidx, **unzip(tparams)) # print 'Done' # sys.stdout.flush() # generate some samples with the model and display them # finish after this many updates if uidx >= finish_after: print 'Finishing after %d iterations!' % uidx estop = True break print 'Seen %d samples' % n_samples if estop: break if best_p is not None: zipp(best_p, tparams) use_noise.set_value(0.) return 0.
nest.SetStatus(voltmeter, [{"to_file": True, "withtime": True, 'label' : self.params['exc_volt_fn']}]) nest.DivergentConnect(voltmeter, self.nrns) def run(self): # R U N nest.Simulate(self.params['t_sim']) if __name__ == '__main__': if len(sys.argv) == 2: # params = utils.load_params(os.path.abspath(sys.argv[1])) # load existing parameters params_json = utils.load_params(os.path.abspath(sys.argv[1])) params = utils.convert_to_NEST_conform_dict(params_json) # this is necessary because json stores information in unicode, but SLI (the NEST interpreter) does not understand unicode ... else: GP = simulation_parameters.global_parameters() params = GP.params GP.write_parameters_to_file() # write_parameters_to_file MUST be called before every simulation sim = Simulation(params) sim.setup() sim.create_cells() sim.create_input_spiketrains() sim.record() sim.run()
max_epoch = (0 if CV else num_epochs); for i in xrange(ntimes): if not CV and i!=5: continue if i == 5: num_epochs = max_epoch+1; print("full train data use {:d} epochs".format(num_epochs)) nn.layers.set_all_param_values(net['output'],init0); data = u.DataH5PyStreamer(os.path.join(c.data_sunnybrook, datafile), batch_size=batch_size, folds=(5,i)) hist,best_epoch = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn = train_fn, test_fn=test_fn, max_per_epoch=-1, tr_transform=lambda x: du.segmenter_data_transform(x, shift=shi, rotate=rot, scale = sca, normalize_pctwise=pct_norm_tr), te_transform=lambda x: du.segmenter_data_transform(x, normalize_pctwise=pct_norm,istest=True), last_layer = net['output'], save_best_params_to=c.params_dir + '/fcn_v{}_{}_f{}.npz'.format(version, vvv,i)) if i < 5 and best_epoch>max_epoch: max_epoch = best_epoch; if CV: for pfn in ['fcn_v{}_{}_f{}.npz'.format(version, vvv, i) for i in xrange(ntimes-1)]:#!!!!CHANGE u.load_params(net['output'], os.path.join(c.params_dir, pfn)) testfold = int(pfn.split('_')[-1][1]) data = u.DataH5PyStreamer(os.path.join(c.data_sunnybrook, datafile), batch_size=16, folds=(5,testfold)) streamer = data.streamer(training=False, shuffled=True) accs = [] for imb in streamer.get_epoch_iterator(): x,y = du.segmenter_data_transform(imb,normalize_pctwise=pct_norm) accs.append(acc_fn(x,y)) print pfn, np.mean(accs)
def main(model_options): print 'Loading data' dp = data_provider() dp.load_data(model_options['batch_size'], model_options['word_count_threshold']) dp.build_word_vocab() dp.group_train_captions_by_length() model_options['vocab_size'] = dp.get_word_vocab_size() print 'Building model' # This create the initial parameters as numpy ndarrays. generator = caption_generator() params = generator.init_params(model_options) save_n = {} save_n['checkpoint'] = 0 save_n['prediction'] = 0 # reload a saved checkpoint if model_options['reload_checkpoint_path']: _, save_n['checkpoint'] = utils.load_params(model_options['reload_checkpoint_path'], params) print 'Reloaded checkpoint from', model_options['reload_checkpoint_path'] # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = utils.init_tparams(params) # use_noise is for dropout sents, mask, imgs, gt_sents, use_noise, cost = generator.build_model(tparams) grads = tensor.grad(cost, wrt=tparams.values()) lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizers[model_options['optimizer']](lr, tparams, grads, sents, mask, imgs, gt_sents, cost) imgs_to_predict, predicted_indices, predicted_prob = generator.predict(tparams) f_pred = theano.function([imgs_to_predict], predicted_indices, name='f_pred') f_pred_prob = theano.function([imgs_to_predict], predicted_prob, name='f_pred_prob') train_iter = dp.train_iterator kf_valid = KFold(len(dp.split['val']), n_folds=len(dp.split['val']) / model_options['batch_size'], shuffle=False) if model_options['use_dropout'] == 1: use_noise.set_value(1.) else: use_noise.set_value(0.) print 'Optimization' uidx = 0 lrate = model_options['lrate'] # display print time duration dp_start = time.time() for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for batch_data in train_iter: uidx += 1 # preparing the mini batch data pd_start = time.time() sents, sents_mask, imgs, gt_sents = dp.prepare_train_batch_data(batch_data) pd_duration = time.time() - pd_start if sents is None: print 'Minibatch is empty' continue # training on the mini batch ud_start = time.time() cost = f_grad_shared(sents, sents_mask, imgs, gt_sents) f_update(lrate) ud_duration = time.time() - ud_start # Numerical stability check if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' if numpy.mod(uidx, model_options['disp_freq']) == 0: dp_duration = time.time() - dp_start print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'Prepare data ', pd_duration, 'Update data ', ud_duration, '{0}_iter_time {1}'.format(model_options['disp_freq'], dp_duration) dp_start = time.time() # Log validation loss + checkpoint the model with the best validation log likelihood if numpy.mod(uidx, model_options['valid_freq']) == 0: scores = validate_and_save_checkpoint(model_options, dp, params, tparams, f_pred, f_pred_prob, kf_valid, save_n) print scores print 'Performing final validation' scores = validate_and_save_checkpoint(model_options, dp, params, tparams, f_pred, f_pred_prob, kf_valid, save_n) print scores print 'Done!!!'
def train(dim_word_desc=400,# word vector dimensionality dim_word_q=400, dim_word_ans=600, dim_proj=300, dim=400,# the number of LSTM units encoder_desc='lstm', encoder_desc_word='lstm', encoder_desc_sent='lstm', use_dq_sims=False, eyem=None, learn_h0=False, use_desc_skip_c_g=False, debug=False, encoder_q='lstm', patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., clip_c=-1., lrate=0.01, n_words_q=49145, n_words_desc=115425, n_words_ans=409, pkl_train_files=None, pkl_valid_files=None, maxlen=2000, # maximum length of the description optimizer='rmsprop', batch_size=2, vocab=None, valid_batch_size=16, use_elu_g=False, saveto='model.npz', model_dir=None, ms_nlayers=3, validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates datasets=[None], truncate=400, momentum=0.9, use_bidir=False, cost_mask=None, valid_datasets=['/u/yyu/stor/caglar/rc-data/cnn/cnn_test_data.h5', '/u/yyu/stor/caglar/rc-data/cnn/cnn_valid_data.h5'], dropout_rate=0.5, use_dropout=True, reload_=True, **opt_ds): ensure_dir_exists(model_dir) mpath = os.path.join(model_dir, saveto) mpath_best = os.path.join(model_dir, prfx("best", saveto)) mpath_last = os.path.join(model_dir, prfx("last", saveto)) mpath_stats = os.path.join(model_dir, prfx("stats", saveto)) # Model options model_options = locals().copy() model_options['use_sent_reps'] = opt_ds['use_sent_reps'] stats = defaultdict(list) del model_options['eyem'] del model_options['cost_mask'] if cost_mask is not None: cost_mask = sharedX(cost_mask) # reload options and parameters if reload_: print "Reloading the model." if os.path.exists(mpath_best): print "Reloading the best model from %s." % mpath_best with open(os.path.join(mpath_best, '%s.pkl' % mpath_best), 'rb') as f: models_options = pkl.load(f) params = init_params(model_options) params = load_params(mpath_best, params) elif os.path.exists(mpath): print "Reloading the model from %s." % mpath with open(os.path.join(mpath, '%s.pkl' % mpath), 'rb') as f: models_options = pkl.load(f) params = init_params(model_options) params = load_params(mpath, params) else: raise IOError("Couldn't open the file.") else: print "Couldn't reload the models initializing from scratch." params = init_params(model_options) if datasets[0]: print "Short dataset", datasets[0] print 'Loading data' print 'Building model' if pkl_train_files is None or pkl_valid_files is None: train, valid, test = load_data(path=datasets[0], valid_path=valid_datasets[0], test_path=valid_datasets[1], batch_size=batch_size, **opt_ds) else: train, valid, test = load_pkl_data(train_file_paths=pkl_train_files, valid_file_paths=pkl_valid_files, batch_size=batch_size, vocab=vocab, eyem=eyem, **opt_ds) tparams = init_tparams(params) trng, use_noise, inps_d, \ opt_ret, \ cost, errors, ent_errors, ent_derrors, probs = \ build_model(tparams, model_options, prepare_data if not opt_ds['use_sent_reps'] \ else prepare_data_sents, valid, cost_mask=cost_mask) alphas = opt_ret['dec_alphas'] if opt_ds['use_sent_reps']: inps = [inps_d["desc"], \ inps_d["word_mask"], \ inps_d["q"], \ inps_d['q_mask'], \ inps_d['ans'], \ inps_d['wlen'], inps_d['slen'], inps_d['qlen'],\ inps_d['ent_mask'] ] else: inps = [inps_d["desc"], \ inps_d["word_mask"], \ inps_d["q"], \ inps_d['q_mask'], \ inps_d['ans'], \ inps_d['wlen'], \ inps_d['qlen'], \ inps_d['ent_mask']] outs = [cost, errors, probs, alphas] if ent_errors: outs += [ent_errors] if ent_derrors: outs += [ent_derrors] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, outs, profile=profile) print 'Done' # Apply weight decay on the feed-forward connections if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): if "logit" in kk or "ff" in kk: weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Computing gradient...', grads = safe_grad(cost, itemlist(tparams)) print 'Done' # Gradient clipping: if clip_c > 0.: g2 = get_norms(grads) for p, g in grads.iteritems(): grads[p] = tensor.switch(g2 > (clip_c**2), (g / tensor.sqrt(g2 + 1e-8)) * clip_c, g) inps.pop() if optimizer.lower() == "adasecant": learning_rule = Adasecant(delta_clip=25.0, use_adagrad=True, grad_clip=0.25, gamma_clip=0.) elif optimizer.lower() == "rmsprop": learning_rule = RMSPropMomentum(init_momentum=momentum) elif optimizer.lower() == "adam": learning_rule = Adam() elif optimizer.lower() == "adadelta": learning_rule = AdaDelta() lr = tensor.scalar(name='lr') print 'Building optimizers...', learning_rule = None if learning_rule: f_grad_shared, f_update = learning_rule.get_funcs(learning_rate=lr, grads=grads, inp=inps, cost=cost, errors=errors) else: f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost, errors) print 'Done' print 'Optimization' history_errs = [] # reload history if reload_ and os.path.exists(mpath): history_errs = list(numpy.load(mpath)['history_errs']) best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size best_found = False uidx = 0 estop = False train_cost_ave, train_err_ave, \ train_gnorm_ave = reset_train_vals() for eidx in xrange(max_epochs): n_samples = 0 if train.done: train.reset() for d_, q_, a, em in train: n_samples += len(a) uidx += 1 use_noise.set_value(1.) if opt_ds['use_sent_reps']: # To mask the description and the question. d, d_mask, q, q_mask, dlen, slen, qlen = prepare_data_sents(d_, q_) if d is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost, errors, gnorm, pnorm = f_grad_shared(d, d_mask, q, q_mask, a, dlen, slen, qlen) else: d, d_mask, q, q_mask, dlen, qlen = prepare_data(d_, q_) if d is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost, errors, gnorm, pnorm = f_grad_shared(d, d_mask, q, q_mask, a, dlen, qlen) upnorm = f_update(lrate) ud = time.time() - ud_start # Collect the running ave train stats. train_cost_ave = running_ave(train_cost_ave, cost) train_err_ave = running_ave(train_err_ave, errors) train_gnorm_ave = running_ave(train_gnorm_ave, gnorm) if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' import ipdb; ipdb.set_trace() if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, ' Update ', uidx, \ ' Cost ', cost, ' UD ', ud, \ ' UpNorm ', upnorm[0].tolist(), \ ' GNorm ', gnorm, \ ' Pnorm ', pnorm, 'Terrors ', errors if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None and best_found: numpy.savez(mpath_best, history_errs=history_errs, **best_p) pkl.dump(model_options, open('%s.pkl' % mpath_best, 'wb')) else: params = unzip(tparams) numpy.savez(mpath, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl' % mpath, 'wb')) pkl.dump(stats, open("%s.pkl" % mpath_stats, 'wb')) print 'Done' print_param_norms(tparams) if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) if valid.done: valid.reset() valid_costs, valid_errs, valid_probs, \ valid_alphas, error_ent, error_dent = eval_model(f_log_probs, prepare_data if not opt_ds['use_sent_reps'] \ else prepare_data_sents, model_options, valid, use_sent_rep=opt_ds['use_sent_reps']) valid_alphas_ = numpy.concatenate([va.argmax(0) for va in valid_alphas.tolist()], axis=0) valid_err = valid_errs.mean() valid_cost = valid_costs.mean() valid_alpha_ent = -negentropy(valid_alphas) mean_valid_alphas = valid_alphas_.mean() std_valid_alphas = valid_alphas_.std() mean_valid_probs = valid_probs.argmax(1).mean() std_valid_probs = valid_probs.argmax(1).std() history_errs.append([valid_cost, valid_err]) stats['train_err_ave'].append(train_err_ave) stats['train_cost_ave'].append(train_cost_ave) stats['train_gnorm_ave'].append(train_gnorm_ave) stats['valid_errs'].append(valid_err) stats['valid_costs'].append(valid_cost) stats['valid_err_ent'].append(error_ent) stats['valid_err_desc_ent'].append(error_dent) stats['valid_alphas_mean'].append(mean_valid_alphas) stats['valid_alphas_std'].append(std_valid_alphas) stats['valid_alphas_ent'].append(valid_alpha_ent) stats['valid_probs_mean'].append(mean_valid_probs) stats['valid_probs_std'].append(std_valid_probs) if uidx == 0 or valid_err <= numpy.array(history_errs)[:, 1].min(): best_p = unzip(tparams) bad_counter = 0 best_found = True else: bst_found = False if numpy.isnan(valid_err): import ipdb; ipdb.set_trace() print "============================" print '\t>>>Valid error: ', valid_err, \ ' Valid cost: ', valid_cost print '\t>>>Valid pred mean: ', mean_valid_probs, \ ' Valid pred std: ', std_valid_probs print '\t>>>Valid alphas mean: ', mean_valid_alphas, \ ' Valid alphas std: ', std_valid_alphas, \ ' Valid alpha negent: ', valid_alpha_ent, \ ' Valid error ent: ', error_ent, \ ' Valid error desc ent: ', error_dent print "============================" print "Running average train stats " print '\t>>>Train error: ', train_err_ave, \ ' Train cost: ', train_cost_ave, \ ' Train grad norm: ', train_gnorm_ave print "============================" train_cost_ave, train_err_ave, \ train_gnorm_ave = reset_train_vals() print 'Seen %d samples' % n_samples if estop: break if best_p is not None: zipp(best_p, tparams) use_noise.set_value(0.) valid.reset() valid_cost, valid_error, valid_probs, \ valid_alphas, error_ent = eval_model(f_log_probs, prepare_data if not opt_ds['use_sent_reps'] \ else prepare_data_sents, model_options, valid, use_sent_rep=opt_ds['use_sent_rep']) print " Final eval resuts: " print 'Valid error: ', valid_error.mean() print 'Valid cost: ', valid_cost.mean() print '\t>>>Valid pred mean: ', valid_probs.mean(), \ ' Valid pred std: ', valid_probs.std(), \ ' Valid error ent: ', error_ent params = copy.copy(best_p) numpy.savez(mpath_last, zipped_params=best_p, history_errs=history_errs, **params) return valid_err, valid_cost
def build_fn(args, embeddings): """ Build training and testing functions. """ in_x1 = T.imatrix('x1') in_x2 = T.imatrix('x2') in_mask1 = T.matrix('mask1') in_mask2 = T.matrix('mask2') in_l = T.matrix('l') in_y = T.ivector('y') l_in1 = lasagne.layers.InputLayer((None, None), in_x1) l_mask1 = lasagne.layers.InputLayer((None, None), in_mask1) l_emb1 = lasagne.layers.EmbeddingLayer(l_in1, args.vocab_size, args.embedding_size, W=embeddings) l_in2 = lasagne.layers.InputLayer((None, None), in_x2) l_mask2 = lasagne.layers.InputLayer((None, None), in_mask2) l_emb2 = lasagne.layers.EmbeddingLayer(l_in2, args.vocab_size, args.embedding_size, W=l_emb1.W) network1 = nn_layers.stack_rnn(l_emb1, l_mask1, args.num_layers, args.hidden_size, grad_clipping=args.grad_clipping, dropout_rate=args.dropout_rate, only_return_final=(args.att_func == 'last'), bidir=args.bidir, name='d', rnn_layer=args.rnn_layer) network2 = nn_layers.stack_rnn(l_emb2, l_mask2, args.num_layers, args.hidden_size, grad_clipping=args.grad_clipping, dropout_rate=args.dropout_rate, only_return_final=True, bidir=args.bidir, name='q', rnn_layer=args.rnn_layer) args.rnn_output_size = args.hidden_size * 2 if args.bidir else args.hidden_size if args.att_func == 'mlp': att = nn_layers.MLPAttentionLayer([network1, network2], args.rnn_output_size, mask_input=l_mask1) elif args.att_func == 'bilinear': att = nn_layers.BilinearAttentionLayer([network1, network2], args.rnn_output_size, mask_input=l_mask1) elif args.att_func == 'avg': att = nn_layers.AveragePoolingLayer(network1, mask_input=l_mask1) elif args.att_func == 'last': att = network1 elif args.att_func == 'dot': att = nn_layers.DotProductAttentionLayer([network1, network2], mask_input=l_mask1) else: raise NotImplementedError('att_func = %s' % args.att_func) network = lasagne.layers.DenseLayer(att, args.num_labels, nonlinearity=lasagne.nonlinearities.softmax) if args.pre_trained is not None: dic = utils.load_params(args.pre_trained) lasagne.layers.set_all_param_values(network, dic['params'], trainable=True) del dic['params'] logging.info('Loaded pre-trained model: %s' % args.pre_trained) for dic_param in dic.iteritems(): logging.info(dic_param) logging.info('#params: %d' % lasagne.layers.count_params(network, trainable=True)) for layer in lasagne.layers.get_all_layers(network): logging.info(layer) # Test functions test_prob = lasagne.layers.get_output(network, deterministic=True) * in_l test_prediction = T.argmax(test_prob, axis=-1) acc = T.sum(T.eq(test_prediction, in_y)) test_fn = theano.function([in_x1, in_mask1, in_x2, in_mask2, in_l, in_y], acc) # Train functions train_prediction = lasagne.layers.get_output(network) * in_l train_prediction = train_prediction / \ train_prediction.sum(axis=1).reshape((train_prediction.shape[0], 1)) train_prediction = T.clip(train_prediction, 1e-7, 1.0 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(train_prediction, in_y).mean() # TODO: lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2) params = lasagne.layers.get_all_params(network, trainable=True) if args.optimizer == 'sgd': updates = lasagne.updates.sgd(loss, params, args.learning_rate) elif args.optimizer == 'adam': updates = lasagne.updates.adam(loss, params) elif args.optimizer == 'rmsprop': updates = lasagne.updates.rmsprop(loss, params) else: raise NotImplementedError('optimizer = %s' % args.optimizer) train_fn = theano.function([in_x1, in_mask1, in_x2, in_mask2, in_l, in_y], loss, updates=updates) return train_fn, test_fn, params
def main(data_file = '', num_epochs=10, batch_size = 128, L=2, z_dim=256, n_hid=1500, binary='false', img_size = 64, init_from = '', save_to='params', split_layer='conv7', pxsh = 0.5, specstr = c.pf_cae_specstr, cae_weights=c.pf_cae_params, deconv_weights = c.pf_deconv_params): binary = binary.lower() == 'true' # pre-trained function for extracting convolutional features from images cae = m.build_cae(input_var=None, specstr=specstr, shape=(img_size,img_size)) laydict = dict((l.name, l) for l in nn.layers.get_all_layers(cae)) convshape = nn.layers.get_output_shape(laydict[split_layer]) convs_from_img, _ = m.encoder_decoder(cae_weights, specstr=specstr, layersplit=split_layer, shape=(img_size, img_size)) # pre-trained function for returning to images from convolutional features img_from_convs = m.deconvoluter(deconv_weights, specstr=specstr, shape=convshape) # Create VAE model print("Building model and compiling functions...") print("L = {}, z_dim = {}, n_hid = {}, binary={}".format(L, z_dim, n_hid, binary)) input_var = T.tensor4('inputs') c,w,h = convshape[1], convshape[2], convshape[3] l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=n_hid, shape=(w,h), channels=c) if len(init_from) > 0: print("loading from {}".format(init_from)) u.load_params(l_x, init_from) # build loss, updates, training, prediction functions loss,_ = u.build_vae_loss(input_var, *l_tup, deterministic=False, binary=binary, L=L) test_loss, test_prediction = u.build_vae_loss(input_var, *l_tup, deterministic=True, binary=binary, L=L) lr = theano.shared(nn.utils.floatX(1e-5)) params = nn.layers.get_all_params(l_x, trainable=True) updates = nn.updates.adam(loss, params, learning_rate=lr) train_fn = theano.function([input_var], loss, updates=updates) val_fn = theano.function([input_var], test_loss) ae_fn = theano.function([input_var], test_prediction) # run training loop def data_transform(x, do_center): floatx_ims = u.raw_to_floatX(x, pixel_shift=pxsh, square=True, center=do_center) return convs_from_img(floatx_ims) print("training for {} epochs".format(num_epochs)) data = u.DataH5PyStreamer(data_file, batch_size=batch_size) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn, tr_transform=lambda x: data_transform(x[0], do_center=False), te_transform=lambda x: data_transform(x[0], do_center=True)) # generate examples, save training history te_stream = data.streamer(shuffled=True) imb, = next(te_stream.get_epoch_iterator()) orig_feats = data_transform(imb, do_center=True) reconstructed_feats = ae_fn(orig_feats).reshape(orig_feats.shape) orig_feats_deconv = img_from_convs(orig_feats) reconstructed_feats_deconv = img_from_convs(reconstructed_feats) for i in range(reconstructed_feats_deconv.shape[0]): u.get_image_pair(orig_feats_deconv, reconstructed_feats_deconv, index=i, shift=pxsh)\ .save('output_{}.jpg'.format(i)) hist = np.asarray(hist) np.savetxt('vae_convs_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f') u.save_params(l_x, os.path.join(save_to, 'vae_convs_{}.npz'.format(hist[-1,-1])))
def main(save_to='params', dataset = 'mm', kl_loss='true', # use kl-div in z-space instead of mse diffs = 'false', seq_length = 30, num_epochs=1, lstm_n_hid=1024, max_per_epoch=-1 ): kl_loss = kl_loss.lower() == 'true' diffs = diffs.lower() == 'true' # set up functions for data pre-processing and model training input_var = T.tensor4('inputs') # different experimental setup for moving mnist vs pulp fiction dataests if dataset == 'pf': img_size = 64 cae_weights = c.pf_cae_params cae_specstr = c.pf_cae_specstr split_layer = 'conv7' inpvar = T.tensor4('input') net = m.build_cae(inpvar, specstr=cae_specstr, shape=(img_size, img_size)) convs_from_img,_ = m.encoder_decoder(cae_weights, specstr=cae_specstr, layersplit=split_layer, shape=(img_size, img_size), poolinv=True) laydict = dict((l.name, l) for l in nn.layers.get_all_layers(net)) zdec_in_shape = nn.layers.get_output_shape(laydict[split_layer]) deconv_weights = c.pf_deconv_params vae_weights = c.pf_vae_params img_from_convs = m.deconvoluter(deconv_weights, specstr=cae_specstr, shape=zdec_in_shape) L=2 vae_n_hid = 1500 binary = False z_dim = 256 l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=vae_n_hid, shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1]) u.load_params(l_x, vae_weights) datafile = 'data/pf.hdf5' frame_skip=3 # every 3rd frame in sequence z_decode_layer = l_x_mu_list[0] pixel_shift = 0.5 samples_per_image = 4 tr_batch_size = 16 # must be a multiple of samples_per_image elif dataset == 'mm': img_size = 64 cvae_weights = c.mm_cvae_params L=2 vae_n_hid = 1024 binary = True z_dim = 32 zdec_in_shape = (None, 1, img_size, img_size) l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vcae(input_var, L=L, z_dim=z_dim, n_hid=vae_n_hid, binary=binary, shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1]) u.load_params(l_x, cvae_weights) datafile = 'data/moving_mnist.hdf5' frame_skip=1 w,h=img_size,img_size # of raw input image in the hdf5 file z_decode_layer = l_x_list[0] pixel_shift = 0 samples_per_image = 1 tr_batch_size = 128 # must be a multiple of samples_per_image # functions for moving to/from image or conv-space, and z-space z_mat = T.matrix('z') zenc = theano.function([input_var], nn.layers.get_output(l_z_mu, deterministic=True)) zdec = theano.function([z_mat], nn.layers.get_output(z_decode_layer, {l_z_mu:z_mat}, deterministic=True).reshape((-1, zdec_in_shape[1]) + zdec_in_shape[2:])) zenc_ls = theano.function([input_var], nn.layers.get_output(l_z_ls, deterministic=True)) # functions for encoding sequences of z's print 'compiling functions' z_var = T.tensor3('z_in') z_ls_var = T.tensor3('z_ls_in') tgt_mu_var = T.tensor3('z_tgt') tgt_ls_var = T.tensor3('z_ls_tgt') learning_rate = theano.shared(nn.utils.floatX(1e-4)) # separate function definitions if we are using MSE and predicting only z, or KL divergence # and predicting both mean and sigma of z if kl_loss: def kl(p_mu, p_sigma, q_mu, q_sigma): return 0.5 * T.sum(T.sqr(p_sigma)/T.sqr(q_sigma) + T.sqr(q_mu - p_mu)/T.sqr(q_sigma) - 1 + 2*T.log(q_sigma) - 2*T.log(p_sigma)) lstm, _ = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid, training=True) z_mu_expr, z_ls_expr = nn.layers.get_output([lstm['output_mu'], lstm['output_ls']]) z_mu_expr_det, z_ls_expr_det = nn.layers.get_output([lstm['output_mu'], lstm['output_ls']], deterministic=True) loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr, T.exp(z_ls_expr)) te_loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr_det, T.exp(z_ls_expr_det)) params = nn.layers.get_all_params(lstm['output'], trainable=True) updates = nn.updates.adam(loss, params, learning_rate=learning_rate) train_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], loss, updates=updates) test_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], te_loss) else: lstm, _ = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=True) loss = nn.objectives.squared_error(nn.layers.get_output(lstm['output']), tgt_mu_var).mean() te_loss = nn.objectives.squared_error(nn.layers.get_output(lstm['output'], deterministic=True), tgt_mu_var).mean() params = nn.layers.get_all_params(lstm['output'], trainable=True) updates = nn.updates.adam(loss, params, learning_rate=learning_rate) train_fn = theano.function([z_var, tgt_mu_var], loss, updates=updates) test_fn = theano.function([z_var, tgt_mu_var], te_loss) if dataset == 'pf': z_from_img = lambda x: zenc(convs_from_img(x)) z_ls_from_img = lambda x: zenc_ls(convs_from_img(x)) img_from_z = lambda z: img_from_convs(zdec(z)) elif dataset == 'mm': z_from_img = zenc z_ls_from_img = zenc_ls img_from_z = zdec # training loop print('training for {} epochs'.format(num_epochs)) nbatch = (seq_length+1) * tr_batch_size * frame_skip / samples_per_image data = u.DataH5PyStreamer(datafile, batch_size=nbatch) # for taking arrays of uint8 (non square) and converting them to batches of sequences def transform_data(ims_batch, center=False): imb = u.raw_to_floatX(ims_batch, pixel_shift=pixel_shift, center=center)[np.random.randint(frame_skip)::frame_skip] zbatch = np.zeros((tr_batch_size, seq_length+1, z_dim), dtype=theano.config.floatX) zsigbatch = np.zeros((tr_batch_size, seq_length+1, z_dim), dtype=theano.config.floatX) for i in xrange(samples_per_image): chunk = tr_batch_size/samples_per_image if diffs: zf = z_from_img(imb).reshape((chunk, seq_length+1, -1)) zbatch[i*chunk:(i+1)*chunk, 1:] = zf[:,1:] - zf[:,:-1] if kl_loss: zls = z_ls_from_img(imb).reshape((chunk, seq_length+1, -1)) zsigbatch[i*chunk:(i+1)*chunk, 1:] = zls[:,1:] - zls[:,:-1] else: zbatch[i*chunk:(i+1)*chunk] = z_from_img(imb).reshape((chunk, seq_length+1, -1)) if kl_loss: zsigbatch[i*chunk:(i+1)*chunk] = z_ls_from_img(imb).reshape((chunk, seq_length+1, -1)) if kl_loss: return zbatch[:,:-1,:], zsigbatch[:,:-1,:], zbatch[:,1:,:], zsigbatch[:,1:,:] return zbatch[:,:-1,:], zbatch[:,1:,:] # we need sequences of images, so we do not shuffle data during trainin hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=test_fn, train_shuffle=False, max_per_epoch=max_per_epoch, tr_transform=lambda x: transform_data(x[0], center=False), te_transform=lambda x: transform_data(x[0], center=True)) hist = np.asarray(hist) u.save_params(lstm['output'], os.path.join(save_to, 'lstm_{}.npz'.format(hist[-1,-1]))) # build functions to sample from LSTM # separate cell_init and hid_init from the other learned model parameters all_param_values = nn.layers.get_all_param_values(lstm['output']) init_indices = [i for i,p in enumerate(nn.layers.get_all_params(lstm['output'])) if 'init' in str(p)] init_values = [all_param_values[i] for i in init_indices] params_noinit = [p for i,p in enumerate(all_param_values) if i not in init_indices] # build model without learnable init values, and load non-init parameters if kl_loss: lstm_sample, state_vars = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid, training=False) else: lstm_sample, state_vars = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=False) nn.layers.set_all_param_values(lstm_sample['output'], params_noinit) # extract layers representing thee hidden and cell states, and have sample_fn # return their outputs state_layers_keys = [k for k in lstm_sample.keys() if 'hidfinal' in k or 'cellfinal' in k] state_layers_keys = sorted(state_layers_keys) state_layers_keys = sorted(state_layers_keys, key = lambda x:int(x.split('_')[1])) state_layers = [lstm_sample[s] for s in state_layers_keys] if kl_loss: sample_fn = theano.function([z_var, z_ls_var] + state_vars, nn.layers.get_output([lstm['output_mu'], lstm['output_ls']] + state_layers, deterministic=True)) else: sample_fn = theano.function([z_var] + state_vars, nn.layers.get_output([lstm['output']] + state_layers, deterministic=True)) from images2gif import writeGif from PIL import Image # sample approximately 30 different generated video sequences te_stream = data.streamer(training=True, shuffled=False) interval = data.ntrain / data.batch_size / 30 for idx,imb in enumerate(te_stream.get_epoch_iterator()): if idx % interval != 0: continue z_tup = transform_data(imb[0], center=True) seg_idx = np.random.randint(z_tup[0].shape[0]) if kl_loss: z_in, z_ls_in = z_tup[0], z_tup[1] z_last, z_ls_last = z_in[seg_idx:seg_idx+1], z_ls_in[seg_idx:seg_idx+1] z_vars = [z_last, z_ls_last] else: z_in = z_tup[0] z_last = z_in[seg_idx:seg_idx+1] z_vars = [z_last] images = [] state_values = [np.dot(np.ones((z_last.shape[0],1), dtype=theano.config.floatX), s) for s in init_values] output_list = sample_fn(*(z_vars + state_values)) # use whole sequence of predictions for output z_pred = output_list[0] state_values = output_list[2 if kl_loss else 1:] rec = img_from_z(z_pred.reshape(-1, z_dim)) for k in xrange(rec.shape[0]): images.append(Image.fromarray(u.get_picture_array(rec, index=k, shift=pixel_shift))) k += 1 # slice prediction to feed into lstm z_pred = z_pred[:,-1:,:] if kl_loss: z_ls_pred = output_list[1][:,-1:,:] z_vars = [z_pred, z_ls_pred] else: z_vars = [z_pred] for i in xrange(30): # predict 30 frames after the end of the priming video output_list = sample_fn(*(z_vars + state_values)) z_pred = output_list[0] state_values = output_list[2 if kl_loss else 1:] rec = img_from_z(z_pred.reshape(-1, z_dim)) images.append(Image.fromarray(u.get_picture_array(rec, index=0, shift=pixel_shift))) if kl_loss: z_ls_pred = output_list[1] z_vars = [z_pred, z_ls_pred] else: z_vars = [z_pred] writeGif("sample_{}.gif".format(idx),images,duration=0.1,dither=0)