def main(): # load config file config = run_utils.load_config(CONFIG_NAME) config_test = run_utils.load_config(TEST_CONFIG_NAME) # Find data_dir data_dir = python_utils.choose_first_existing_path( config["data_dir_candidates"]) if data_dir is None: print("ERROR: Data directory not found!") exit() else: print("Using data from {}".format(data_dir)) dataset_raw_dirpath = os.path.join( data_dir, config_test["dataset_raw_partial_dirpath"]) if not os.path.exists(config_test["disp_maps_dir"]): os.makedirs(config_test["disp_maps_dir"]) for images_info in config_test["images_info_list"]: for number in images_info["numbers"]: image_info = { "city": images_info["city"], "number": number, } generate_disp_maps(dataset_raw_dirpath, image_info, config_test["disp_map_params"], config_test["thresholds"], config_test["disp_maps_dir"])
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() distribution = "uniform" params = { "n": args.sample_count, "f": args.frequency, "s": args.noise_std, "d": distribution, } sobol_generator = rand_utils.SobolGenerator() # sobol_generator = None generate_test(config, params, split_name="train", seed=0, sobol_generator=sobol_generator) generate_test(config, params, split_name="val", seed=1, sobol_generator=sobol_generator) generate_test(config, params, split_name="test", seed=2, sobol_generator=sobol_generator)
def reboot_run(run_dir, used_gpus): cfg_file = pjoin(run_dir, 'cfg.json') # Read in cluster we should be using cfg = load_config(cfg_file) cluster = ''.join(c for c in cfg['host'] if not c.isdigit()) run_args = '--cfg_file %s' % cfg_file cmd = 'cd %s; source ~/.bashrc; nohup %s runNNet.py %s' % ( CTC_DIR, PYTHON_CMD, run_args) print cmd gpu_node = None while not gpu_node: all_free_gpus = get_all_free_gpus(cluster) print all_free_gpus gpu_node, gpu = get_next_free_gpu_sequential(all_free_gpus, used_gpus, FLAGGED_GPUS) if not gpu_node: print 'No free GPUs, waiting for a bit' time.sleep(SLEEP_SEC) # Log to file in for debugging log_file = pjoin(RUN_DIR, '%s.txt' % str(TimeString())) print 'Logging to %s' % log_file run_gpu_job(gpu_node, gpu, cmd, blocking=False, stdout=open(log_file, 'w')) used_gpus.add(gpu_node + '_' + str(gpu)) time.sleep(SLEEP_SEC)
def main(): # load config file config_test = run_utils.load_config(TEST_CONFIG_NAME) # Find data_dir data_dir = python_utils.choose_first_existing_path(config_test["data_dir_candidates"]) if data_dir is None: print("ERROR: Data directory not found!") exit() else: print("Using data from {}".format(data_dir)) dataset_raw_dirpath = os.path.join(data_dir, config_test["dataset_raw_partial_dirpath"]) output_dir = config_test["align_dir"] + OUTPUT_DIRNAME_EXTENTION if not os.path.exists(output_dir): os.makedirs(output_dir) for images_info in config_test["images_info_list"]: for number in images_info["numbers"]: image_info = { "city": images_info["city"], "number": number, } test_image(RUNS_DIRPATH, dataset_raw_dirpath, image_info, config_test["disp_maps_dir"], config_test["disp_map_params"]["disp_map_count"], config_test["disp_map_params"]["disp_max_abs_value"], config_test["batch_size"], DS_FAC_LIST, RUN_NAME_LIST, config_test["model_disp_max_abs_value"], config_test["thresholds"], output_dir, config_test["output_shapefiles"])
def test(opts): old_opts = CfgStruct(**load_config(opts.cfg_file)) logging.basicConfig(filename=pjoin(opts.output_dir, 'test.log'), level=logging.DEBUG) logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.info('Running on %s' % get_hostname()) with open(old_opts.in_file, 'r') as fid: pickle.load(fid) # SGD data, not needed print 'rawDim:', old_opts.rawDim, 'inputDim:', old_opts.inputDim,\ 'layerSize:', old_opts.layerSize, 'numLayers:', old_opts.numLayers,\ 'maxUttLen:', old_opts.maxUttLen print 'temporalLayer:', old_opts.temporalLayer, 'outputDim:', old_opts.outputDim alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader( opts.dataDir, old_opts.rawDim, old_opts.inputDim, alisDir) nn = rnnet.NNet(old_opts.inputDim, old_opts.outputDim, old_opts.layerSize, old_opts.numLayers, old_opts.maxUttLen, temporalLayer=old_opts.temporalLayer, train=False) nn.initParams() nn.fromFile(fid) # FIXME Different output directory specific to test set out_dir = pjoin(SCAIL_DATA_DIR, 'ctc_loglikes_%s_%s' % (DATASET, DATA_SUBSET)) if not os.path.exists(out_dir): os.makedirs(out_dir) for i in range(opts.startFile, opts.numFiles + 1): writeLogLikes(loader, nn, i, out_dir, writePickle=True)
def reboot_run(run_dir, used_gpus): cfg_file = pjoin(run_dir, 'cfg.json') # Read in cluster we should be using cfg = load_config(cfg_file) cluster = ''.join(c for c in cfg['host'] if not c.isdigit()) run_args = '--cfg_file %s' % cfg_file cmd = 'cd %s; source ~/.bashrc; nohup %s runNNet.py %s' % (CTC_DIR, PYTHON_CMD, run_args) print cmd gpu_node = None while not gpu_node: all_free_gpus = get_all_free_gpus(cluster) print all_free_gpus gpu_node, gpu = get_next_free_gpu_sequential(all_free_gpus, used_gpus, FLAGGED_GPUS) if not gpu_node: print 'No free GPUs, waiting for a bit' time.sleep(SLEEP_SEC) # Log to file in for debugging log_file = pjoin(RUN_DIR, '%s.txt' % str(TimeString())) print 'Logging to %s' % log_file run_gpu_job(gpu_node, gpu, cmd, blocking=False, stdout=open(log_file, 'w')) used_gpus.add(gpu_node + '_' + str(gpu)) time.sleep(SLEEP_SEC)
def main(): # TODO: pick center pixel when computing gradients # TODO: solve bug.= (look at output) # TODO: display input polygons as well in final plot # TODO: find theta (rotation) that minimizes k(.,.) in closed form # TODO: measure k(., .) with different models trained at different rounds args = get_args() # load overwrite_config file overwrite_config = run_utils.load_config(args.config) if args.runs_dirpath is not None: overwrite_config["runs_dirpath"] = args.runs_dirpath overwrite_config["input_res"] = PATCH_RES overwrite_config["batch_size"] = 1 # Find data_dir if args.data_dirpath is None: data_dirpath = python_utils.choose_first_existing_path( overwrite_config["data_dir_candidates"]) if data_dirpath is None: print("ERROR: Data directory not found!") exit() else: print("Using data from {}".format(data_dirpath)) else: data_dirpath = args.data_dirpath raw_dirpath = os.path.join(data_dirpath, DATASET_NAME, "raw") # Get all tiles tile_info_list_filepath = "{}.tile_info_list.npy".format( args.output_dirname) try: print("Loading tile_info_list from disk...") tile_info_list = np.load(tile_info_list_filepath) except FileNotFoundError: tile_info_list = read.get_tile_info_list(raw_dirpath=raw_dirpath) # Sample patches in each tile pool_size = 4 with Pool(pool_size) as p: params_list = [(raw_dirpath, tile_info, args.ds_fac, PATCH_RES, PATCH_PER_TILE, SEED) for tile_info in tile_info_list] tile_info_list = list( tqdm(p.imap(sample_patches, params_list), total=len(params_list), desc="Sample patches: ")) np.save(tile_info_list_filepath, tile_info_list) # tile_info_list = tile_info_list[-60:-50] # TODO: remove to take all tiles if args.mode == "compute": compute(args, raw_dirpath, overwrite_config, tile_info_list) elif args.mode == "individual": individual(args, raw_dirpath, tile_info_list)
def main(_): working_dir = os.path.dirname(os.path.abspath(__file__)) # print FLAGS print("#--- FLAGS: ---#") print("config: {}".format(FLAGS.config)) print("new_run: {}".format(FLAGS.new_run)) print("init_run_name: {}".format(FLAGS.init_run_name)) print("run_name: {}".format(FLAGS.run_name)) print("batch_size: {}".format(FLAGS.batch_size)) # load config file config = run_utils.load_config(FLAGS.config) # Check config setting coherences assert len(config["level_loss_coefs_params"]) == config["pool_count"], \ "level_loss_coefs_params ({} elements) must have model_res_levels ({}) elements".format( len(config["level_loss_coefs_params"]), config["pool_count"]) tfrecords_dirpath_list = [ os.path.join(working_dir, tfrecords_dirpath) for tfrecords_dirpath in config["tfrecords_partial_dirpath_list"] ] ds_repeat_list = config["ds_repeat_list"] # setup init run directory of one is specified: if FLAGS.init_run_name is not None: init_run_dirpath = run_utils.setup_run_dir(config["runs_dirname"], FLAGS.init_run_name) else: init_run_dirpath = None # setup run directory: runs_dir = os.path.join(working_dir, config["runs_dirname"]) current_run_dirpath = run_utils.setup_run_dir(runs_dir, FLAGS.run_name, FLAGS.new_run) # save config in logs directory run_utils.save_config(config, current_run_dirpath) # save FLAGS FLAGS_filepath = os.path.join(current_run_dirpath, "FLAGS.json") python_utils.save_json( FLAGS_filepath, { "run_name": FLAGS.run_name, "new_run": FLAGS.new_run, "batch_size": FLAGS.batch_size }) train(config, tfrecords_dirpath_list, init_run_dirpath, current_run_dirpath, FLAGS.batch_size, ds_repeat_list)
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() if args.batch_size is not None: config["batch_size"] = args.batch_size distribution = "uniform" dataset_params = { "n": args.sample_count, "f": args.frequency, "s": args.noise_std, "d": distribution, } # Find data_dir data_dirpath = python_utils.choose_first_existing_path( config["data_dir_candidates"]) if data_dirpath is None: print_utils.print_error("ERROR: Data directory not found!") exit() data_dirpath = os.path.expanduser(data_dirpath) print_utils.print_info("Using data from {}".format(data_dirpath)) root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"]) sobol_generator = rand_utils.SobolGenerator() train_ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name="train", sobol_generator=sobol_generator, transform=torchvision.transforms.Compose([ transforms.ToTensor(), transforms.ToDevice(device="cuda") ])) train_dl = DataLoader(train_ds, batch_size=config["batch_size"], shuffle=True, num_workers=4) for i_batch, sample_batched in enumerate(train_dl): print( i_batch, sample_batched['density'].max(), # sample_batched['gt'], # sample_batched['noise'], )
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() if args.runs_dirpath is not None: config["runs_dirpath"] = args.runs_dirpath if args.mode == "image": plot_stats.plot_stats(config, args.run_name, args.source_idx_list) elif args.mode == "1d": plot_stats_1d.plot(config, args.run_name)
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() if args.batch_size is not None: config["batch_size"] = args.batch_size if args.exps_dirpath is not None: config["exps_dirpath"] = args.exps_dirpath distribution = "uniform" params = { "run_count": args.run_count, "sample_count": args.sample_count, "frequency": args.frequency, "noise_std": args.noise_std, "distribution": distribution, } stats_params = { "neighbors_t": args.neighbors_t, "neighbors_n": args.neighbors_n, } working_dir = os.path.dirname(os.path.abspath(__file__)) # Setup exp directory: exps_dir = os.path.join(working_dir, config["exps_dirpath"]) exp_dirpath = run_utils.setup_run_dir(exps_dir, args.exp_name, args.new_exp) # Launch experiments launch_experiments(config, exp_dirpath, args.new_exp, args.recompute_stats, params, stats_params) # Aggregate results aggregate_results(exp_dirpath, params, stats_params)
def main(): # TODO: pick center pixel when computing gradients # TODO: solve bug.= (look at output) # TODO: display input polygons as well in final plot # TODO: find theta (rotation) that minimizes k(.,.) in closed form # TODO: measure k(., .) with different models trained at different rounds args = get_args() # load overwrite_config file overwrite_config = run_utils.load_config(args.config) # Find data_dir data_dir = python_utils.choose_first_existing_path( overwrite_config["data_dir_candidates"]) if data_dir is None: print("ERROR: Data directory not found!") exit() else: print("Using data from {}".format(data_dir)) raw_dirpath = os.path.join(data_dir, DATASET_NAME, "raw") # Get all tiles print("Loading tile_info_list from disk...") tile_info_list_filepath = "{}.tile_info_list.npy".format( args.output_dirname) tile_info_list = np.load(tile_info_list_filepath) # tile_info_list = tile_info_list[-60:-50] # TODO: remove to take all tiles if args.mode == "overall": print("Plot overall histogram of neighbors_soft:") fig_name = args.output_dirname + ".overall_hist" plot_neighbors_soft_hist(raw_dirpath, tile_info_list, args.output_dirname, fig_name) elif args.mode == "individual": plot_similarities(raw_dirpath, tile_info_list, args.output_dirname, args.individual_selection, args.k)
def main(): # load config file config_test = run_utils.load_config(TEST_CONFIG_NAME) # # Handle FLAGS # if FLAGS.batch_size is not None: # batch_size = FLAGS.batch_size # else: # batch_size = config_test["batch_size"] # print("#--- Used params: ---#") # print("batch_size: {}".format(FLAGS.batch_size)) # Find data_dir data_dir = python_utils.choose_first_existing_path( config_test["data_dir_candidates"]) if data_dir is None: print("ERROR: Data directory not found!") exit() else: print("Using data from {}".format(data_dir)) dataset_raw_dirpath = os.path.join( data_dir, config_test["dataset_raw_partial_dirpath"]) output_dir_stem = config_test["align_dir"] for images_info in config_test["images_info_list"]: for number in images_info["numbers"]: image_info = { "city": images_info["city"], "number": number, } measure_image(dataset_raw_dirpath, image_info, PERFECT_GT_POLYGONS_DIRNAME, GT_POLYGONS_DIRNAME_LIST, THRESHOLDS, output_dir_stem)
def test(opts): old_opts = CfgStruct(**load_config(opts.cfg_file)) logging.basicConfig(filename=pjoin(opts.output_dir, 'test.log'), level=logging.DEBUG) logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.info('Running on %s' % get_hostname()) with open(old_opts.in_file, 'r') as fid: pickle.load(fid) # SGD data, not needed print 'rawDim:', old_opts.rawDim, 'inputDim:', old_opts.inputDim,\ 'layerSize:', old_opts.layerSize, 'numLayers:', old_opts.numLayers,\ 'maxUttLen:', old_opts.maxUttLen print 'temporalLayer:', old_opts.temporalLayer, 'outputDim:', old_opts.outputDim alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, old_opts.rawDim, old_opts.inputDim, alisDir) nn = rnnet.NNet(old_opts.inputDim, old_opts.outputDim, old_opts.layerSize, old_opts.numLayers, old_opts.maxUttLen, temporalLayer=old_opts.temporalLayer, train=False) nn.initParams() nn.fromFile(fid) # FIXME Different output directory specific to test set out_dir = pjoin(SCAIL_DATA_DIR, 'ctc_loglikes_%s_%s' % (DATASET, DATA_SUBSET)) if not os.path.exists(out_dir): os.makedirs(out_dir) for i in range(opts.startFile, opts.numFiles + 1): writeLogLikes(loader, nn, i, out_dir, writePickle=True)
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option('--cfg_file', dest='cfg_file', default=None, help='File with settings from previously trained net') parser.add_option( "--test", action="store_true", dest="test", default=False) # Architecture parser.add_option( "--layerSize", dest="layerSize", type="int", default=1824) parser.add_option("--numLayers", dest="numLayers", type="int", default=5) parser.add_option( "--temporalLayer", dest="temporalLayer", type="int", default=3) # Optimization parser.add_option("--momentum", dest="momentum", type="float", default=0.95) parser.add_option("--epochs", dest="epochs", type="int", default=20) parser.add_option("--step", dest="step", type="float", default=1e-5) parser.add_option("--anneal", dest="anneal", type="float", default=1.3, help="Sets (learning rate := learning rate / anneal) after each epoch.") parser.add_option('--reg', dest='reg', type='float', default=0.0, help='lambda for L2 regularization of the weight matrices') # Data parser.add_option("--dataDir", dest="dataDir", type="string", default=TRAIN_DATA_DIR['fbank']) parser.add_option('--alisDir', dest='alisDir', type='string', default=TRAIN_ALIS_DIR) parser.add_option('--startFile', dest='startFile', type='int', default=1, help='Start file for running testing') parser.add_option("--numFiles", dest="numFiles", type="int", default=384) parser.add_option( "--inputDim", dest="inputDim", type="int", default=41 * 15) parser.add_option("--rawDim", dest="rawDim", type="int", default=41 * 15) parser.add_option("--outputDim", dest="outputDim", type="int", default=35) parser.add_option( "--maxUttLen", dest="maxUttLen", type="int", default=MAX_UTT_LEN) # Save/Load parser.add_option('--save_every', dest='save_every', type='int', default=10, help='During training, save parameters every x number of files') parser.add_option('--run_desc', dest='run_desc', type='string', default='', help='Description of experiment run') (opts, args) = parser.parse_args(args) if opts.cfg_file: cfg = load_config(opts.cfg_file) else: cfg = vars(opts) # These config values should be updated every time cfg['host'] = get_hostname() cfg['git_rev'] = get_git_revision() cfg['pid'] = os.getpid() # Create experiment output directory if not opts.cfg_file: time_string = str(TimeString()) output_dir = pjoin(RUN_DIR, time_string) cfg['output_dir'] = output_dir if not os.path.exists(output_dir): print 'Creating %s' % output_dir os.makedirs(output_dir) opts.cfg_file = pjoin(output_dir, 'cfg.json') else: output_dir = cfg['output_dir'] cfg['output_dir'] = output_dir cfg['in_file'] = pjoin(output_dir, 'params.pk') cfg['out_file'] = pjoin(output_dir, 'params.pk') cfg['test'] = opts.test if opts.test: cfg['dataDir'] = opts.dataDir cfg['numFiles'] = opts.numFiles cfg['startFile'] = opts.startFile if 'reg' not in cfg: cfg['reg'] = 0.0 # Logging logging.basicConfig(filename=pjoin(output_dir, 'train.log'), level=logging.DEBUG) logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.info('Running on %s' % cfg['host']) # seed for debugging, turn off when stable np.random.seed(33) import random random.seed(33) if 'CUDA_DEVICE' in os.environ: cm.cuda_set_device(int(os.environ['CUDA_DEVICE'])) else: cm.cuda_set_device(0) # Default opts = CfgStruct(**cfg) # Testing if opts.test: test(opts) return alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) nn = rnnet.NNet(opts.inputDim, opts.outputDim, opts.layerSize, opts.numLayers, opts.maxUttLen, temporalLayer=opts.temporalLayer, reg=opts.reg) nn.initParams() SGD = sgd.SGD(nn, opts.maxUttLen, alpha=opts.step, momentum=opts.momentum) # Dump config cfg['param_count'] = nn.paramCount() dump_config(cfg, opts.cfg_file) # Training epoch_file = pjoin(output_dir, 'epoch') if os.path.exists(epoch_file): start_epoch = int(open(epoch_file, 'r').read()) + 1 else: start_epoch = 0 # Load model if specified if os.path.exists(opts.in_file): with open(opts.in_file, 'r') as fid: SGD.fromFile(fid) SGD.alpha = SGD.alpha / (opts.anneal ** start_epoch) nn.fromFile(fid) num_files_file = pjoin(output_dir, 'num_files') for k in range(start_epoch, opts.epochs): perm = np.random.permutation(opts.numFiles) + 1 loader.loadDataFileAsynch(perm[0]) file_start = 0 if k == start_epoch: if os.path.exists(num_files_file): file_start = int(open(num_files_file, 'r').read().strip()) logger.info('Starting from file %d, epoch %d' % (file_start, start_epoch)) else: open(num_files_file, 'w').write(str(file_start)) for i in xrange(file_start, perm.shape[0]): start = time.time() data_dict, alis, keys, sizes = loader.getDataAsynch() # Prefetch if i + 1 < perm.shape[0]: loader.loadDataFileAsynch(perm[i + 1]) SGD.run(data_dict, alis, keys, sizes) end = time.time() logger.info('File time %f' % (end - start)) # Save parameters and cost if (i+1) % opts.save_every == 0: logger.info('Saving parameters') with open(opts.out_file, 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) open(num_files_file, 'w').write('%d' % (i+1)) logger.info('Done saving parameters') with open(pjoin(output_dir, 'last_cost'), 'w') as fid: if opts.reg > 0.0: fid.write(str(SGD.expcost[-1] - SGD.regcost[-1])) else: fid.write(str(SGD.expcost[-1])) # Save epoch completed open(pjoin(output_dir, 'epoch'), 'w').write(str(k)) # Save parameters for the epoch with open(opts.out_file + '.epoch{0:02}'.format(k), 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) SGD.alpha = SGD.alpha / opts.anneal # Run now complete, touch sentinel file touch_file(pjoin(output_dir, 'sentinel'))
for d in run_dirs: alive = False log_file = pjoin(d, 'train.log') cfg_file = pjoin(d, 'cfg.json') if not os.path.exists(cfg_file): # Definitely delete it shutil.rmtree(d) continue alive = file_alive(log_file, max_dur_sec=60 * 60) if not alive and not os.path.exists(pjoin(d, 'sentinel')): run = os.path.basename(d) print 'loading config' print cfg_file cfg = load_config(cfg_file) print 'loaded config' host = cfg['host'] pid = cfg['pid'] print 'Killing run %s, PID %s on %s' % (run, cfg['pid'], cfg['host']) # Kill children (due to async data loader) run_cpu_job(host, 'pkill -TERM -P %s' % pid) # Kill process run_cpu_job(host, 'kill -9 %s' % pid) if args.clear_dirs: print 'Clearing %s' % d shutil.rmtree(d)
def runSeq(opts): fid = open(opts.out_file, 'w') phone_map = get_char_map(opts.dataDir) print phone_map print len(phone_map) alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) hyps = list() refs = list() hypscores = list() refscores = list() numphones = list() subsets = list() alignments = list() if MODEL_TYPE != 'ngram': cfg_file = '/deep/u/zxie/rnnlm/13/cfg.json' params_file = '/deep/u/zxie/rnnlm/13/params.pk' #cfg_file = '/deep/u/zxie/dnn/11/cfg.json' #params_file = '/deep/u/zxie/dnn/11/params.pk' cfg = load_config(cfg_file) model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() model_hps.set_from_dict(cfg) opt_hps.set_from_dict(cfg) clm = model_class(None, model_hps, opt_hps, train=False, opt='nag') with open(params_file, 'rb') as fin: clm.from_file(fin) else: from srilm import LM from decoder_config import LM_ARPA_FILE print 'Loading %s...' % LM_ARPA_FILE clm = LM(LM_ARPA_FILE) print 'Done.' #clm = None for i in range(opts.start_file, opts.start_file + opts.numFiles): data_dict, alis, keys, _ = loader.loadDataFileDict(i) # For later alignments keys = sorted(keys) # For Switchboard filter if DATA_SUBSET == 'eval2000': if SWBD_SUBSET == 'swbd': keys = [k for k in keys if k.startswith('sw')] elif SWBD_SUBSET == 'callhome': keys = [k for k in keys if k.startswith('en')] ll_file = pjoin(LIKELIHOODS_DIR, 'loglikelihoods_%d.pk' % i) ll_fid = open(ll_file, 'rb') probs_dict = pickle.load(ll_fid) # Parallelize decoding over utterances print 'Decoding utterances in parallel, n_jobs=%d, file=%d' % ( NUM_CPUS, i) decoded_utts = Parallel(n_jobs=NUM_CPUS)(delayed(decode_utterance)( k, probs_dict[k], alis[k], phone_map, lm=clm) for k in keys) for k, (hyp, ref, hypscore, refscore, align) in zip(keys, decoded_utts): if refscore is None: refscore = 0.0 if hypscore is None: hypscore = 0.0 hyp = replace_contractions(hyp) fid.write(k + ' ' + ' '.join(hyp) + '\n') hyps.append(hyp) refs.append(ref) hypscores.append(hypscore) refscores.append(refscore) numphones.append(len(alis[k])) subsets.append('callhm' if k.startswith('en') else 'swbd') alignments.append(align) fid.close() # Pickle some values for computeStats.py pkid = open(opts.out_file.replace('.txt', '.pk'), 'wb') pickle.dump(hyps, pkid) pickle.dump(refs, pkid) pickle.dump(hypscores, pkid) pickle.dump(refscores, pkid) pickle.dump(numphones, pkid) pickle.dump(subsets, pkid) pickle.dump(alignments, pkid) pkid.close()
def compute_grads(raw_dirpath, runs_dirpath, run_name, ds_fac, overwrite_config, tile_info_list, polygon_dirname, output_dirname, output_filepath_format): # -- Params: # Setup run dir and load config file run_dir = run_utils.setup_run_dir(runs_dirpath, run_name) _, checkpoints_dir = run_utils.setup_run_subdirs(run_dir) config = run_utils.load_config(config_dirpath=run_dir) # --- Instantiate model output_res = model.MapAlignModel.get_output_res( overwrite_config["input_res"], config["pool_count"]) map_align_model = model.MapAlignModel( config["model_name"], overwrite_config["input_res"], config["add_image_input"], config["image_channel_count"], config["image_feature_base_count"], config["add_poly_map_input"], config["poly_map_channel_count"], config["poly_map_feature_base_count"], config["common_feature_base_count"], config["pool_count"], config["add_disp_output"], config["disp_channel_count"], config["add_seg_output"], config["seg_channel_count"], output_res, overwrite_config["batch_size"], config["loss_params"], config["level_loss_coefs_params"], config["learning_rate_params"], config["weight_decay"], config["image_dynamic_range"], config["disp_map_dynamic_range_fac"], config["disp_max_abs_value"]) map_align_model.setup_compute_grads() # Add ops to compute gradients saver = tf.train.Saver(save_relative_paths=True) with tf.Session() as sess: # Restore checkpoint restore_checkpoint_success = map_align_model.restore_checkpoint( sess, saver, checkpoints_dir) if not restore_checkpoint_success: sys.exit('No checkpoint found in {}'.format(checkpoints_dir)) # Compute patch count patch_total_count = 0 for tile_info in tile_info_list: patch_total_count += len(tile_info["bbox_list"]) pbar = tqdm(total=patch_total_count, desc="Computing patch gradients: ") for tile_info in tile_info_list: # --- Path setup: unused_filepath = output_filepath_format.format( dir=raw_dirpath, fold=tile_info["fold"], out_dir=output_dirname, tile="", b0=0, b1=0, b2=0, b3=0, out_name="", ext="") os.makedirs(os.path.dirname(unused_filepath), exist_ok=True) tile_name = read.IMAGE_NAME_FORMAT.format( city=tile_info["city"], number=tile_info["number"]) # Compute grads for that image additional_args = { "overwrite_polygon_dir_name": polygon_dirname, } # t = time.clock() image, metadata, polygons = read.load_gt_data( raw_dirpath, tile_info["city"], tile_info["number"], additional_args=additional_args) # t_read = time.clock() - t # Downsample image, polygons = process_utils.downsample_data( image, metadata, polygons, ds_fac, config["reference_pixel_size"]) spatial_shape = image.shape[:2] # Draw polygon map # t = time.clock() polygon_map = polygon_utils.draw_polygon_map(polygons, spatial_shape, fill=True, edges=True, vertices=True) # t_draw = time.clock() - t t_grads = 0 t_save = 0 for bbox in tile_info["bbox_list"]: p_im = image[bbox[0]:bbox[2], bbox[1]:bbox[3], :] p_polygon_map = polygon_map[bbox[0]:bbox[2], bbox[1]:bbox[3], :] # p_polygons = polygon_utils.crop_polygons_to_patch_if_touch(polygons, bbox) # Grad compute t = time.clock() grads = map_align_model.compute_grads(sess, p_im, p_polygon_map) t_grads += time.clock() - t # Saving t = time.clock() flattened_grads_x = get_flattened_gradients(grads["x"]) flattened_grads_y = get_flattened_gradients(grads["y"]) flattened_grads = np.stack( [flattened_grads_x, flattened_grads_y], axis=-1) # # Save patch for later visualization # im_filepath = output_filepath_format.format(dir=raw_dirpath, fold=tile_info["fold"], # out_dir=output_dirname, tile=tile_name, # b0=bbox[0], b1=bbox[1], b2=bbox[2], b3=bbox[3], # out_name="image", ext="png") # skimage.io.imsave(im_filepath, p_im) # # Save polygons as well # polygons_filepath = output_filepath_format.format(dir=raw_dirpath, fold=tile_info["fold"], # out_dir=output_dirname, tile=tile_name, # b0=bbox[0], b1=bbox[1], b2=bbox[2], b3=bbox[3], # out_name="polygons", ext="npy") # np.save(polygons_filepath, p_polygons) # Save grads grads_filepath = output_filepath_format.format( dir=raw_dirpath, fold=tile_info["fold"], out_dir=output_dirname, tile=tile_name, b0=bbox[0], b1=bbox[1], b2=bbox[2], b3=bbox[3], out_name="grads", ext="npy") np.save(grads_filepath, flattened_grads) t_save += time.clock() - t pbar.update(len(tile_info["bbox_list"])) pbar.set_postfix(t_grads=t_grads, t_save=t_save) pbar.close()
def inference(runs_dirpath, ori_image, ori_metadata, ori_disp_polygons, model_disp_max_abs_value, batch_size, scale_factor, run_name): # Setup run dir and load config file run_dir = run_utils.setup_run_dir(runs_dirpath, run_name) _, checkpoints_dir = run_utils.setup_run_subdirs(run_dir) config = run_utils.load_config( config_dirpath=os.path.dirname(os.path.realpath(__file__))) #run_dir) why would there be a second config in run dir?? # Downsample image, disp_polygons = downsample_data(ori_image, ori_metadata, ori_disp_polygons, scale_factor, config["reference_pixel_size"]) spatial_shape = image.shape[:2] # Draw displaced polygon map # disp_polygons_to_rasterize = [] disp_polygons_to_rasterize = disp_polygons disp_polygon_map = polygon_utils.draw_polygon_map( disp_polygons_to_rasterize, spatial_shape, fill=True, edges=True, vertices=True) # Compute output_res output_res = model.MapAlignModel.get_output_res(config["input_res"], config["pool_count"]) # print("output_res: {}".format(output_res)) map_align_model = model.MapAlignModel( config["model_name"], config["input_res"], config["add_image_input"], config["image_channel_count"], config["image_feature_base_count"], config["add_poly_map_input"], config["poly_map_channel_count"], config["poly_map_feature_base_count"], config["common_feature_base_count"], config["pool_count"], config["add_disp_output"], config["disp_channel_count"], config["add_seg_output"], config["seg_channel_count"], output_res, batch_size, config["loss_params"], config["level_loss_coefs_params"], config["learning_rate_params"], config["weight_decay"], config["image_dynamic_range"], config["disp_map_dynamic_range_fac"], model_disp_max_abs_value) pred_field_map, segmentation_image = map_align_model.inference( image, disp_polygon_map, checkpoints_dir) # --- align disp_polygon according to pred_field_map --- # # print("# --- Align disp_polygon according to pred_field_map --- #") aligned_disp_polygons = disp_polygons # First remove polygons that are not fully inside the inner_image padding = (spatial_shape[0] - pred_field_map.shape[0]) // 2 bounding_box = [ padding, padding, spatial_shape[0] - padding, spatial_shape[1] - padding ] # aligned_disp_polygons = polygon_utils.filter_polygons_in_bounding_box(aligned_disp_polygons, bounding_box) # TODO: reimplement? But also filter out ori_gt_polygons for comparaison aligned_disp_polygons = polygon_utils.transform_polygons_to_bounding_box_space( aligned_disp_polygons, bounding_box) # Then apply displacement field map to aligned_disp_polygons aligned_disp_polygons = polygon_utils.apply_disp_map_to_polygons( pred_field_map, aligned_disp_polygons) # Restore polygons to original image space bounding_box = [ -padding, -padding, spatial_shape[0] + padding, spatial_shape[1] + padding ] aligned_disp_polygons = polygon_utils.transform_polygons_to_bounding_box_space( aligned_disp_polygons, bounding_box) # Add padding to segmentation_image final_segmentation_image = np.zeros( (spatial_shape[0], spatial_shape[1], segmentation_image.shape[2])) final_segmentation_image[padding:-padding, padding:-padding, :] = segmentation_image # --- Upsample outputs --- # # print("# --- Upsample outputs --- #") final_segmentation_image, aligned_disp_polygons = upsample_data( final_segmentation_image, ori_metadata, aligned_disp_polygons, scale_factor, config["reference_pixel_size"]) return aligned_disp_polygons, final_segmentation_image
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() print_utils.print_info("Using downscaling factors: {}".format(args.ds_fac)) run_name_list = [RUN_NAME_FORMAT.format(ds_fac) for ds_fac in args.ds_fac] # --- Read image --- # print_utils.print_info("Reading image...") image_filepath = get_abs_path(args.image) image, image_metadata = read_image(image_filepath, args.pixelsize) image = clip_image(image, 0, 255) # hist = np.histogram(image) # print_hist(hist) im_min, im_max = get_min_max(image, std_factor=3) # print("min: {}, max: {}".format(im_min, im_max)) image = stretch_image(image, im_min, im_max, 0, 255) image = clip_image(image, 0, 255) # hist = np.histogram(image) # print_hist(hist) print("Image stats:") print("\tShape: {}".format(image.shape)) print("\tMin: {}".format(image.min())) print("\tMax: {}".format(image.max())) # --- Read shapefile if it exists --- # if args.shapefile is not None: shapefile_filepath = get_abs_path(args.shapefile) gt_polygons = get_shapefile_annotations(image_filepath, shapefile_filepath) else: # --- Load or fetch OSM building data --- # gt_polygons = get_osm_annotations(image_filepath) # --- Print polygon info --- # print("Polygons stats:") print("\tCount: {}".format(len(gt_polygons))) print("\tMin: {}".format(min([polygon.min() for polygon in gt_polygons]))) print("\tMax: {}".format(max([polygon.max() for polygon in gt_polygons]))) if not check_polygons_in_image(image, gt_polygons): print_utils.print_error( "ERROR: polygons are not inside the image. This is most likely due to using the wrong projection when reading the input shapefile. Aborting..." ) exit() print_utils.print_info("Aligning building annotations...") aligned_polygons = test.test_align_gt(args.runs_dirpath, image, image_metadata, gt_polygons, args.batch_size, args.ds_fac, run_name_list, config["disp_max_abs_value"], output_shapefiles=False) print_utils.print_info("Saving aligned building annotations...") save_annotations(args.image, aligned_polygons)
for d in run_dirs: alive = False log_file = pjoin(d, 'train.log') cfg_file = pjoin(d, 'cfg.json') if not os.path.exists(cfg_file): # Definitely delete it shutil.rmtree(d) continue alive = file_alive(log_file, max_dur_sec=60*60) if not alive and not os.path.exists(pjoin(d, 'sentinel')): run = os.path.basename(d) print 'loading config' print cfg_file cfg = load_config(cfg_file) print 'loaded config' host = cfg['host'] pid = cfg['pid'] print 'Killing run %s, PID %s on %s' % (run, cfg['pid'], cfg['host']) # Kill children (due to async data loader) run_cpu_job(host, 'pkill -TERM -P %s' % pid) # Kill process run_cpu_job(host, 'kill -9 %s' % pid) if args.clear_dirs: print 'Clearing %s' % d shutil.rmtree(d)
# Higher alpha -> more and more like most likely sequence probs = probs ** alpha probs = probs / sum(probs) w = np.random.choice(range(model.hps.output_size), p=probs) char = chars[w] return char if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('cfg_file', help='config file with run data for model to use') args = parser.parse_args() cfg = load_config(args.cfg_file) model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() model_hps.set_from_dict(cfg) opt_hps.set_from_dict(cfg) cfg = CfgStruct(**cfg) SAMPLES = 100 SAMPLE_LENGTH = 100 # PARAM ALPHA = 1.0 # FIXME PARAM LM_ORDER = CONTEXT + 1 with open(CHAR_CORPUS_VOCAB_FILE, 'rb') as fin: char_inds = pickle.load(fin)
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option('--cfg_file', dest='cfg_file', default=None, help='File with settings from previously trained net') parser.add_option("--test", action="store_true", dest="test", default=False) # Architecture parser.add_option("--layerSize", dest="layerSize", type="int", default=1824) parser.add_option("--numLayers", dest="numLayers", type="int", default=5) parser.add_option("--temporalLayer", dest="temporalLayer", type="int", default=3) # Optimization parser.add_option("--momentum", dest="momentum", type="float", default=0.95) parser.add_option("--epochs", dest="epochs", type="int", default=20) parser.add_option("--step", dest="step", type="float", default=1e-5) parser.add_option( "--anneal", dest="anneal", type="float", default=1.3, help="Sets (learning rate := learning rate / anneal) after each epoch." ) parser.add_option( '--reg', dest='reg', type='float', default=0.0, help='lambda for L2 regularization of the weight matrices') # Data parser.add_option("--dataDir", dest="dataDir", type="string", default=TRAIN_DATA_DIR['fbank']) parser.add_option('--alisDir', dest='alisDir', type='string', default=TRAIN_ALIS_DIR) parser.add_option('--startFile', dest='startFile', type='int', default=1, help='Start file for running testing') parser.add_option("--numFiles", dest="numFiles", type="int", default=384) parser.add_option("--inputDim", dest="inputDim", type="int", default=41 * 15) parser.add_option("--rawDim", dest="rawDim", type="int", default=41 * 15) parser.add_option("--outputDim", dest="outputDim", type="int", default=35) parser.add_option("--maxUttLen", dest="maxUttLen", type="int", default=MAX_UTT_LEN) # Save/Load parser.add_option( '--save_every', dest='save_every', type='int', default=10, help='During training, save parameters every x number of files') parser.add_option('--run_desc', dest='run_desc', type='string', default='', help='Description of experiment run') (opts, args) = parser.parse_args(args) if opts.cfg_file: cfg = load_config(opts.cfg_file) else: cfg = vars(opts) # These config values should be updated every time cfg['host'] = get_hostname() cfg['git_rev'] = get_git_revision() cfg['pid'] = os.getpid() # Create experiment output directory if not opts.cfg_file: time_string = str(TimeString()) output_dir = pjoin(RUN_DIR, time_string) cfg['output_dir'] = output_dir if not os.path.exists(output_dir): print 'Creating %s' % output_dir os.makedirs(output_dir) opts.cfg_file = pjoin(output_dir, 'cfg.json') else: output_dir = cfg['output_dir'] cfg['output_dir'] = output_dir cfg['in_file'] = pjoin(output_dir, 'params.pk') cfg['out_file'] = pjoin(output_dir, 'params.pk') cfg['test'] = opts.test if opts.test: cfg['dataDir'] = opts.dataDir cfg['numFiles'] = opts.numFiles cfg['startFile'] = opts.startFile if 'reg' not in cfg: cfg['reg'] = 0.0 # Logging logging.basicConfig(filename=pjoin(output_dir, 'train.log'), level=logging.DEBUG) logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.info('Running on %s' % cfg['host']) # seed for debugging, turn off when stable np.random.seed(33) import random random.seed(33) if 'CUDA_DEVICE' in os.environ: cm.cuda_set_device(int(os.environ['CUDA_DEVICE'])) else: cm.cuda_set_device(0) # Default opts = CfgStruct(**cfg) # Testing if opts.test: test(opts) return alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) nn = rnnet.NNet(opts.inputDim, opts.outputDim, opts.layerSize, opts.numLayers, opts.maxUttLen, temporalLayer=opts.temporalLayer, reg=opts.reg) nn.initParams() SGD = sgd.SGD(nn, opts.maxUttLen, alpha=opts.step, momentum=opts.momentum) # Dump config cfg['param_count'] = nn.paramCount() dump_config(cfg, opts.cfg_file) # Training epoch_file = pjoin(output_dir, 'epoch') if os.path.exists(epoch_file): start_epoch = int(open(epoch_file, 'r').read()) + 1 else: start_epoch = 0 # Load model if specified if os.path.exists(opts.in_file): with open(opts.in_file, 'r') as fid: SGD.fromFile(fid) SGD.alpha = SGD.alpha / (opts.anneal**start_epoch) nn.fromFile(fid) num_files_file = pjoin(output_dir, 'num_files') for k in range(start_epoch, opts.epochs): perm = np.random.permutation(opts.numFiles) + 1 loader.loadDataFileAsynch(perm[0]) file_start = 0 if k == start_epoch: if os.path.exists(num_files_file): file_start = int(open(num_files_file, 'r').read().strip()) logger.info('Starting from file %d, epoch %d' % (file_start, start_epoch)) else: open(num_files_file, 'w').write(str(file_start)) for i in xrange(file_start, perm.shape[0]): start = time.time() data_dict, alis, keys, sizes = loader.getDataAsynch() # Prefetch if i + 1 < perm.shape[0]: loader.loadDataFileAsynch(perm[i + 1]) SGD.run(data_dict, alis, keys, sizes) end = time.time() logger.info('File time %f' % (end - start)) # Save parameters and cost if (i + 1) % opts.save_every == 0: logger.info('Saving parameters') with open(opts.out_file, 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) open(num_files_file, 'w').write('%d' % (i + 1)) logger.info('Done saving parameters') with open(pjoin(output_dir, 'last_cost'), 'w') as fid: if opts.reg > 0.0: fid.write(str(SGD.expcost[-1] - SGD.regcost[-1])) else: fid.write(str(SGD.expcost[-1])) # Save epoch completed open(pjoin(output_dir, 'epoch'), 'w').write(str(k)) # Save parameters for the epoch with open(opts.out_file + '.epoch{0:02}'.format(k), 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) SGD.alpha = SGD.alpha / opts.anneal # Run now complete, touch sentinel file touch_file(pjoin(output_dir, 'sentinel'))
def main(_): working_dir = os.path.dirname(os.path.abspath(__file__)) config_dir = os.path.dirname(os.path.realpath(__file__)) # print FLAGS print("#--- FLAGS: ---#") print("config: {}".format(FLAGS.config)) print("new_run: {}".format(FLAGS.new_run)) print("init_run_name: {}".format(FLAGS.init_run_name)) print("run_name: {}".format(FLAGS.run_name)) print("batch_size: {}".format(FLAGS.batch_size)) print("ds_fac: {}".format(FLAGS.ds_fac)) # load config file config = run_utils.load_config(FLAGS.config, config_dir) # Check config setting coherences assert len(config["level_loss_coefs_params"]) == config["pool_count"], \ "level_loss_coefs_params ({} elements) must have model_res_levels ({}) elements".format( len(config["level_loss_coefs_params"]), config["pool_count"]) # Find data_dir data_dir = python_utils.choose_first_existing_path( config["data_dir_candidates"]) if data_dir is None: print("ERROR: Data directory not found!") exit() else: print("Using data from {}".format(data_dir)) # Setup dataset dirpaths tfrecords_dirpath_list = [ os.path.join(data_dir, tfrecords_dirpath) for tfrecords_dirpath in config["tfrecords_partial_dirpath_list"] ] # Overwrite config ds_fac if FLAGS specify them if FLAGS.ds_fac is not None: ds_fac_list = [FLAGS.ds_fac] ds_repeat_list = [1] else: ds_fac_list = config["ds_fac_list"] ds_repeat_list = config["ds_repeat_list"] # setup init run directory of one is specified: if FLAGS.init_run_name is not None: init_run_dirpath = run_utils.setup_run_dir(config["runs_dirname"], FLAGS.init_run_name) else: init_run_dirpath = None # setup run directory: runs_dir = os.path.join(working_dir, config["runs_dirname"]) current_run_dirpath = run_utils.setup_run_dir(runs_dir, FLAGS.run_name, FLAGS.new_run) # save config in logs directory run_utils.save_config(config, current_run_dirpath) # save FLAGS FLAGS_filepath = os.path.join(current_run_dirpath, "FLAGS.json") python_utils.save_json( FLAGS_filepath, { "run_name": FLAGS.run_name, "new_run": FLAGS.new_run, "batch_size": FLAGS.batch_size, "ds_fac": FLAGS.ds_fac, }) train(config, tfrecords_dirpath_list, init_run_dirpath, current_run_dirpath, FLAGS.batch_size, ds_fac_list, ds_repeat_list)
def runSeq(opts): fid = open(opts.out_file, 'w') phone_map = get_char_map(opts.dataDir) print phone_map print len(phone_map) alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) hyps = list() refs = list() hypscores = list() refscores = list() numphones = list() subsets = list() alignments = list() if MODEL_TYPE != 'ngram': cfg_file = '/deep/u/zxie/rnnlm/13/cfg.json' params_file = '/deep/u/zxie/rnnlm/13/params.pk' #cfg_file = '/deep/u/zxie/dnn/11/cfg.json' #params_file = '/deep/u/zxie/dnn/11/params.pk' cfg = load_config(cfg_file) model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() model_hps.set_from_dict(cfg) opt_hps.set_from_dict(cfg) clm = model_class(None, model_hps, opt_hps, train=False, opt='nag') with open(params_file, 'rb') as fin: clm.from_file(fin) else: from srilm import LM from decoder_config import LM_ARPA_FILE print 'Loading %s...' % LM_ARPA_FILE clm = LM(LM_ARPA_FILE) print 'Done.' #clm = None for i in range(opts.start_file, opts.start_file + opts.numFiles): data_dict, alis, keys, _ = loader.loadDataFileDict(i) # For later alignments keys = sorted(keys) # For Switchboard filter if DATA_SUBSET == 'eval2000': if SWBD_SUBSET == 'swbd': keys = [k for k in keys if k.startswith('sw')] elif SWBD_SUBSET == 'callhome': keys = [k for k in keys if k.startswith('en')] ll_file = pjoin(LIKELIHOODS_DIR, 'loglikelihoods_%d.pk' % i) ll_fid = open(ll_file, 'rb') probs_dict = pickle.load(ll_fid) # Parallelize decoding over utterances print 'Decoding utterances in parallel, n_jobs=%d, file=%d' % (NUM_CPUS, i) decoded_utts = Parallel(n_jobs=NUM_CPUS)(delayed(decode_utterance)(k, probs_dict[k], alis[k], phone_map, lm=clm) for k in keys) for k, (hyp, ref, hypscore, refscore, align) in zip(keys, decoded_utts): if refscore is None: refscore = 0.0 if hypscore is None: hypscore = 0.0 hyp = replace_contractions(hyp) fid.write(k + ' ' + ' '.join(hyp) + '\n') hyps.append(hyp) refs.append(ref) hypscores.append(hypscore) refscores.append(refscore) numphones.append(len(alis[k])) subsets.append('callhm' if k.startswith('en') else 'swbd') alignments.append(align) fid.close() # Pickle some values for computeStats.py pkid = open(opts.out_file.replace('.txt', '.pk'), 'wb') pickle.dump(hyps, pkid) pickle.dump(refs, pkid) pickle.dump(hypscores, pkid) pickle.dump(refscores, pkid) pickle.dump(numphones, pkid) pickle.dump(subsets, pkid) pickle.dump(alignments, pkid) pkid.close()