def test_ncbi_stockholm( namespace_base, path_ncbi_stockholm, path_ncbi_stockholm_targets, tmp_path, ): """ncfp collects correct coding sequences for NCBI/Stockholm input. This test was added as a check for the fix in issue 31. """ # Modify default arguments infile = path_ncbi_stockholm outdir = tmp_path / "ncbi_stockholm" args = modify_namespace(namespace_base, infname=infile, outdirname=outdir, stockholm=True) # Run ersatz command-line ncfp.run_main(args) # Compare output (should be no skipped files) check_files( outdir, path_ncbi_stockholm_targets, ("ncfp_aa.fasta", "ncfp_nt.fasta"), )
def test_small_stockholm_use_protein_id( namespace_base, path_uniprot_stockholm_small, path_uniprot_stockholm_small_use_proteinid_targets, tmp_path, ): """ncfp collects correct coding sequences for small UniProt/Stockholm input.""" # Modify default arguments infile = path_uniprot_stockholm_small outdir = tmp_path / "small_stockholm_use_proteinid" args = modify_namespace( namespace_base, infname=infile, outdirname=outdir, stockholm=True, use_protein_ids=True, ) # Run ersatz command-line ncfp.run_main(args) # Compare output (should be no skipped files) check_files( outdir, path_uniprot_stockholm_small_use_proteinid_targets, ("ncfp_aa.fasta", "ncfp_nt.fasta"), )
def run_wps_geogrid(work_root, wps_root, config, args): wps_work_dir = os.path.abspath(work_root) + '/wps' if not os.path.isdir(wps_work_dir): os.mkdir(wps_work_dir) os.chdir(wps_work_dir) cli.notice(f'Run geogrid.exe at {wps_work_dir} ...') if not os.path.isfile('GEOGRID.TBL'): run(f'ln -sf {wps_root}/geogrid/GEOGRID.TBL.ARW {wps_work_dir}/GEOGRID.TBL' ) expected_files = [ 'geo_em.d{:02d}.nc'.format(i + 1) for i in range(config['domains']['max_dom']) ] if not check_files(expected_files): run('rm -f geo_em.d*.nc') submit_job(f'{wps_root}/geogrid/src/geogrid.exe', args.np, config, args, logfile='geogrid.log.0000', wait=True) if not check_files(expected_files): cli.error( f'Failed! Check output {os.path.abspath(wps_work_dir)}/geogrid.out.0000' ) cli.notice('Succeeded.') else: cli.notice('File geo_em.*.nc already exist.') run(f'ls -l {wps_work_dir}/geo_em.*.nc')
def run_wrfplus_ad(work_root, wrfplus_root, config, args): start_time = config['custom']['start_time'] end_time = config['custom']['end_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) max_dom = config['domains']['max_dom'] wrf_work_dir = os.path.abspath(work_root) + '/wrf' if not os.path.isdir(wrf_work_dir): cli.error(f'WRF work directory {wrf_work_dir} does not exist!') wrfplus_work_dir = os.path.abspath(work_root) + '/wrfplus' if not os.path.isdir(wrfplus_work_dir): cli.error(f'WRFPLUS has not been configured! Run config_wrfplus.py first.') os.chdir(wrfplus_work_dir) if os.path.isfile(f'{wrf_work_dir}/wrfinput_d01_{start_time_str}'): run(f'ln -sf {wrf_work_dir}/wrfinput_d01 .') elif os.path.isfile(f'{wrf_work_dir}/wrfout_d01_{start_time_str}'): run(f'ln -sf {wrf_work_dir}/wrfout_d01_{start_time_str} wrfinput_d01') run(f'ln -sf {wrf_work_dir}/wrfbdy_d01 .') if not os.path.isfile('final_sens_d01'): cli.error('There is no final_sens_d01 file!') version = wrf_version(wrfplus_root) cli.stage(f'Run WRFPLUS at {wrfplus_work_dir} ...') expected_files = ['wrfout_d{:02d}_{}'.format(i + 1, start_time_str) for i in range(max_dom)] expected_files.append(f'init_sens_d01_{start_time_str}') if not check_files(expected_files) or args.force: run('rm -f wrfout_*') run(f'ln -sf {wrfplus_root}/run/LANDUSE.TBL .') run(f'ln -sf {wrfplus_root}/run/VEGPARM.TBL .') run(f'ln -sf {wrfplus_root}/run/SOILPARM.TBL .') run(f'ln -sf {wrfplus_root}/run/GENPARM.TBL .') run(f'ln -sf {wrfplus_root}/run/RRTM_DATA_DBL RRTM_DATA') run(f'ln -sf {wrfplus_root}/run/ETAMPNEW_DATA_DBL ETAMPNEW_DATA') if version >= Version('4.0'): cmd = f'{wrfplus_root}/run/wrfplus.exe' else: cmd = f'{wrfplus_root}/run/wrf.exe' retries = 0 while True: submit_job(cmd, args.np, config, args, wait=True) if os.path.isfile(f'gradient_wrfplus_d01_{start_time_str}'): run(f'mv gradient_wrfplus_d01_{start_time_str} init_sens_d01_{start_time_str}') if not check_files(expected_files): if retries == 10: cli.error(f'Failed! Check output {os.path.abspath(wrfplus_work_dir)}/rsl.error.0000.') retries = retries + 1 cli.warning('Failed to run wrfplus, retry it!') else: break cli.notice('Succeeded.') else: cli.notice('File wrfout_* already exist.') run(f'ls -l {wrfplus_work_dir}/wrfout_*')
def test_basic_ncbi(namespace_base, path_ncbi, path_ncbi_targets, tmp_path): """ncfp collects correct coding sequences for basic NCBI input.""" # Modify default arguments infile = path_ncbi outdir = tmp_path / "basic_ncbi" args = modify_namespace(namespace_base, infname=infile, outdirname=outdir) # Run ersatz command-line ncfp.run_main(args) # Compare output check_files(outdir, path_ncbi_targets, ("ncfp_aa.fasta", "ncfp_nt.fasta"))
def test_alternative_start(namespace_base, path_altstart, path_altstart_targets, tmp_path): """ncfp collects correct coding sequences for NCBI input with alternative start codon.""" infile = path_altstart outdir = tmp_path / "alternative_start" args = modify_namespace(namespace_base, infname=infile, outdirname=outdir, alternative_start_codon=True) # Run ersatz command-line ncfp.run_main(args) # Compare output check_files(outdir, path_altstart_targets, ("ncfp_aa.fasta", "ncfp_nt.fasta"))
def load_csv_into_db(ld): """ Function to load tsv, lst, csv files onto the temporary tables. Intermediate step to store tmp raw data before storing into the real tables. :param ld: the file dowloaded from the FTP server provided by federfarma.co :return: None """ if check_db_empty(): logger.info("___Ready to load lst file into table___") if check_files(ld, 'lst') and check_tables(): for entry in ASSOCIATION_LIST: try: file = os.path.join(ld, '{0}.{1}'.format(entry[0], 'lst')) copy_str = "\copy %s from %s CSV DELIMITER '~' HEADER;" % (entry[1], file) command = 'export PGPASSWORD=%s\n psql -U %s -h %s -p %s -d %s -c "%s"' % \ (PGPASS, PGUSER, PGHOST, PGPORT, PGDBNAME, copy_str) logger.info('___SQLExec: {0}___'.format(command)) subprocess.check_call(command, shell=True) except subprocess.CalledProcessError: logger.error("___Failed to invoke psql for: {0}___".format(entry)) logger.info("___Now truncate all the tmp tables!!!___") truncate_tables() raise psycopg2.DatabaseError else: logger.error('___Check Tables: {0} --- Check Files: {1}___'.format(check_tables(), check_files(ld, 'lst'))) raise Exception('___Tables or files not present in db or filesystem!!!___') else: logger.error('___DB is not empty!!!___') raise Exception('Object DB', 'not empty')
def test_basic_uniprot(namespace_base, path_uniprot, path_uniprot_targets, tmp_path, mock_basic_uniprot): """ncfp collects correct coding sequences for basic UniProt input.""" # Modify default arguments infile = path_uniprot outdir = tmp_path / "basic_uniprot" args = modify_namespace(namespace_base, infname=infile, outdirname=outdir) # Run ersatz command-line ncfp.run_main(args) # Compare output check_files( outdir, path_uniprot_targets, ("ncfp_aa.fasta", "ncfp_nt.fasta", "skipped.fasta"), )
def run_met(work_root, met_root, config, args): start_time = config['custom']['start_time'] end_time = config['custom']['end_time'] max_dom = config['domains']['max_dom'] start_time_str = start_time.format('YYYY-MM-DD_HH:mm:ss') end_time_str = end_time.format('YYYY-MM-DD_HH:mm:ss') upp_work_dir = work_root + '/upp' if not os.path.isdir(upp_work_dir): cli.error('UPP is not run successfully!') met_work_dir = work_root + '/met' if not os.path.isdir(met_work_dir): os.makedirs(met_work_dir) os.chdir(met_work_dir) cli.stage('Prepare observation file.') expected_files = ['ob.nc'] if not check_files(expected_files) or args.force: if args.littler_root: if 'obs' in config['custom']: if 'little_r' in config['custom']['obs']: dir_pattern = config['custom']['obs']['little_r']['dir_pattern'] file_pattern = config['custom']['obs']['little_r']['file_pattern'] obs_dir = Template(dir_pattern).render(obs_time=start_time) obs_file = Template(file_pattern).render(obs_time=start_time) if not os.path.isfile(f'{args.littler_root}/{obs_dir}/{obs_file}'): cli.error(f'Observation {args.littler_root}/{obs_dir}/{obs_file} does not exist!') run(f'{met_root}/bin/ascii2nc -format little_r {args.littler_root}/{obs_dir}/{obs_file} ob.nc') elif args.prepbufr_root: pass if not check_files(('ob.nc')): cli.error('Failed to prepare netCDF observation file!') run(f'ls -l {met_work_dir}/ob.nc') cli.stage('Prepare configuration file.') expected_files = ['foo'] if not check_files(expected_files) or args.force: run(f'cp -f {met_root}/share/met/config/PointStatConfig_default PointStatConfig') cli.notice('Succeeded.')
def main(args): runinfo = RunInfo(args) utils.check_files([ 'iter_0/ref.out', 'input.fa', 'init.pdb', 'disulf.def', 't000_.3mers', 't000_.9mers', '%s/source/bin/extract_pdbs%s' % (ROSETTAPATH, ROSETTASUFFIX), '%s/source/bin/rosetta_scripts%s' % (ROSETTAPATH, ROSETTASUFFIX), PARALLEL ]) for it in range(runinfo.opt.niter): runinfo.update_phase('iter_%d/PHASE' % it) if os.path.exists('iter_%d/DONE' % it): continue runinfo.it = it print("Preparing for iter %d..." % (it)) gen_iter(runinfo) os.chdir('iter_%d' % it) print('%s/source/bin/extract_pdbs%s'%(ROSETTAPATH,ROSETTASUFFIX)+\ ' -in:file:silent ref.out 1> /dev/null 2>/dev/null') os.system('%s/source/bin/extract_pdbs%s'%(ROSETTAPATH,ROSETTASUFFIX)+\ ' -in:file:silent ref.out 1> /dev/null 2>/dev/null') runinfo.update_dcut() if runinfo.is_recomb_iter(it): # make a separate logic #run_iter_recomb(runinfo) pass else: # DL portion comes here prepick(runinfo) run_iter(runinfo) postpick(runinfo) os.chdir(runinfo.curr) finalize(runinfo) os.chdir(runinfo.curr)
def test_basic_stockholm(namespace_base, path_stockholm, path_stockholm_targets, tmp_path): """ncfp collects correct coding sequences for basic UniProt/Stockholm input.""" # Modify default arguments infile = path_stockholm outdir = tmp_path / "basic_stockholm" args = modify_namespace(namespace_base, infname=infile, outdirname=outdir, stockholm=True) # Run ersatz command-line ncfp.run_main(args) # Compare output (should be no skipped files) check_files( outdir, path_stockholm_targets, ("ncfp_aa.fasta", "ncfp_nt.fasta"), )
def main(): if SemSyncManager().is_green(): ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') SemSyncManager().write_flags(1, 1, ts, '') # Checking the EXP_SEM_SYNC table to verify if i can get the files FtpCrudStuff(HOST, USER, PASSWORD).dirlist() FtpCrudStuff(HOST, USER, PASSWORD).getfiles(SAVE_TO, deleteremotefiles=False) # Get files from FTP server ld = DirUtils(ROOT_DIR).get_last_created_dir() truncate_tables() if check_tables() and check_files(ld, 'zip'): # Go on if all tables and all files are present if ZipUtils(ld).unzipfiles(): # Unzip all files whitin the last created dir DirUtils(ld).delete_file_with_extension('.zip') # Delete all .zip files for f in DirUtils(ld).listfiletype(['lst']): # I need to clead each file from spaces and strange stuff # if f[0] in ['FARMACIE_UNIFARM.lst', 'FORNITORE_FARMACIA_UNIFARM.lst', 'CAUSALE_UNIFARM.lst']: # cleanfile([os.path.join(ld, f[0])], '\s*~', False) # else: cleanfile('s/\s*~\s*/~/g', os.path.join(ld, f[0])) try: load_csv_into_db(ld) # Load .lst files into DB Postgres temporary tables ts2 = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') SemSyncManager().write_flags(1, 0, ts, ts2) # Wrtie the rigth flag into EXP_SEM_SYNC table ExpLogKeysManager().write_flag_import(synckey_list()) # Write the rigth flag into EXP_LOG_KEYS store_sync_keys(synckey_list()) # Saving the acquired keys flow_temp_data() # transfer the data from tmp tables to the real one except (Exception, psycopg2.DatabaseError): logger.error('___An error occurred during db import___') SemSyncManager().write_flags(0, 1, '', '') shutil.rmtree(ld, ignore_errors=True) sys.exit(1) else: shutil.rmtree(ld, ignore_errors=True) SemSyncManager().write_flags(0, 1, '', '') logger.error('___All files was deleted and nothing loaded into temp DB! Read the log stack please!___') sys.exit(1) else: SemSyncManager().write_flags(0, 1, '', '') shutil.rmtree(ld, ignore_errors=True) logger.error('___Check Tables: {0} --- Check Files: {1}___'.format(check_tables(), check_files(ld, 'zip'))) sys.exit(1) else: logger.error('___Unable to start import. Sem sync is not green!___') sys.exit(1)
trainer = MUNIT_Trainer(config) elif opts.trainer == 'UNIT': trainer = UNIT_Trainer(config) else: sys.exit("Only support MUNIT|UNIT") trainer.cuda() #train_loader_a, train_loader_b, test_loader_a, test_loader_b, folders = get_all_data_loaders(config) (train_loader_a, tr_a), (train_loader_b, tr_b), (test_loader_a, test_a), ( test_loader_b, test_b), folders = get_all_data_loaders_better(config) if opts.check_files and False: print("Checking files...") for folder in folders: print(folder) utils.check_files(folder, delete_bad=False) print("Done checking files.") #print(train_loader_a.dataset[0]) train_display_images_a = torch.stack( [train_loader_a.dataset[i][0] for i in range(display_size)]).cuda() train_display_images_b = torch.stack( [train_loader_b.dataset[i][0] for i in range(display_size)]).cuda() test_display_images_a = torch.stack( [test_loader_a.dataset[i][0] for i in range(display_size)]).cuda() test_display_images_b = torch.stack( [test_loader_b.dataset[i][0] for i in range(display_size)]).cuda() # Setup logger and output folders model_name = os.path.splitext(os.path.basename(opts.config))[0]
trainer = MUNIT_Trainer(config) elif opts.trainer == 'UNIT': trainer = UNIT_Trainer(config) else: sys.exit("Only support MUNIT|UNIT") #trainer.cuda() #train_loader_a, train_loader_b, test_loader_a, test_loader_b, folders = get_all_data_loaders(config) (train_loader_a, tr_a), (train_loader_b, tr_b), (test_loader_a, test_a), (test_loader_b, test_b), folders = get_all_data_loaders_better(config) if opts.check_files and False: print("Checking files...") for folder in folders: print(folder) utils.check_files(folder) print("Done checking files.") # Setup logger and output folders model_name = os.path.splitext(os.path.basename(opts.config))[0] log_dir = utils.increment_path(name="Run", base_path=os.path.join(opts.output_path + "/logs", model_name), make_directory=True) train_writer = tensorboardX.SummaryWriter(log_dir) output_directory = os.path.join(opts.output_path + "/outputs", model_name) output_directory = utils.increment_path(name="Run", base_path=output_directory, make_directory=True) checkpoint_directory, image_directory = prepare_sub_folder(output_directory) shutil.copy(opts.config, os.path.join(output_directory, 'config.yaml')) # copy config file to output folder # Start training iterations = trainer.resume(checkpoint_directory, hyperparameters=config) if opts.resume else 0
def build_wrf(wrf_root, wps_root, wrfplus_root, wrfda_root, args): if not 'HDF5' in os.environ: res = subprocess.run(['which', 'h5dump'], stdout=subprocess.PIPE) if res.returncode == 0: os.environ['HDF5'] = os.path.dirname(os.path.dirname(res.stdout.decode('utf-8'))) cli.notice(f'Set HDF5 to {os.environ["HDF5"]}') if not 'HDF5' in os.environ: cli.warning('HDF5 environment variable is not set') if not 'NETCDF' in os.environ: res = subprocess.run(['which', 'nf-config'], stdout=subprocess.PIPE) if res.returncode == 0: os.environ['NETCDF'] = os.path.dirname(os.path.dirname(res.stdout.decode('utf-8'))) res = subprocess.run(['nf-config', '--includedir'], stdout=subprocess.PIPE) os.environ['NETCDF_INC'] = res.stdout.decode('utf-8').strip() res = subprocess.run(['nf-config', '--flibs'], stdout=subprocess.PIPE) os.environ['NETCDF_LIB'] = re.search(r'-L([^ ]*)', res.stdout.decode('utf-8'))[1] cli.notice(f'Set NETCDF_INC to {os.environ["NETCDF_INC"]}') cli.notice(f'Set NETCDF_LIB to {os.environ["NETCDF_LIB"]}') if not 'NETCDF' in os.environ: cli.warning('NETCDF environment variable is not set!') if not 'JASPERINC' in os.environ or not 'JASPERLIB' in os.environ: if 'JASPER_ROOT' in os.environ: os.environ['JASPERINC'] = os.environ['JASPER_ROOT'] + '/include' os.environ['JASPERLIB'] = os.environ['JASPER_ROOT'] + '/lib' cli.notice(f'Set JASPERINC to {os.environ["JASPERINC"]}.') cli.notice(f'Set JASPERLIB to {os.environ["JASPERLIB"]}.') else: cli.error('JASPERINC and JASPERLIB environment variables are not set!') if not 'LIBPNG_ROOT' in os.environ: cli.warning('LIBPNG_ROOT environment variable is not set. Library PNG may not be found!') if not 'WRFIO_NCD_LARGE_FILE_SUPPORT' in os.environ: os.environ['WRFIO_NCD_LARGE_FILE_SUPPORT'] = '1' cli.notice('Set WRFIO_NCD_LARGE_FILE_SUPPORT to 1.') if args.rttov: os.environ['RTTOV'] = args.rttov cli.notice(f'Use RTTOV in {args.rttov}.') # --------------------------------------------------------------------------------- # WRF os.chdir(wrf_root) version = wrf_version(wrf_root) if version <= Version('3.6.1'): os.environ['BUFR'] = '1' # Fix possible code bugs. if Version('3.6.1') <= version <= Version('3.8.1'): edit_file('phys/module_cu_g3.F', [['integer, dimension \(12\) :: seed', 'integer, dimension (33) :: seed']]) if args.force: run('./clean -a 1> /dev/null 2>&1') expected_exe_files = ('main/wrf.exe', 'main/real.exe', 'main/ndown.exe', 'main/tc.exe') if not check_files(expected_exe_files): cli.notice('Configure WRF ...') if args.use_grib: cli.notice('Set GRIB2 flag.') edit_file('./arch/Config.pl', [ ['\$I_really_want_to_output_grib2_from_WRF = "FALSE"', '$I_really_want_to_output_grib2_from_WRF = "TRUE"'] ]) if args.use_hyb: child = pexpect.spawn('./configure -hyb', encoding='utf-8') else: child = pexpect.spawn('./configure', encoding='utf-8') child.expect('Enter selection.*') if platform.system() == 'Darwin': if args.compiler_suite == 'gnu': child.sendline('15') else: if args.compiler_suite == 'intel': if args.openmp: child.sendline('16') # INTEL (ifort/icc) dm+sm else: child.sendline('15') # INTEL (ifort/icc) dmpar elif args.compiler_suite == 'gnu': if args.openmp: child.sendline('35') # GNU (gfortran/gcc) dm+sm else: child.sendline('34') # GNU (gfortran/gcc) dmpar elif args.compiler_suite == 'pgi': if args.openmp: child.sendline('55') # PGI (pgf90/pgcc) dm+sm else: child.sendline('54') # PGI (pgf90/pgcc) dmpar child.expect('Compile for nesting.*:') child.sendline('1') if platform.system() == 'Darwin': child.expect('This build of WRF will use NETCDF4 with HDF5 compression') child.wait() if args.compiler_suite == 'intel': edit_file('./configure.wrf', [ ['mpif90', 'mpiifort'], ['mpicc', 'mpiicc'] ]) elif args.compiler_suite == 'pgi': edit_file('./configure.wrf', [ ['pgf90', 'pgfortran'], ['mpif90', 'mpifort'] ]) # Fix for OpenMPI. edit_file('./configure.wrf', [ ['DM_CC\s*=\s*mpicc\s*$', 'DM_CC = mpicc -DMPI2_SUPPORT\n'] ]) cli.notice('Compile WRF ...') if args.debug: if args.compiler_suite == 'intel': debug_options = '-O0 -g -traceback' elif args.compiler_suite == 'gnu': debug_options = '-O0 -g -fbacktrace' edit_file('configure.wrf', [ ['FCFLAGS\s*=\s*\$\(FCOPTIM\)\s*\$\(FCBASEOPTS\)', f'FCFLAGS = {debug_options} $(FCBASEOPTS)'] ]) if args.verbose: run(f'./compile em_real') else: run(f'./compile em_real 1> compile.out 2>&1') if check_files(expected_exe_files): cli.notice('Succeeded.') else: if args.verbose: cli.error('Failed!') else: cli.error(f'Failed! Check {wrf_root}/compile.out') else: cli.notice('WRF is already built.') # --------------------------------------------------------------------------------- # WPS os.chdir(wps_root) if args.force: run('./clean -a 1> /dev/null 2>&1') expected_exe_files = ('geogrid/src/geogrid.exe', 'metgrid/src/metgrid.exe', 'ungrib/src/ungrib.exe') if not check_files(expected_exe_files): cli.notice('Configure WPS ...') child = pexpect.spawn('./configure') child.expect('Enter selection.*') if args.compiler_suite == 'intel': child.sendline('19') # Linux x86_64, Intel compiler (dmpar) elif args.compiler_suite == 'gnu': child.sendline('3') # Linux x86_64, gfortran (dmpar) elif args.compiler_suite == 'pgi': child.sendline('7') child.wait() if args.compiler_suite == 'intel': edit_file('./configure.wps', [ ['mpif90', 'mpiifort'], ['mpicc', 'mpiicc'] ]) elif args.compiler_suite == 'pgi': edit_file('./configure.wps', [ ['pgf90', 'pgfortran'], ['mpif90', 'mpifort'] ]) else: run('sed -i "s/mpicc -cc=.*/mpicc/" configure.wps') run('sed -i "s/mpif90 -f90=.*/mpif90/" configure.wps') run('sed -i "s/WRF_DIR\s*=.*/WRF_DIR = ..\/WRF/" configure.wps') if 'LIBPNG_ROOT' in os.environ: run(f'sed -i "s@COMPRESSION_LIBS\s*=\(.*\)@COMPRESSION_LIBS = \\1 -L{os.environ["LIBPNG_ROOT"]}/lib@" configure.wps') run(f'sed -i "s@COMPRESSION_INC\s*=\(.*\)@COMPRESSION_INC = \\1 -I{os.environ["LIBPNG_ROOT"]}/include@" configure.wps') if args.compiler_suite == 'gnu': # Fix for gfortran 9.1.0. edit_file('ungrib/src/ngl/g2/intmath.f', [['iand\(i,i-1\)/=0', 'iand(i,i-1_8)/=0']], return_on_first_match=True) edit_file('ungrib/src/ngl/g2/intmath.f', [['iand\(i,i-1\)/=0', 'iand(i,i-1_4)/=0']], return_on_first_match=True) edit_file('ungrib/src/ngl/g2/intmath.f', [['iand\(i,i-1\)/=0', 'iand(i,i-1_2)/=0']], return_on_first_match=True) edit_file('ungrib/src/ngl/g2/intmath.f', [['iand\(i,i-1\)/=0', 'iand(i,i-1_1)/=0']], return_on_first_match=True) # Fix for OpenMPI. edit_file('./configure.wps', [ ['DM_CC\s*=\s*mpicc\s*$', 'DM_CC = mpicc -DMPI2_SUPPORT\n'] ]) cli.notice('Compile WPS ...') if args.verbose: run('./compile') else: run('./compile 1> compile.out 2>&1') if check_files(expected_exe_files): cli.notice('Succeeded.') else: if args.verbose: cli.error('Failed!') else: cli.error(f'Failed! Check {wps_root}/compile.out') else: cli.notice('WPS is already built.') # --------------------------------------------------------------------------------- # WRFPLUS os.chdir(wrfplus_root) if args.force: run('./clean -a 1> /dev/null 2>&1') if Version('3.6.1') <= version <= Version('3.9.1'): edit_file('phys/module_cu_g3.F', [['integer, dimension \(12\) :: seed', 'integer, dimension (33) :: seed']]) if version == Version('3.6.1'): line_number = 841 elif version == Version('3.8.1'): line_number = 855 elif version == Version('3.9.1'): line_number = 856 else: error('Find out the wrong OpenMP directive in WRFPLUS/main/module_wrf_top.F!') edit_file('main/module_wrf_top.F', [[line_number, ' !$OMP DEFAULT (SHARED) PRIVATE ( ij )\n']]) if version >= Version('4.0'): expected_exe_files = ('main/wrfplus.exe') else: expected_exe_files = ('main/wrf.exe') if not check_files(expected_exe_files): cli.notice('Configure WRFPLUS ...') if args.use_grib: cli.notice('Set GRIB2 flag.') edit_file('./arch/Config.pl', [ ['\$I_really_want_to_output_grib2_from_WRF = "FALSE"', '$I_really_want_to_output_grib2_from_WRF = "TRUE"'] ]) child = pexpect.spawn('./configure wrfplus') child.expect('Enter selection.*') if args.compiler_suite == 'intel': if version <= Version('3.6.1'): child.sendline('8') else: child.sendline('34') elif args.compiler_suite == 'gnu': child.sendline('18') elif args.compiler_suite == 'pgi': child.sendline('28') child.wait() if args.compiler_suite == 'intel': edit_file('./configure.wrf', [ ['mpif90', 'mpiifort'], ['mpicc', 'mpiicc'], ['override-limits', 'qoverride-limits'] ]) # Fix for OpenMPI. edit_file('./configure.wrf', [ ['DM_CC\s*=\s*mpicc\s*$', 'DM_CC = mpicc -DMPI2_SUPPORT\n'] ]) cli.notice('Compile WRFPLUS ...') if args.debug: if args.compiler_suite == 'intel': debug_options = '-O0 -g -traceback' elif args.compiler_suite == 'gnu': debug_options = '-O0 -g -fbacktrace' edit_file('configure.wrf', [ ['FCFLAGS\s*=\s*\$\(FCOPTIM\)\s*\$\(FCBASEOPTS\)', f'FCFLAGS = {debug_options} $(FCBASEOPTS)'] ]) if version >= Version('4.0'): build_target = 'wrfplus' else: build_target = 'wrf' if args.verbose: run(f'./compile {build_target}') else: run(f'./compile {build_target} 1> compile.out 2>&1') if check_files(expected_exe_files): cli.notice('Succeeded.') else: if args.verbose: cli.error('Failed!') else: cli.error(f'Failed! Check {wrfplus_root}/compile.out') else: cli.notice('WRFPLUS is already built.') # --------------------------------------------------------------------------------- # WRFDA os.chdir(wrfda_root) os.environ['WRFPLUS_DIR'] = wrfplus_root if args.force: run('./clean -a 1> /dev/null 2>&1') if Version('3.6.1') <= version <= Version('3.9.1'): cli.warning(f'Fix {wrfda_root}/var/da/da_define_structures/da_zero_y.inc') edit_file('var/da/da_define_structures/da_zero_y.inc', [ [', value \)', ', value_ )'], [':: value$', ':: value_\nreal value'], ['if \(.not.\(present\(value\)\)\) value = 0.0', ''' if (.not.(present(value_))) then value = 0.0 else value = value_ end if '''] ]) if version == Version('4.1.1'): cli.warning(f'Fix {wrfda_root}/share/input_wrf.F') edit_file('share/input_wrf.F', [ ['FUNCTION check_which_switch', 'FUNCTION check_which_switch1'] ]) expected_exe_files = [ 'var/build/da_advance_time.exe', 'var/build/da_bias_airmass.exe', 'var/build/da_bias_scan.exe', 'var/build/da_bias_sele.exe', 'var/build/da_bias_verif.exe', 'var/build/da_rad_diags.exe', 'var/build/da_tune_obs_desroziers.exe', 'var/build/da_tune_obs_hollingsworth1.exe', 'var/build/da_tune_obs_hollingsworth2.exe', 'var/build/da_update_bc_ad.exe', 'var/build/da_update_bc.exe', 'var/build/da_verif_grid.exe', 'var/build/da_verif_obs.exe', 'var/build/da_wrfvar.exe', 'var/build/gen_be_addmean.exe', 'var/build/gen_be_cov2d3d_contrib.exe', 'var/build/gen_be_cov2d.exe', 'var/build/gen_be_cov3d2d_contrib.exe', 'var/build/gen_be_cov3d3d_bin3d_contrib.exe', 'var/build/gen_be_cov3d3d_contrib.exe', 'var/build/gen_be_cov3d.exe', 'var/build/gen_be_diags.exe', 'var/build/gen_be_diags_read.exe', 'var/build/gen_be_ensmean.exe', 'var/build/gen_be_ensrf.exe', 'var/build/gen_be_ep1.exe', 'var/build/gen_be_ep2.exe', 'var/build/gen_be_etkf.exe', 'var/build/gen_be_hist.exe', 'var/build/gen_be_stage0_gsi.exe', 'var/build/gen_be_stage0_wrf.exe', 'var/build/gen_be_stage1_1dvar.exe', 'var/build/gen_be_stage1.exe', 'var/build/gen_be_stage1_gsi.exe', 'var/build/gen_be_stage2_1dvar.exe', 'var/build/gen_be_stage2a.exe', 'var/build/gen_be_stage2.exe', 'var/build/gen_be_stage2_gsi.exe', 'var/build/gen_be_stage3.exe', 'var/build/gen_be_stage4_global.exe', 'var/build/gen_be_stage4_regional.exe', 'var/build/gen_be_vertloc.exe', 'var/build/gen_mbe_stage2.exe', 'var/obsproc/src/obsproc.exe'] if not check_files(expected_exe_files): cli.notice('Configure WRFDA ...') if args.use_grib: cli.notice('Set GRIB2 flag.') edit_file('./arch/Config.pl', [ ['\$I_really_want_to_output_grib2_from_WRF = "FALSE"', '$I_really_want_to_output_grib2_from_WRF = "TRUE"'] ]) child = pexpect.spawn('./configure 4dvar') child.expect('Enter selection.*') if args.compiler_suite == 'intel': child.sendline('8') elif args.compiler_suite == 'gnu': child.sendline('18') elif args.compiler_suite == 'pgi': child.sendline('28') child.wait() if args.compiler_suite == 'intel': edit_file('./configure.wrf', [ ['mpif90', 'mpiifort'], ['mpicc', 'mpiicc'] ]) # Fix for OpenMPI. edit_file('./configure.wrf', [ ['DM_CC\s*=\s*mpicc\s*$', 'DM_CC = mpicc -DMPI2_SUPPORT\n'] ]) cli.notice('Compile WRFDA ...') if args.debug: if args.compiler_suite == 'intel': debug_options = '-O0 -g -traceback' elif args.compiler_suite == 'gnu': debug_options = '-O0 -g -fbacktrace' edit_file('configure.wrf', [ ['FCFLAGS\s*=\s*\$\(FCOPTIM\)\s*\$\(FCBASEOPTS\)', f'FCFLAGS = {debug_options} $(FCBASEOPTS)'] ]) if args.verbose: run(f'./compile all_wrfvar') else: run(f'./compile all_wrfvar 1> compile.out 2>&1') if check_files(expected_exe_files, fatal=True): cli.notice('Succeeded.') else: if args.verbose: cli.error('Failed!') else: cli.error(f'Failed! Check {wrfda_root}/compile.out') else: cli.notice('WRFDA is already built.')
def run_wrfda_3dvar(work_root, wrfda_root, config, args, wrf_work_dir=None, force=False, tag=None, fg=None): start_time = config['custom']['start_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) max_dom = config['domains']['max_dom'] if not wrf_work_dir: if tag != None: wrf_work_dir = f'{work_root}/wrf_{tag}' else: wrf_work_dir = f'{work_root}/wrf' if tag != None: obsproc_work_dir = f'{work_root}/wrfda_{tag}/obsproc' else: obsproc_work_dir = f'{work_root}/wrfda/obsproc' if max_dom > 1: dom_str = 'd' + str(config['custom']['wrfda']['dom'] + 1).zfill(2) if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}/{dom_str}' else: wrfda_work_dir = f'{work_root}/wrfda/{dom_str}' else: dom_str = 'd01' if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}' else: wrfda_work_dir = f'{work_root}/wrfda' if not os.path.isdir(wrfda_work_dir): os.mkdir(wrfda_work_dir) os.chdir(wrfda_work_dir) cli.stage(f'Run da_wrfvar.exe at {wrfda_work_dir} ...') if os.path.isfile(f'wrfvar_output_{start_time_str}' ) and not args.force and not force: run(f'ls -l wrfvar_output_{start_time_str}') cli.notice(f'wrfvar_output_{start_time_str} already exist.') return run(f'ln -sf {wrfda_root}/run/LANDUSE.TBL {wrfda_work_dir}') if not os.path.isfile('namelist.input'): cli.error( 'namelist.input has not been generated! Run config_wrfda.py.') # BE matrix if 'cv_options' in config['wrfvar7']: be_work_dir = os.path.dirname( os.path.abspath(work_root)) + '/be/' + dom_str if not os.path.isdir(be_work_dir): be_work_dir = os.path.dirname( os.path.abspath(work_root)) + '/../be/' + dom_str if config['wrfvar7']['cv_options'] == 5: if not os.path.isfile(f'{be_work_dir}/be.dat.cv5'): cli.error( f'BE matrix {be_work_dir}/be.dat.cv5 does not exist!') run(f'ln -sf {be_work_dir}/be.dat.cv5 be.dat') elif config['wrfvar7']['cv_options'] == 6: if not os.path.isfile(f'{be_work_dir}/be.dat.cv6'): cli.error( f'BE matrix {be_work_dir}/be.dat.cv6 does not exist!') run(f'ln -sf {be_work_dir}/be.dat.cv6 be.dat') elif config['wrfvar7']['cv_options'] == 7: if not os.path.isfile(f'{be_work_dir}/be.dat.cv7'): cli.error( f'BE matrix {be_work_dir}/be.dat.cv7 does not exist!') run(f'ln -sf {be_work_dir}/be.dat.cv7 be.dat') if not os.path.exists('./be.dat'): run(f'ln -sf {wrfda_root}/var/run/be.dat.cv3 be.dat') # First guess # TODO: Assume there is only one domain to be assimilated. if fg != None: run(f'ln -sf {fg} {wrfda_work_dir}/fg') else: expected_files = [ '{}/wrfout_d{:02d}_{}'.format(wrf_work_dir, i + 1, start_time_str) for i in range(max_dom) ] if check_files(expected_files): run(f'ln -sf {wrf_work_dir}/wrfout_{dom_str}_{start_time_str} {wrfda_work_dir}/fg' ) else: expected_files = [ '{}/wrfinput_d{:02d}_{}'.format(wrf_work_dir, i + 1, start_time_str) for i in range(max_dom) ] if not check_files(expected_files): cli.error( 'real.exe or da_update_bc.exe wasn\'t executed successfully!' ) run(f'ln -sf {wrf_work_dir}/wrfinput_{dom_str}_{start_time_str} {wrfda_work_dir}/fg' ) # Observation data if config['custom']['wrfda']['type'] == '3dvar': if 'use_radarobs' in config['wrfvar4'] and config['wrfvar4'][ 'use_radarobs']: # Radar data run(f'rm -f ob.*') for obs_radar_file in glob( f'{args.littler_root}/{start_time.format("YYYYMMDD")}/obs.radar.*' ): radar_time = pendulum.from_format( os.path.basename(obs_radar_file).split('.')[2], 'YYYYMMDDHHmm') if radar_time == start_time: run(f'ln -sf {obs_radar_file} ob.radar') if os.path.isfile(f'wrfvar_output_{start_time_str}'): cli.notice('Use previous analysis data as the background.') run(f'mv wrfvar_output_{start_time_str} wrfvar_output_conv_{start_time_str}' ) run(f'ln -sf wrfvar_output_conv_{start_time_str} fg') elif 'conv_obs' in config['custom']: if 'dir_pattern' in config['custom']['conv_obs']: obs_dir = Template( config['custom']['conv_obs']['dir_pattern']).render( obs_time=start_time) if 'file_pattern' in config['custom']['conv_obs']: obs_file = Template( config['custom']['conv_obs']['file_pattern']).render( obs_time=start_time) if config['wrfvar3']['ob_format'] == 1: run(f'ln -sf {args.prepbufr_root}/{obs_dir}/{obs_file} ob.bufr' ) elif config['wrfvar3']['ob_format'] == 2: run(f'ln -sf {args.prepbufr_root}/{obs_dir}/{obs_file} ob.ascii' ) elif config['wrfvar3']['ob_format'] == 2 and os.path.isfile( f'{obsproc_work_dir}/obs_gts_{start_time.format(datetime_fmt)}.3DVAR' ): # LITTLE_R conventional data run(f'ln -sf {obsproc_work_dir}/obs_gts_{start_time.format(datetime_fmt)}.3DVAR ob.ascii' ) elif config['wrfvar3']['ob_format'] == 1 and config['custom']['wrfda'][ 'prepbufr_source'] == 'gdas': # PREPBUFR conventional data gdas_file_path = f'{args.prepbufr_root}/gdas.{start_time.format("YYYYMMDD")}/gdas.t{start_time.hour:02}z.prepbufr.nr' if not os.path.isfile(gdas_file_path): cli.error(f'{gdas_file_path} does not exist!') run(f'ln -sf {gdas_file_path} ob.bufr') if os.path.isfile(f'{wrfda_work_dir}/wrfvar_output_{start_time_str}' ) and not args.force: cli.notice( f'{wrfda_work_dir}/wrfvar_output_{start_time_str} already exists.') return submit_job(f'{wrfda_root}/var/build/da_wrfvar.exe', min(20, args.np), config, args, wait=True) expected_files = [f'wrfvar_output', 'statistics'] if not check_files(expected_files): # Check if the failure is caused by parallel computing? Such as cv_options is zero in some process. if search_files('rsl.error.*', 'Invalid CV option chosen: cv_options = 0'): cli.warning( 'Failed to run da_wrfvar.exe in parallel. Try to run in serial.' ) submit_job(f'{wrfda_root}/var/build/da_wrfvar.exe', 1, config, args, wait=True) if not check_files(expected_files): cli.error( f'Still failed! See {wrfda_work_dir}/rsl.error.0000.') else: cli.error(f'Failed! See {wrfda_work_dir}/rsl.error.0000.') else: print(open('statistics').read()) run(f'ncl -Q {scripts_root}/../plots/plot_cost_grad_fn.ncl') run(f'cp wrfvar_output wrfvar_output_{start_time_str}') cli.notice('Succeeded.')
def main(): """ Main program for ADI data reduction, configured with a call to adiparam.GetConfig(), which brings up a GUI to set parameters. The pipeline is currently designed for SEEDS data taken without an occulting mask. You must have scipy, numpy, pyephem, multiprocessing, and matplotlib installed to use this pipeline. """ parser = optparse.OptionParser(usage=__doc__) parser.add_option("-p", "--prefix", dest="prefix", default="HICA", help="Specify raw file name prefix (default=%default)") opts, args = parser.parse_args() exec_path = os.path.dirname(os.path.realpath(__file__)) filesetup, adipar, locipar = GetConfig(prefix=opts.prefix) nframes = len(filesetup.framelist) ngroup = 1 + int((nframes - 1) / locipar.max_n) flat = pyf.open(filesetup.flat) if filesetup.pixmask is not None: hotpix = pyf.open(filesetup.pixmask) else: hotpix = None dimy, dimx = pyf.open(filesetup.framelist[0])[-1].data.shape mem, ncpus, storeall = utils.config(nframes, dimy * dimx) if filesetup.scale_phot: x, y = np.meshgrid(np.arange(7) - 3, np.arange(7) - 3) window = (x**2 + y**2 < 2.51**2) * 1.0 window /= np.sum(window) ref_phot, ref_psf = photometry.calc_phot(filesetup, adipar, flat, hotpix, mem, window) else: ref_psf = None ref_phot = None ################################################################ # WCS coordinates are not reliable in HiCIAO data with the image # rotator off. Compute parallactic angle. Otherwise, trust the # WCS coordinates. ################################################################ if 'HICA' in filesetup.framelist[0]: pa = np.asarray([ transform.get_pa(frame) * -1 * np.pi / 180 for frame in filesetup.framelist ]) else: pa = np.ones(len(filesetup.framelist)) for i in range(len(filesetup.framelist)): cd2_1 = pyf.open(filesetup.framelist[i])[0].header['cd2_1'] cd2_2 = pyf.open(filesetup.framelist[i])[0].header['cd2_2'] pa[i] = -np.arctan2(cd2_1, cd2_2) fullframe = re.sub("-C.*fits", ".fits", filesetup.framelist[0]) try: objname = pyf.open(fullframe)[0].header['OBJECT'] except: objname = "Unknown_Object" objname = re.sub(' ', '_', objname) np.savetxt(filesetup.output_dir + '/' + objname + '_palist.dat', pa) dr_rms = None #################################################################### # Default save/resume points: destriping, recentering, final files # Configuration gives the option to skip the destriping step (only # performing a flat-field), the dewarping, and the centering. #################################################################### if np.all(utils.check_files(filesetup, ext="_r")): print "\nResuming reduction from recentered files." if ngroup == 1: flux = utils.read_files(filesetup, ext="_r") else: flux = utils.read_files(filesetup, ext="_r") else: if storeall and np.all(utils.check_files(filesetup, ext="_ds")): flux = utils.read_files(filesetup, ext="_ds") elif not np.all(utils.check_files(filesetup, ext="_ds")): flux = parallel._destripe(filesetup, flat, hotpix, mem, adipar, write_files=True, storeall=storeall, full_destripe=adipar.full_destripe, do_horiz=adipar.full_destripe) else: flux = None if adipar.dewarp: flux = parallel._dewarp(filesetup, mem, flux=flux, storeall=storeall) if adipar.do_centroid: centers, dr_rms = centroid.fit_centroids(filesetup, flux, pa, storeall=storeall, objname=objname, method=adipar.center, psf_dir=exec_path + '/psfref', ref_psf=ref_psf) #centers = np.ndarray((nframes, 2)) #centers[:, 0] = 1026 - 128 #centers[:, 1] = 949 + 60 #dr_rms = 30 np.savetxt(filesetup.output_dir + '/' + objname + '_centers.dat', centers) #################################################################### # Recenter the data onto a square array of the largest dimension # such that the entire array has data #################################################################### mindim = min(dimy - centers[:, 0].max(), centers[:, 0].min(), dimx - centers[:, 1].max(), centers[:, 1].min()) mindim = int(mindim) * 2 - 1 flux = parallel._rotate_recenter(filesetup, flux, storeall=storeall, centers=centers, newdimen=mindim, write_files=True) nframes = len(filesetup.framelist) #################################################################### # Perform scaled PCA on the flux array; alternatively, read in an # array of principal components. Neither is currently used. #################################################################### if False: pcapath = '/scr/wakusei1/users/tbrandt' flux, pca_arr = pca.pca(flux, ncomp=20, nread=2, dosub=True, pcadir=pcapath + '/psfref') for i in range(nframes): out = pyf.HDUList( pyf.PrimaryHDU(flux[i].astype(np.float32), pyf.open(filesetup.framelist[i])[0].header)) rootfile = re.sub('.*/', '', filesetup.framelist[i]) out.writeto(filesetup.reduce_dir + '/' + re.sub('.fits', '_r.fits', rootfile), clobber=True) if dr_rms is None: dr_rms = 20 elif False: pca_dir = '.' npca = 40 pca_arr = np.zeros((npca, flux.shape[1], flux.shape[2]), np.float32) for i in range(npca): tmp = pyf.open(pca_dir + '/pcacomp_' + str(i) + '.fits')[0].data dy, dx = [tmp.shape[0] // 2, tmp.shape[1] // 2] pca_arr[i, yc - dy:yc + dy + 1, xc - dx:xc + dx + 1] = tmp else: pca_arr = None #################################################################### # Find the n closest matches to each frame. Not currently used. #################################################################### if False: corr = pca.allcorr(range(int(locipar.rmax)), flux, n=80) ngroup = 1 else: corr = None #################################################################### # Subtract a radial profile from each frame. Not currently used. #################################################################### if False: flux = parallel._radialsub(filesetup, flux, mode='median', center=None, rmax=None, smoothwidth=0) #################################################################### # Run LOCI if that ADI reduction method is chosen #################################################################### partial_sub = None full_pa = pa.copy() full_framelist = [frame for frame in filesetup.framelist] for igroup in range(ngroup): if ngroup > 1: filesetup.framelist = full_framelist[igroup::ngroup] if np.all(utils.check_files(filesetup, ext="_r")): flux = utils.read_files(filesetup, ext="_r") else: print "Unable to read recentered files for LOCI." sys.exit() pa = full_pa[igroup::ngroup] x = np.arange(flux.shape[1]) - flux.shape[1] // 2 x, y = np.meshgrid(x, x) r = np.sqrt(x**2 + y**2) if adipar.adi == 'LOCI': ################################################################ # Set the maximum radius at which to perform LOCI ################################################################ deltar = np.sqrt(np.pi * locipar.fwhm**2 / 4 * locipar.npsf) rmax = int(flux.shape[1] // 2 - deltar - 50) locipar.rmax = min(locipar.rmax, rmax) if dr_rms is None: nf, dy, dx = flux.shape fluxmed = np.median(flux, axis=0)[dy // 2 - 100:dy // 2 + 101, dx // 2 - 100:dx // 2 + 101] sat = fluxmed > 0.7 * fluxmed.max() r2 = r[dy // 2 - 100:dy // 2 + 101, dx // 2 - 100:dx // 2 + 101]**2 dr_rms = np.sqrt(np.sum(r2 * sat) / np.sum(sat)) ################################################################ # This is regular LOCI ################################################################ if locipar.feedback == 0: partial_sub = loci.loci(flux, pa, locipar, mem, mode='LOCI', pca_arr=None, r_ex=dr_rms, corr=corr, method='matrix', do_partial_sub=True, sub_dir=exec_path) ################################################################ # The next block runs LOCI once, de-rotates, takes the median, # and re-rotates to each frame's position angle. It then runs # LOCI again to over-correct the result. Not recommended for # SEEDS data with AO188. ################################################################ else: fluxref = np.ndarray(flux.shape, np.float32) fluxref[:] = flux loci.loci(fluxref, pca_arr, pa, locipar, mem, mode='LOCI', r_ex=dr_rms, pca_arr=pca_arr, corr=corr, method='matrix', do_partial_sub=False) for i in range(flux.shape[0]): np.putmask(fluxref[i], r > locipar.rmax - 1, 0) np.putmask(fluxref[i], r < dr_rms + 1, 0) locipar.rmax -= 100 fluxref = parallel._rotate_recenter(filesetup, fluxref, theta=pa) for i in range(flux.shape[0]): np.putmask(fluxref[i], r > locipar.rmax - 1, 0) np.putmask(fluxref[i], r < dr_rms + 1, 0) locipar.rmax -= 100 fluxmed = np.median(fluxref, axis=0) for i in range(flux.shape[0]): fluxref[i] = fluxmed * locipar.feedback fluxref = parallel._rotate_recenter(filesetup, fluxref, theta=-pa) loci.loci(flux, pa, locipar, mem, mode='refine', fluxref=fluxref, pca_arr=pca_arr, rmin=dr_rms, r_ex=dr_rms) ################################################################ # Mask saturated areas (< dr_rms), do median subtraction at radii # beyond the limit of the LOCI reduction ################################################################ fluxmed = np.median(flux, axis=0) for i in range(flux.shape[0]): np.putmask(flux[i], r < dr_rms + 2, 0) np.putmask(flux[i], r > locipar.rmax - 1, flux[i] - fluxmed) #################################################################### # Alternative to LOCI: median PSF subtraction #################################################################### elif adipar.adi == 'median': medpsf = np.median(flux, axis=0) for i in range(flux.shape[0]): flux[i] -= medpsf else: print "Error: ADI reduction method " + adipar.adi + " not recognized." #sys.exit(1) #################################################################### # Derotate, combine flux array using mean/median hybrid (see # Brandt+ 2012), measure standard deviation at each radius #################################################################### if igroup == 0: newhead = utils.makeheader(flux[0], pyf.open(fullframe)[0].header, full_framelist, adipar, locipar) flux = parallel._rotate_recenter(filesetup, flux, theta=pa) fluxtmp, noise = combine.meanmed(flux) fluxbest = fluxtmp / ngroup if partial_sub is not None: partial_sub_tot = partial_sub / ngroup else: flux = parallel._rotate_recenter(filesetup, flux, theta=pa) fluxtmp, noise = combine.meanmed(flux) fluxbest += fluxtmp / ngroup if partial_sub is not None: partial_sub_tot += partial_sub / ngroup filesetup.framelist = full_framelist if partial_sub is not None: partial_sub = partial_sub_tot #################################################################### # Rescale all arrays to 2001x2001 so that the center is pixel number # (1000, 1000) indexed from 0. Use NaN to pad arrays. #################################################################### fluxbest = utils.arr_resize(fluxbest) if partial_sub is not None: partial_sub = utils.arr_resize( partial_sub, newdim=fluxbest.shape[0]).astype(np.float32) fluxbest /= partial_sub out = pyf.HDUList(pyf.PrimaryHDU(partial_sub)) out.writeto('partial_sub2.fits', clobber=True) x, y = np.meshgrid(np.arange(7) - 3, np.arange(7) - 3) window = (x**2 + y**2 < 2.51**2) * 1.0 window /= np.sum(window) fluxbest = signal.convolve2d(fluxbest, window, mode='same') noise = combine.radprof(fluxbest, mode='std', smoothwidth=2, sigrej=4.5)[0] r = utils.arr_resize(r) if dr_rms is not None: np.putmask(fluxbest, r < dr_rms + 3, np.nan) np.putmask(fluxbest, r > locipar.rmax - 2, np.nan) fluxsnr = (fluxbest / noise).astype(np.float32) #################################################################### # 5-sigma sensitivity maps--just multiply by the scaled aperture # photometry of the central star #################################################################### if partial_sub is not None: sensitivity = noise * 5 / partial_sub #################################################################### # Photometry of the central star #################################################################### if filesetup.scale_phot: #ref_phot = photometry.calc_phot(filesetup, adipar, flat, # hotpix, mem, window)[0] sensitivity /= ref_phot fluxbest /= ref_phot noise /= ref_phot sig_sens = combine.radprof(sensitivity, mode='std', smoothwidth=0)[0] outfile = open( filesetup.output_dir + '/' + objname + '_5sigma_sensitivity.dat', 'w') for i in range(sig_sens.shape[0] // 2, sig_sens.shape[0]): iy = sig_sens.shape[0] // 2 if np.isfinite(sensitivity[iy, i]): outfile.write('%8d %12.5e %12.5e %12e\n' % (i - iy, sensitivity[iy, i], sig_sens[iy, i], partial_sub[iy, i])) outfile.close() else: np.savetxt(filesetup.output_dir + '/' + objname + '_noiseprofile.dat', noise[noise.shape[0] // 2, noise.shape[1] // 2:].T) #################################################################### # Write the output fits files. #################################################################### snr = pyf.HDUList(pyf.PrimaryHDU(fluxsnr.astype(np.float32), newhead)) final = pyf.HDUList(pyf.PrimaryHDU(fluxbest.astype(np.float32), newhead)) if partial_sub is not None: contrast = pyf.HDUList( pyf.PrimaryHDU(sensitivity.astype(np.float32), newhead)) name_base = filesetup.output_dir + '/' + objname snr.writeto(name_base + '_snr.fits', clobber=True) final.writeto(name_base + '_final.fits', clobber=True) if partial_sub is not None: contrast.writeto(name_base + '_5sigma_sensitivity.fits', clobber=True)
def start(self): # temp advance_dirs = { 'Merged_vcf': '{analydir}/Advance/{newjob}/Merged_vcf', 'ACMG': '{analydir}/Advance/{newjob}/ACMG', 'FilterSV': '{analydir}/Advance/{newjob}/FilterSV', 'FilterCNV': '{analydir}/Advance/{newjob}/FilterCNV', 'Noncoding': '{analydir}/Advance/{newjob}/Noncoding', 'ModelF': '{analydir}/Advance/{newjob}/ModelF', 'Share': '{analydir}/Advance/{newjob}/Share', 'Denovo': '{analydir}/Advance/{newjob}/Denovo', 'Linkage': '{analydir}/Advance/{newjob}/Linkage', 'ROH': '{analydir}/Advance/{newjob}/ROH', 'Network': '{analydir}/Advance/{newjob}/Network', 'Pathway': '{analydir}/Advance/{newjob}/Pathway', 'PPI': '{analydir}/Advance/{newjob}/PPI', 'HLA': '{analydir}/Advance/{newjob}/HLA', 'SiteAS': '{analydir}/Advance/{newjob}/SiteAS', 'GeneAS': '{analydir}/Advance/{newjob}/GeneAS', 'IntegrateResult': '{analydir}/Advance/{newjob}/IntegrateResult', 'Disease': '{analydir}/Advance/{newjob}/Disease', 'BriefResults': '{analydir}/Advance/{newjob}/BriefResults', } for k, v in advance_dirs.iteritems(): self.args.update({k: v.format(**self.args)}) # print self.args['SiteAS'] # exit() # print self.analy_array print 'hello, {}'.format(self.username) # Require rawdata or not qc_status = utils.get_status('qc', self.startpoint, config.ANALYSIS_POINTS) mapping_status = utils.get_status('bwa_mem', self.startpoint, config.ANALYSIS_POINTS) print 'qc status:', qc_status print 'mapping status:', mapping_status ANALY_DICT = utils.get_analysis_dict(self.analy_array, config.ANALYSIS_CODE) self.args.update({'ANALY_DICT': ANALY_DICT}) # print ANALY_DICT.keys();exit() softwares = utils.get_softwares(self.analy_array, self.args['ANALY_DICT'], self.args, self.seqstrag) # pprint(softwares);exit() self.args.update({'softwares': softwares}) # check inputs self.queues = utils.check_queues(self.queues, self.username) self.args.update({'queues': self.queues}) # use sentieon specific queues if needed if 'sentieon' in softwares.values(): print 'add sentieon_queues' sentieon_queues = self.queues if config.CONFIG.has_option('resource', 'sentieon_queues'): sentieon_queues = config.CONFIG.get( 'resource', 'sentieon_queues').split(',') sentieon_queues = utils.check_queues(sentieon_queues, self.username) if not sentieon_queues: sentieon_queues = self.queues self.args.update({'sentieon_queues': sentieon_queues}) # print self.args['sentieon_queues'];exit() # print sentieon_queues;exit() utils.check_analy_array(self.seqstrag, self.analy_array, config.ANALYSIS_CODE) utils.check_files(self.pn, self.samp_info, self.samp_list) newTR = utils.check_target_region(config.CONFIG, self.seqstrag, self.refgenome, self.rawTR) self.args.update({'TR': newTR}) print 'analysis items:' for analysis_code in self.analy_array: print utils.color_text( '{:4} {}'.format(analysis_code, config.ANALYSIS_CODE[analysis_code][0]), 'yellow') # Analysis start point if self.startpoint: if self.startpoint in config.ANALYSIS_POINTS: print 'start point: {}'.format( utils.color_text(self.startpoint)) else: print '[error] invalid startpoint: {}'.format( utils.color_text(self.startpoint)) print 'maybe you want to choose: {}'.format( utils.color_text( process.extractOne(self.startpoint, config.ANALYSIS_POINTS.keys())[0], 'cyan')) print 'available startpoints are as follows:\n {}'.format( ' '.join(config.ANALYSIS_POINTS.keys())) exit(1) is_advance = max(self.analy_array) > 6.1 project = utils.Project(self.analydir, self.samp_info, self.samp_info_done, self.samp_list, self.qc_list, qc_status, mapping_status, is_advance) # Extract sample_info print 'extract sample informations...' fenqi, tissue, disease_name, sample_infos, sample_infos_all, sample_done = project.get_sample_infos( self.samp_list, self.samp_info, self.samp_info_done, is_advance) database = '{}/project/DisGeNet.json'.format( config.CONFIG.get('software', 'soft_dir')) disease_ids = utils.get_disease_id(disease_name, database) self.args.update({ 'disease_name': disease_name, 'disease_ids': disease_ids, }) sample_infos_waiting = { sampleid: infos for sampleid, infos in sample_infos.iteritems() if sampleid not in sample_done } self.args.update({'sample_infos_waiting': sample_infos_waiting}) # print sample_infos_waiting # exit() # print 'fenqi:', fenqi # print 'tissue:', tissue # exit() sample_lists = project.get_sample_lists # print sample_lists # print sample_infos.keys() # print sample_infos_all.keys() # for sample in sample_infos: # print sample, sample_infos[sample]['familyid'] # exit() if mapping_status == 'waiting': sample_lists = project.update_qc_list() print ' report number: {}'.format(utils.color_text(fenqi)) if disease_name: print ' disease name: {}'.format(utils.color_text(disease_name)) print ' disease id: {}'.format(utils.color_text(disease_ids)) if tissue: print ' tissue: {}'.format(utils.color_text(tissue)) print ' samples ({}): {}'.format( len(sample_infos), utils.color_text(sample_infos.keys())) if sample_done: print ' samples done({}): {}'.format( len(sample_done), utils.color_text(sample_done)) # Update qc_list and extract sample_list # print 'update qc_list...' # print json.dumps(sample_lists, indent=2) # set memory according seqstrag print 'set analysis memory...' if self.seqstrag == 'WGS': print 'upate memory for WGS...' for analysis, memory in config.ANALYSIS_MEM_WGS.items(): if analysis in config.ANALYSIS_POINTS: config.ANALYSIS_POINTS[analysis][0] = memory # exit() # =========================================================== # =========================================================== print '>>> pipeline start...' mutation_soft, sv_soft, cnv_soft, denovo_soft = [ softwares[each] for each in ('mutation', 'sv', 'cnv', 'denovo') ] print ' mutation_soft:{}, sv_soft:{}, cnv_soft:{}, denovo_soft:{}'.format( mutation_soft, sv_soft, cnv_soft, denovo_soft) # QC if ANALY_DICT['quality_control'] and qc_status == 'waiting': utils.print_color('> QC', 'white') QC(self.args, self.jobs, self.orders, sample_lists, config).start() # Mapping if ANALY_DICT['mapping']: utils.print_color('> Mapping', 'white') Mapping(self.args, self.jobs, self.orders, sample_lists, sample_infos, config, qc_status, mapping_status).start() # Mutation if ANALY_DICT['snpindel_call']: utils.print_color('> Mutation', 'white') Mutation(self.args, self.jobs, self.orders, sample_lists, sample_infos, config).start() # SV if ANALY_DICT['sv_call']: utils.print_color('> SV', 'white') SV(self.args, self.jobs, self.orders, sample_infos, config).start() # CNV if ANALY_DICT['cnv_call']: utils.print_color('> CNV', 'white') CNV(self.args, self.jobs, self.orders, sample_infos, config).start() # FilterDB if ANALY_DICT['filter']: utils.print_color('> FilterDB', 'white') FilterDB(self.args, self.jobs, self.orders, mutation_soft, sv_soft, cnv_soft, sample_infos, config, disease_name, tissue, ANALY_DICT).start() # ModelF if ANALY_DICT['filter_model']: utils.print_color('> Model', 'white') FilterModel(self.args, self.jobs, self.orders, mutation_soft, sv_soft, cnv_soft, sample_infos, config).start() # Denovo if ANALY_DICT['denovo']: utils.print_color('> Denovo', 'white') Denovo(self.args, self.jobs, self.orders, mutation_soft, sv_soft, cnv_soft, denovo_soft, sample_infos, config, ANALY_DICT).start() # Linkage if ANALY_DICT['linkage']: utils.print_color('> Linkage', 'white') Linkage(self.args, self.jobs, self.orders, mutation_soft, sv_soft, cnv_soft, denovo_soft, sample_infos_all, config, ANALY_DICT).start() # IntegrateResult if any(ANALY_DICT[analysis] for analysis in ['filter', 'filter_model', 'denovo', 'phenolyzer']): utils.print_color('> IntegrateResult', 'white') IntegrateResult(self.args, self.jobs, self.orders, config).start() # ROH if ANALY_DICT['roh']: utils.print_color('> ROH', 'white') ROH(self.args, self.jobs, self.orders, sample_infos, mutation_soft, config).start() # OTHER other = Other(self.args, self.jobs, self.orders, config, disease_name) # IBD if any(ANALY_DICT[each] for each in ['filter_model', 'linkage', 'denovo' ]) and len(sample_infos_waiting) > 1: utils.print_color('> IBD', 'white') other.ibd() # Network if ANALY_DICT['phenolyzer']: utils.print_color('> Phenolyzer', 'white') other.phenolyzer() # Pathway if ANALY_DICT['pathway']: utils.print_color('> Pathway', 'white') other.pathway() # PPI if ANALY_DICT['ppi']: utils.print_color('> PPI', 'white') other.ppi() # SiteAS if ANALY_DICT['site_association']: utils.print_color('> SiteAS', 'white') Association(self.args, self.jobs, self.orders, config).site_association() # GeneAS if ANALY_DICT['gene_association']: utils.print_color('> GeneAS', 'white') Association(self.args, self.jobs, self.orders, config).gene_association() # HLA if ANALY_DICT['hla']: utils.print_color('> HLA', 'white') HLA(self.args, self.jobs, self.orders, sample_lists, sample_infos, config, qc_status).start() # result and report utils.print_color('> Result', 'white') Result(self.args, self.jobs, self.orders, config).start() utils.print_color('> Report', 'white') Report(self.args, self.jobs, self.orders, config).start() # job summary print 'lenght of jobs waiting/total: {}/{}'.format( len([job for job in self.jobs if job.get('status') == 'waiting']), len(self.jobs)) utils.write_job(self.analydir, self.newjob, self.jobs, self.orders) print '{:-^80}'.format(' all done ')
from train import initialize_for_train, train from utils import check_files from test import initialize_for_test, get_prediction from params import Params # initialize parameters params = Params() # check the existence of train / test data and create necessary folder check_files(params) # collect all inputs for training data_loader, vocab_size, encoder, decoder, optimizer = initialize_for_train( params) # training train(params, data_loader, vocab_size, encoder, decoder, optimizer) # collect all inputs for testing data_loader, encoder, decoder = initialize_for_test(params) # testing get_prediction(data_loader, encoder, decoder, params)
def run_trg(workpath, predf, opt=['cons', 'aggr']): # check files/options utils.check_files([predf, 'init.pdb', 't000_.3mers', 't000_.9mers']) if not os.path.exists('input.fa'): utils.pdb2fa('init.pdb', outfa='input.fa', gap=False) if not os.path.exists('disulf.def'): os.system('echo "1 1" > disulf.def') extraopt = '' if os.path.exists('native.pdb'): extraopt += '-native %s/native.pdb' % CURR # read in options nstruct = CONFIGS['nstruct_div'] njobs = CONFIGS['njobs_div'] nproc = CONFIGS['nproc'] os.system('mkdir %s 2>/dev/null' % workpath) os.chdir(workpath) # Restraint (or "cst") generation using estogram2cst script if not os.path.exists('cons.cst'): print( 'CMD: python %s/estogram2cst.py %s/%s %s/init.pdb cons -weakcst spline -reference_correction > cons.cstgen.txt' % (SCRIPTPATH, CURR, predf, CURR)) os.system( 'python %s/estogram2cst.py %s/%s %s/init.pdb cons -weakcst spline -reference_correction > cons.cstgen.txt' % (SCRIPTPATH, CURR, predf, CURR)) if not os.path.exists('aggr.cst'): aggropt = '-exulr_from_harm -reference_correction -pcore 0.8 0.8 0.9' print( 'CMD: python %s/estogram2cst.py %s/%s %s/init.pdb aggr -weakcst spline %s > aggr.cstgen.txt' % (SCRIPTPATH, CURR, predf, CURR, aggropt)) os.system( 'python %s/estogram2cst.py %s/%s %s/init.pdb aggr -weakcst spline %s > aggr.cstgen.txt' % (SCRIPTPATH, CURR, predf, CURR, aggropt)) # Append input cst if exists if os.path.exists('../input.fa.cst'): os.system('cat ../input.fa.cst >> cons.fa.cst') os.system('cat ../input.fa.cst >> aggr.fa.cst') if os.path.exists('../input.cen.cst'): os.system('cat ../input.cen.cst >> cons.cst') os.system('cat ../input.cen.cst >> aggr.cst') ## Put all jobs at 'alljobs.all' and run altogether jobs = open('alljobs.all', 'w') sh = '%s/rosetta_scripts/runhybrid.sh' % SCRIPTPATH # Setup Aggressive sampling jobs if 'aggr' in opt: outsilent = 'aggr.out' pdbstr = " template1=partial.init.aggr.pdb" scriptvars = pdbstr + ' cencst=aggr.cst facst=aggr.fa.cst cst_weight=0.2 cst_fa_weight=0.2 scriptdir=%s' % SCRIPTPATH for k in range(njobs): prefix = 'aggr.%d' % k jobs.write('%s %d %s "%s" %s %s "%s"\n' % (sh, nstruct, 'mut.xml', scriptvars, outsilent, prefix, extraopt)) # Setup Conservative sampling jobs if 'cons' in opt: outsilent = 'cons.out' pdbstr = "' template1=partial.init.cons.pdb'" scriptvars = pdbstr + ' cencst=cons.cst facst=cons.fa.cst cst_weight=0.2 cst_fa_weight=1.0 scriptdir=%s' % SCRIPTPATH for k in range(njobs): prefix = 'cons.%d' % k jobs.write('%s %d %s "%s" %s %s "%s"\n' % (sh, nstruct, 'mut.xml', scriptvars, outsilent, prefix, extraopt)) jobs.close() # Launch jobs through gnu parallel print('CMD: %s -j %d :::: alljobs.all' % (PARALLEL, nproc)) os.system('%s -j %d :::: alljobs.all' % (PARALLEL, nproc)) # Check the number of files produced ncons = len(os.popen('grep ^SCORE cons.out').readlines()) naggr = len(os.popen('grep ^SCORE aggr.out').readlines()) n_to_sample_per_opt = int(nstruct * njobs * 0.9) if ncons < n_to_sample_per_opt or naggr < n_to_sample_per_opt: sys.exit( "Insufficient decoys generated: %d/%d in cons/aggr.out, terminate!" % (ncons, naggr)) # clear logs if sampled numbers are okay os.system('rm cons*log aggr*log') # post process -- pick 50 from generated decoys Q = np.mean(np.load(CURR + '/' + predf)['lddt']) f = max(0.0, (Q - 0.6) / 0.4) dcut = max(0.2, (1.0 - f) * 0.3) print('CMD: %s/rosetta_scripts/pick_from_div.sh %6.2f' % (SCRIPTPATH, dcut)) os.system('%s/rosetta_scripts/pick_from_div.sh %6.2f' % (SCRIPTPATH, dcut)) QScorer.main('pick.out', 'pick.Q.out') os.chdir(CURR)
def run_wrfda_update_bc(work_root, wrfda_root, update_lowbc, config, args, wrf_work_dir=None, wrfbdy=None, tag=None): start_time = config['custom']['start_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) max_dom = config['domains']['max_dom'] if not wrf_work_dir: if tag != None: wrf_work_dir = f'{work_root}/wrf_{tag}' else: wrf_work_dir = f'{work_root}/wrf' if max_dom > 1: dom_str = 'd' + str(config['custom']['wrfda']['dom'] + 1).zfill(2) if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}/{dom_str}' else: wrfda_work_dir = f'{work_root}/wrfda/{dom_str}' else: dom_str = 'd01' if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}' else: wrfda_work_dir = f'{work_root}/wrfda' if not os.path.isdir(wrfda_work_dir): os.mkdir(wrfda_work_dir) os.chdir(wrfda_work_dir) if not wrfbdy: wrfbdy = f'{wrf_work_dir}/wrfbdy_{dom_str}' cli.stage(f'Run WRFDA update_bc at {wrfda_work_dir} ...') expected_files = [wrfbdy, f'wrfvar_output_{start_time_str}', 'fg'] if not check_files(expected_files): print(expected_files) cli.error( 'run_wrfda_update_bc: da_wrfvar.exe or real.exe wasn\'t executed successfully!' ) run(f'ln -sf {wrfbdy} wrfbdy_{dom_str}') run(f'ln -sf wrfvar_output_{start_time_str} wrfvar_output') parame_in = f90nml.read(f'{wrfda_root}/var/test/update_bc/parame.in') parame_in['control_param']['wrf_input'] = './fg' if update_lowbc: cli.notice('Update only low boundary condition.') parame_in['control_param']['low_bdy_only'] = True parame_in.write(f'{wrfda_work_dir}/parame.in', force=True) if update_lowbc: expected_file = f'wrfbdy_{dom_str}_{start_time_str}.low_updated' else: expected_file = f'wrfbdy_{dom_str}_{start_time_str}.lateral_updated' if not check_files(expected_file) or args.force: submit_job(f'{wrfda_root}/var/build/da_update_bc.exe', 1, config, args, wait=True) run(f'cp wrfbdy_{dom_str} {expected_file}') else: run(f'ls -l {expected_file}') cli.notice('Succeeded.')
def proc_event(eve): for lat in np.arange(minlat, Mlat, 1.): for lon in np.arange(minlon, Mlon, 1.): if utils.check_files(net, paramdic, eve, lat, lon): print('Already computed this event') continue try: inv = client.get_stations( starttime=stime, endtime=etime, station="*", channel="*H*", network="IU,N4,US,II", level="response", latitude=lat, longitude=lon, maxradius=paramdic['station_radius']) except: print('No stations for: ' + str(lat) + ' ' + str(lon)) continue inv = utils.scrub_inventory(inv) if len(inv[0]) < 3: print('too few stations') continue ## Get the data try: st, bad_stas = utils.get_data(inv, eve, paramdic, model, client) st = utils.proc_data(st, inv, paramdic, eve) except: return for comp in ['Z', 'R']: if len(st.select(component=comp)) < 3: continue stack, not_used = utils.comp_stack(st, comp) if len(stack) == 0: print('Bad event') continue ## We have a pretty plot showing all of the components try: #if True: utils.pretty_plot(st, stack, eve, not_used, comp, inv, paramdic) ## Add the lats and lons for book keeping. utils.write_event_results(st, net, stack, eve, not_used, comp, inv, paramdic, lat, lon) except: print('Problem') print(eve) continue print('Finished: ' + str(lat) + ' ' + str(lon)) print(eve) return
def run_real(work_root, wps_work_dir, wrf_root, config, args, tag=None): start_time = config['custom']['start_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) max_dom = config['domains']['max_dom'] if not os.path.isdir(wps_work_dir): cli.error(f'WPS work directory {wps_work_dir} does not exist!') if tag != None: wrf_work_dir = f'{work_root}/wrf_{tag}' else: wrf_work_dir = f'{work_root}/wrf' if not os.path.isdir(wrf_work_dir): os.mkdir(wrf_work_dir) os.chdir(wrf_work_dir) cli.stage(f'Run real.exe at {wrf_work_dir} ...') expected_files = [ 'wrfinput_d{:02d}_{}'.format(i + 1, start_time_str) for i in range(max_dom) ] expected_files.append('wrfbdy_d01') if not check_files(expected_files) or args.force: run('rm -f wrfinput_* met_em.*.nc') run(f'ln -sf {wps_work_dir}/met_em.*.nc .') try: dataset = Dataset(glob('met_em.*.nc')[0]) except: cli.error('Failed to open one of met_em.*.nc file!') # Check met_em file. if not 'num_st_layers' in dataset.dimensions or dataset.dimensions[ 'num_st_layers'].size == 0: cli.error( 'Failed to run ungrib and metgrid due to num_metgrid_soil_levels is zero!' ) namelist_input = f90nml.read('./namelist.input') namelist_input['domains']['num_metgrid_levels'] = dataset.dimensions[ 'num_metgrid_levels'].size namelist_input['physics']['num_land_cat'] = dataset.getncattr( 'NUM_LAND_CAT') if 'num_st_layers' in dataset.dimensions: namelist_input['domains'][ 'num_metgrid_soil_levels'] = dataset.dimensions[ 'num_st_layers'].size else: cli.warning( f'Dimension num_st_layers is not in {dataset.filepath()}! Set num_metgrid_soil_levels to 0.' ) namelist_input['domains']['num_metgrid_soil_levels'] = 0 dataset.close() namelist_input.write('./namelist.input', force=True) submit_job(f'{wrf_root}/run/real.exe', args.np, config, args, wait=True) for i in range(max_dom): if not os.path.isfile('wrfinput_d{0:02d}'.format(i + 1)): # Check if the failure is caused by parallel computing? cli.warning( 'Failed to run real.exe in parallel. Try to run in serial.' ) submit_job(f'{wrf_root}/run/real.exe', 1, config, args, wait=True) if not os.path.isfile('wrfinput_d{0:02d}'.format(i + 1)): cli.error( f'Still failed to generate wrfinput_d{0:02d}! See {wrf_work_dir}/rsl.error.0000.' .format(i + 1)) run('ln -sf wrfinput_d{0:02d} wrfinput_d{0:02d}_{1}'.format( i + 1, start_time_str)) if os.path.isfile('wrfbdy_d01'): run(f'ln -sf wrfbdy_d01 wrfbdy_d01_{start_time_str}') cli.notice('Succeeded.') else: run('ls -l wrfinput_* wrfbdy_*') cli.notice('File wrfinput_* already exist.')
def build_upp(wrf_root, upp_root, args): if wrf_root != None: os.environ['WRF_DIR'] = wrf_root if not 'HDF5' in os.environ: res = subprocess.run(['which', 'h5dump'], stdout=subprocess.PIPE) if res.returncode == 0: os.environ['HDF5'] = os.path.dirname( os.path.dirname(res.stdout.decode('utf-8'))) cli.notice(f'Set HDF5 to {os.environ["HDF5"]}') if not 'HDF5' in os.environ: cli.warning('HDF5 environment variable is not set') if not 'NETCDF' in os.environ: res = subprocess.run(['which', 'ncdump'], stdout=subprocess.PIPE) if res.returncode == 0: os.environ['NETCDF'] = os.path.dirname( os.path.dirname(res.stdout.decode('utf-8'))) cli.notice(f'Set NETCDF to {os.environ["NETCDF"]}') if not 'NETCDF' in os.environ: cli.warning('NETCDF environment variable is not set!') if not 'JASPERINC' in os.environ or not 'JASPERLIB' in os.environ: if 'JASPER_ROOT' in os.environ: os.environ['JASPERINC'] = os.environ['JASPER_ROOT'] + '/include' os.environ['JASPERLIB'] = os.environ['JASPER_ROOT'] + '/lib' cli.notice(f'Set JASPERINC to {os.environ["JASPERINC"]}.') cli.notice(f'Set JASPERLIB to {os.environ["JASPERLIB"]}.') else: cli.error( 'JASPERINC and JASPERLIB environment variables are not set!') version = upp_version(args.upp_root) if version < Version('4.1'): expected_exe_files = ('bin/copygb.exe', 'bin/ndate.exe', 'bin/unipost.exe') else: expected_exe_files = ('exec/unipost.exe') if not check_files(expected_exe_files): if not args.nceplibs_root: args.nceplibs_root = f'{os.path.dirname(args.upp_root)}/NCEPLIBS' if not os.path.isdir(args.nceplibs_root): cli.error('NCEPLIBS is not ready!') os.environ['NCEPLIBS_DIR'] = args.nceplibs_root if not check_files(expected_exe_files): os.chdir(upp_root) if args.force: run('./clean -a &> /dev/null') cli.notice('Configure UPP ...') child = pexpect.spawn('./configure') child.expect('Enter selection.*') if args.compiler_suite == 'intel': child.sendline('4') # Linux x86_64, Intel compiler (dmpar) elif args.compiler_suite == 'gnu': child.sendline('8') # Linux x86_64, gfortran compiler (dmpar) elif args.compiler_suite == 'pgi': child.sendline( '14') # Linux x86_64, PGI compiler: -f90=pgf90 (dmpar) child.wait() if args.compiler_suite == 'intel': edit_file('./configure.upp', [['mpif90', 'mpiifort'], ['mpicc', 'mpiicc']]) if 'LIBPNG_ROOT' in os.environ: edit_file('./configure.upp', [ ['-lpng', f'-L{os.environ["LIBPNG_ROOT"]}/lib -lpng'], [ 'GRIB2SUPT_INC\s*=\s*(.*)', f'GRIB2SUPT_INC = \\1 -I{os.environ["LIBPNG_ROOT"]}/include' ] ]) cli.notice('Compile UPP ...') run('./compile &> compile.out') if check_files(expected_exe_files): cli.notice('Succeeded.') else: cli.error(f'Failed! Check {upp_root}/compile.out') else: cli.notice('UPP is already built.')
def run_wrf(work_root, wrf_root, config, args, wrfda_work_dir=None, tag=None): start_time = config['custom']['start_time'] end_time = config['custom']['end_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) end_time_str = end_time.format(datetime_fmt) max_dom = config['domains']['max_dom'] if not wrfda_work_dir: if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}' else: wrfda_work_dir = f'{work_root}/wrfda' elif not os.path.isdir(wrfda_work_dir): cli.error(f'run_wrf: {wrfda_work_dir} does not exist!') if tag != None: wrf_work_dir = f'{work_root}/wrf_{tag}' else: wrf_work_dir = f'{work_root}/wrf' if not os.path.isdir(wrf_work_dir): cli.error(f'run_wrf: {wrf_work_dir} does not exist!') os.chdir(wrf_work_dir) all_wrfda_ok = True for dom_idx in range(max_dom): dom_str = 'd' + str(dom_idx + 1).zfill(2) if not copy_wrfda_output(dom_str, start_time_str, wrfda_work_dir): all_wrfda_ok = False break if not all_wrfda_ok: cli.warning('Do not use data assimilation.') expected_files = ['wrfinput_d{:02d}_{}'.format(i + 1, start_time_str) for i in range(max_dom)] expected_files.append(f'wrfbdy_d01_{start_time_str}') if not check_files(expected_files): cli.error('real.exe wasn\'t executed successfully!') for i in range(max_dom): run('ln -sf wrfinput_d{0:02d}_{1} wrfinput_d{0:02d}'.format(i + 1, start_time_str)) run(f'ln -sf wrfbdy_d01_{start_time_str} wrfbdy_d01') cli.stage(f'Run wrf.exe at {wrf_work_dir} ...') expected_files = ['wrfout_d{:02d}_{}'.format(i + 1, end_time_str) for i in range(max_dom)] if not check_files(expected_files) or args.force: run('rm -f wrfout_*') run(f'ln -sf {wrf_root}/run/LANDUSE.TBL .') run(f'ln -sf {wrf_root}/run/ozone_plev.formatted .') run(f'ln -sf {wrf_root}/run/ozone_lat.formatted .') run(f'ln -sf {wrf_root}/run/ozone.formatted .') run(f'ln -sf {wrf_root}/run/RRTM_DATA_DBL RRTM_DATA') run(f'ln -sf {wrf_root}/run/RRTMG_LW_DATA .') run(f'ln -sf {wrf_root}/run/RRTMG_SW_DATA .') run(f'ln -sf {wrf_root}/run/VEGPARM.TBL .') run(f'ln -sf {wrf_root}/run/SOILPARM.TBL .') run(f'ln -sf {wrf_root}/run/GENPARM.TBL .') retries = 0 while True: submit_job(f'{wrf_root}/run/wrf.exe', args.np, config, args, wait=True) if not check_files(expected_files): if retries == 0: cli.error(f'Failed! Check output {os.path.abspath(wrf_work_dir)}/rsl.error.0000.') retries = retries + 1 cli.warning(f'Failed to run wrf, retry it! {retries}') else: break cli.notice('Succeeded.') else: cli.notice('File wrfout_* already exist.') run(f'ls -l {wrf_work_dir}/wrfout_*')
f'--> 3 Quitter.\n' files = f'Choisir un set de donnees:\n' \ f'--> 1 data_20.csv (set de 20 action).\n' \ f'--> 2 dataset1.csv (set de 1000 actions Sienna).\n' \ f'--> 3 dataset2.csv (set de 1000 actions Sienna).\n' if __name__ == "__main__": running = True print(welcome) while running: choice = check_menu(input('Entrer le numero du menu: ')) solutions = Solutions() if choice == 1: print(files) file = check_files( input('Entrer le numero du fichier a analyser: \n'), files) brute_force = Bruteforce(solutions) brute_force.knapsack_bruteforce([ Action(action).convert_to_cents() for action in load_csv(PATH, file) ], MAX_COST) solutions.add_profit() solutions.sort_by_best_profit() print(solutions) elif choice == 2: print(files) file = check_files( input('Entrer le numero du fichier a analyser: \n'), files) kp_dynamic = KpDynamic(solutions) kp_dynamic.knapsack([ Action(action).convert_to_cents()
# check proper input try: donors_input = sys.argv[1] pcecentile_input = sys.argv[2] donors_repeat_file = sys.argv[3] except IndexError: print( "!!! Please input the files using the correct format: 1) donation input file, 2) percentile input file, 3) output file" ) print( "Eg: python ./src/donation-analytics.py ./input/itcont.txt ./input/percentile.txt ./output/repeat_donors.txt" ) sys.exit() # test input or output file read and write availability check_files(donors_input, pcecentile_input, donors_repeat_file) write_repeat_file = open(donors_repeat_file, 'w') with open(pcecentile_input, 'r') as per_input: try: percentile = int(per_input.read()) #print(percentile) if (percentile > 100 or percentile < 0): raise ValueError( "The input percentile %d is not valid (0-100)" % (percentile)) except IOError: print("The percentile value is not correct. \n") num, repeat_num = 0, 0 donor_id_dict = [] recepient_dict = []
def _check_files(self, dir=None, exts=None): return check_files(os.path.join(self.project_path, dir), exts)
def build_gsi(wrf_root, gsi_root, args): # Check environment. if not 'HDF5' in os.environ: res = subprocess.run(['which', 'h5dump'], stdout=subprocess.PIPE) if res.returncode == 0: os.environ['HDF5'] = os.path.dirname( os.path.dirname(res.stdout.decode('utf-8'))) cli.notice(f'Set HDF5 to {os.environ["HDF5"]}') if not 'HDF5' in os.environ: cli.warning('HDF5 environment variable is not set') if not 'NETCDF' in os.environ: res = subprocess.run(['which', 'ncdump'], stdout=subprocess.PIPE) if res.returncode == 0: os.environ['NETCDF'] = os.path.dirname( os.path.dirname(res.stdout.decode('utf-8'))) cli.notice(f'Set NETCDF to {os.environ["NETCDF"]}') if not 'NETCDF' in os.environ: cli.warning('NETCDF environment variable is not set!') if not os.getenv('LAPACK_PATH') and args.compiler_suite != 'intel': cli.error('Shell variable LAPACK_PATH is not set!') version = gsi_version(args.gsi_root) if version <= Version('3.6'): # 3.7 changes: Added wrf interface as a library (wrflib). No need to compile WRF with GSI and EnKF. if not os.path.isdir(args.wrf_root): cli.error(f'WRF directory {args.wrf_root} does not exist!') os.chdir(args.wrf_root) expected_exe_files = ('main/wrf.exe') if not check_files(expected_exe_files): cli.error('WRF has not been built! Build it first.') os.chdir(args.gsi_root) if args.force: run('rm -rf build') if not os.path.isdir('build'): os.mkdir('build') os.chdir('build') if version == Version('3.6'): expected_exe_files = ('bin/gsi.x', 'lib/libbacio_v2.0.1.a', 'lib/libbufr_v10.2.5.a', 'lib/libcrtm_v2.2.3.a', 'lib/libenkfdeplib.a', 'lib/libenkflib.a', 'lib/libgsilib_shrd.a', 'lib/libgsilib_wrf.a', 'lib/libnemsio_v2.2.1.a', 'lib/libsfcio_v1.1.0.a', 'lib/libsigio_v2.0.1.a', 'lib/libsp_v2.0.2.a', 'lib/libw3emc_v2.2.0.a', 'lib/libw3nco_v2.0.6.a') elif version == Version('3.7'): expected_exe_files = ('bin/enkf_wrf.x', 'bin/gsi.x', 'lib/libbacio_v2.0.1.a', 'lib/libbufr_v10.2.5.a', 'lib/libcrtm_v2.2.3.a', 'lib/libenkfdeplib.a', 'lib/libenkflib.a', 'lib/libgsilib_shrd.a', 'lib/libgsilib_wrf.a', 'lib/libnemsio_v2.2.1.a', 'lib/libsfcio_v1.1.0.a', 'lib/libsigio_v2.0.1.a', 'lib/libsp_v2.0.2.a', 'lib/libw3emc_v2.2.0.a', 'lib/libw3nco_v2.0.6.a') if not check_files(expected_exe_files): cmake_args = f'-DBUILD_ENKF=ON -DBUILD_CORELIBS=ON -DUSE_WRF=ON -DBUILD_WRF=ON -DBUILD_GFS=OFF ' if version == Version('3.6'): cli.notice('Fix GSI 3.6!') edit_file('../cmake/Modules/FindCORELIBS.cmake', [[ '\${CMAKE_SOURCE_DIR}/libsrc', '${CMAKE_SOURCE_DIR}/lib/libsrc' ]]) if args.compiler_suite == 'gnu': edit_file('../cmake/Modules/setCompilerFlags.cmake', [[ 'set\(BACIO_Fortran_FLAGS " -O3 -fconvert=big-endian -ffree-form', 'set(BACIO_Fortran_FLAGS " -O3 -fconvert=big-endian' ]]) elif args.compiler_suite == 'intel': edit_file('../cmake/Modules/setCompilerFlags.cmake', [[ 'set \(BACIO_Fortran_FLAGS "-O3 -free -assume nocc_omp', 'set(BACIO_Fortran_FLAGS " -O3 -assume nocc_omp' ]]) edit_file('../core-libs/sigio/CMakeLists.txt', [['\*\.f\)', '*.f90)']]) edit_file('../src/hybrid_ensemble_isotropic.F90', [['stop\(123\)', 'stop 123']]) edit_file('../src/setupoz.f90', [[ 'my_head%ij\(1\),my_head%wij\(1\)\)', 'my_head%ij,my_head%wij)' ]]) cmake_args += f'-DWRFPATH={args.wrf_root}' if version == Version('3.7'): cli.notice('Fix GSI 3.7!') edit_file('../src/setuplight.f90', [['my_head%wij\(1\)\)', 'my_head%wij)']]) cli.warning( 'GSI 3.7 has bug when rerun cmake, so clean all build files.') run('rm -rf ../build/*') cmake_args += '-DBUILD_UTIL_COM=ON' # Fix not-found -lnetcdf -lnetcdff. edit_file('../cmake/Modules/setCompilerFlags.cmake', [['-lnetcdf -lnetcdff', '']]) cli.notice('Configure GSI ...') if args.compiler_suite == 'gnu': cc = 'gcc' cxx = 'g++' fc = 'gfortran' elif args.compiler_suite == 'intel': cc = 'mpiicc' cxx = 'mpiicpc' fc = 'mpiifort' if args.verbose: run(f'CC={cc} CXX={cxx} FC={fc} cmake .. {cmake_args}') else: run(f'CC={cc} CXX={cxx} FC={fc} cmake .. {cmake_args} &> cmake.out' ) cli.notice('Compile GSI ...') if args.verbose: run('make') else: run('make &> make.out') if check_files(expected_exe_files): cli.notice('Succeeded.') else: if args.verbose: cli.error('Failed') else: cli.error(f'Failed! Check {args.gsi_root}/build/make.out') else: cli.notice('GSI has already been built.') if version == Version('3.6'): os.chdir(f'{args.gsi_root}/util/bufr_tools') if args.force: run('make clean') expected_exe_files = ( 'bufr_append_sample.exe', 'bufr_decode_radiance.exe', 'bufr_decode_sample.exe', 'bufr_encode_sample.exe', 'prepbufr_append_retrieve.exe', 'prepbufr_append_surface.exe', 'prepbufr_append_upperair.exe', 'prepbufr_decode_all.exe', 'prepbufr_encode_surface.exe', 'prepbufr_encode_upperair.exe', 'prepbufr_inventory.exe') if not check_files(expected_exe_files): edit_file('makefile', [['^\s*FC\s*=.*$', f'FC = {fc}'], ['-I\.\./\.\./dtc', '-I../../build'], ['-L\.\./\.\./dtc', '-L../../build'], ['-lbufr_i4r8', '-lbufr_v10.2.5']]) cli.notice('Compile bufr_tools ...') if args.verbose: run('make') else: run('make &> make.out') if check_files(expected_exe_files): cli.notice('Succeeded.') else: if args.verbose: cli.error('Failed!') else: cli.error( f'Failed! Check {args.gsi_root}/util/bufr_tools/make.out' ) else: cli.notice('GSI bufr_tools has been built.') os.chdir(f'{args.gsi_root}/util/Analysis_Utilities/read_diag/') expected_exe_files = ('read_diag_conv.exe', 'read_diag_conv_ens.exe', 'read_diag_rad.exe') if not check_files(expected_exe_files): edit_file('makefile', [[ 'include \.\./\.\./\.\./dtc/configure.gsi', '' ], ['\$\(SFC\)', fc], ['-I\.\./\.\./\.\./dtc', '-I../../../build'], [ '-L\.\./\.\./\.\./src -lgsi', '-L../../../build/lib -lgsilib_shrd' ], [ 'FLAGS= \$\(FFLAGS_DEFAULT\)', 'FLAGS = -fconvert=big-endian' ]]) cli.notice('Compile read_diag ...') if args.verbose: run('make') else: run('make &> make.out') if check_files(expected_exe_files): cli.notice('Succeeded.') else: if args.verbose: cli.error('Failed') else: cli.error( f'Failed! Check {args.gsi_root}/util/Analysis_Utilities/read_diag/make.out' ) else: cli.notice('GSI read_diag has been built.')
def run_wrfda_obsproc(work_root, wrfda_root, littler_root, config, args, wrf_work_dir=None, tag=None): start_time = config['custom']['start_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) if not wrf_work_dir: if tag != None: wrf_work_dir = f'{work_root}/wrf_{tag}' else: wrf_work_dir = f'{work_root}/wrf' if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}/obsproc' else: wrfda_work_dir = f'{work_root}/wrfda/obsproc' if not os.path.isdir(wrfda_work_dir): os.mkdir(wrfda_work_dir) os.chdir(wrfda_work_dir) cli.notice('Use builtin obserr.') run(f'ln -sf {wrfda_root}/var/obsproc/obserr.txt {wrfda_work_dir}') # Use d01 domain extent. if check_files([f'{wrf_work_dir}/wrfinput_d01_{start_time_str}']): ncfile = Dataset(f'{wrf_work_dir}/wrfinput_d01_{start_time_str}', 'r') iproj = ncfile.getncattr('MAP_PROJ') phic = ncfile.getncattr('CEN_LAT') xlonc = ncfile.getncattr('CEN_LON') moad_cen_lat = ncfile.getncattr('MOAD_CEN_LAT') standard_lon = ncfile.getncattr('STAND_LON') ncfile.close() else: iproj = config['geogrid']['map_proj'] phic = config['geogrid']['ref_lat'] xlonc = config['geogrid']['ref_lon'] moad_cen_lat = config['geogrid']['ref_lat'] standard_lon = config['geogrid']['ref_lon'] output_format = get_value(config, ['custom', 'obsproc', 'output_format'], default=2) time_window = get_value(config, ['custom', 'wrfda', 'time_window'], default=360) if has_key(config, ('custom', 'da', 'type')): if config['custom']['da']['type'] == '3dvar': namelist_obsproc = f90nml.read( f'{wrfda_root}/var/obsproc/namelist.obsproc.3dvar.wrfvar-tut') else: cli.error('Currently, we only support 3DVar...') else: namelist_obsproc = f90nml.read( f'{wrfda_root}/var/obsproc/namelist.obsproc.3dvar.wrfvar-tut') namelist_obsproc['record1'][ 'obs_gts_filename'] = f'obs.gts.{start_time.format("YYYYMMDDHHmm")}' namelist_obsproc['record2']['time_window_min'] = start_time.subtract( minutes=time_window / 2).format('YYYY-MM-DD_HH:mm:ss') namelist_obsproc['record2']['time_analysis'] = start_time.format( 'YYYY-MM-DD_HH:mm:ss') namelist_obsproc['record2']['time_window_max'] = start_time.add( minutes=time_window / 2).format('YYYY-MM-DD_HH:mm:ss') namelist_obsproc['record3']['max_number_of_obs'] = 1200000 namelist_obsproc['record7']['PHIC'] = phic namelist_obsproc['record7']['XLONC'] = xlonc namelist_obsproc['record7']['MOAD_CEN_LAT'] = moad_cen_lat namelist_obsproc['record7']['STANDARD_LON'] = standard_lon namelist_obsproc['record8']['NESTIX'] = config['geogrid']['e_sn'] namelist_obsproc['record8']['NESTJX'] = config['geogrid']['e_we'] namelist_obsproc['record8']['DIS'] = config['geogrid']['dx'] namelist_obsproc['record9']['OUTPUT_OB_FORMAT'] = output_format namelist_obsproc.write('./namelist.obsproc', force=True) cli.stage(f'Run obsproc.exe at {wrfda_work_dir} ...') expected_files = [ f'obs_gts_{start_time.format("YYYY-MM-DD_HH:mm:ss")}.3DVAR' ] if not check_files(expected_files) or args.force: run('rm -f obs_gts_*') if has_key(config, ('custom', 'littler')): if 'dir_pattern' in config['custom'][ 'littler'] and 'file_pattern' in config['custom'][ 'littler']: dir_name = Template( config['custom']['littler']['dir_pattern']).render( time=start_time) file_name = Template( config['custom']['littler']['file_pattern']).render( time=start_time) littler_path = f'{littler_root}/{dir_name}/{file_name}' else: cli.error( 'No dir_pattern and file_pattern in custom->littler section!' ) else: littler_path = f'{littler_root}/{start_time.format("YYYYMMDD")}/obs.gts.{start_time.format("YYYYMMDDHHmm")}' if os.path.exists(littler_path): run(f'ln -sf {littler_path} {wrfda_work_dir}/obs.gts.{start_time.format("YYYYMMDDHHmm")}' ) else: cli.error(f'Failed! {littler_path} Not Found.') submit_job(f'{wrfda_root}/var/obsproc/obsproc.exe', 1, config, args, wait=True) if not check_files(expected_files): cli.error(f'Failed!') cli.notice('Succeeded.') else: cli.notice('File obs_gts_* already exist.') run('ls -l obs_gts_*')
def main(): """ Module to execute the entire package from data retrieval to model performance metrics @:param: None :return: Post process results """ # Importing inhibitor notation data # The SMILES and InChI logs of the same material have identical indices # Creating and joining the SMILES and InChI dataframes along the same index utils.check_files() df_compounds_smiles = utils.create_dataframe( 'data/chemical_notation_' 'data/compounds_smiles.txt', 'smiles') df_compounds_smiles.rename(columns={'ID': 'CID'}, inplace=True) df_compounds_smiles.sort_values(by='CID', inplace=True) # Importing inhibitor activity data activity = pd.read_csv('data/activity_data/AID_743255_datatable.csv') activity = utils.clean_activity_dataframe(activity) # Merging activity data and compound notation data df = activity.merge(df_compounds_smiles) df.sort_values(by='CID', inplace=True) df.reset_index(drop=True, inplace=True) # Drop non-descriptor columns before feature space reduction df_target = df.drop(['SMILES', 'CID', 'Phenotype'], axis=1) # Extracting molecular descriptors for all compounds # print("Sending data for descriptor calculation") # utils.extract_all_descriptors(df, 'SMILES') # Importing feature sets df_charge = pd.DataFrame.from_csv('data/df_charge.csv') df_basak = pd.DataFrame.from_csv('data/df_basak.csv') df_con = pd.DataFrame.from_csv('data/df_con.csv') df_estate = pd.DataFrame.from_csv('data/df_estate.csv') df_constitution = pd.DataFrame.from_csv('data/df_constitution.csv') df_property = pd.DataFrame.from_csv('data/df_property.csv') df_kappa = pd.DataFrame.from_csv('data/df_kappa.csv') df_moe = pd.DataFrame.from_csv('data/df_moe.csv') print("Joining dataframes") df_descriptor = df_kappa.join(df_moe).join(df_constitution).\ join(df_property).join(df_charge).join(df_estate).join(df_con).join( df_basak) print("Joining dataframes done") print("Checking dataframe for NaN, infinite or too large values") df_descriptor = utils.remove_nan_infinite(df_descriptor) # Transform all column values to mean 0 and unit variance print("Transforming dataframe using mean and variance") df_descriptor = utils.transform_dataframe(df_descriptor) print("Transforming dataframe using mean and variance done") # Feature selection and space reduction print("Selecting best features in dataframe") df_features = utils.select_features(df_descriptor, df_target) print("Selecting best features in dataframe done") df = df_features.join(df_target) # Data to training task # Type check inputs for sanity if df is None: raise ValueError('df is None') if not isinstance(df, pd.DataFrame): raise TypeError('df is not a dataframe') if TARGET_COLUMN is None: raise ValueError('target_column is None') if not isinstance(TARGET_COLUMN, basestring): raise TypeError('target_column is not a string') if TARGET_COLUMN not in df.columns: raise ValueError('target_column (%s) is not a valid column name' % TARGET_COLUMN) # Train, validation and test split df_train, df_test = sklearn.cross_validation.train_test_split( df, test_size=0.25) # Remove the classification column from the dataframe x_train = df_train.drop(TARGET_COLUMN, 1) x_test = df_test.drop(TARGET_COLUMN, 1) y_train = pd.DataFrame(df_train[TARGET_COLUMN]) y_test = pd.DataFrame(df_test[TARGET_COLUMN]) with open(XY_PICKLE, 'wb') as results: pickle.dump(x_train, results, pickle.HIGHEST_PROTOCOL) pickle.dump(x_test, results, pickle.HIGHEST_PROTOCOL) pickle.dump(y_train, results, pickle.HIGHEST_PROTOCOL) pickle.dump(y_test, results, pickle.HIGHEST_PROTOCOL) models.run_models(x_train, y_train, x_test, y_test) post_process.results()
def main(): """ Main program for ADI data reduction, configured with a call to adiparam.GetConfig(), which brings up a GUI to set parameters. The pipeline is currently designed for SEEDS data taken without an occulting mask. You must have scipy, numpy, pyephem, multiprocessing, and matplotlib installed to use this pipeline. """ parser = optparse.OptionParser(usage=__doc__) parser.add_option("-p", "--prefix", dest="prefix", default="HICA", help="Specify raw file name prefix (default=%default)") opts, args = parser.parse_args() exec_path = os.path.dirname(os.path.realpath(__file__)) filesetup, adipar, locipar = GetConfig(prefix=opts.prefix) nframes = len(filesetup.framelist) ngroup = 1 + int((nframes - 1) / locipar.max_n) flat = pyf.open(filesetup.flat) if filesetup.pixmask is not None: hotpix = pyf.open(filesetup.pixmask) else: hotpix = None dimy, dimx = pyf.open(filesetup.framelist[0])[-1].data.shape mem, ncpus, storeall = utils.config(nframes, dimy * dimx) if filesetup.scale_phot: x, y = np.meshgrid(np.arange(7) - 3, np.arange(7) - 3) window = (x**2 + y**2 < 2.51**2) * 1.0 window /= np.sum(window) ref_phot, ref_psf = photometry.calc_phot(filesetup, adipar, flat, hotpix, mem, window) else: ref_psf = None ref_phot = None ################################################################ # WCS coordinates are not reliable in HiCIAO data with the image # rotator off. Compute parallactic angle. Otherwise, trust the # WCS coordinates. ################################################################ if 'HICA' in filesetup.framelist[0]: pa = np.asarray([transform.get_pa(frame) * -1 * np.pi / 180 for frame in filesetup.framelist]) else: pa = np.ones(len(filesetup.framelist)) for i in range(len(filesetup.framelist)): cd2_1 = pyf.open(filesetup.framelist[i])[0].header['cd2_1'] cd2_2 = pyf.open(filesetup.framelist[i])[0].header['cd2_2'] pa[i] = -np.arctan2(cd2_1, cd2_2) fullframe = re.sub("-C.*fits", ".fits", filesetup.framelist[0]) try: objname = pyf.open(fullframe)[0].header['OBJECT'] except: objname = "Unknown_Object" objname = re.sub(' ', '_', objname) np.savetxt(filesetup.output_dir + '/' + objname + '_palist.dat', pa) dr_rms = None #################################################################### # Default save/resume points: destriping, recentering, final files # Configuration gives the option to skip the destriping step (only # performing a flat-field), the dewarping, and the centering. #################################################################### if np.all(utils.check_files(filesetup, ext="_r")): print "\nResuming reduction from recentered files." if ngroup == 1: flux = utils.read_files(filesetup, ext="_r") else: flux = utils.read_files(filesetup, ext="_r") else: if storeall and np.all(utils.check_files(filesetup, ext="_ds")): flux = utils.read_files(filesetup, ext="_ds") elif not np.all(utils.check_files(filesetup, ext="_ds")): flux = parallel._destripe(filesetup, flat, hotpix, mem, adipar, write_files=True, storeall=storeall, full_destripe=adipar.full_destripe, do_horiz=adipar.full_destripe) else: flux = None if adipar.dewarp: flux = parallel._dewarp(filesetup, mem, flux=flux, storeall=storeall) if adipar.do_centroid: centers, dr_rms = centroid.fit_centroids(filesetup, flux, pa, storeall=storeall, objname=objname, method=adipar.center, psf_dir=exec_path+'/psfref', ref_psf=ref_psf) #centers = np.ndarray((nframes, 2)) #centers[:, 0] = 1026 - 128 #centers[:, 1] = 949 + 60 #dr_rms = 30 np.savetxt(filesetup.output_dir + '/' + objname + '_centers.dat', centers) #################################################################### # Recenter the data onto a square array of the largest dimension # such that the entire array has data #################################################################### mindim = min(dimy - centers[:, 0].max(), centers[:, 0].min(), dimx - centers[:, 1].max(), centers[:, 1].min()) mindim = int(mindim) * 2 - 1 flux = parallel._rotate_recenter(filesetup, flux, storeall=storeall, centers=centers, newdimen=mindim, write_files=True) nframes = len(filesetup.framelist) #################################################################### # Perform scaled PCA on the flux array; alternatively, read in an # array of principal components. Neither is currently used. #################################################################### if False: pcapath = '/scr/wakusei1/users/tbrandt' flux, pca_arr = pca.pca(flux, ncomp=20, nread=2, dosub=True, pcadir=pcapath + '/psfref') for i in range(nframes): out = pyf.HDUList(pyf.PrimaryHDU(flux[i].astype(np.float32), pyf.open(filesetup.framelist[i])[0].header)) rootfile = re.sub('.*/', '', filesetup.framelist[i]) out.writeto(filesetup.reduce_dir + '/' + re.sub('.fits', '_r.fits', rootfile), clobber=True) if dr_rms is None: dr_rms = 20 elif False: pca_dir = '.' npca = 40 pca_arr = np.zeros((npca, flux.shape[1], flux.shape[2]), np.float32) for i in range(npca): tmp = pyf.open(pca_dir + '/pcacomp_' + str(i) + '.fits')[0].data dy, dx = [tmp.shape[0] // 2, tmp.shape[1] // 2] pca_arr[i, yc - dy:yc + dy + 1, xc - dx:xc + dx + 1] = tmp else: pca_arr = None #################################################################### # Find the n closest matches to each frame. Not currently used. #################################################################### if False: corr = pca.allcorr(range(int(locipar.rmax)), flux, n=80) ngroup = 1 else: corr = None #################################################################### # Subtract a radial profile from each frame. Not currently used. #################################################################### if False: flux = parallel._radialsub(filesetup, flux, mode='median', center=None, rmax=None, smoothwidth=0) #################################################################### # Run LOCI if that ADI reduction method is chosen #################################################################### partial_sub = None full_pa = pa.copy() full_framelist = [frame for frame in filesetup.framelist] for igroup in range(ngroup): if ngroup > 1: filesetup.framelist = full_framelist[igroup::ngroup] if np.all(utils.check_files(filesetup, ext="_r")): flux = utils.read_files(filesetup, ext="_r") else: print "Unable to read recentered files for LOCI." sys.exit() pa = full_pa[igroup::ngroup] x = np.arange(flux.shape[1]) - flux.shape[1] // 2 x, y = np.meshgrid(x, x) r = np.sqrt(x**2 + y**2) if adipar.adi == 'LOCI': ################################################################ # Set the maximum radius at which to perform LOCI ################################################################ deltar = np.sqrt(np.pi * locipar.fwhm**2 / 4 * locipar.npsf) rmax = int(flux.shape[1] // 2 - deltar - 50) locipar.rmax = min(locipar.rmax, rmax) if dr_rms is None: nf, dy, dx = flux.shape fluxmed = np.median(flux, axis=0)[dy // 2 - 100:dy // 2 + 101, dx // 2 - 100:dx // 2 + 101] sat = fluxmed > 0.7 * fluxmed.max() r2 = r[dy//2 - 100:dy//2 + 101, dx//2 - 100:dx//2 + 101]**2 dr_rms = np.sqrt(np.sum(r2 * sat) / np.sum(sat)) ################################################################ # This is regular LOCI ################################################################ if locipar.feedback == 0: partial_sub = loci.loci(flux, pa, locipar, mem, mode='LOCI', pca_arr=None, r_ex=dr_rms, corr=corr, method='matrix', do_partial_sub=True, sub_dir=exec_path) ################################################################ # The next block runs LOCI once, de-rotates, takes the median, # and re-rotates to each frame's position angle. It then runs # LOCI again to over-correct the result. Not recommended for # SEEDS data with AO188. ################################################################ else: fluxref = np.ndarray(flux.shape, np.float32) fluxref[:] = flux loci.loci(fluxref, pca_arr, pa, locipar, mem, mode='LOCI', r_ex=dr_rms, pca_arr=pca_arr, corr=corr, method='matrix', do_partial_sub=False) for i in range(flux.shape[0]): np.putmask(fluxref[i], r > locipar.rmax - 1, 0) np.putmask(fluxref[i], r < dr_rms + 1, 0) locipar.rmax -= 100 fluxref = parallel._rotate_recenter(filesetup, fluxref, theta=pa) for i in range(flux.shape[0]): np.putmask(fluxref[i], r > locipar.rmax - 1, 0) np.putmask(fluxref[i], r < dr_rms + 1, 0) locipar.rmax -= 100 fluxmed = np.median(fluxref, axis=0) for i in range(flux.shape[0]): fluxref[i] = fluxmed * locipar.feedback fluxref = parallel._rotate_recenter(filesetup, fluxref, theta=-pa) loci.loci(flux, pa, locipar, mem, mode='refine', fluxref=fluxref, pca_arr=pca_arr, rmin=dr_rms, r_ex=dr_rms) ################################################################ # Mask saturated areas (< dr_rms), do median subtraction at radii # beyond the limit of the LOCI reduction ################################################################ fluxmed = np.median(flux, axis=0) for i in range(flux.shape[0]): np.putmask(flux[i], r < dr_rms + 2, 0) np.putmask(flux[i], r > locipar.rmax - 1, flux[i] - fluxmed) #################################################################### # Alternative to LOCI: median PSF subtraction #################################################################### elif adipar.adi == 'median': medpsf = np.median(flux, axis=0) for i in range(flux.shape[0]): flux[i] -= medpsf else: print "Error: ADI reduction method " + adipar.adi + " not recognized." #sys.exit(1) #################################################################### # Derotate, combine flux array using mean/median hybrid (see # Brandt+ 2012), measure standard deviation at each radius #################################################################### if igroup == 0: newhead = utils.makeheader(flux[0], pyf.open(fullframe)[0].header, full_framelist, adipar, locipar) flux = parallel._rotate_recenter(filesetup, flux, theta=pa) fluxtmp, noise = combine.meanmed(flux) fluxbest = fluxtmp / ngroup if partial_sub is not None: partial_sub_tot = partial_sub / ngroup else: flux = parallel._rotate_recenter(filesetup, flux, theta=pa) fluxtmp, noise = combine.meanmed(flux) fluxbest += fluxtmp / ngroup if partial_sub is not None: partial_sub_tot += partial_sub / ngroup filesetup.framelist = full_framelist if partial_sub is not None: partial_sub = partial_sub_tot #################################################################### # Rescale all arrays to 2001x2001 so that the center is pixel number # (1000, 1000) indexed from 0. Use NaN to pad arrays. #################################################################### fluxbest = utils.arr_resize(fluxbest) if partial_sub is not None: partial_sub = utils.arr_resize(partial_sub, newdim=fluxbest.shape[0]).astype(np.float32) fluxbest /= partial_sub out = pyf.HDUList(pyf.PrimaryHDU(partial_sub)) out.writeto('partial_sub2.fits', clobber=True) x, y = np.meshgrid(np.arange(7) - 3, np.arange(7) - 3) window = (x**2 + y**2 < 2.51**2) * 1.0 window /= np.sum(window) fluxbest = signal.convolve2d(fluxbest, window, mode='same') noise = combine.radprof(fluxbest, mode='std', smoothwidth=2, sigrej=4.5)[0] r = utils.arr_resize(r) if dr_rms is not None: np.putmask(fluxbest, r < dr_rms + 3, np.nan) np.putmask(fluxbest, r > locipar.rmax - 2, np.nan) fluxsnr = (fluxbest / noise).astype(np.float32) #################################################################### # 5-sigma sensitivity maps--just multiply by the scaled aperture # photometry of the central star #################################################################### if partial_sub is not None: sensitivity = noise * 5 / partial_sub #################################################################### # Photometry of the central star #################################################################### if filesetup.scale_phot: #ref_phot = photometry.calc_phot(filesetup, adipar, flat, # hotpix, mem, window)[0] sensitivity /= ref_phot fluxbest /= ref_phot noise /= ref_phot sig_sens = combine.radprof(sensitivity, mode='std', smoothwidth=0)[0] outfile = open(filesetup.output_dir + '/' + objname + '_5sigma_sensitivity.dat', 'w') for i in range(sig_sens.shape[0] // 2, sig_sens.shape[0]): iy = sig_sens.shape[0] // 2 if np.isfinite(sensitivity[iy, i]): outfile.write('%8d %12.5e %12.5e %12e\n' % (i - iy, sensitivity[iy, i], sig_sens[iy, i], partial_sub[iy, i])) outfile.close() else: np.savetxt(filesetup.output_dir + '/' + objname + '_noiseprofile.dat', noise[noise.shape[0] // 2, noise.shape[1] // 2:].T) #################################################################### # Write the output fits files. #################################################################### snr = pyf.HDUList(pyf.PrimaryHDU(fluxsnr.astype(np.float32), newhead)) final = pyf.HDUList(pyf.PrimaryHDU(fluxbest.astype(np.float32), newhead)) if partial_sub is not None: contrast = pyf.HDUList(pyf.PrimaryHDU(sensitivity.astype(np.float32), newhead)) name_base = filesetup.output_dir + '/' + objname snr.writeto(name_base + '_snr.fits', clobber=True) final.writeto(name_base + '_final.fits', clobber=True) if partial_sub is not None: contrast.writeto(name_base + '_5sigma_sensitivity.fits', clobber=True)