def test_compile_update_catalogs(app, status, warning): app.builder.compile_update_catalogs() catalog_dir = locale_dir / app.config.language / 'LC_MESSAGES' expect = set( [x.replace('.po', '.mo') for x in find_files(catalog_dir, '.po')]) actual = set(find_files(catalog_dir, '.mo')) assert actual # not empty assert actual == expect
def test_compile_update_catalogs(app, status, warning): app.builder.compile_update_catalogs() catalog_dir = locale_dir / app.config.language / 'LC_MESSAGES' expect = set([ x.replace('.po', '.mo') for x in find_files(catalog_dir, '.po') ]) actual = set(find_files(catalog_dir, '.mo')) assert actual # not empty assert actual == expect
def plot_wise(cat_path): for catfile in find_files(cat_path, "*merged+wise.csv"): print("\nreading catalog: {}".format(catfile)) df = pd.read_csv(catfile) # convert to magnitudes nbadflux = (df.flux <= 0).sum() try: assert nbadflux == 0 except: print("warning: {} negative flux source(s)".format(nbadflux)) ch = catfile.split('/')[-1].split('_')[1] mags = spz_jy_to_mags(df.flux*1e-3, float(ch)) if ch == '1': plt.scatter(df.W1mag, mags) plt.xlabel('W1 [mag]') plt.ylabel('I1 [mag]') elif ch == '2': plt.scatter(df.W2mag, mags) plt.xlabel('W2 [mag]') plt.ylabel('I2 [mag]') ax = plt.gca() xlim, ylim = ax.get_xlim(), ax.get_ylim() plt.plot([-5, ylim[1]*2], [-5, ylim[1]*2], 'r-') ax.set_xlim(xlim) ; ax.set_ylim(ylim) reg = catfile.split('/')[-1].split('_')[0] name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch) outpath = '/'.join(catfile.split('/')[:-1]+[name]) plt.savefig(outpath, dpi=120) plt.close()
def check_n_in_aper(radius_factor=1, k=100): for catfile in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"): print print catfile names = open(catfile).readline().split()[1:] cat = np.recfromtxt(catfile, names=names) xscfile = catfile.replace('combined_hdr_catalog.txt', '2mass_xsc.tbl') print xscfile names = open(xscfile).read().split('\n')[76].split('|')[1:-1] xsc = np.recfromtxt(xscfile, skip_header=80, names=names) n_in_aper = [] coords = radec_to_coords(cat.ra, cat.dec) kdt = KDT(coords) for i in range(xsc.size): r_deg = xsc.r_ext[i] / 3600. idx, ds = spherematch2(xsc.ra[i], xsc.dec[i], cat.ra, cat.dec, kdt, tolerance=radius_factor * r_deg, k=k) n_in_aper.append(ds.size) for i in [(i, n_in_aper.count(i)) for i in set(n_in_aper)]: print i
def readme(config: dict, app_logger: logger.Logger) -> bool: """ Display contents of readme file located within migration directory. Return False if readme file doesn't exist. :param config: pymigrate configuration. :param app_logger: pymigrate configured logger. :return: True on success, False otherwise. """ app_logger.log_with_ts('Running readme action', logger.Levels.DEBUG) migration_dir = os.path.join( os.pardir, config['PROJECT_DIR'] + '/' + config['MIGRATIONS_DIR'] + '/' + config['MIGRATION_ID']) readme_files = util.find_files('readme*', migration_dir, False) if len(readme_files) != 0: for readme_file in readme_files: with open(readme_file, 'r') as f: print("Contents of {0}".format(readme_file)) print(f.read()) else: app_logger.log_with_ts("No readme files found", logger.Levels.ERROR) return False return True
def check_n_in_aper(radius_factor=1, k=100): for catfile in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"): print print catfile names = open(catfile).readline().split()[1:] cat = np.recfromtxt(catfile, names=names) xscfile = catfile.replace('combined_hdr_catalog.txt','2mass_xsc.tbl') print xscfile names = open(xscfile).read().split('\n')[76].split('|')[1:-1] xsc = np.recfromtxt(xscfile, skip_header=80, names=names) n_in_aper = [] coords = radec_to_coords(cat.ra, cat.dec) kdt = KDT(coords) for i in range(xsc.size): r_deg = xsc.r_ext[i]/3600. idx, ds = spherematch2(xsc.ra[i], xsc.dec[i], cat.ra, cat.dec, kdt, tolerance=radius_factor*r_deg, k=k) n_in_aper.append(ds.size) for i in [(i,n_in_aper.count(i)) for i in set(n_in_aper)]: print i
def dir_walk(target_dir=None, quiet=None): '''recursively walk a directory containing cti and return the stats''' files = find_files('*.xml', resolve_path(target_dir)) if not quiet: widgets = ['Directory Walk: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA()] progress = ProgressBar(widgets=widgets, maxval=len(files)).start() cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \ 'exploit_targets': set(), 'incidents': set(), \ 'indicators': set(), 'threat_actors': set(), \ 'ttps': set()} cooked_cybox_objs = dict() for file_ in files: try: stix_package = file_to_stix(file_) (raw_stix_objs, raw_cybox_objs) = \ process_stix_pkg(stix_package) for k in raw_stix_objs.keys(): cooked_stix_objs[k].update(raw_stix_objs[k]) for k in raw_cybox_objs.keys(): if not k in cooked_cybox_objs.keys(): cooked_cybox_objs[k] = set() cooked_cybox_objs[k].update(raw_cybox_objs[k]) if not quiet: progress.update(i) except: next if not quiet: progress.finish() return (cooked_stix_objs, cooked_cybox_objs)
def dir_walk(target_dir=None, quiet=None): '''recursively walk a directory containing cti and return the stats''' files = find_files('*.xml', resolve_path(target_dir)) if not quiet: widgets = [ 'Directory Walk: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA() ] progress = ProgressBar(widgets=widgets, maxval=len(files)).start() cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \ 'exploit_targets': set(), 'incidents': set(), \ 'indicators': set(), 'threat_actors': set(), \ 'ttps': set()} cooked_cybox_objs = dict() for file_ in files: try: stix_package = file_to_stix(file_) (raw_stix_objs, raw_cybox_objs) = \ process_stix_pkg(stix_package) for k in raw_stix_objs.keys(): cooked_stix_objs[k].update(raw_stix_objs[k]) for k in raw_cybox_objs.keys(): if not k in cooked_cybox_objs.keys(): cooked_cybox_objs[k] = set() cooked_cybox_objs[k].update(raw_cybox_objs[k]) if not quiet: progress.update(i) except: next if not quiet: progress.finish() return (cooked_stix_objs, cooked_cybox_objs)
def plot_wise(cat_path): for catfile in find_files(cat_path, "*merged+wise.csv"): print("\nreading catalog: {}".format(catfile)) df = pd.read_csv(catfile) # convert to magnitudes nbadflux = (df.flux <= 0).sum() try: assert nbadflux == 0 except: print("warning: {} negative flux source(s)".format(nbadflux)) ch = catfile.split('/')[-1].split('_')[1] mags = spz_jy_to_mags(df.flux * 1e-3, float(ch)) if ch == '1': plt.scatter(df.W1mag, mags) plt.xlabel('W1 [mag]') plt.ylabel('I1 [mag]') elif ch == '2': plt.scatter(df.W2mag, mags) plt.xlabel('W2 [mag]') plt.ylabel('I2 [mag]') ax = plt.gca() xlim, ylim = ax.get_xlim(), ax.get_ylim() plt.plot([-5, ylim[1] * 2], [-5, ylim[1] * 2], 'r-') ax.set_xlim(xlim) ax.set_ylim(ylim) reg = catfile.split('/')[-1].split('_')[0] name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch) outpath = '/'.join(catfile.split('/')[:-1] + [name]) plt.savefig(outpath, dpi=120) plt.close()
def pylintChecker(): filelist = util.find_files("../", "*.py") for f in filelist: print f #(pylint_stdout,pylint_stderr) = lint.py_run("../analyzer.py",True) #print pylint_stdout.readlines() #print pylint_stderr.readlines() return True
def setup_test(): # delete remnants left over after failed build root.rmtree(True) (rootdir / 'roots' / 'test-intl').copytree(root) # copy all catalogs into locale layout directory for po in find_files(root, '.po'): copy_po = (locale_dir / 'en' / 'LC_MESSAGES' / po) if not copy_po.parent.exists(): copy_po.parent.makedirs() shutil.copy(root / po, copy_po)
def parseDir(path): # set up class and results dictionary log.info("Performing recursive search for smali files") classes = {} sharedobj_strings = {} for smali in util.find_files(path, '*.smali'): continue log.info("Parsing " + smali) f = open(smali, 'r') smali_class = parseSmaliFiles(f) classes[smali_class['ClassName']] = smali_class for sharedobj in util.find_files(path, '*.so'): log.info("Processing: " + sharedobj) f = open(sharedobj, 'r') smali_class = parseSmaliFiles(f) sharedobj_strings[sharedobj] = util.unique_strings_from_file(sharedobj) log.info("Parsing Complete") return {'classes': classes, 'sharedobjs': sharedobj_strings}
def plot_sdss(cat_path): for catfile in find_files(cat_path, "*merged+sdss.txt"): # for now ignore the channel 2 files if catfile.split('/')[-1].split('_')[1] != '1': continue print("\nreading catalog: {}".format(catfile)) df = pd.read_table(catfile, sep=' ') # get rid of negative flux sources, if any df = df[df.flux > 0] # convert to magnitudes mags = spz_jy_to_mags(df.flux * 1e-3, 1) # print counts per magnitude bin for i in range(10, 15): sc = ((df.cl == 3) & (mags > i) & (mags < i + 1)).sum() xc = ((df.xsc == 1) & (mags > i) & (mags < i + 1)).sum() msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources" print(msg.format(i, i + 1, sc, xc)) # print number of sources agreed upon agree = ((df.xsc == 1) & (df.cl == 3)).sum() disagree = ((df.xsc == 1) & (df.cl == 6)).sum() na = ((df.xsc == 1) & (df.cl == 0)).sum() msg = "{} 2MASS XSC sources classified as galaxies by SDSS" print(msg.format(agree)) msg = "{} 2MASS XSC sources classified as stars by SDSS" print(msg.format(disagree)) msg = "{} 2MASS XSC sources not matched to SDSS" print(msg.format(na)) # plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1) sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3) # mags[xsc_gals].hist(label='2MASS XSC', normed=True) # mags[sdss_gals].hist(label='SDSS galaxies', normed=True) plt.hist([mags[xsc_gals].values, mags[sdss_gals].values], bins=5, label=['2MASS', 'SDSS']) plt.xlabel('IRAC1 [mag]') plt.ylabel('Number Count') reg = catfile.split('/')[-1].split('_')[0] plt.title('{} Extended Sources / Galaxies'.format(reg)) plt.legend(loc=2) name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg) outpath = '/'.join(catfile.split('/')[:-1] + [name]) plt.savefig(outpath, dpi=100) plt.close() print("created file: {}".format(outpath))
def check_archive(folder, mask, wrapper, file_mask, use_crc, output=None, force=False): """Check folder for all matching archives and extract matching files from them""" if not output: output = os.path.join(CONFIG["DATA"], os.path.basename(os.path.normpath(folder))) if not force and os.path.exists(output): print "Output path {} already exists - not extracting".format(output) return print "Extracting to " + output for path in find_files(folder, file_mask): with wrapper(path) as archive: for member in archive.infolist(): if fnmatch.fnmatch(member.filename, mask): check_file(output, archive, member, use_crc)
def plot_sdss(cat_path): for catfile in find_files(cat_path, "*merged+sdss.txt"): # for now ignore the channel 2 files if catfile.split('/')[-1].split('_')[1] != '1': continue print("\nreading catalog: {}".format(catfile)) df = pd.read_table(catfile, sep=' ') # get rid of negative flux sources, if any df = df[df.flux > 0] # convert to magnitudes mags = spz_jy_to_mags(df.flux*1e-3, 1) # print counts per magnitude bin for i in range(10,15): sc = ((df.cl == 3) & (mags > i) & (mags < i+1)).sum() xc = ((df.xsc == 1) & (mags > i) & (mags < i+1)).sum() msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources" print(msg.format(i, i+1, sc, xc)) # print number of sources agreed upon agree = ((df.xsc == 1) & (df.cl == 3)).sum() disagree = ((df.xsc == 1) & (df.cl == 6)).sum() na = ((df.xsc == 1) & (df.cl == 0)).sum() msg = "{} 2MASS XSC sources classified as galaxies by SDSS" print(msg.format(agree)) msg = "{} 2MASS XSC sources classified as stars by SDSS" print(msg.format(disagree)) msg = "{} 2MASS XSC sources not matched to SDSS" print(msg.format(na)) # plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1) sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3) # mags[xsc_gals].hist(label='2MASS XSC', normed=True) # mags[sdss_gals].hist(label='SDSS galaxies', normed=True) plt.hist([mags[xsc_gals].values, mags[sdss_gals].values], bins=5, label=['2MASS', 'SDSS']) plt.xlabel('IRAC1 [mag]') plt.ylabel('Number Count') reg = catfile.split('/')[-1].split('_')[0] plt.title('{} Extended Sources / Galaxies'.format(reg)) plt.legend(loc=2) name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg) outpath = '/'.join(catfile.split('/')[:-1]+[name]) plt.savefig(outpath, dpi=100) plt.close() print("created file: {}".format(outpath))
def parseDir(path): # set up class and results dictionary log.info("Performing recursive search for smali files") classes = {} sharedobj_strings = {} for smali in util.find_files(path, '*.smali'): continue log.info("Parsing " + smali) f = open(smali, 'r') smali_class = parseSmaliFiles(f) classes[smali_class['ClassName']] = smali_class for sharedobj in util.find_files(path, '*.so'): log.info("Processing: " + sharedobj) f = open(sharedobj, 'r') smali_class = parseSmaliFiles(f) sharedobj_strings[sharedobj] = util.unique_strings_from_file(sharedobj) log.info("Parsing Complete") return { 'classes' : classes, 'sharedobjs' : sharedobj_strings }
def db_update(config: dict, app_logger: logger.Logger) -> bool: """ Check migrations directory for new migrations since last run and update migrations database. :param config: pymigrate configuration :param app_logger: instance of configured logger :return: """ app_logger.log_with_ts('Starting migration database update process', logger.Levels.DEBUG) migrations_directory_path = os.path.join(os.pardir, config['PROJECT_DIR'] + '/' + config['MIGRATIONS_DIR']) migration_ids = [migration_id for migration_id in os.listdir(migrations_directory_path) if os.path.isdir(os.path.join(migrations_directory_path, migration_id))] migrations_from_db = get_statuses(migrations_directory_path + '/migrations.db', app_logger) branch = git.get_branch(migrations_directory_path) app_logger.log_with_ts('Got git branch: {0}'.format(branch), logger.Levels.DEBUG) # TODO: handle io, sqlite db exceptions with sqlite3.connect(migrations_directory_path + '/migrations.db') as conn: c = conn.cursor() for migration_id, status in migrations_from_db.items(): if migration_id not in migration_ids: app_logger.log_with_ts('Migration {0} is missing on disk, marking it ABSENT'.format(migration_id), logger.Levels.DEBUG) c.execute( "UPDATE migrations SET presence='ABSENT' where migration_id='{0}'".format(migration_id)) elif migration_id not in migration_ids and status == 'ABSENT': app_logger.log_with_ts('Migration re-appeared: {0}'.format(migration_id), logger.Levels.DEBUG) c.execute("UPDATE migrations SET presence='PRESENT' where migration_id='{0}'".format(migration_id)) for migration_id in migration_ids: if migration_id not in migrations_from_db: app_logger.log_with_ts('New migration detected: {0}'.format(migration_id), logger.Levels.DEBUG) c.execute( "INSERT INTO migrations VALUES ('{0}', 'PENDING', 'PRESENT', '{1}')".format(migration_id, branch)) # Set migration status MANUAL if readme.* is present check_query = "SELECT status from migrations where migration_id='{0}'" readme_files = util.find_files('readme*', migrations_directory_path + '/' + migration_id, False) if len(readme_files) != 0 and os.path.isfile(readme_files[0]) and \ c.execute(check_query.format(migration_id)).fetchone()[0].replace('\n', '') not in ( Status.DONE.name, Status.FAILED.name, Status.SKIP.name): app_logger.log_with_ts('Readme file detected for migration: {0}'.format(migration_id), logger.Levels.DEBUG) c.execute("UPDATE migrations SET status='MANUAL' where migration_id='{0}'".format(migration_id)) return True
def run_migration(migration_id: str, config: dict, app_logger: logger.Logger) -> bool: """ Run migration :param migration_id:. :param migration_id: id of migration to run :param config: pymigrate configuration :param app_logger: instance of configured logger """ migration_dir = os.path.join(os.pardir, config['PROJECT_DIR'] + '/' + config['MIGRATIONS_DIR'] + '/' + migration_id) app_logger.log_with_ts("Running migration {0} from directory {1}".format(migration_id, migration_dir), logger.Levels.DEBUG) # we do not expect more than one migrate* exec # TODO: may be we shall exec only migrate.sh if it exists and don't touch other migrate* executables there migrate_executable = util.find_files('migrate*', migration_dir, True).pop() tmp_file = '/tmp/.migration_runner_stream.tmp' cmd = migrate_executable + " {0} ".format(config['ENVIRONMENT']) with io.open(tmp_file, 'wb') as writer, io.open(tmp_file, 'rb', 1) as reader: child = subprocess.Popen(cmd, shell=True, stdout=writer, stderr=subprocess.STDOUT, env=config) print('stdout:') while child.poll() is None: print(bytes(reader.read()).decode()) time.sleep(0.5) print(bytes(reader.read()).decode()) exit_code = child.returncode app_logger.log_with_ts("Migration executable exit code: {0}".format(exit_code), logger.Levels.DEBUG) os.remove(tmp_file) if int(exit_code) == 0: app_logger.log_with_ts("Migration is considered DONE", logger.Levels.DEBUG) set_status_done(migration_id, app_logger, os.path.join(os.pardir, config['PROJECT_DIR'] + '/' + config['MIGRATIONS_DIR'])) return True else: app_logger.log_with_ts("Migration is considered FAILED", logger.Levels.DEBUG) set_status_failed(migration_id, app_logger, os.path.join(os.pardir, config['PROJECT_DIR'] + '/' + config['MIGRATIONS_DIR'])) return False
def match_wise(cat_path, sdss=True): if sdss: search_pattern = "*merged+sdss.txt" else: search_pattern = "*merged.txt" for catfile in find_files(cat_path, search_pattern): # read pipeline catalog print("\nreading catalog: {}".format(catfile)) cat = pd.read_table(catfile, sep=' ') # retrieve WISE data from ViZieR if not already downloaded ch = catfile.split('/')[-1].split('_')[1] if sdss: outpath = catfile.replace('{}_merged+sdss.txt'.format(ch), 'wise.vot') else: outpath = catfile.replace('{}_merged.txt'.format(ch), 'wise.vot') if not os.path.isfile(outpath): cntr_ra = np.median(cat.ra) cntr_dec = np.median(cat.dec) # get source from one corner of the mosaic to calculate radius c1 = (cat.ra.min(), cat.dec[cat.ra==cat.ra.min()].values[0]) # make radius 10% bigger just to be on safe side radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1 url = get_url(cntr_ra, cntr_dec, radius) print("retrieving URL: {}".format(url)) handler = urllib2.urlopen(url) raw = handler.read() with open(outpath,'wb') as f: f.write(raw) print("created file: {}".format(outpath)) # parse VOTable print("reading VOTable: {}".format(outpath)) table = parse_single_table(outpath) # if this is one of the southern hemisphere regions, delete and continue if table.array.size == 0: os.remove(outpath) print("no WISE coverage") continue # get unmasked array wise = table.array.data # make sure sky coverage is big enough assert wise['RAJ2000'].min() < cat.ra.min() assert wise['RAJ2000'].max() > cat.ra.max() assert wise['DEJ2000'].min() < cat.dec.min() assert wise['DEJ2000'].max() > cat.dec.max() # match to catalog tol = 2/3600. if cat.shape[0] < wise.shape[0]: idx1, idx2, ds = spherematch(cat.ra, cat.dec, wise['RAJ2000'], wise['DEJ2000'], tolerance = tol) else: idx2, idx1, ds = spherematch(wise['RAJ2000'], wise['DEJ2000'], cat.ra, cat.dec, tolerance = tol) print("matched {} out of {} sources with {} arcsec tolerance".format(ds.size, cat.shape[0], tol*3600)) # add WISE to the catalog if ch == '1': cat['W1mag'] = np.repeat(np.nan, cat.shape[0]) cat['e_W1mag'] = np.repeat(np.nan, cat.shape[0]) cat['W1mag'][idx1] = wise['W1mag'][idx2] cat['e_W1mag'][idx1] = wise['e_W1mag'][idx2] elif ch == '2': cat['W2mag'] = np.repeat(np.nan, cat.shape[0]) cat['e_W2mag'] = np.repeat(np.nan, cat.shape[0]) cat['W2mag'][idx1] = wise['W2mag'][idx2] cat['e_W2mag'][idx1] = wise['e_W2mag'][idx2] else: print("unexpected error adding WISE data") # write to new file outpath = catfile.replace('.txt', '+wise.csv') # fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2 # hdr = ' '.join(names)+' cl' # np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr) cat.to_csv(outpath, index=False, float_format='%.8f') print("created file: {}".format(outpath))
def find_vct_files(root_dir): """Return a chronologically sorted list of VCT file paths in root_dir.""" files = util.find_files(root_dir) files = parse_file_list(files) return seaflowfile.sorted_files(files)
def merge_subarray(vg_dir, bcdphot_dir): out_dir = vg_dir.replace('clean', 'plots_catalogs') os.mkdir(out_dir) hdr_files = find_files(bcdphot_dir, '*combined_hdr_*xsc_cor.txt') # hdr_file = list(hdr_files)[0] for hdr_file in hdr_files: reg, ch = hdr_file.split('/')[-1].split('_')[:2] sub_file = '/'.join([vg_dir, "d{}_ch{}_agg.csv".format(reg, ch)]) hdr_names = open(hdr_file).readline().split()[1:] hdr = np.recfromtxt(hdr_file, names=hdr_names) sub = np.recfromcsv(sub_file) # sub.flux *= 1e-3 # convert from uJy to mJy idx1, idx2, ds = spherematch(sub.ra, sub.dec, hdr.ra, hdr.dec, tolerance=3 / 3600.) df = pd.DataFrame({ 'sub_flux': sub.flux[idx1], 'hdr_flux': hdr.flux[idx2] }) slope = fit_line(df, int(ch)) with open("{}/linefits.txt".format(out_dir), 'a') as f: f.write("{} {} {}\n".format(reg, ch, slope)) fig = df.plot(x='hdr_flux', y='sub_flux', kind='scatter') fig.plot([0, fig.get_xlim()[1]], [0, slope * fig.get_xlim()[1]], 'r-') fig.set_title("region {} channel {}".format(reg, ch)) fig.text(fig.get_xlim()[1] * 0.2, fig.get_ylim()[1] * 0.8, "slope: {0:3f}".format(slope), fontsize=24) plt.savefig("{}/{}_{}_linefit.png".format(out_dir, reg, ch), dpi=300) plt.close() # now save the (uncorrected) matched data to disk sub_matched = pd.DataFrame.from_records(sub[idx1]) # rename the columns cols = sub_matched.columns.tolist() cols_new = ['sub_' + i for i in cols] sub_matched.columns = cols_new # set hdr_matched dataframe index equal to sub_matched index, this is # necessary for concatenation using pandas.concat hdr_matched = pd.DataFrame.from_records(hdr[idx2]).set_index( sub_matched.index) # rename the columns cols = hdr_matched.columns.tolist() cols_new = ['hdr_' + i for i in cols] hdr_matched.columns = cols_new # concatenate concat = pd.concat([sub_matched, hdr_matched], 1) # # convert subarray flux to mJy # concat.sub_flux = concat.sub_flux*1e3 # concat.sub_unc = concat.sub_unc*1e3 concat.to_csv("{}/{}_{}_hdr_vs_sub.csv".format(out_dir, reg, ch), index=False, float_format='%.8f') # now correct all the subarray flux values with the slope sub.flux /= slope # now merge hdr and subarray into one dataset: # want to keep all the hdr photometry that is not saturated, and # keep only the subarray photometry above the hdr saturation limit cutoff = get_cutoff(ch) bad = hdr.flux > cutoff hdr_subset = pd.DataFrame.from_records(hdr[~bad]) bad = sub.flux < cutoff sub_subset = pd.DataFrame.from_records(sub[~bad]) # add n_obs column to subarray data so it has same format as hdr sub_subset['n_obs'] = 4 # add column indicating whether if it came from subarray hdr_subset['sub'] = np.zeros(hdr_subset.shape[0]).astype(int) sub_subset['sub'] = np.ones(sub_subset.shape[0]).astype(int) # concatenate them concat = pd.concat([hdr_subset, sub_subset], 0, ignore_index=True) # get rid of the 'id' field since it is no longer relevant # but add a column indicating if it was a 2MASS XSC measurement concat['xsc'] = np.zeros(concat.shape[0]).astype(int) concat.xsc[concat.id < 1] = 1 concat = concat.drop('id', 1) # apply 1% flux reduction to correct for stray light (only to >100 mJy sources) concat.flux[concat.flux > 100] *= 0.99 concat.unc[concat.flux > 100] *= 0.99 # write to disk concat.to_csv("{}/{}_{}_merged.txt".format(out_dir, reg, ch), index=False, sep=' ', float_format='%.8f')
def match_wise(cat_path, sdss=True): if sdss: search_pattern = "*merged+sdss.txt" else: search_pattern = "*merged.txt" for catfile in find_files(cat_path, search_pattern): # read pipeline catalog print("\nreading catalog: {}".format(catfile)) cat = pd.read_table(catfile, sep=' ') # retrieve WISE data from ViZieR if not already downloaded ch = catfile.split('/')[-1].split('_')[1] if sdss: outpath = catfile.replace('{}_merged+sdss.txt'.format(ch), 'wise.vot') else: outpath = catfile.replace('{}_merged.txt'.format(ch), 'wise.vot') if not os.path.isfile(outpath): cntr_ra = np.median(cat.ra) cntr_dec = np.median(cat.dec) # get source from one corner of the mosaic to calculate radius c1 = (cat.ra.min(), cat.dec[cat.ra == cat.ra.min()].values[0]) # make radius 10% bigger just to be on safe side radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1 url = get_url(cntr_ra, cntr_dec, radius) print("retrieving URL: {}".format(url)) handler = urllib2.urlopen(url) raw = handler.read() with open(outpath, 'wb') as f: f.write(raw) print("created file: {}".format(outpath)) # parse VOTable print("reading VOTable: {}".format(outpath)) table = parse_single_table(outpath) # if this is one of the southern hemisphere regions, delete and continue if table.array.size == 0: os.remove(outpath) print("no WISE coverage") continue # get unmasked array wise = table.array.data # make sure sky coverage is big enough assert wise['RAJ2000'].min() < cat.ra.min() assert wise['RAJ2000'].max() > cat.ra.max() assert wise['DEJ2000'].min() < cat.dec.min() assert wise['DEJ2000'].max() > cat.dec.max() # match to catalog tol = 2 / 3600. if cat.shape[0] < wise.shape[0]: idx1, idx2, ds = spherematch(cat.ra, cat.dec, wise['RAJ2000'], wise['DEJ2000'], tolerance=tol) else: idx2, idx1, ds = spherematch(wise['RAJ2000'], wise['DEJ2000'], cat.ra, cat.dec, tolerance=tol) print("matched {} out of {} sources with {} arcsec tolerance".format( ds.size, cat.shape[0], tol * 3600)) # add WISE to the catalog if ch == '1': cat['W1mag'] = np.repeat(np.nan, cat.shape[0]) cat['e_W1mag'] = np.repeat(np.nan, cat.shape[0]) cat['W1mag'][idx1] = wise['W1mag'][idx2] cat['e_W1mag'][idx1] = wise['e_W1mag'][idx2] elif ch == '2': cat['W2mag'] = np.repeat(np.nan, cat.shape[0]) cat['e_W2mag'] = np.repeat(np.nan, cat.shape[0]) cat['W2mag'][idx1] = wise['W2mag'][idx2] cat['e_W2mag'][idx1] = wise['e_W2mag'][idx2] else: print("unexpected error adding WISE data") # write to new file outpath = catfile.replace('.txt', '+wise.csv') # fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2 # hdr = ' '.join(names)+' cl' # np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr) cat.to_csv(outpath, index=False, float_format='%.8f') print("created file: {}".format(outpath))
import importlib, re from util import find_files modules = [] for file in find_files("modules"): if file.endswith(".py") and not file == "__init__.py": filename = 'modules.'+file[:-3] modules.append(importlib.import_module(filename)) """ for module in modules: print module """ pattern = re.compile(r'(http://i.imgur.com/(.*))(\?.*)?') result = pattern.search("http://i.imgur.com/test") # using regex here instead of BeautifulSoup because we are pasing a url, not html if result: print result.group(0) print len(result.groups())
def run_xsc_phot(bcdphot_out_path, mosaic_path): replaced = {} for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"): print("\n======================================================") print("\nadjusting photometry in: {}".format(cat.split('/')[-1])) print("------------------------------------------------------") outpath = cat.replace('combined_hdr_catalog.txt', '2mass_xsc.tbl') # retrieve 2mass data if file doesn't already exist (from previous run) if not os.path.isfile(outpath): # get url and retrieve data url = query_2mass_xsc_polygon(*get_region_corners(cat)) print("\ndownloading 2MASS photometry from: {}".format(url)) text = urllib2.urlopen(url).read() # write to disk with open(outpath, 'w') as f: f.write(text) print("\ncreated file: {}".format(outpath)) # read back in as recarray print("\nreading: {}".format(outpath)) names = open(outpath).read().split('\n')[76].split('|')[1:-1] da = np.recfromtxt(outpath, skip_header=80, names=names) # write input file for xsc_phot.pro infile_outpath = '/'.join(cat.split('/')[:-1]) + '/xsc.txt' with open(infile_outpath, 'w') as w: for i in range(da.shape[0]): w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i], da.dec[i], da.r_ext[i])) print( "\ncreated input file for xsc_phot.pro: {}".format(infile_outpath)) # locate the FITS mosaic file for xsc_phot.pro to do photometry on reg, ch = cat.split('/')[-1].split('_')[:2] mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\ .format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0] print("\nfound mosaic file: {}".format(mosaicfile)) # spawn IDL subprocess running xsc_phot.pro and catch stdout in file outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt') if not os.path.isfile(outpath): outfile = open(outpath, 'w') print("\nspawning xsc_phot.pro IDL subprocess") cmd = "xsc_phot,'" + mosaicfile + "','" + infile_outpath + "','long'" rc = subprocess.call( ['/usr/local/itt/idl71/bin/idl', '-quiet', '-e', cmd], stderr=subprocess.PIPE, stdout=outfile) outfile.close() # read in output to recarray print("\nreading: {}".format(outpath)) phot = np.recfromtxt(outpath, names=['id', 'flux', 'unc', 'sky', 'skyunc']) # make sure rows are aligned assert (da.designation == phot.id).all() # ignore xsc sources we got a NaN or negative flux for bad = np.isnan(phot.flux) | (phot.flux < 0) print("\naper.pro returned NaN or negative flux for {} sources".format( bad.sum())) if bad.sum() > 0: for i in phot[bad].id: print(i) outpath = cat.replace('combined_hdr_catalog.txt', 'xsc_nan_phot.csv') with open(outpath, 'w') as f: w = csv.writer(f) w.writerow(da.dtype.names) w.writerows(da[bad].tolist()) print('\ncreated file: {}'.format(outpath)) phot = phot[~bad] da = da[~bad] # read in pipeline catalog print("\nreading: {}".format(cat)) names = open(cat).readline().split()[1:] c = np.recfromtxt(cat, names=names) # loop through xsc sources and find matches in pipeline catalog print( "\nfinding records associated with XSC sources in pipeline catalog" ) c_flux_total = [] n_in_aper = [] c_idx = [] coords = radec_to_coords(c.ra, c.dec) kdt = KDT(coords) for i in range(phot.size): radius = da.r_ext[i] / 3600. # idx1, idx2, ds = spherematch(da.ra[i], da.dec[i], # c.ra, c.dec, tolerance=radius) idx, ds = spherematch2(da.ra[i], da.dec[i], c.ra, c.dec, kdt, tolerance=radius, k=500) # c_flux_total.append(c.flux[idx2].sum()) # n_in_aper.append(c.flux[idx2].size) # c_idx.append(idx2.tolist()) c_flux_total.append(c.flux[idx].sum()) n_in_aper.append(ds.size) c_idx.append(idx.tolist()) print("\nhistogram of source counts in r_ext aperture") for i in [(i, n_in_aper.count(i)) for i in set(n_in_aper)]: print i # create new version of catalog file with xsc-associated entries replaced c_idx = np.array(flatten(c_idx)) print("\nremoving {}, adding {}".format(c_idx.size, phot.size)) replaced[cat] = {'old': c_idx.size, 'new': phot.size} replaced[cat]['hist'] = [(i, n_in_aper.count(i)) for i in set(n_in_aper)] c = np.delete(c, c_idx) newrows = np.rec.array([(-i, da.ra[i], da.dec[i], phot.flux[i], phot.unc[i], 1) for i in \ range(phot.size)], dtype=c.dtype) newcat = np.hstack((c, newrows)) # write new version of catalog to disk fmt = ['%i'] + ['%0.8f'] * 2 + ['%.4e'] * 2 + ['%i'] outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt') np.savetxt(outpath, newcat, fmt=fmt, header=' '.join(names)) print('\ncreated file: {}'.format(outpath)) # make plot of total old vs. new flux plt.scatter(c_flux_total, phot.flux) ylim = plt.gca().get_ylim() plt.xlim(*ylim) max_y = ylim[1] plt.plot(ylim, ylim, 'r-') plt.xlabel('old flux [mJy]') plt.ylabel('new flux [mJy]') name = ' '.join(cat.split('/')[-1].split('_')[:2]) plt.title(name) outpath = cat.replace('combined_hdr_catalog.txt', 'xsc_new_vs_old_phot.png') plt.savefig(outpath, dpi=200) plt.close() print('\ncreated file: {}'.format(outpath)) outfile = 'xsc_replaced.json' json.dump(replaced, open(outfile, 'w')) print("\ncreated file: {}".format(outfile)) print("\nremoved / added") for k, v in replaced.iteritems(): print k.split('/')[-1], v['old'], v['new'] m = np.mean([i['old'] / float(i['new']) for i in replaced.values()]) print("average ratio: {}".format(m)) print("\nK mag and r_ext of sources with NaN photometry:") for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"): reg = i.split('/')[-1] rec = np.recfromcsv(i) bad_id = rec.designation.tolist() bad_k = rec.k_m_k20fe.tolist() bad_r_ext = rec.r_ext.tolist() print reg print("\tid\t\t\tKmag\tr_ext") if type(bad_id) is list: seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0]) for j, k, l in seq: print("\t{}\t{}\t{}".format(j, k, l)) else: print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))
def is_plain_json_list(fname): if not os.path.exists(fname): return -1 with open(fname, 'r') as fh: cont = fh.read(1024) cont = cont.strip() if not cont: return True return cont[0] == '{' def compress_and_rename_old(fname): if file_is_bzip2(fname): return # compressed already if not is_plain_json_list(fname): return # compressed already if is_plain_json_list(fname) == -1: return # file does not exist? if file_age_in_seconds(fname) < N_DAYS * 86400: return # not old os.system('bzip2 "%s"' % fname) os.rename(fname + '.bz2', fname) print ' File compressed:', fname if __name__ == '__main__': for output_file in find_files(ITEMS_DIR, '*.jl'): compress_and_rename_old(output_file) for log_file in find_files(LOGS_DIR, '*.log'): compress_and_rename_old(log_file)
def show(request): list = find_files() return render(request, "index.html", {"all_info": list})
def dir_walk(target_dir=None, quiet=None): '''recursively walk a directory containing cti and return the stats''' files = find_files('*.xml', resolve_path(target_dir)) if not quiet: widgets = ['Directory Walk: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA()] progress = ProgressBar(widgets=widgets, maxval=len(files)).start() cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \ 'exploit_targets': set(), 'incidents': set(), \ 'indicators': set(), 'threat_actors': set(), \ 'ttps': set()} cooked_cybox_objs = {'AccountObjectType': set(), 'AddressObjectType': set(), 'APIObjectType': set(), 'ArchiveFileObjectType': set(), 'ARPCacheObjectType': set(), 'ArtifactObjectType': set(), 'ASObjectType': set(), 'CodeObjectType': set(), 'CustomObjectType': set(), 'DeviceObjectType': set(), 'DiskObjectType': set(), 'DiskPartitionObjectType': set(), 'DNSCacheObjectType': set(), 'DNSQueryObjectType': set(), 'DNSRecordObjectType': set(), 'DomainNameObjectType': set(), 'EmailMessageObjectType': set(), 'FileObjectType': set(), 'GUIDialogboxObjectType': set(), 'GUIObjectType': set(), 'GUIWindowObjectType': set(), 'HostnameObjectType': set(), 'HTTPSessionObjectType': set(), 'ImageFileObjectType': set(), 'LibraryObjectType': set(), 'LinkObjectType': set(), 'LinuxPackageObjectType': set(), 'MemoryObjectType': set(), 'MutexObjectType': set(), 'NetworkConnectionObjectType': set(), 'NetworkFlowObjectType': set(), 'NetworkPacketObjectType': set(), 'NetworkRouteEntryObjectType': set(), 'NetRouteObjectType': set(), 'NetworkSocketObjectType': set(), 'NetworkSubnetObjectType': set(), 'PDFFileObjectType': set(), 'PipeObjectType': set(), 'PortObjectType': set(), 'ProcessObjectType': set(), 'ProductObjectType': set(), 'SemaphoreObjectType': set(), 'SMSMessageObjectType': set(), 'SocketAddressObjectType': set(), 'SystemObjectType': set(), 'UnixFileObjectType': set(), 'UnixNetworkRouteEntryObjectType': set(), 'UnixPipeObjectType': set(), 'UnixProcessObjectType': set(), 'UnixUserAccountObjectType': set(), 'UnixVolumeObjectType': set(), 'URIObjectType': set(), 'URLHistoryObjectType': set(), 'UserAccountObjectType': set(), 'UserSessionObjectType': set(), 'VolumeObjectType': set(), 'WhoisObjectType': set(), 'WindowsComputerAccountObjectType': set(), 'WindowsCriticalSectionObjectType': set(), 'WindowsDriverObjectType': set(), 'WindowsEventLogObjectType': set(), 'WindowsEventObjectType': set(), 'WindowsExecutableFileObjectType': set(), 'WindowsFilemappingObjectType': set(), 'WindowsFileObjectType': set(), 'WindowsHandleObjectType': set(), 'WindowsHookObjectType': set(), 'WindowsKernelHookObjectType': set(), 'WindowsKernelObjectType': set(), 'WindowsMailslotObjectType': set(), 'WindowsMemoryPageRegionObjectType': set(), 'WindowsMutexObjectType': set(), 'WindowsNetworkRouteEntryObjectType': set(), 'WindowsNetworkShareObjectType': set(), 'WindowsPipeObjectType': set(), 'WindowsPrefetchObjectType': set(), 'WindowsProcessObjectType': set(), 'WindowsRegistryKeyObjectType': set(), 'WindowsSemaphoreObjectType': set(), 'WindowsServiceObjectType': set(), 'WindowsSystemObjectType': set(), 'WindowsSystemRestoreObjectType': set(), 'WindowsTaskObjectType': set(), 'WindowsThreadObjectType': set(), 'WindowsUserAccountObjectType': set(), 'WindowsVolumeObjectType': set(), 'WindowsWaitableTimerObjectType': set(), 'X509CertificateObjectType': set(), } for file_ in files: try: stix_package = file_to_stix(file_) (raw_stix_objs, raw_cybox_objs) = \ process_stix_pkg(stix_package) for k in raw_stix_objs.keys(): cooked_stix_objs[k].update(raw_stix_objs[k]) for k in raw_cybox_objs.keys(): cooked_cybox_objs[k].update(raw_cybox_objs[k]) if not quiet: progress.update(i) except: next if not quiet: progress.finish() return (cooked_stix_objs, cooked_cybox_objs)
def do_run(): """Unpack eFRI data""" ## Lambert conformal conic projection MNR_LAMBERT = arcpy.SpatialReference( 'Projected Coordinate Systems/National Grids/Canada/NAD 1983 CSRS Ontario MNR Lambert' ) ## base file name for data BASE_NAME = r'eFRI' ## File types to input from FIND_MASK = '*.zip' if len(sys.argv) > 1: BASE_NAME = sys.argv[1] if len(sys.argv) > 2: FIND_MASK = sys.argv[2] ## Where to unzip input files to OUT_GDBS = os.path.join(OUT_DIR, r'{}_gdbs'.format(BASE_NAME)) ## Where to unzip exterior files UNZIP_FIRST = os.path.join(OUT_DIR, BASE_NAME) ## Where to unzip nested zipped files UNZIP_SECOND = os.path.join(OUT_DIR, r'{}_1'.format(BASE_NAME)) # check_zip(INPUT_DIR, '*', file_mask=FIND_MASK, output=UNZIP_FIRST) # make sure we unzip any zips that were in the zips check_zip(UNZIP_FIRST, '*', output=UNZIP_SECOND) gdbs = find_dirs(UNZIP_SECOND, '*.gdb') # roots = sorted(map(os.path.basename, gdbs)) for i in xrange(len(roots)): if roots[i] in roots[i + 1:]: print 'Error: duplicate directory name - ' + roots[i] sys.exit(-1) # ensure_dir(OUT_GDBS) # def try_move(x, move_to): """Try to move and do nothing on failure""" try: shutil.move(x, move_to) except: # must have moved a parent directory already pass # map(lambda x: try_move(x, OUT_GDBS), gdbs) # OUT_ZIPS = os.path.join(OUT_DIR, r'{}_zips'.format(BASE_NAME)) ensure_dir(OUT_ZIPS) zips = find_files(UNZIP_SECOND, '*.zip') map(lambda x: try_move(x, OUT_ZIPS), zips) # UNZIP_THIRD = os.path.join(OUT_DIR, r'{}_2'.format(BASE_NAME)) check_zip(OUT_ZIPS, '*', output=UNZIP_THIRD) gdbs = find_dirs(UNZIP_THIRD, '*.gdb') map(lambda x: try_move(x, OUT_GDBS), gdbs) # arcpy.env.overwriteOutput = True arcpy.env.addOutputsToMap = False # #~ # only find gdbs that end in -2D or _2D since those are the ones we care about # missing WhiteRiver since it's '2D_FRI.gdb' #~ gdbs = find_dirs(OUT_GDBS, '*[_-]2D.gdb') #~ gdbs = find_dirs(OUT_GDBS, '*.gdb') # HACK: only use gdbs with > 2 characters in name so we omit the '2D' and '3D' duplicates of Algonquin gdbs = sorted(find_dirs(OUT_GDBS, '???*.gdb')) ## Directory to output to FINAL_DIR = ensure_dir(r'C:\FireGUARD\data\GIS\intermediate\fuels') ## GDB to output to OUT_GDB = checkGDB(FINAL_DIR, "{}_LIO.gdb".format(BASE_NAME)) ## GDB to output shapefiles of simplified bounds to OUT_GDB_COVERAGE = checkGDB(FINAL_DIR, "{}_LIO_coverage.gdb".format(BASE_NAME)) # def findName(ds): """Find simplified name to use for dataset""" name = ds.replace('-', '_') if name.endswith('_w'): name = name[:-2] ignore = [ 'eFRI', '2D', 'Final', 'FRI', 'Dataset', 'Block', 'forest', 'Forest', 'FOREST', 'PP', '_', 'Topology', 'SMLS' ] # remove all numbers ignore += map(str, list(xrange(10))) for r in ignore: name = name.replace(r, '') # HACK: fix known abbreviations names = { 'GCF': 'GordonCosens', 'CA': 'Caribou', 'DRMatawin': 'DogRiverMatawin', 'Hrst': 'Hearst', 'MagpieThunderH': 'Magpie', 'PANA': 'Pukaskwa', 'ARF': 'AbitibiRiver', 'BA': 'Bancroft' } if name in names.keys(): name = names[name] if name.isupper(): name = name.capitalize() name = name.replace('lake', 'Lake') return name # def copyForest(gdb): """Copy from gdb""" print "Processing " + str(gdb) arcpy.env.workspace = gdb gdb_name = os.path.basename(gdb) try: ds = arcpy.ListDatasets()[0] except: # this is an empty folder, so skip return None if gdb_name.startswith('pp_FRI_FIMv2'): name = findName( re.match('pp_FRI_FIMv2_[^_]*_', gdb_name).group(0).replace('pp_FRI_FIMv2', '')) else: name = findName(ds) # HACK: if name consists of only those things we replace then look at gdb name if 0 == len(name): name = findName(gdb_name.replace('.gdb', '')) def mkForest(_): arcpy.env.workspace = os.path.join(gdb, ds) feats = arcpy.ListFeatureClasses() # HACK: assume feature with most rows is the forest polygon counts = map(lambda x: int(arcpy.GetCount_management(x)[0]), feats) forest = feats[counts.index(max(counts))] arcpy.CopyFeatures_management(forest, _) forest = check_make(os.path.join(OUT_GDB, name), mkForest) # using the coverage from the gdb is giving us the WMU, not the area covered by the data outline = check_make( os.path.join(OUT_GDB_COVERAGE, name), lambda _: arcpy.Dissolve_management(forest, _, '#')) if 'name' not in map(lambda x: x.name, arcpy.ListFields(outline)): arcpy.AddField_management(outline, 'name', "TEXT") arcpy.CalculateField_management(outline, 'name', '"{}"'.format(name), 'PYTHON') return name # def mkAll(_): ## list of names after copying from gdbs out = [x for x in sorted(map(copyForest, gdbs)) if x is not None] arcpy.env.outputCoordinateSystem = MNR_LAMBERT ## Merge all outlines together to make total area covered shape arcpy.Merge_management(';'.join(out), _) env_push() arcpy.env.workspace = OUT_GDB_COVERAGE ALL = arcpy.MakeFeatureLayer_management(check_make('ALL', mkAll)) env_pop() # this is for updated example map services on test gis server if BASE_NAME == 'eFRI': fri_status = arcpy.MakeFeatureLayer_management( os.path.join(os.path.join(GIS_DIR, r'input\fuels\LIO'), r'FRI_STATUS_FT.shp')) # copy to service if we're doing eFRI data copy_to_server(ALL, 'eFRIdata') copy_to_server(fri_status, 'eFRIplanned') arcpy.SelectLayerByLocation_management( fri_status, "HAVE_THEIR_CENTER_IN", ALL, invert_spatial_relationship="INVERT") # HACK: can't think of a better way to do this arcpy.SelectLayerByAttribute_management(fri_status, "REMOVE_FROM_SELECTION", "UNIT_NAME like '%Nipigon%'") copy_to_server(fri_status, 'eFRIplanned_select')
import util import multiprocess import numpy as np import os INPUT_ROOT = '/gpfs/milgram/project/chang/pg496/nn_all_raw_files' OUTPUT_ROOT = '/gpfs/milgram/project/chang/pg496/nn_all_raw_files_ms4_sorting_stringent' PARALLEL = True NUM_PARALLEL_PROCESSES = 8 def sort_several(files): for file in files: sorting.matlab_source_file_default_pipeline(INPUT_ROOT, OUTPUT_ROOT, file) def create_sorting_task(files): return (lambda f: lambda: sort_several(f))(files) if __name__ == '__main__': _, src_filenames, _ = util.find_files(INPUT_ROOT, '.mat') filename_sets = np.array_split(src_filenames, NUM_PARALLEL_PROCESSES) fs = [create_sorting_task(f) for f in filename_sets] if PARALLEL: multiprocess.run_tasks(multiprocess.make_tasks(fs)) else: for f in fs: f()
with open(backup_file, 'r') as original: with open(fixed_file, 'w') as fixed: content = regex.sub( lambda x: regex_sub_dict[x.string[x.start(): x.end()]], original.read() ) fixed.write(content) parser = argparse.ArgumentParser(description='Fix input files') parser.add_argument('--input-file-dir', required=True) if __name__ == '__main__': args = parser.parse_args() files_to_fix = find_files(args.input_file_dir) print('Found the following files to fix:\n{}'.format('\n'.join(files_to_fix))) regex_sub_dict = { "\n,": ",", "\n\n": "\n", "\0": "" } # Make a backup dir for the original files to be fixed backup_dir = args.input_file_dir + '/backup' if not os.path.exists(backup_dir): print('Creating {} directory'.format(backup_dir)) os.mkdir(backup_dir)
parser.add_argument('--pwd', required=True) parser.add_argument('--host', required=False, default='localhost') parser.add_argument('--port', required=False, default=5432) parser.add_argument('--db', required=False, default='postgres') parser.add_argument('--input-file-dir', required=True) if __name__ == '__main__': args = parser.parse_args() engine = create_engine( "postgresql://{user}:{pwd}@{host}:{port}/{db}".format(user=args.user, pwd=args.pwd, host=args.host, port=args.port, db=args.db)) files_to_load = find_files(args.input_file_dir) marketing_files = [ args.input_file_dir + '/' + file for file in files_to_load if 'marketing' in file ] user_files = [ args.input_file_dir + '/' + file for file in files_to_load if 'user' in file ] Session = sessionmaker(bind=engine) session = Session() Base.metadata.create_all(engine, checkfirst=True) for file in marketing_files:
def merge_subarray(vg_dir, bcdphot_dir): out_dir = vg_dir.replace('clean','plots_catalogs') os.mkdir(out_dir) hdr_files = find_files(bcdphot_dir, '*combined_hdr_*xsc_cor.txt') # hdr_file = list(hdr_files)[0] for hdr_file in hdr_files: reg, ch = hdr_file.split('/')[-1].split('_')[:2] sub_file = '/'.join([vg_dir, "d{}_ch{}_agg.csv".format(reg, ch)]) hdr_names = open(hdr_file).readline().split()[1:] hdr = np.recfromtxt(hdr_file, names=hdr_names) sub = np.recfromcsv(sub_file) # sub.flux *= 1e-3 # convert from uJy to mJy idx1, idx2, ds = spherematch(sub.ra, sub.dec, hdr.ra, hdr.dec, tolerance=3/3600.) df = pd.DataFrame({'sub_flux': sub.flux[idx1], 'hdr_flux':hdr.flux[idx2]}) slope = fit_line(df, int(ch)) with open("{}/linefits.txt".format(out_dir),'a') as f: f.write("{} {} {}\n".format(reg, ch, slope)) fig = df.plot(x='hdr_flux',y='sub_flux', kind='scatter') fig.plot([0, fig.get_xlim()[1]], [0, slope * fig.get_xlim()[1]], 'r-') fig.set_title("region {} channel {}".format(reg, ch)) fig.text(fig.get_xlim()[1]*0.2, fig.get_ylim()[1]*0.8, "slope: {0:3f}".format(slope), fontsize=24) plt.savefig("{}/{}_{}_linefit.png".format(out_dir, reg, ch), dpi=300) plt.close() # now save the (uncorrected) matched data to disk sub_matched = pd.DataFrame.from_records(sub[idx1]) # rename the columns cols = sub_matched.columns.tolist() cols_new = ['sub_'+i for i in cols] sub_matched.columns = cols_new # set hdr_matched dataframe index equal to sub_matched index, this is # necessary for concatenation using pandas.concat hdr_matched = pd.DataFrame.from_records(hdr[idx2]).set_index(sub_matched.index) # rename the columns cols = hdr_matched.columns.tolist() cols_new = ['hdr_'+i for i in cols] hdr_matched.columns = cols_new # concatenate concat = pd.concat([ sub_matched, hdr_matched ], 1) # # convert subarray flux to mJy # concat.sub_flux = concat.sub_flux*1e3 # concat.sub_unc = concat.sub_unc*1e3 concat.to_csv("{}/{}_{}_hdr_vs_sub.csv".format(out_dir, reg, ch), index=False, float_format='%.8f') # now correct all the subarray flux values with the slope sub.flux /= slope # now merge hdr and subarray into one dataset: # want to keep all the hdr photometry that is not saturated, and # keep only the subarray photometry above the hdr saturation limit cutoff = get_cutoff(ch) bad = hdr.flux > cutoff hdr_subset = pd.DataFrame.from_records(hdr[~bad]) bad = sub.flux < cutoff sub_subset = pd.DataFrame.from_records(sub[~bad]) # add n_obs column to subarray data so it has same format as hdr sub_subset['n_obs'] = 4 # add column indicating whether if it came from subarray hdr_subset['sub'] = np.zeros(hdr_subset.shape[0]).astype(int) sub_subset['sub'] = np.ones(sub_subset.shape[0]).astype(int) # concatenate them concat = pd.concat([ hdr_subset, sub_subset ], 0, ignore_index=True) # get rid of the 'id' field since it is no longer relevant # but add a column indicating if it was a 2MASS XSC measurement concat['xsc'] = np.zeros(concat.shape[0]).astype(int) concat.xsc[concat.id < 1] = 1 concat = concat.drop('id', 1) # apply 1% flux reduction to correct for stray light (only to >100 mJy sources) concat.flux[concat.flux > 100] *= 0.99 concat.unc[concat.flux > 100] *= 0.99 # write to disk concat.to_csv("{}/{}_{}_merged.txt".format(out_dir, reg, ch), index=False, sep=' ', float_format='%.8f')
def get_actual(): return set(find_files(catalog_dir, '.mo'))
def test_compile_specific_catalogs(app, status, warning): app.builder.compile_specific_catalogs(['admonitions']) catalog_dir = locale_dir / app.config.language / 'LC_MESSAGES' actual = set(find_files(catalog_dir, '.mo')) assert actual == set(['admonitions.mo'])
def plot_spz_vs_wise_sdss_class(cat_path, plot_style='scatter'): ch1 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[::2] ch2 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[1::2] for ch1, ch2 in zip(ch1, ch2): reg1 = ch1.split('/')[-1].split('_')[0] reg2 = ch2.split('/')[-1].split('_')[0] assert reg1 == reg2 print("\nreading catalog: {}".format(ch1)) print("reading catalog: {}".format(ch2)) df1 = pd.read_csv(ch1) df2 = pd.read_csv(ch2) # convert to magnitudes mags1 = spz_jy_to_mags(df1.flux*1e-3, 1) mags2 = spz_jy_to_mags(df2.flux*1e-3, 2) # match ch1 / ch2 idx1, idx2 = match_cats(df1, df2, tol=2/3600.) # save matched catalogs matched1 = df1.loc[idx1] matched2 = df2.loc[idx2] ch1_cols = [i+'_1' for i in df1.columns.tolist()] ch2_cols = [i+'_2' for i in df2.columns.tolist()] # matched1.columns = ch1_cols # matched2.columns = ch2_cols # matched = pd.concat([matched1, matched2], 1, ignore_index=True) # weird error matched = np.concatenate([matched1.values, matched2.values], 1) df_matched = pd.DataFrame(matched, columns=ch1_cols+ch2_cols) df_matched['I1'] = mags1[idx1].values df_matched['I2'] = mags2[idx2].values outpath = '/'.join(ch1.split('/')[:-1])+'/{}_2ch_matched+sdss.csv'.format(reg1) df_matched.to_csv(outpath, index=False, float_format='%.8f') print("created file: {}".format(outpath)) # identify SDSS galaxies and stars galaxies = (df1.cl[idx1].values == 3) & (df2.cl[idx2].values == 3) stars = (df1.cl[idx1].values == 6) & (df2.cl[idx2].values == 6) # plot I1-I2 vs. W1-W2 color1 = df1.W1mag[idx1].values - df2.W2mag[idx2].values color2 = mags1[idx1].values - mags2[idx2].values # galaxies name = '{}_I1-I2_vs_W1-W2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1]+[name]) plot(color1[galaxies], color2[galaxies], outpath, 'W1-W2 [mag]', 'I1-I2 [mag]', plot_style=plot_style, plot_type='color-color') # stars outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars') plot(color1[stars], color2[stars], outpath, 'W1-W2 [mag]', 'I1-I2 [mag]', plot_style=plot_style, plot_type='color-color') # plot I1-W1 vs. I2-W2 color1 = mags1[idx1].values - df1.W1mag[idx1].values color2 = mags2[idx2].values - df2.W2mag[idx2].values # galaxies name = '{}_I1-W1_vs_I2-W2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1]+[name]) plot(color1[galaxies], color2[galaxies], outpath, 'I1-W1 [mag]', 'I2-W2 [mag]', plot_style=plot_style, plot_type='color-color') # stars outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars') plot(color1[stars], color2[stars], outpath, 'I1-W1 [mag]', 'I2-W2 [mag]', plot_style=plot_style, plot_type='color-color') # plot spz color-magnitude diagrams color = mags1[idx1].values - mags2[idx2].values mags = mags1[idx1].values # galaxies name = '{}_I1_vs_I1-I2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1]+[name]) plot(mags[galaxies], color[galaxies], outpath, 'I1 [mag]', 'I1-I2 [mag]', plot_style=plot_style, plot_type='color-mag') # stars outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars') plot(mags[stars], color[stars], outpath, 'I1 [mag]', 'I1-I2 [mag]', plot_style=plot_style, plot_type='color-mag') # plot wise color-magnitude diagrams color = df1.W1mag[idx1].values - df2.W2mag[idx2].values mags = df1.W1mag[idx1].values # galaxies name = '{}_W1_vs_W1-W2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1]+[name]) plot(mags[galaxies], color[galaxies], outpath, 'W1 [mag]', 'W1-W2 [mag]', plot_style=plot_style, plot_type='color-mag') # stars outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars') plot(mags[stars], color[stars], outpath, 'W1 [mag]', 'W1-W2 [mag]', plot_style=plot_style, plot_type='color-mag') # plot I1 vs I2 mags1_matched = mags1[idx1].values mags2_matched = mags2[idx2].values # galaxies name = '{}_I1_vs_I2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1]+[name]) plot(mags1_matched[galaxies], mags2_matched[galaxies], outpath, 'I1 [mag]', 'I2 [mag]', plot_style=plot_style, plot_type='mag-mag') # stars outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars') plot(mags1_matched[stars], mags2_matched[stars], outpath, 'I1 [mag]', 'I2 [mag]', plot_style=plot_style, plot_type='mag-mag')
def plot_spz_vs_wise_sdss_class(cat_path, plot_style='scatter'): ch1 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[::2] ch2 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[1::2] for ch1, ch2 in zip(ch1, ch2): reg1 = ch1.split('/')[-1].split('_')[0] reg2 = ch2.split('/')[-1].split('_')[0] assert reg1 == reg2 print("\nreading catalog: {}".format(ch1)) print("reading catalog: {}".format(ch2)) df1 = pd.read_csv(ch1) df2 = pd.read_csv(ch2) # convert to magnitudes mags1 = spz_jy_to_mags(df1.flux * 1e-3, 1) mags2 = spz_jy_to_mags(df2.flux * 1e-3, 2) # match ch1 / ch2 idx1, idx2 = match_cats(df1, df2, tol=2 / 3600.) # save matched catalogs matched1 = df1.loc[idx1] matched2 = df2.loc[idx2] ch1_cols = [i + '_1' for i in df1.columns.tolist()] ch2_cols = [i + '_2' for i in df2.columns.tolist()] # matched1.columns = ch1_cols # matched2.columns = ch2_cols # matched = pd.concat([matched1, matched2], 1, ignore_index=True) # weird error matched = np.concatenate([matched1.values, matched2.values], 1) df_matched = pd.DataFrame(matched, columns=ch1_cols + ch2_cols) df_matched['I1'] = mags1[idx1].values df_matched['I2'] = mags2[idx2].values outpath = '/'.join( ch1.split('/')[:-1]) + '/{}_2ch_matched+sdss.csv'.format(reg1) df_matched.to_csv(outpath, index=False, float_format='%.8f') print("created file: {}".format(outpath)) # identify SDSS galaxies and stars galaxies = (df1.cl[idx1].values == 3) & (df2.cl[idx2].values == 3) stars = (df1.cl[idx1].values == 6) & (df2.cl[idx2].values == 6) # plot I1-I2 vs. W1-W2 color1 = df1.W1mag[idx1].values - df2.W2mag[idx2].values color2 = mags1[idx1].values - mags2[idx2].values # galaxies name = '{}_I1-I2_vs_W1-W2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1] + [name]) plot(color1[galaxies], color2[galaxies], outpath, 'W1-W2 [mag]', 'I1-I2 [mag]', plot_style=plot_style, plot_type='color-color') # stars outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace( 'galaxies', 'stars') plot(color1[stars], color2[stars], outpath, 'W1-W2 [mag]', 'I1-I2 [mag]', plot_style=plot_style, plot_type='color-color') # plot I1-W1 vs. I2-W2 color1 = mags1[idx1].values - df1.W1mag[idx1].values color2 = mags2[idx2].values - df2.W2mag[idx2].values # galaxies name = '{}_I1-W1_vs_I2-W2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1] + [name]) plot(color1[galaxies], color2[galaxies], outpath, 'I1-W1 [mag]', 'I2-W2 [mag]', plot_style=plot_style, plot_type='color-color') # stars outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace( 'galaxies', 'stars') plot(color1[stars], color2[stars], outpath, 'I1-W1 [mag]', 'I2-W2 [mag]', plot_style=plot_style, plot_type='color-color') # plot spz color-magnitude diagrams color = mags1[idx1].values - mags2[idx2].values mags = mags1[idx1].values # galaxies name = '{}_I1_vs_I1-I2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1] + [name]) plot(mags[galaxies], color[galaxies], outpath, 'I1 [mag]', 'I1-I2 [mag]', plot_style=plot_style, plot_type='color-mag') # stars outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace( 'galaxies', 'stars') plot(mags[stars], color[stars], outpath, 'I1 [mag]', 'I1-I2 [mag]', plot_style=plot_style, plot_type='color-mag') # plot wise color-magnitude diagrams color = df1.W1mag[idx1].values - df2.W2mag[idx2].values mags = df1.W1mag[idx1].values # galaxies name = '{}_W1_vs_W1-W2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1] + [name]) plot(mags[galaxies], color[galaxies], outpath, 'W1 [mag]', 'W1-W2 [mag]', plot_style=plot_style, plot_type='color-mag') # stars outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace( 'galaxies', 'stars') plot(mags[stars], color[stars], outpath, 'W1 [mag]', 'W1-W2 [mag]', plot_style=plot_style, plot_type='color-mag') # plot I1 vs I2 mags1_matched = mags1[idx1].values mags2_matched = mags2[idx2].values # galaxies name = '{}_I1_vs_I2_galaxies_plot_style.png'.format(reg1) name = name.replace('plot_style', plot_style) outpath = '/'.join(ch1.split('/')[:-1] + [name]) plot(mags1_matched[galaxies], mags2_matched[galaxies], outpath, 'I1 [mag]', 'I2 [mag]', plot_style=plot_style, plot_type='mag-mag') # stars outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace( 'galaxies', 'stars') plot(mags1_matched[stars], mags2_matched[stars], outpath, 'I1 [mag]', 'I2 [mag]', plot_style=plot_style, plot_type='mag-mag')
def m2c_generator(max_num_sample): ''' m2c Generator Input : a testing sample index Output : Chord Label (n, 16) Monophony Melody Label (n, 2) BPM float Average Elasped Time for one sample : 0.16 sec ''' # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cpu_device = torch.device('cpu') # Load Data chord_dic = pd.read_pickle(CONFIG_ALL['data']['chord_dic']) # prepare features all_files = find_files(CONFIG_ALL['data']['test_dir'], '*.mid') input_dic = [] for i_file in all_files: _ = midi_feature(i_file, sampling_fac=2) _ = np.reshape(_, (1, _.shape[0], _.shape[1])) input_dic.append({'midi': i_file, 'm_embed': _}) print 'Total Number of files : ', len(input_dic) # training model = BiRNN(CONFIG_ALL['model']['input_size'], CONFIG_ALL['model']['lstm_hidden_size'], CONFIG_ALL['model']['fc_hidden_size'], CONFIG_ALL['model']['num_layers'], CONFIG_ALL['model']['num_classes_cf'], CONFIG_ALL['model']['num_classes_c'], device).to(device) # Load Model path = os.path.join(CONFIG_ALL['model']['log_dir'], CONFIG_ALL['model']['exp_name'], 'models/', CONFIG_ALL['model']['eval_model']) model.load_state_dict(torch.load(path)) # Test the model with torch.no_grad(): while True: test_idx = yield if test_idx >= max_num_sample or test_idx < 0: print "Invalid sample index" continue m_embedding = input_dic[test_idx]['m_embed'] out_cf, out_c = model( torch.tensor(m_embedding, dtype=torch.float).to(device)) out_c = out_c.data.cpu().numpy() _, pred_cf = torch.max(out_cf.data, 1) pred_cf = pred_cf.data.cpu().numpy() i_out_tn1 = -1 i_out_tn2 = -1 i_out_tn3 = -1 i_out_t = -1 predicted = [] c_threshold = 0.825 f_threshold = 0.35 #ochord_threshold = 1.0 for idx, i_out in enumerate(out_c): # Seventh chord #T_chord_label = [0, 1, 2, 3, 4, 5, 102, 103, 104] #D_chord_label = [77, 78, 79, 55, 56, 57] #R_chord_label = [132] # Triad Chord T_chord_label = [0, 1, 37] D_chord_label = [20, 28] R_chord_label = [48] O_chord_label = [ i for i in range(0, 48) if not (i in T_chord_label) or ( i in D_chord_label) or (i in R_chord_label) ] # Bean Search for repeated note if pred_cf[idx] == 0: L = np.argsort( -np.asarray([i_out[i] for i in T_chord_label])) if i_out_tn1 == T_chord_label[ L[0]] and i_out_tn2 == T_chord_label[L[0]]: i_out_t = T_chord_label[L[1]] else: i_out_t = T_chord_label[L[0]] elif pred_cf[idx] == 1: i_out_t = D_chord_label[np.argmax( [i_out[i] for i in D_chord_label])] elif pred_cf[idx] == 3: L = np.argsort( -np.asarray([i_out[i] for i in O_chord_label])) if i_out_tn1 == O_chord_label[ L[0]] and i_out_tn2 == O_chord_label[L[0]]: i_out_t = O_chord_label[L[1]] else: i_out_t = O_chord_label[L[0]] else: i_out_t = 48 predicted.append(i_out_t) i_out_tn2 = i_out_tn1 i_out_tn1 = i_out_t i_out_last = i_out # Write file to midi midi_original = pretty_midi.PrettyMIDI(input_dic[test_idx]['midi']) midi_chord = pro_chordlabel_to_midi( predicted, chord_dic, inv_beat_resolution=CONFIG_ALL['data']['chord_resolution'], constant_tempo=midi_original.get_tempo_changes()[1]) midi_chord.instruments[0].name = "Predicted_w_func" midi_original.instruments.append(midi_chord.instruments[0]) out_path = os.path.join('eval_test/', str(test_idx) + '.mid') ensure_dir(out_path) midi_original.write(out_path) print "Write Files to : ", out_path out_mc = midi_to_list(midi_original, predicted) yield { 'melody': out_mc['melody'], 'chord': out_mc['chord'], 'BPM': float(midi_original.get_tempo_changes()[1]) }
def match_sdss(cat_path): for catfile in find_files(cat_path, "*merged.txt"): # read pipeline catalog print("\nreading catalog: {}".format(catfile)) cat = pd.read_table(catfile, sep=' ') # retrieve SDSS data from ViZieR if not already downloaded ch = catfile.split('/')[-1].split('_')[1] outpath = catfile.replace('{}_merged.txt'.format(ch), 'sdss.vot') if not os.path.isfile(outpath): cntr_ra = np.median(cat.ra) cntr_dec = np.median(cat.dec) # get source from one corner of the mosaic to calculate radius c1 = (cat.ra.min(), cat.dec[cat.ra==cat.ra.min()].values[0]) # make radius 10% bigger just to be on safe side radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1 url = get_url(cntr_ra, cntr_dec, radius) print("retrieving URL: {}".format(url)) handler = urllib2.urlopen(url) raw = handler.read() with open(outpath,'wb') as f: f.write(raw) print("created file: {}".format(outpath)) # parse VOTable print("reading VOTable: {}".format(outpath)) table = parse_single_table(outpath) # if this is one of the southern hemisphere regions, delete and continue if table.array.size == 0: os.remove(outpath) print("outside of SDSS coverage") continue # make sure no missing data for name in table.array.dtype.names: assert table.array[name].mask.sum() == 0 # get unmasked array sdss = table.array.data # make sure sky coverage is big enough assert sdss['RAJ2000'].min() < cat.ra.min() assert sdss['RAJ2000'].max() > cat.ra.max() assert sdss['DEJ2000'].min() < cat.dec.min() assert sdss['DEJ2000'].max() > cat.dec.max() # match to catalog assert cat.shape[0] < sdss.shape[0] tol = 2/3600. idx1, idx2, ds = spherematch(cat.ra, cat.dec, sdss['RAJ2000'], sdss['DEJ2000'], tolerance = tol) print("matched {} out of {} sources with {} arcsec tolerance".format(ds.size, cat.shape[0], tol*3600)) # create vector of star/galaxy class (0=missing, 3=galaxy, 6=star) cl = np.zeros(cat.shape[0]).astype('int') cl[idx1] = sdss['cl'][idx2] # add the column to the dataset cat['cl'] = cl # write to new file outpath = catfile.replace('merged.txt', 'merged+sdss.txt') # fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2 # hdr = ' '.join(names)+' cl' # np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr) cat.to_csv(outpath, index=False, sep=' ', float_format='%.8f') print("created file: {}".format(outpath))
def find_evt_files(root_dir): """Return a chronologically sorted list of EVT or OPP file paths in root_dir.""" files = util.find_files(root_dir) files = parse_file_list(files) return seaflowfile.sorted_files(files)
def run_xsc_phot(bcdphot_out_path, mosaic_path): replaced = {} for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"): print("\n======================================================") print("\nadjusting photometry in: {}".format(cat.split('/')[-1])) print("------------------------------------------------------") outpath = cat.replace('combined_hdr_catalog.txt','2mass_xsc.tbl') # retrieve 2mass data if file doesn't already exist (from previous run) if not os.path.isfile(outpath): # get url and retrieve data url = query_2mass_xsc_polygon(*get_region_corners(cat)) print("\ndownloading 2MASS photometry from: {}".format(url)) text = urllib2.urlopen(url).read() # write to disk with open(outpath, 'w') as f: f.write(text) print("\ncreated file: {}".format(outpath)) # read back in as recarray print("\nreading: {}".format(outpath)) names = open(outpath).read().split('\n')[76].split('|')[1:-1] da = np.recfromtxt(outpath, skip_header=80, names=names) # write input file for xsc_phot.pro infile_outpath = '/'.join(cat.split('/')[:-1])+'/xsc.txt' with open(infile_outpath,'w') as w: for i in range(da.shape[0]): w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i], da.dec[i], da.r_ext[i])) print("\ncreated input file for xsc_phot.pro: {}".format(infile_outpath)) # locate the FITS mosaic file for xsc_phot.pro to do photometry on reg, ch = cat.split('/')[-1].split('_')[:2] mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\ .format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0] print("\nfound mosaic file: {}".format(mosaicfile)) # spawn IDL subprocess running xsc_phot.pro and catch stdout in file outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt') if not os.path.isfile(outpath): outfile = open(outpath,'w') print("\nspawning xsc_phot.pro IDL subprocess") cmd = "xsc_phot,'"+mosaicfile+"','"+infile_outpath+"','long'" rc = subprocess.call(['/usr/local/itt/idl71/bin/idl','-quiet','-e',cmd], stderr = subprocess.PIPE, stdout = outfile) outfile.close() # read in output to recarray print("\nreading: {}".format(outpath)) phot = np.recfromtxt(outpath, names=['id','flux','unc','sky','skyunc']) # make sure rows are aligned assert (da.designation == phot.id).all() # ignore xsc sources we got a NaN or negative flux for bad = np.isnan(phot.flux) | (phot.flux < 0) print("\naper.pro returned NaN or negative flux for {} sources".format(bad.sum())) if bad.sum() > 0: for i in phot[bad].id: print(i) outpath = cat.replace('combined_hdr_catalog.txt','xsc_nan_phot.csv') with open(outpath,'w') as f: w = csv.writer(f) w.writerow(da.dtype.names) w.writerows(da[bad].tolist()) print('\ncreated file: {}'.format(outpath)) phot = phot[~bad] da = da[~bad] # read in pipeline catalog print("\nreading: {}".format(cat)) names = open(cat).readline().split()[1:] c = np.recfromtxt(cat, names=names) # loop through xsc sources and find matches in pipeline catalog print("\nfinding records associated with XSC sources in pipeline catalog") c_flux_total = [] n_in_aper = [] c_idx = [] coords = radec_to_coords(c.ra, c.dec) kdt = KDT(coords) for i in range(phot.size): radius = da.r_ext[i]/3600. # idx1, idx2, ds = spherematch(da.ra[i], da.dec[i], # c.ra, c.dec, tolerance=radius) idx, ds = spherematch2(da.ra[i], da.dec[i], c.ra, c.dec, kdt, tolerance=radius, k=500) # c_flux_total.append(c.flux[idx2].sum()) # n_in_aper.append(c.flux[idx2].size) # c_idx.append(idx2.tolist()) c_flux_total.append(c.flux[idx].sum()) n_in_aper.append(ds.size) c_idx.append(idx.tolist()) print("\nhistogram of source counts in r_ext aperture") for i in [(i,n_in_aper.count(i)) for i in set(n_in_aper)]: print i # create new version of catalog file with xsc-associated entries replaced c_idx = np.array(flatten(c_idx)) print("\nremoving {}, adding {}".format(c_idx.size, phot.size)) replaced[cat] = {'old':c_idx.size, 'new':phot.size} replaced[cat]['hist'] = [(i,n_in_aper.count(i)) for i in set(n_in_aper)] c = np.delete(c, c_idx) newrows = np.rec.array([(-i, da.ra[i], da.dec[i], phot.flux[i], phot.unc[i], 1) for i in \ range(phot.size)], dtype=c.dtype) newcat = np.hstack((c, newrows)) # write new version of catalog to disk fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i'] outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt') np.savetxt(outpath, newcat, fmt = fmt, header = ' '.join(names)) print('\ncreated file: {}'.format(outpath)) # make plot of total old vs. new flux plt.scatter(c_flux_total, phot.flux) ylim = plt.gca().get_ylim() plt.xlim(*ylim) max_y = ylim[1] plt.plot(ylim, ylim, 'r-') plt.xlabel('old flux [mJy]') plt.ylabel('new flux [mJy]') name = ' '.join(cat.split('/')[-1].split('_')[:2]) plt.title(name) outpath = cat.replace('combined_hdr_catalog.txt','xsc_new_vs_old_phot.png') plt.savefig(outpath, dpi=200) plt.close() print('\ncreated file: {}'.format(outpath)) outfile = 'xsc_replaced.json' json.dump(replaced, open(outfile,'w')) print("\ncreated file: {}".format(outfile)) print("\nremoved / added") for k,v in replaced.iteritems(): print k.split('/')[-1], v['old'], v['new'] m = np.mean([i['old']/float(i['new']) for i in replaced.values()]) print("average ratio: {}".format(m)) print("\nK mag and r_ext of sources with NaN photometry:") for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"): reg = i.split('/')[-1] rec = np.recfromcsv(i) bad_id = rec.designation.tolist() bad_k = rec.k_m_k20fe.tolist() bad_r_ext = rec.r_ext.tolist() print reg print ("\tid\t\t\tKmag\tr_ext") if type(bad_id) is list: seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0]) for j,k,l in seq: print("\t{}\t{}\t{}".format(j,k,l)) else: print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))
def match_sdss(cat_path): for catfile in find_files(cat_path, "*merged.txt"): # read pipeline catalog print("\nreading catalog: {}".format(catfile)) cat = pd.read_table(catfile, sep=' ') # retrieve SDSS data from ViZieR if not already downloaded ch = catfile.split('/')[-1].split('_')[1] outpath = catfile.replace('{}_merged.txt'.format(ch), 'sdss.vot') if not os.path.isfile(outpath): cntr_ra = np.median(cat.ra) cntr_dec = np.median(cat.dec) # get source from one corner of the mosaic to calculate radius c1 = (cat.ra.min(), cat.dec[cat.ra == cat.ra.min()].values[0]) # make radius 10% bigger just to be on safe side radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1 url = get_url(cntr_ra, cntr_dec, radius) print("retrieving URL: {}".format(url)) handler = urllib2.urlopen(url) raw = handler.read() with open(outpath, 'wb') as f: f.write(raw) print("created file: {}".format(outpath)) # parse VOTable print("reading VOTable: {}".format(outpath)) table = parse_single_table(outpath) # if this is one of the southern hemisphere regions, delete and continue if table.array.size == 0: os.remove(outpath) print("outside of SDSS coverage") continue # make sure no missing data for name in table.array.dtype.names: assert table.array[name].mask.sum() == 0 # get unmasked array sdss = table.array.data # make sure sky coverage is big enough assert sdss['RAJ2000'].min() < cat.ra.min() assert sdss['RAJ2000'].max() > cat.ra.max() assert sdss['DEJ2000'].min() < cat.dec.min() assert sdss['DEJ2000'].max() > cat.dec.max() # match to catalog assert cat.shape[0] < sdss.shape[0] tol = 2 / 3600. idx1, idx2, ds = spherematch(cat.ra, cat.dec, sdss['RAJ2000'], sdss['DEJ2000'], tolerance=tol) print("matched {} out of {} sources with {} arcsec tolerance".format( ds.size, cat.shape[0], tol * 3600)) # create vector of star/galaxy class (0=missing, 3=galaxy, 6=star) cl = np.zeros(cat.shape[0]).astype('int') cl[idx1] = sdss['cl'][idx2] # add the column to the dataset cat['cl'] = cl # write to new file outpath = catfile.replace('merged.txt', 'merged+sdss.txt') # fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2 # hdr = ' '.join(names)+' cl' # np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr) cat.to_csv(outpath, index=False, sep=' ', float_format='%.8f') print("created file: {}".format(outpath))
def dir_walk(target_dir=None, quiet=None): '''recursively walk a directory containing cti and return the stats''' files = find_files('*.xml', resolve_path(target_dir)) if not quiet: widgets = [ 'Directory Walk: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA() ] progress = ProgressBar(widgets=widgets, maxval=len(files)).start() cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \ 'exploit_targets': set(), 'incidents': set(), \ 'indicators': set(), 'threat_actors': set(), \ 'ttps': set()} cooked_cybox_objs = { 'AccountObjectType': set(), 'AddressObjectType': set(), 'APIObjectType': set(), 'ArchiveFileObjectType': set(), 'ARPCacheObjectType': set(), 'ArtifactObjectType': set(), 'ASObjectType': set(), 'CodeObjectType': set(), 'CustomObjectType': set(), 'DeviceObjectType': set(), 'DiskObjectType': set(), 'DiskPartitionObjectType': set(), 'DNSCacheObjectType': set(), 'DNSQueryObjectType': set(), 'DNSRecordObjectType': set(), 'DomainNameObjectType': set(), 'EmailMessageObjectType': set(), 'FileObjectType': set(), 'GUIDialogboxObjectType': set(), 'GUIObjectType': set(), 'GUIWindowObjectType': set(), 'HostnameObjectType': set(), 'HTTPSessionObjectType': set(), 'ImageFileObjectType': set(), 'LibraryObjectType': set(), 'LinkObjectType': set(), 'LinuxPackageObjectType': set(), 'MemoryObjectType': set(), 'MutexObjectType': set(), 'NetworkConnectionObjectType': set(), 'NetworkFlowObjectType': set(), 'NetworkPacketObjectType': set(), 'NetworkRouteEntryObjectType': set(), 'NetRouteObjectType': set(), 'NetworkSocketObjectType': set(), 'NetworkSubnetObjectType': set(), 'PDFFileObjectType': set(), 'PipeObjectType': set(), 'PortObjectType': set(), 'ProcessObjectType': set(), 'ProductObjectType': set(), 'SemaphoreObjectType': set(), 'SMSMessageObjectType': set(), 'SocketAddressObjectType': set(), 'SystemObjectType': set(), 'UnixFileObjectType': set(), 'UnixNetworkRouteEntryObjectType': set(), 'UnixPipeObjectType': set(), 'UnixProcessObjectType': set(), 'UnixUserAccountObjectType': set(), 'UnixVolumeObjectType': set(), 'URIObjectType': set(), 'URLHistoryObjectType': set(), 'UserAccountObjectType': set(), 'UserSessionObjectType': set(), 'VolumeObjectType': set(), 'WhoisObjectType': set(), 'WindowsComputerAccountObjectType': set(), 'WindowsCriticalSectionObjectType': set(), 'WindowsDriverObjectType': set(), 'WindowsEventLogObjectType': set(), 'WindowsEventObjectType': set(), 'WindowsExecutableFileObjectType': set(), 'WindowsFilemappingObjectType': set(), 'WindowsFileObjectType': set(), 'WindowsHandleObjectType': set(), 'WindowsHookObjectType': set(), 'WindowsKernelHookObjectType': set(), 'WindowsKernelObjectType': set(), 'WindowsMailslotObjectType': set(), 'WindowsMemoryPageRegionObjectType': set(), 'WindowsMutexObjectType': set(), 'WindowsNetworkRouteEntryObjectType': set(), 'WindowsNetworkShareObjectType': set(), 'WindowsPipeObjectType': set(), 'WindowsPrefetchObjectType': set(), 'WindowsProcessObjectType': set(), 'WindowsRegistryKeyObjectType': set(), 'WindowsSemaphoreObjectType': set(), 'WindowsServiceObjectType': set(), 'WindowsSystemObjectType': set(), 'WindowsSystemRestoreObjectType': set(), 'WindowsTaskObjectType': set(), 'WindowsThreadObjectType': set(), 'WindowsUserAccountObjectType': set(), 'WindowsVolumeObjectType': set(), 'WindowsWaitableTimerObjectType': set(), 'X509CertificateObjectType': set(), } for file_ in files: try: stix_package = file_to_stix(file_) (raw_stix_objs, raw_cybox_objs) = \ process_stix_pkg(stix_package) for k in raw_stix_objs.keys(): cooked_stix_objs[k].update(raw_stix_objs[k]) for k in raw_cybox_objs.keys(): cooked_cybox_objs[k].update(raw_cybox_objs[k]) if not quiet: progress.update(i) except: next if not quiet: progress.finish() return (cooked_stix_objs, cooked_cybox_objs)