Exemplo n.º 1
0
def test_compile_update_catalogs(app, status, warning):
    app.builder.compile_update_catalogs()

    catalog_dir = locale_dir / app.config.language / 'LC_MESSAGES'
    expect = set(
        [x.replace('.po', '.mo') for x in find_files(catalog_dir, '.po')])
    actual = set(find_files(catalog_dir, '.mo'))
    assert actual  # not empty
    assert actual == expect
Exemplo n.º 2
0
def test_compile_update_catalogs(app, status, warning):
    app.builder.compile_update_catalogs()

    catalog_dir = locale_dir / app.config.language / 'LC_MESSAGES'
    expect = set([
        x.replace('.po', '.mo')
        for x in find_files(catalog_dir, '.po')
    ])
    actual = set(find_files(catalog_dir, '.mo'))
    assert actual  # not empty
    assert actual == expect
Exemplo n.º 3
0
def plot_wise(cat_path):

	for catfile in find_files(cat_path, "*merged+wise.csv"):

		print("\nreading catalog: {}".format(catfile))
		df = pd.read_csv(catfile)

		# convert to magnitudes
		nbadflux = (df.flux <= 0).sum()
		try:
			assert nbadflux == 0
		except:
			print("warning: {} negative flux source(s)".format(nbadflux))
		ch = catfile.split('/')[-1].split('_')[1]
		mags = spz_jy_to_mags(df.flux*1e-3, float(ch))
		if ch == '1':
			plt.scatter(df.W1mag, mags)
			plt.xlabel('W1 [mag]')
			plt.ylabel('I1 [mag]')
		elif ch == '2':
			plt.scatter(df.W2mag, mags)
			plt.xlabel('W2 [mag]')
			plt.ylabel('I2 [mag]')
		ax = plt.gca()
		xlim, ylim = ax.get_xlim(), ax.get_ylim()
		plt.plot([-5, ylim[1]*2], [-5, ylim[1]*2], 'r-')
		ax.set_xlim(xlim) ; ax.set_ylim(ylim)
		reg = catfile.split('/')[-1].split('_')[0]
		name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch)
		outpath = '/'.join(catfile.split('/')[:-1]+[name])
		plt.savefig(outpath, dpi=120)
		plt.close()
Exemplo n.º 4
0
def check_n_in_aper(radius_factor=1, k=100):

    for catfile in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"):

        print
        print catfile
        names = open(catfile).readline().split()[1:]
        cat = np.recfromtxt(catfile, names=names)

        xscfile = catfile.replace('combined_hdr_catalog.txt', '2mass_xsc.tbl')
        print xscfile
        names = open(xscfile).read().split('\n')[76].split('|')[1:-1]
        xsc = np.recfromtxt(xscfile, skip_header=80, names=names)

        n_in_aper = []
        coords = radec_to_coords(cat.ra, cat.dec)
        kdt = KDT(coords)
        for i in range(xsc.size):
            r_deg = xsc.r_ext[i] / 3600.

            idx, ds = spherematch2(xsc.ra[i],
                                   xsc.dec[i],
                                   cat.ra,
                                   cat.dec,
                                   kdt,
                                   tolerance=radius_factor * r_deg,
                                   k=k)
            n_in_aper.append(ds.size)
        for i in [(i, n_in_aper.count(i)) for i in set(n_in_aper)]:
            print i
Exemplo n.º 5
0
def readme(config: dict, app_logger: logger.Logger) -> bool:
    """
    Display contents of readme file located within migration directory. Return False if readme file doesn't exist.

    :param config: pymigrate configuration.
    :param app_logger: pymigrate configured logger.

    :return: True on success, False otherwise.
    """
    app_logger.log_with_ts('Running readme action', logger.Levels.DEBUG)

    migration_dir = os.path.join(
        os.pardir, config['PROJECT_DIR'] + '/' + config['MIGRATIONS_DIR'] +
        '/' + config['MIGRATION_ID'])

    readme_files = util.find_files('readme*', migration_dir, False)
    if len(readme_files) != 0:
        for readme_file in readme_files:
            with open(readme_file, 'r') as f:
                print("Contents of {0}".format(readme_file))
                print(f.read())
    else:
        app_logger.log_with_ts("No readme files found", logger.Levels.ERROR)
        return False
    return True
Exemplo n.º 6
0
def check_n_in_aper(radius_factor=1, k=100):

	for catfile in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"):

		print
		print catfile
		names = open(catfile).readline().split()[1:]
		cat = np.recfromtxt(catfile, names=names)

		xscfile = catfile.replace('combined_hdr_catalog.txt','2mass_xsc.tbl')
		print xscfile
		names = open(xscfile).read().split('\n')[76].split('|')[1:-1]
		xsc = np.recfromtxt(xscfile, skip_header=80, names=names)

		n_in_aper = []
		coords = radec_to_coords(cat.ra, cat.dec)
		kdt = KDT(coords)
		for i in range(xsc.size):
			r_deg = xsc.r_ext[i]/3600.

			idx, ds = spherematch2(xsc.ra[i], xsc.dec[i], cat.ra, cat.dec,
				kdt, tolerance=radius_factor*r_deg, k=k)
			n_in_aper.append(ds.size)
		for i in [(i,n_in_aper.count(i)) for i in set(n_in_aper)]:
			print i
Exemplo n.º 7
0
def dir_walk(target_dir=None, quiet=None):
    '''recursively walk a directory containing cti and return the stats'''
    files = find_files('*.xml', resolve_path(target_dir))
    if not quiet:
        widgets = ['Directory Walk: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
                   ' ', ETA()]
        progress = ProgressBar(widgets=widgets, maxval=len(files)).start()
    cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \
                        'exploit_targets': set(), 'incidents': set(), \
                        'indicators': set(), 'threat_actors': set(), \
                        'ttps': set()}
    cooked_cybox_objs = dict()
    for file_ in files:
        try:
            stix_package = file_to_stix(file_)
            (raw_stix_objs, raw_cybox_objs) = \
                process_stix_pkg(stix_package)
            for k in raw_stix_objs.keys():
                cooked_stix_objs[k].update(raw_stix_objs[k])
            for k in raw_cybox_objs.keys():
                if not k in cooked_cybox_objs.keys():
                    cooked_cybox_objs[k] = set()
                cooked_cybox_objs[k].update(raw_cybox_objs[k])
            if not quiet:
                progress.update(i)
        except:
            next
    if not quiet:
        progress.finish()
    return (cooked_stix_objs, cooked_cybox_objs)
Exemplo n.º 8
0
def dir_walk(target_dir=None, quiet=None):
    '''recursively walk a directory containing cti and return the stats'''
    files = find_files('*.xml', resolve_path(target_dir))
    if not quiet:
        widgets = [
            'Directory Walk: ',
            Percentage(), ' ',
            Bar(marker=RotatingMarker()), ' ',
            ETA()
        ]
        progress = ProgressBar(widgets=widgets, maxval=len(files)).start()
    cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \
                        'exploit_targets': set(), 'incidents': set(), \
                        'indicators': set(), 'threat_actors': set(), \
                        'ttps': set()}
    cooked_cybox_objs = dict()
    for file_ in files:
        try:
            stix_package = file_to_stix(file_)
            (raw_stix_objs, raw_cybox_objs) = \
                process_stix_pkg(stix_package)
            for k in raw_stix_objs.keys():
                cooked_stix_objs[k].update(raw_stix_objs[k])
            for k in raw_cybox_objs.keys():
                if not k in cooked_cybox_objs.keys():
                    cooked_cybox_objs[k] = set()
                cooked_cybox_objs[k].update(raw_cybox_objs[k])
            if not quiet:
                progress.update(i)
        except:
            next
    if not quiet:
        progress.finish()
    return (cooked_stix_objs, cooked_cybox_objs)
Exemplo n.º 9
0
def plot_wise(cat_path):

    for catfile in find_files(cat_path, "*merged+wise.csv"):

        print("\nreading catalog: {}".format(catfile))
        df = pd.read_csv(catfile)

        # convert to magnitudes
        nbadflux = (df.flux <= 0).sum()
        try:
            assert nbadflux == 0
        except:
            print("warning: {} negative flux source(s)".format(nbadflux))
        ch = catfile.split('/')[-1].split('_')[1]
        mags = spz_jy_to_mags(df.flux * 1e-3, float(ch))
        if ch == '1':
            plt.scatter(df.W1mag, mags)
            plt.xlabel('W1 [mag]')
            plt.ylabel('I1 [mag]')
        elif ch == '2':
            plt.scatter(df.W2mag, mags)
            plt.xlabel('W2 [mag]')
            plt.ylabel('I2 [mag]')
        ax = plt.gca()
        xlim, ylim = ax.get_xlim(), ax.get_ylim()
        plt.plot([-5, ylim[1] * 2], [-5, ylim[1] * 2], 'r-')
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        reg = catfile.split('/')[-1].split('_')[0]
        name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch)
        outpath = '/'.join(catfile.split('/')[:-1] + [name])
        plt.savefig(outpath, dpi=120)
        plt.close()
Exemplo n.º 10
0
def pylintChecker():
    filelist = util.find_files("../", "*.py")
    for f in filelist:
        print f
    #(pylint_stdout,pylint_stderr) = lint.py_run("../analyzer.py",True)
    #print pylint_stdout.readlines()
    #print pylint_stderr.readlines()
    return True
Exemplo n.º 11
0
def setup_test():
    # delete remnants left over after failed build
    root.rmtree(True)
    (rootdir / 'roots' / 'test-intl').copytree(root)
    # copy all catalogs into locale layout directory
    for po in find_files(root, '.po'):
        copy_po = (locale_dir / 'en' / 'LC_MESSAGES' / po)
        if not copy_po.parent.exists():
            copy_po.parent.makedirs()
        shutil.copy(root / po, copy_po)
Exemplo n.º 12
0
def setup_test():
    # delete remnants left over after failed build
    root.rmtree(True)
    (rootdir / 'roots' / 'test-intl').copytree(root)
    # copy all catalogs into locale layout directory
    for po in find_files(root, '.po'):
        copy_po = (locale_dir / 'en' / 'LC_MESSAGES' / po)
        if not copy_po.parent.exists():
            copy_po.parent.makedirs()
        shutil.copy(root / po, copy_po)
Exemplo n.º 13
0
def parseDir(path):
    # set up class and results dictionary
    log.info("Performing recursive search for smali files")
    classes = {}
    sharedobj_strings = {}

    for smali in util.find_files(path, '*.smali'):
        continue
        log.info("Parsing " + smali)
        f = open(smali, 'r')
        smali_class = parseSmaliFiles(f)
        classes[smali_class['ClassName']] = smali_class

    for sharedobj in util.find_files(path, '*.so'):
        log.info("Processing: " + sharedobj)
        f = open(sharedobj, 'r')
        smali_class = parseSmaliFiles(f)
        sharedobj_strings[sharedobj] = util.unique_strings_from_file(sharedobj)

    log.info("Parsing Complete")
    return {'classes': classes, 'sharedobjs': sharedobj_strings}
Exemplo n.º 14
0
def plot_sdss(cat_path):
    for catfile in find_files(cat_path, "*merged+sdss.txt"):

        # for now ignore the channel 2 files
        if catfile.split('/')[-1].split('_')[1] != '1':
            continue

        print("\nreading catalog: {}".format(catfile))
        df = pd.read_table(catfile, sep=' ')

        # get rid of negative flux sources, if any
        df = df[df.flux > 0]

        # convert to magnitudes
        mags = spz_jy_to_mags(df.flux * 1e-3, 1)

        # print counts per magnitude bin
        for i in range(10, 15):
            sc = ((df.cl == 3) & (mags > i) & (mags < i + 1)).sum()
            xc = ((df.xsc == 1) & (mags > i) & (mags < i + 1)).sum()
            msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources"
            print(msg.format(i, i + 1, sc, xc))

        # print number of sources agreed upon
        agree = ((df.xsc == 1) & (df.cl == 3)).sum()
        disagree = ((df.xsc == 1) & (df.cl == 6)).sum()
        na = ((df.xsc == 1) & (df.cl == 0)).sum()
        msg = "{} 2MASS XSC sources classified as galaxies by SDSS"
        print(msg.format(agree))
        msg = "{} 2MASS XSC sources classified as stars by SDSS"
        print(msg.format(disagree))
        msg = "{} 2MASS XSC sources not matched to SDSS"
        print(msg.format(na))

        # plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes
        xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1)
        sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3)
        # mags[xsc_gals].hist(label='2MASS XSC', normed=True)
        # mags[sdss_gals].hist(label='SDSS galaxies', normed=True)
        plt.hist([mags[xsc_gals].values, mags[sdss_gals].values],
                 bins=5,
                 label=['2MASS', 'SDSS'])
        plt.xlabel('IRAC1 [mag]')
        plt.ylabel('Number Count')
        reg = catfile.split('/')[-1].split('_')[0]
        plt.title('{} Extended Sources / Galaxies'.format(reg))
        plt.legend(loc=2)
        name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg)
        outpath = '/'.join(catfile.split('/')[:-1] + [name])
        plt.savefig(outpath, dpi=100)
        plt.close()
        print("created file: {}".format(outpath))
Exemplo n.º 15
0
def check_archive(folder, mask, wrapper, file_mask, use_crc, output=None, force=False):
    """Check folder for all matching archives and extract matching files from them"""
    if not output:
        output = os.path.join(CONFIG["DATA"], os.path.basename(os.path.normpath(folder)))
    if not force and os.path.exists(output):
        print "Output path {} already exists - not extracting".format(output)
        return
    print "Extracting to " + output
    for path in find_files(folder, file_mask):
        with wrapper(path) as archive:
            for member in archive.infolist():
                if fnmatch.fnmatch(member.filename, mask):
                    check_file(output, archive, member, use_crc)
Exemplo n.º 16
0
def plot_sdss(cat_path):
	for catfile in find_files(cat_path, "*merged+sdss.txt"):

		# for now ignore the channel 2 files
		if catfile.split('/')[-1].split('_')[1] != '1':
			continue

		print("\nreading catalog: {}".format(catfile))
		df = pd.read_table(catfile, sep=' ')

		# get rid of negative flux sources, if any
		df = df[df.flux > 0]

		# convert to magnitudes
		mags = spz_jy_to_mags(df.flux*1e-3, 1)

		# print counts per magnitude bin
		for i in range(10,15):
			sc = ((df.cl == 3) & (mags > i) & (mags < i+1)).sum()
			xc = ((df.xsc == 1) & (mags > i) & (mags < i+1)).sum() 
			msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources"
			print(msg.format(i, i+1, sc, xc))

		# print number of sources agreed upon
		agree = ((df.xsc == 1) & (df.cl == 3)).sum()
		disagree = ((df.xsc == 1) & (df.cl == 6)).sum()
		na = ((df.xsc == 1) & (df.cl == 0)).sum()
		msg = "{} 2MASS XSC sources classified as galaxies by SDSS"
		print(msg.format(agree))
		msg = "{} 2MASS XSC sources classified as stars by SDSS"
		print(msg.format(disagree))
		msg = "{} 2MASS XSC sources not matched to SDSS"
		print(msg.format(na))

		# plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes
		xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1)
		sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3)
		# mags[xsc_gals].hist(label='2MASS XSC', normed=True)
		# mags[sdss_gals].hist(label='SDSS galaxies', normed=True)
		plt.hist([mags[xsc_gals].values, mags[sdss_gals].values],
			bins=5, label=['2MASS', 'SDSS'])
		plt.xlabel('IRAC1 [mag]')
		plt.ylabel('Number Count')
		reg = catfile.split('/')[-1].split('_')[0]
		plt.title('{} Extended Sources / Galaxies'.format(reg))
		plt.legend(loc=2)
		name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg)
		outpath = '/'.join(catfile.split('/')[:-1]+[name])
		plt.savefig(outpath, dpi=100)
		plt.close()
		print("created file: {}".format(outpath))
Exemplo n.º 17
0
def parseDir(path):
    # set up class and results dictionary
    log.info("Performing recursive search for smali files")
    classes = {}
    sharedobj_strings = {}

    for smali in util.find_files(path, '*.smali'):
        continue
        log.info("Parsing " + smali)
        f = open(smali, 'r')
        smali_class = parseSmaliFiles(f)
        classes[smali_class['ClassName']] = smali_class

    for sharedobj in util.find_files(path, '*.so'):
        log.info("Processing: " + sharedobj)
        f = open(sharedobj, 'r')
        smali_class = parseSmaliFiles(f)
        sharedobj_strings[sharedobj] =  util.unique_strings_from_file(sharedobj)


    log.info("Parsing Complete")
    return { 'classes' : classes,
             'sharedobjs' : sharedobj_strings }
Exemplo n.º 18
0
def db_update(config: dict, app_logger: logger.Logger) -> bool:
    """
    Check migrations directory for new migrations since last run and update migrations database.

    :param config: pymigrate configuration
    :param app_logger: instance of configured logger

    :return:
    """
    app_logger.log_with_ts('Starting migration database update process', logger.Levels.DEBUG)
    migrations_directory_path = os.path.join(os.pardir, config['PROJECT_DIR'] + '/' + config['MIGRATIONS_DIR'])
    migration_ids = [migration_id for migration_id in os.listdir(migrations_directory_path) if
                     os.path.isdir(os.path.join(migrations_directory_path, migration_id))]

    migrations_from_db = get_statuses(migrations_directory_path + '/migrations.db', app_logger)
    branch = git.get_branch(migrations_directory_path)
    app_logger.log_with_ts('Got git branch: {0}'.format(branch), logger.Levels.DEBUG)

    # TODO: handle io, sqlite db exceptions
    with sqlite3.connect(migrations_directory_path + '/migrations.db') as conn:
        c = conn.cursor()
        for migration_id, status in migrations_from_db.items():
            if migration_id not in migration_ids:
                app_logger.log_with_ts('Migration {0} is missing on disk, marking it ABSENT'.format(migration_id),
                                       logger.Levels.DEBUG)
                c.execute(
                    "UPDATE migrations SET presence='ABSENT' where migration_id='{0}'".format(migration_id))
            elif migration_id not in migration_ids and status == 'ABSENT':
                app_logger.log_with_ts('Migration re-appeared: {0}'.format(migration_id), logger.Levels.DEBUG)
                c.execute("UPDATE migrations SET presence='PRESENT' where migration_id='{0}'".format(migration_id))

        for migration_id in migration_ids:
            if migration_id not in migrations_from_db:
                app_logger.log_with_ts('New migration detected: {0}'.format(migration_id), logger.Levels.DEBUG)
                c.execute(
                    "INSERT INTO migrations VALUES ('{0}', 'PENDING', 'PRESENT', '{1}')".format(migration_id, branch))

            # Set migration status MANUAL if readme.* is present
            check_query = "SELECT status from migrations where migration_id='{0}'"
            readme_files = util.find_files('readme*', migrations_directory_path + '/' + migration_id, False)
            if len(readme_files) != 0 and os.path.isfile(readme_files[0]) and \
                            c.execute(check_query.format(migration_id)).fetchone()[0].replace('\n', '') not in (
                            Status.DONE.name, Status.FAILED.name, Status.SKIP.name):
                app_logger.log_with_ts('Readme file detected for migration: {0}'.format(migration_id),
                                       logger.Levels.DEBUG)
                c.execute("UPDATE migrations SET status='MANUAL' where migration_id='{0}'".format(migration_id))
    return True
Exemplo n.º 19
0
def run_migration(migration_id: str, config: dict, app_logger: logger.Logger) -> bool:
    """
    Run migration :param migration_id:.

    :param migration_id: id of migration to run
    :param config: pymigrate configuration
    :param app_logger: instance of configured logger
    """
    migration_dir = os.path.join(os.pardir, config['PROJECT_DIR'] +
                                 '/' + config['MIGRATIONS_DIR'] +
                                 '/' + migration_id)
    app_logger.log_with_ts("Running migration {0} from directory {1}".format(migration_id, migration_dir),
                           logger.Levels.DEBUG)

    # we do not expect more than one migrate* exec
    # TODO: may be we shall exec only migrate.sh if it exists and don't touch other migrate* executables there
    migrate_executable = util.find_files('migrate*', migration_dir, True).pop()
    tmp_file = '/tmp/.migration_runner_stream.tmp'
    cmd = migrate_executable + " {0} ".format(config['ENVIRONMENT'])
    with io.open(tmp_file, 'wb') as writer, io.open(tmp_file, 'rb', 1) as reader:
        child = subprocess.Popen(cmd,
                                 shell=True,
                                 stdout=writer,
                                 stderr=subprocess.STDOUT,
                                 env=config)
        print('stdout:')
        while child.poll() is None:
            print(bytes(reader.read()).decode())
            time.sleep(0.5)
        print(bytes(reader.read()).decode())
        exit_code = child.returncode
        app_logger.log_with_ts("Migration executable exit code: {0}".format(exit_code), logger.Levels.DEBUG)
        os.remove(tmp_file)

    if int(exit_code) == 0:
        app_logger.log_with_ts("Migration is considered DONE", logger.Levels.DEBUG)
        set_status_done(migration_id, app_logger, os.path.join(os.pardir,
                                                               config['PROJECT_DIR'] + '/' +
                                                               config['MIGRATIONS_DIR']))
        return True
    else:
        app_logger.log_with_ts("Migration is considered FAILED", logger.Levels.DEBUG)
        set_status_failed(migration_id, app_logger, os.path.join(os.pardir,
                                                                 config['PROJECT_DIR'] + '/' +
                                                                 config['MIGRATIONS_DIR']))
        return False
Exemplo n.º 20
0
def match_wise(cat_path, sdss=True):
	if sdss:
		search_pattern = "*merged+sdss.txt"
	else:
		search_pattern = "*merged.txt"

	for catfile in find_files(cat_path, search_pattern):

		# read pipeline catalog
		print("\nreading catalog: {}".format(catfile))
		cat = pd.read_table(catfile, sep=' ')

		# retrieve WISE data from ViZieR if not already downloaded
		ch = catfile.split('/')[-1].split('_')[1]
		if sdss:
			outpath = catfile.replace('{}_merged+sdss.txt'.format(ch), 'wise.vot')
		else:
			outpath = catfile.replace('{}_merged.txt'.format(ch), 'wise.vot')
		if not os.path.isfile(outpath):
			cntr_ra = np.median(cat.ra)
			cntr_dec = np.median(cat.dec)
			# get source from one corner of the mosaic to calculate radius
			c1 = (cat.ra.min(), cat.dec[cat.ra==cat.ra.min()].values[0])
			# make radius 10% bigger just to be on safe side
			radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1
			url = get_url(cntr_ra, cntr_dec, radius)
			print("retrieving URL: {}".format(url))
			handler = urllib2.urlopen(url)
			raw = handler.read()
			with open(outpath,'wb') as f:
				f.write(raw)
			print("created file: {}".format(outpath))

		# parse VOTable
		print("reading VOTable: {}".format(outpath))
		table = parse_single_table(outpath)

		# if this is one of the southern hemisphere regions, delete and continue
		if table.array.size == 0:
			os.remove(outpath)
			print("no WISE coverage")
			continue

		# get unmasked array
		wise = table.array.data

		# make sure sky coverage is big enough
		assert wise['RAJ2000'].min() < cat.ra.min()
		assert wise['RAJ2000'].max() > cat.ra.max()
		assert wise['DEJ2000'].min() < cat.dec.min()
		assert wise['DEJ2000'].max() > cat.dec.max()

		# match to catalog
		tol = 2/3600.
		if cat.shape[0] < wise.shape[0]:
			idx1, idx2, ds = spherematch(cat.ra, cat.dec, 
				wise['RAJ2000'], wise['DEJ2000'], tolerance = tol)
		else:
			idx2, idx1, ds = spherematch(wise['RAJ2000'], wise['DEJ2000'],
				cat.ra, cat.dec, tolerance = tol)
		print("matched {} out of {} sources with {} arcsec tolerance".format(ds.size, 
			cat.shape[0], tol*3600))

		# add WISE to the catalog
		if ch == '1':
			cat['W1mag'] = np.repeat(np.nan, cat.shape[0])
			cat['e_W1mag'] = np.repeat(np.nan, cat.shape[0])
			cat['W1mag'][idx1] = wise['W1mag'][idx2]
			cat['e_W1mag'][idx1] = wise['e_W1mag'][idx2]
		elif ch == '2':
			cat['W2mag'] = np.repeat(np.nan, cat.shape[0])
			cat['e_W2mag'] = np.repeat(np.nan, cat.shape[0])
			cat['W2mag'][idx1] = wise['W2mag'][idx2]
			cat['e_W2mag'][idx1] = wise['e_W2mag'][idx2]
		else:
			print("unexpected error adding WISE data")

		# write to new file
		outpath = catfile.replace('.txt', '+wise.csv')
		# fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2
		# hdr = ' '.join(names)+' cl'
		# np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr)
		cat.to_csv(outpath, index=False, float_format='%.8f')
		print("created file: {}".format(outpath))
Exemplo n.º 21
0
def find_vct_files(root_dir):
    """Return a chronologically sorted list of VCT file paths in root_dir."""
    files = util.find_files(root_dir)
    files = parse_file_list(files)
    return seaflowfile.sorted_files(files)
Exemplo n.º 22
0
def merge_subarray(vg_dir, bcdphot_dir):
    out_dir = vg_dir.replace('clean', 'plots_catalogs')
    os.mkdir(out_dir)

    hdr_files = find_files(bcdphot_dir, '*combined_hdr_*xsc_cor.txt')
    # hdr_file = list(hdr_files)[0]
    for hdr_file in hdr_files:
        reg, ch = hdr_file.split('/')[-1].split('_')[:2]
        sub_file = '/'.join([vg_dir, "d{}_ch{}_agg.csv".format(reg, ch)])

        hdr_names = open(hdr_file).readline().split()[1:]
        hdr = np.recfromtxt(hdr_file, names=hdr_names)
        sub = np.recfromcsv(sub_file)
        # sub.flux *= 1e-3	# convert from uJy to mJy

        idx1, idx2, ds = spherematch(sub.ra,
                                     sub.dec,
                                     hdr.ra,
                                     hdr.dec,
                                     tolerance=3 / 3600.)
        df = pd.DataFrame({
            'sub_flux': sub.flux[idx1],
            'hdr_flux': hdr.flux[idx2]
        })

        slope = fit_line(df, int(ch))
        with open("{}/linefits.txt".format(out_dir), 'a') as f:
            f.write("{} {} {}\n".format(reg, ch, slope))

        fig = df.plot(x='hdr_flux', y='sub_flux', kind='scatter')
        fig.plot([0, fig.get_xlim()[1]], [0, slope * fig.get_xlim()[1]], 'r-')
        fig.set_title("region {} channel {}".format(reg, ch))
        fig.text(fig.get_xlim()[1] * 0.2,
                 fig.get_ylim()[1] * 0.8,
                 "slope: {0:3f}".format(slope),
                 fontsize=24)
        plt.savefig("{}/{}_{}_linefit.png".format(out_dir, reg, ch), dpi=300)
        plt.close()

        # now save the (uncorrected) matched data to disk
        sub_matched = pd.DataFrame.from_records(sub[idx1])
        # rename the columns
        cols = sub_matched.columns.tolist()
        cols_new = ['sub_' + i for i in cols]
        sub_matched.columns = cols_new
        # set hdr_matched dataframe index equal to sub_matched index, this is
        # necessary for concatenation using pandas.concat
        hdr_matched = pd.DataFrame.from_records(hdr[idx2]).set_index(
            sub_matched.index)
        # rename the columns
        cols = hdr_matched.columns.tolist()
        cols_new = ['hdr_' + i for i in cols]
        hdr_matched.columns = cols_new
        # concatenate
        concat = pd.concat([sub_matched, hdr_matched], 1)
        # # convert subarray flux to mJy
        # concat.sub_flux = concat.sub_flux*1e3
        # concat.sub_unc = concat.sub_unc*1e3
        concat.to_csv("{}/{}_{}_hdr_vs_sub.csv".format(out_dir, reg, ch),
                      index=False,
                      float_format='%.8f')

        # now correct all the subarray flux values with the slope
        sub.flux /= slope

        # now merge hdr and subarray into one dataset:
        # want to keep all the hdr photometry that is not saturated, and
        # keep only the subarray photometry above the hdr saturation limit
        cutoff = get_cutoff(ch)
        bad = hdr.flux > cutoff
        hdr_subset = pd.DataFrame.from_records(hdr[~bad])
        bad = sub.flux < cutoff
        sub_subset = pd.DataFrame.from_records(sub[~bad])
        # add n_obs column to subarray data so it has same format as hdr
        sub_subset['n_obs'] = 4
        # add column indicating whether if it came from subarray
        hdr_subset['sub'] = np.zeros(hdr_subset.shape[0]).astype(int)
        sub_subset['sub'] = np.ones(sub_subset.shape[0]).astype(int)
        # concatenate them
        concat = pd.concat([hdr_subset, sub_subset], 0, ignore_index=True)
        # get rid of the 'id' field since it is no longer relevant
        # but add a column indicating if it was a 2MASS XSC measurement
        concat['xsc'] = np.zeros(concat.shape[0]).astype(int)
        concat.xsc[concat.id < 1] = 1
        concat = concat.drop('id', 1)
        # apply 1% flux reduction to correct for stray light (only to >100 mJy sources)
        concat.flux[concat.flux > 100] *= 0.99
        concat.unc[concat.flux > 100] *= 0.99
        # write to disk
        concat.to_csv("{}/{}_{}_merged.txt".format(out_dir, reg, ch),
                      index=False,
                      sep=' ',
                      float_format='%.8f')
Exemplo n.º 23
0
def match_wise(cat_path, sdss=True):
    if sdss:
        search_pattern = "*merged+sdss.txt"
    else:
        search_pattern = "*merged.txt"

    for catfile in find_files(cat_path, search_pattern):

        # read pipeline catalog
        print("\nreading catalog: {}".format(catfile))
        cat = pd.read_table(catfile, sep=' ')

        # retrieve WISE data from ViZieR if not already downloaded
        ch = catfile.split('/')[-1].split('_')[1]
        if sdss:
            outpath = catfile.replace('{}_merged+sdss.txt'.format(ch),
                                      'wise.vot')
        else:
            outpath = catfile.replace('{}_merged.txt'.format(ch), 'wise.vot')
        if not os.path.isfile(outpath):
            cntr_ra = np.median(cat.ra)
            cntr_dec = np.median(cat.dec)
            # get source from one corner of the mosaic to calculate radius
            c1 = (cat.ra.min(), cat.dec[cat.ra == cat.ra.min()].values[0])
            # make radius 10% bigger just to be on safe side
            radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1
            url = get_url(cntr_ra, cntr_dec, radius)
            print("retrieving URL: {}".format(url))
            handler = urllib2.urlopen(url)
            raw = handler.read()
            with open(outpath, 'wb') as f:
                f.write(raw)
            print("created file: {}".format(outpath))

        # parse VOTable
        print("reading VOTable: {}".format(outpath))
        table = parse_single_table(outpath)

        # if this is one of the southern hemisphere regions, delete and continue
        if table.array.size == 0:
            os.remove(outpath)
            print("no WISE coverage")
            continue

        # get unmasked array
        wise = table.array.data

        # make sure sky coverage is big enough
        assert wise['RAJ2000'].min() < cat.ra.min()
        assert wise['RAJ2000'].max() > cat.ra.max()
        assert wise['DEJ2000'].min() < cat.dec.min()
        assert wise['DEJ2000'].max() > cat.dec.max()

        # match to catalog
        tol = 2 / 3600.
        if cat.shape[0] < wise.shape[0]:
            idx1, idx2, ds = spherematch(cat.ra,
                                         cat.dec,
                                         wise['RAJ2000'],
                                         wise['DEJ2000'],
                                         tolerance=tol)
        else:
            idx2, idx1, ds = spherematch(wise['RAJ2000'],
                                         wise['DEJ2000'],
                                         cat.ra,
                                         cat.dec,
                                         tolerance=tol)
        print("matched {} out of {} sources with {} arcsec tolerance".format(
            ds.size, cat.shape[0], tol * 3600))

        # add WISE to the catalog
        if ch == '1':
            cat['W1mag'] = np.repeat(np.nan, cat.shape[0])
            cat['e_W1mag'] = np.repeat(np.nan, cat.shape[0])
            cat['W1mag'][idx1] = wise['W1mag'][idx2]
            cat['e_W1mag'][idx1] = wise['e_W1mag'][idx2]
        elif ch == '2':
            cat['W2mag'] = np.repeat(np.nan, cat.shape[0])
            cat['e_W2mag'] = np.repeat(np.nan, cat.shape[0])
            cat['W2mag'][idx1] = wise['W2mag'][idx2]
            cat['e_W2mag'][idx1] = wise['e_W2mag'][idx2]
        else:
            print("unexpected error adding WISE data")

        # write to new file
        outpath = catfile.replace('.txt', '+wise.csv')
        # fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2
        # hdr = ' '.join(names)+' cl'
        # np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr)
        cat.to_csv(outpath, index=False, float_format='%.8f')
        print("created file: {}".format(outpath))
Exemplo n.º 24
0
import importlib, re
from util import find_files

modules = []

for file in find_files("modules"):
	if file.endswith(".py") and not file == "__init__.py":
		filename = 'modules.'+file[:-3]
		modules.append(importlib.import_module(filename))

"""
for module in modules:
	print module
"""

pattern = re.compile(r'(http://i.imgur.com/(.*))(\?.*)?')

result = pattern.search("http://i.imgur.com/test") # using regex here instead of BeautifulSoup because we are pasing a url, not html
if result:
	print result.group(0)
	print len(result.groups())
Exemplo n.º 25
0
def run_xsc_phot(bcdphot_out_path, mosaic_path):
    replaced = {}
    for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"):

        print("\n======================================================")
        print("\nadjusting photometry in: {}".format(cat.split('/')[-1]))
        print("------------------------------------------------------")
        outpath = cat.replace('combined_hdr_catalog.txt', '2mass_xsc.tbl')

        # retrieve 2mass data if file doesn't already exist (from previous run)
        if not os.path.isfile(outpath):
            # get url and retrieve data
            url = query_2mass_xsc_polygon(*get_region_corners(cat))
            print("\ndownloading 2MASS photometry from: {}".format(url))
            text = urllib2.urlopen(url).read()
            # write to disk
            with open(outpath, 'w') as f:
                f.write(text)
            print("\ncreated file: {}".format(outpath))

        # read back in as recarray
        print("\nreading: {}".format(outpath))
        names = open(outpath).read().split('\n')[76].split('|')[1:-1]
        da = np.recfromtxt(outpath, skip_header=80, names=names)

        # write input file for xsc_phot.pro
        infile_outpath = '/'.join(cat.split('/')[:-1]) + '/xsc.txt'
        with open(infile_outpath, 'w') as w:
            for i in range(da.shape[0]):
                w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i],
                                               da.dec[i], da.r_ext[i]))
        print(
            "\ncreated input file for xsc_phot.pro: {}".format(infile_outpath))

        # locate the FITS mosaic file for xsc_phot.pro to do photometry on
        reg, ch = cat.split('/')[-1].split('_')[:2]
        mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\
         .format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0]
        print("\nfound mosaic file: {}".format(mosaicfile))

        # spawn IDL subprocess running xsc_phot.pro and catch stdout in file
        outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt')
        if not os.path.isfile(outpath):
            outfile = open(outpath, 'w')
            print("\nspawning xsc_phot.pro IDL subprocess")
            cmd = "xsc_phot,'" + mosaicfile + "','" + infile_outpath + "','long'"
            rc = subprocess.call(
                ['/usr/local/itt/idl71/bin/idl', '-quiet', '-e', cmd],
                stderr=subprocess.PIPE,
                stdout=outfile)
            outfile.close()

        # read in output to recarray
        print("\nreading: {}".format(outpath))
        phot = np.recfromtxt(outpath,
                             names=['id', 'flux', 'unc', 'sky', 'skyunc'])

        # make sure rows are aligned
        assert (da.designation == phot.id).all()

        # ignore xsc sources we got a NaN or negative flux for
        bad = np.isnan(phot.flux) | (phot.flux < 0)
        print("\naper.pro returned NaN or negative flux for {} sources".format(
            bad.sum()))
        if bad.sum() > 0:
            for i in phot[bad].id:
                print(i)
            outpath = cat.replace('combined_hdr_catalog.txt',
                                  'xsc_nan_phot.csv')
            with open(outpath, 'w') as f:
                w = csv.writer(f)
                w.writerow(da.dtype.names)
                w.writerows(da[bad].tolist())
            print('\ncreated file: {}'.format(outpath))
        phot = phot[~bad]
        da = da[~bad]

        # read in pipeline catalog
        print("\nreading: {}".format(cat))
        names = open(cat).readline().split()[1:]
        c = np.recfromtxt(cat, names=names)

        # loop through xsc sources and find matches in pipeline catalog
        print(
            "\nfinding records associated with XSC sources in pipeline catalog"
        )
        c_flux_total = []
        n_in_aper = []
        c_idx = []
        coords = radec_to_coords(c.ra, c.dec)
        kdt = KDT(coords)
        for i in range(phot.size):
            radius = da.r_ext[i] / 3600.
            # idx1, idx2, ds = spherematch(da.ra[i], da.dec[i],
            # 	c.ra, c.dec, tolerance=radius)
            idx, ds = spherematch2(da.ra[i],
                                   da.dec[i],
                                   c.ra,
                                   c.dec,
                                   kdt,
                                   tolerance=radius,
                                   k=500)
            # c_flux_total.append(c.flux[idx2].sum())
            # n_in_aper.append(c.flux[idx2].size)
            # c_idx.append(idx2.tolist())
            c_flux_total.append(c.flux[idx].sum())
            n_in_aper.append(ds.size)
            c_idx.append(idx.tolist())
        print("\nhistogram of source counts in r_ext aperture")
        for i in [(i, n_in_aper.count(i)) for i in set(n_in_aper)]:
            print i

        # create new version of catalog file with xsc-associated entries replaced
        c_idx = np.array(flatten(c_idx))
        print("\nremoving {}, adding {}".format(c_idx.size, phot.size))
        replaced[cat] = {'old': c_idx.size, 'new': phot.size}
        replaced[cat]['hist'] = [(i, n_in_aper.count(i))
                                 for i in set(n_in_aper)]
        c = np.delete(c, c_idx)
        newrows = np.rec.array([(-i, da.ra[i], da.dec[i],
         phot.flux[i], phot.unc[i], 1) for i in \
         range(phot.size)], dtype=c.dtype)
        newcat = np.hstack((c, newrows))

        # write new version of catalog to disk
        fmt = ['%i'] + ['%0.8f'] * 2 + ['%.4e'] * 2 + ['%i']
        outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt')
        np.savetxt(outpath, newcat, fmt=fmt, header=' '.join(names))
        print('\ncreated file: {}'.format(outpath))

        # make plot of total old vs. new flux
        plt.scatter(c_flux_total, phot.flux)
        ylim = plt.gca().get_ylim()
        plt.xlim(*ylim)
        max_y = ylim[1]
        plt.plot(ylim, ylim, 'r-')
        plt.xlabel('old flux [mJy]')
        plt.ylabel('new flux [mJy]')
        name = ' '.join(cat.split('/')[-1].split('_')[:2])
        plt.title(name)
        outpath = cat.replace('combined_hdr_catalog.txt',
                              'xsc_new_vs_old_phot.png')
        plt.savefig(outpath, dpi=200)
        plt.close()
        print('\ncreated file: {}'.format(outpath))

    outfile = 'xsc_replaced.json'
    json.dump(replaced, open(outfile, 'w'))
    print("\ncreated file: {}".format(outfile))
    print("\nremoved / added")
    for k, v in replaced.iteritems():
        print k.split('/')[-1], v['old'], v['new']
    m = np.mean([i['old'] / float(i['new']) for i in replaced.values()])
    print("average ratio: {}".format(m))
    print("\nK mag and r_ext of sources with NaN photometry:")
    for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"):
        reg = i.split('/')[-1]
        rec = np.recfromcsv(i)
        bad_id = rec.designation.tolist()
        bad_k = rec.k_m_k20fe.tolist()
        bad_r_ext = rec.r_ext.tolist()
        print reg
        print("\tid\t\t\tKmag\tr_ext")
        if type(bad_id) is list:
            seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0])
            for j, k, l in seq:
                print("\t{}\t{}\t{}".format(j, k, l))
        else:
            print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))
Exemplo n.º 26
0
def is_plain_json_list(fname):
    if not os.path.exists(fname):
        return -1
    with open(fname, 'r') as fh:
        cont = fh.read(1024)
    cont = cont.strip()
    if not cont:
        return True
    return cont[0] == '{'


def compress_and_rename_old(fname):
    if file_is_bzip2(fname):
        return  # compressed already
    if not is_plain_json_list(fname):
        return  # compressed already
    if is_plain_json_list(fname) == -1:
        return  # file does not exist?
    if file_age_in_seconds(fname) < N_DAYS * 86400:
        return  # not old
    os.system('bzip2 "%s"' % fname)
    os.rename(fname + '.bz2', fname)
    print '  File compressed:', fname


if __name__ == '__main__':
    for output_file in find_files(ITEMS_DIR, '*.jl'):
        compress_and_rename_old(output_file)
    for log_file in find_files(LOGS_DIR, '*.log'):
        compress_and_rename_old(log_file)
Exemplo n.º 27
0
def show(request):
    list = find_files()
    return render(request, "index.html", {"all_info": list})
Exemplo n.º 28
0
def dir_walk(target_dir=None, quiet=None):
    '''recursively walk a directory containing cti and return the stats'''
    files = find_files('*.xml', resolve_path(target_dir))
    if not quiet:
        widgets = ['Directory Walk: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
                   ' ', ETA()]
        progress = ProgressBar(widgets=widgets, maxval=len(files)).start()
    cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \
                        'exploit_targets': set(), 'incidents': set(), \
                        'indicators': set(), 'threat_actors': set(), \
                        'ttps': set()}
    cooked_cybox_objs = {'AccountObjectType': set(),
                      'AddressObjectType': set(),
                      'APIObjectType': set(),
                      'ArchiveFileObjectType': set(),
                      'ARPCacheObjectType': set(),
                      'ArtifactObjectType': set(),
                      'ASObjectType': set(),
                      'CodeObjectType': set(),
                      'CustomObjectType': set(),
                      'DeviceObjectType': set(),
                      'DiskObjectType': set(),
                      'DiskPartitionObjectType': set(),
                      'DNSCacheObjectType': set(),
                      'DNSQueryObjectType': set(),
                      'DNSRecordObjectType': set(),
                      'DomainNameObjectType': set(),
                      'EmailMessageObjectType': set(),
                      'FileObjectType': set(),
                      'GUIDialogboxObjectType': set(),
                      'GUIObjectType': set(),
                      'GUIWindowObjectType': set(),
                      'HostnameObjectType': set(),
                      'HTTPSessionObjectType': set(),
                      'ImageFileObjectType': set(),
                      'LibraryObjectType': set(),
                      'LinkObjectType': set(),
                      'LinuxPackageObjectType': set(),
                      'MemoryObjectType': set(),
                      'MutexObjectType': set(),
                      'NetworkConnectionObjectType': set(),
                      'NetworkFlowObjectType': set(),
                      'NetworkPacketObjectType': set(),
                      'NetworkRouteEntryObjectType': set(),
                      'NetRouteObjectType': set(),
                      'NetworkSocketObjectType': set(),
                      'NetworkSubnetObjectType': set(),
                      'PDFFileObjectType': set(),
                      'PipeObjectType': set(),
                      'PortObjectType': set(),
                      'ProcessObjectType': set(),
                      'ProductObjectType': set(),
                      'SemaphoreObjectType': set(),
                      'SMSMessageObjectType': set(),
                      'SocketAddressObjectType': set(),
                      'SystemObjectType': set(),
                      'UnixFileObjectType': set(),
                      'UnixNetworkRouteEntryObjectType': set(),
                      'UnixPipeObjectType': set(),
                      'UnixProcessObjectType': set(),
                      'UnixUserAccountObjectType': set(),
                      'UnixVolumeObjectType': set(),
                      'URIObjectType': set(),
                      'URLHistoryObjectType': set(),
                      'UserAccountObjectType': set(),
                      'UserSessionObjectType': set(),
                      'VolumeObjectType': set(),
                      'WhoisObjectType': set(),
                      'WindowsComputerAccountObjectType': set(),
                      'WindowsCriticalSectionObjectType': set(),
                      'WindowsDriverObjectType': set(),
                      'WindowsEventLogObjectType': set(),
                      'WindowsEventObjectType': set(),
                      'WindowsExecutableFileObjectType': set(),
                      'WindowsFilemappingObjectType': set(),
                      'WindowsFileObjectType': set(),
                      'WindowsHandleObjectType': set(),
                      'WindowsHookObjectType': set(),
                      'WindowsKernelHookObjectType': set(),
                      'WindowsKernelObjectType': set(),
                      'WindowsMailslotObjectType': set(),
                      'WindowsMemoryPageRegionObjectType': set(),
                      'WindowsMutexObjectType': set(),
                      'WindowsNetworkRouteEntryObjectType': set(),
                      'WindowsNetworkShareObjectType': set(),
                      'WindowsPipeObjectType': set(),
                      'WindowsPrefetchObjectType': set(),
                      'WindowsProcessObjectType': set(),
                      'WindowsRegistryKeyObjectType': set(),
                      'WindowsSemaphoreObjectType': set(),
                      'WindowsServiceObjectType': set(),
                      'WindowsSystemObjectType': set(),
                      'WindowsSystemRestoreObjectType': set(),
                      'WindowsTaskObjectType': set(),
                      'WindowsThreadObjectType': set(),
                      'WindowsUserAccountObjectType': set(),
                      'WindowsVolumeObjectType': set(),
                      'WindowsWaitableTimerObjectType': set(),
                      'X509CertificateObjectType': set(),
    }
    for file_ in files:
        try:
            stix_package = file_to_stix(file_)
            (raw_stix_objs, raw_cybox_objs) = \
                process_stix_pkg(stix_package)
            for k in raw_stix_objs.keys():
                cooked_stix_objs[k].update(raw_stix_objs[k])
            for k in raw_cybox_objs.keys():
                cooked_cybox_objs[k].update(raw_cybox_objs[k])
            if not quiet:
                progress.update(i)
        except:
            next
    if not quiet:
        progress.finish()
    return (cooked_stix_objs, cooked_cybox_objs)
Exemplo n.º 29
0
def do_run():
    """Unpack eFRI data"""
    ## Lambert conformal conic projection
    MNR_LAMBERT = arcpy.SpatialReference(
        'Projected Coordinate Systems/National Grids/Canada/NAD 1983 CSRS Ontario MNR Lambert'
    )
    ## base file name for data
    BASE_NAME = r'eFRI'
    ## File types to input from
    FIND_MASK = '*.zip'
    if len(sys.argv) > 1:
        BASE_NAME = sys.argv[1]
    if len(sys.argv) > 2:
        FIND_MASK = sys.argv[2]
    ## Where to unzip input files to
    OUT_GDBS = os.path.join(OUT_DIR, r'{}_gdbs'.format(BASE_NAME))
    ## Where to unzip exterior files
    UNZIP_FIRST = os.path.join(OUT_DIR, BASE_NAME)
    ## Where to unzip nested zipped files
    UNZIP_SECOND = os.path.join(OUT_DIR, r'{}_1'.format(BASE_NAME))
    #
    check_zip(INPUT_DIR, '*', file_mask=FIND_MASK, output=UNZIP_FIRST)
    # make sure we unzip any zips that were in the zips
    check_zip(UNZIP_FIRST, '*', output=UNZIP_SECOND)
    gdbs = find_dirs(UNZIP_SECOND, '*.gdb')
    #
    roots = sorted(map(os.path.basename, gdbs))
    for i in xrange(len(roots)):
        if roots[i] in roots[i + 1:]:
            print 'Error: duplicate directory name - ' + roots[i]
            sys.exit(-1)
    #
    ensure_dir(OUT_GDBS)

    #
    def try_move(x, move_to):
        """Try to move and do nothing on failure"""
        try:
            shutil.move(x, move_to)
        except:
            # must have moved a parent directory already
            pass

    #
    map(lambda x: try_move(x, OUT_GDBS), gdbs)
    #
    OUT_ZIPS = os.path.join(OUT_DIR, r'{}_zips'.format(BASE_NAME))
    ensure_dir(OUT_ZIPS)
    zips = find_files(UNZIP_SECOND, '*.zip')
    map(lambda x: try_move(x, OUT_ZIPS), zips)
    #
    UNZIP_THIRD = os.path.join(OUT_DIR, r'{}_2'.format(BASE_NAME))
    check_zip(OUT_ZIPS, '*', output=UNZIP_THIRD)
    gdbs = find_dirs(UNZIP_THIRD, '*.gdb')
    map(lambda x: try_move(x, OUT_GDBS), gdbs)
    #
    arcpy.env.overwriteOutput = True
    arcpy.env.addOutputsToMap = False
    #
    #~ # only find gdbs that end in -2D or _2D since those are the ones we care about
    # missing WhiteRiver since it's '2D_FRI.gdb'
    #~ gdbs = find_dirs(OUT_GDBS, '*[_-]2D.gdb')
    #~ gdbs = find_dirs(OUT_GDBS, '*.gdb')
    # HACK: only use gdbs with > 2 characters in name so we omit the '2D' and '3D' duplicates of Algonquin
    gdbs = sorted(find_dirs(OUT_GDBS, '???*.gdb'))
    ## Directory to output to
    FINAL_DIR = ensure_dir(r'C:\FireGUARD\data\GIS\intermediate\fuels')
    ## GDB to output to
    OUT_GDB = checkGDB(FINAL_DIR, "{}_LIO.gdb".format(BASE_NAME))
    ## GDB to output shapefiles of simplified bounds to
    OUT_GDB_COVERAGE = checkGDB(FINAL_DIR,
                                "{}_LIO_coverage.gdb".format(BASE_NAME))

    #
    def findName(ds):
        """Find simplified name to use for dataset"""
        name = ds.replace('-', '_')
        if name.endswith('_w'):
            name = name[:-2]
        ignore = [
            'eFRI', '2D', 'Final', 'FRI', 'Dataset', 'Block', 'forest',
            'Forest', 'FOREST', 'PP', '_', 'Topology', 'SMLS'
        ]
        # remove all numbers
        ignore += map(str, list(xrange(10)))
        for r in ignore:
            name = name.replace(r, '')
        # HACK: fix known abbreviations
        names = {
            'GCF': 'GordonCosens',
            'CA': 'Caribou',
            'DRMatawin': 'DogRiverMatawin',
            'Hrst': 'Hearst',
            'MagpieThunderH': 'Magpie',
            'PANA': 'Pukaskwa',
            'ARF': 'AbitibiRiver',
            'BA': 'Bancroft'
        }
        if name in names.keys():
            name = names[name]
        if name.isupper():
            name = name.capitalize()
        name = name.replace('lake', 'Lake')
        return name

    #
    def copyForest(gdb):
        """Copy from gdb"""
        print "Processing " + str(gdb)
        arcpy.env.workspace = gdb
        gdb_name = os.path.basename(gdb)
        try:
            ds = arcpy.ListDatasets()[0]
        except:
            # this is an empty folder, so skip
            return None
        if gdb_name.startswith('pp_FRI_FIMv2'):
            name = findName(
                re.match('pp_FRI_FIMv2_[^_]*_',
                         gdb_name).group(0).replace('pp_FRI_FIMv2', ''))
        else:
            name = findName(ds)
        # HACK: if name consists of only those things we replace then look at gdb name
        if 0 == len(name):
            name = findName(gdb_name.replace('.gdb', ''))

        def mkForest(_):
            arcpy.env.workspace = os.path.join(gdb, ds)
            feats = arcpy.ListFeatureClasses()
            # HACK: assume feature with most rows is the forest polygon
            counts = map(lambda x: int(arcpy.GetCount_management(x)[0]), feats)
            forest = feats[counts.index(max(counts))]
            arcpy.CopyFeatures_management(forest, _)

        forest = check_make(os.path.join(OUT_GDB, name), mkForest)
        # using the coverage from the gdb is giving us the WMU, not the area covered by the data
        outline = check_make(
            os.path.join(OUT_GDB_COVERAGE, name),
            lambda _: arcpy.Dissolve_management(forest, _, '#'))
        if 'name' not in map(lambda x: x.name, arcpy.ListFields(outline)):
            arcpy.AddField_management(outline, 'name', "TEXT")
            arcpy.CalculateField_management(outline, 'name',
                                            '"{}"'.format(name), 'PYTHON')
        return name

    #
    def mkAll(_):
        ## list of names after copying from gdbs
        out = [x for x in sorted(map(copyForest, gdbs)) if x is not None]
        arcpy.env.outputCoordinateSystem = MNR_LAMBERT
        ## Merge all outlines together to make total area covered shape
        arcpy.Merge_management(';'.join(out), _)

    env_push()
    arcpy.env.workspace = OUT_GDB_COVERAGE
    ALL = arcpy.MakeFeatureLayer_management(check_make('ALL', mkAll))
    env_pop()
    # this is for updated example map services on test gis server
    if BASE_NAME == 'eFRI':
        fri_status = arcpy.MakeFeatureLayer_management(
            os.path.join(os.path.join(GIS_DIR, r'input\fuels\LIO'),
                         r'FRI_STATUS_FT.shp'))
        # copy to service if we're doing eFRI data
        copy_to_server(ALL, 'eFRIdata')
        copy_to_server(fri_status, 'eFRIplanned')
        arcpy.SelectLayerByLocation_management(
            fri_status,
            "HAVE_THEIR_CENTER_IN",
            ALL,
            invert_spatial_relationship="INVERT")
        # HACK: can't think of a better way to do this
        arcpy.SelectLayerByAttribute_management(fri_status,
                                                "REMOVE_FROM_SELECTION",
                                                "UNIT_NAME like '%Nipigon%'")
        copy_to_server(fri_status, 'eFRIplanned_select')
Exemplo n.º 30
0
import util
import multiprocess
import numpy as np
import os

INPUT_ROOT = '/gpfs/milgram/project/chang/pg496/nn_all_raw_files'
OUTPUT_ROOT = '/gpfs/milgram/project/chang/pg496/nn_all_raw_files_ms4_sorting_stringent'
PARALLEL = True
NUM_PARALLEL_PROCESSES = 8


def sort_several(files):
    for file in files:
        sorting.matlab_source_file_default_pipeline(INPUT_ROOT, OUTPUT_ROOT,
                                                    file)


def create_sorting_task(files):
    return (lambda f: lambda: sort_several(f))(files)


if __name__ == '__main__':
    _, src_filenames, _ = util.find_files(INPUT_ROOT, '.mat')
    filename_sets = np.array_split(src_filenames, NUM_PARALLEL_PROCESSES)
    fs = [create_sorting_task(f) for f in filename_sets]

    if PARALLEL:
        multiprocess.run_tasks(multiprocess.make_tasks(fs))
    else:
        for f in fs:
            f()
    with open(backup_file, 'r') as original:
        with open(fixed_file, 'w') as fixed:
            content = regex.sub(
                lambda x: regex_sub_dict[x.string[x.start(): x.end()]],
                original.read()
            )
            fixed.write(content)


parser = argparse.ArgumentParser(description='Fix input files')
parser.add_argument('--input-file-dir', required=True)

if __name__ == '__main__':
    args = parser.parse_args()
    files_to_fix = find_files(args.input_file_dir)
    
    print('Found the following files to fix:\n{}'.format('\n'.join(files_to_fix)))

    regex_sub_dict = {
        "\n,": ",",
        "\n\n": "\n",
        "\0": ""
    }

    # Make a backup dir for the original files to be fixed
    backup_dir = args.input_file_dir + '/backup'
    if not os.path.exists(backup_dir):
        print('Creating {} directory'.format(backup_dir))
        os.mkdir(backup_dir)
Exemplo n.º 32
0
parser.add_argument('--pwd', required=True)
parser.add_argument('--host', required=False, default='localhost')
parser.add_argument('--port', required=False, default=5432)
parser.add_argument('--db', required=False, default='postgres')
parser.add_argument('--input-file-dir', required=True)

if __name__ == '__main__':
    args = parser.parse_args()
    engine = create_engine(
        "postgresql://{user}:{pwd}@{host}:{port}/{db}".format(user=args.user,
                                                              pwd=args.pwd,
                                                              host=args.host,
                                                              port=args.port,
                                                              db=args.db))

    files_to_load = find_files(args.input_file_dir)

    marketing_files = [
        args.input_file_dir + '/' + file for file in files_to_load
        if 'marketing' in file
    ]
    user_files = [
        args.input_file_dir + '/' + file for file in files_to_load
        if 'user' in file
    ]

    Session = sessionmaker(bind=engine)
    session = Session()
    Base.metadata.create_all(engine, checkfirst=True)

    for file in marketing_files:
Exemplo n.º 33
0
def merge_subarray(vg_dir, bcdphot_dir):
	out_dir = vg_dir.replace('clean','plots_catalogs')
	os.mkdir(out_dir)

	hdr_files = find_files(bcdphot_dir, '*combined_hdr_*xsc_cor.txt')
	# hdr_file = list(hdr_files)[0]
	for hdr_file in hdr_files:
		reg, ch = hdr_file.split('/')[-1].split('_')[:2]
		sub_file = '/'.join([vg_dir, "d{}_ch{}_agg.csv".format(reg, ch)])

		hdr_names = open(hdr_file).readline().split()[1:]
		hdr = np.recfromtxt(hdr_file, names=hdr_names)
		sub = np.recfromcsv(sub_file)
		# sub.flux *= 1e-3	# convert from uJy to mJy

		idx1, idx2, ds = spherematch(sub.ra, sub.dec, hdr.ra, hdr.dec, tolerance=3/3600.)
		df = pd.DataFrame({'sub_flux': sub.flux[idx1], 'hdr_flux':hdr.flux[idx2]})

		slope = fit_line(df, int(ch))
		with open("{}/linefits.txt".format(out_dir),'a') as f:
			f.write("{} {} {}\n".format(reg, ch, slope))

		fig = df.plot(x='hdr_flux',y='sub_flux', kind='scatter')
		fig.plot([0, fig.get_xlim()[1]], [0, slope * fig.get_xlim()[1]], 'r-')
		fig.set_title("region {} channel {}".format(reg, ch))
		fig.text(fig.get_xlim()[1]*0.2, fig.get_ylim()[1]*0.8, 
			"slope: {0:3f}".format(slope), fontsize=24)
		plt.savefig("{}/{}_{}_linefit.png".format(out_dir, reg, ch), dpi=300)
		plt.close()

		# now save the (uncorrected) matched data to disk
		sub_matched = pd.DataFrame.from_records(sub[idx1])
		# rename the columns
		cols = sub_matched.columns.tolist()
		cols_new = ['sub_'+i for i in cols]
		sub_matched.columns = cols_new
		# set hdr_matched dataframe index equal to sub_matched index, this is
		# necessary for concatenation using pandas.concat
		hdr_matched = pd.DataFrame.from_records(hdr[idx2]).set_index(sub_matched.index)
		# rename the columns
		cols = hdr_matched.columns.tolist()
		cols_new = ['hdr_'+i for i in cols]
		hdr_matched.columns = cols_new
		# concatenate
		concat = pd.concat([ sub_matched, hdr_matched ], 1)
		# # convert subarray flux to mJy
		# concat.sub_flux = concat.sub_flux*1e3
		# concat.sub_unc = concat.sub_unc*1e3
		concat.to_csv("{}/{}_{}_hdr_vs_sub.csv".format(out_dir, reg, ch), 
			index=False, float_format='%.8f')

		# now correct all the subarray flux values with the slope
		sub.flux /= slope

		# now merge hdr and subarray into one dataset:
		# want to keep all the hdr photometry that is not saturated, and
		# keep only the subarray photometry above the hdr saturation limit
		cutoff = get_cutoff(ch)
		bad = hdr.flux > cutoff
		hdr_subset = pd.DataFrame.from_records(hdr[~bad])
		bad = sub.flux < cutoff
		sub_subset = pd.DataFrame.from_records(sub[~bad])
		# add n_obs column to subarray data so it has same format as hdr
		sub_subset['n_obs'] = 4
		# add column indicating whether if it came from subarray
		hdr_subset['sub'] = np.zeros(hdr_subset.shape[0]).astype(int)
		sub_subset['sub'] = np.ones(sub_subset.shape[0]).astype(int)
		# concatenate them
		concat = pd.concat([ hdr_subset, sub_subset ], 0, ignore_index=True)
		# get rid of the 'id' field since it is no longer relevant
		# but add a column indicating if it was a 2MASS XSC measurement
		concat['xsc'] = np.zeros(concat.shape[0]).astype(int)
		concat.xsc[concat.id < 1] = 1
		concat = concat.drop('id', 1)
		# apply 1% flux reduction to correct for stray light (only to >100 mJy sources)
		concat.flux[concat.flux > 100] *= 0.99
		concat.unc[concat.flux > 100] *= 0.99
		# write to disk
		concat.to_csv("{}/{}_{}_merged.txt".format(out_dir, reg, ch), 
			index=False, sep=' ', float_format='%.8f')
Exemplo n.º 34
0
 def get_actual():
     return set(find_files(catalog_dir, '.mo'))
Exemplo n.º 35
0
def test_compile_specific_catalogs(app, status, warning):
    app.builder.compile_specific_catalogs(['admonitions'])

    catalog_dir = locale_dir / app.config.language / 'LC_MESSAGES'
    actual = set(find_files(catalog_dir, '.mo'))
    assert actual == set(['admonitions.mo'])
Exemplo n.º 36
0
def plot_spz_vs_wise_sdss_class(cat_path, plot_style='scatter'):

	ch1 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[::2]
	ch2 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[1::2]

	for ch1, ch2 in zip(ch1, ch2):

		reg1 = ch1.split('/')[-1].split('_')[0]
		reg2 = ch2.split('/')[-1].split('_')[0]
		assert reg1 == reg2

		print("\nreading catalog: {}".format(ch1))
		print("reading catalog: {}".format(ch2))
		df1 = pd.read_csv(ch1)
		df2 = pd.read_csv(ch2)

		# convert to magnitudes
		mags1 = spz_jy_to_mags(df1.flux*1e-3, 1)
		mags2 = spz_jy_to_mags(df2.flux*1e-3, 2)

		# match ch1 / ch2
		idx1, idx2 = match_cats(df1, df2, tol=2/3600.)

		# save matched catalogs
		matched1 = df1.loc[idx1]
		matched2 = df2.loc[idx2]
		ch1_cols = [i+'_1' for i in df1.columns.tolist()]
		ch2_cols = [i+'_2' for i in df2.columns.tolist()]
		# matched1.columns = ch1_cols	
		# matched2.columns = ch2_cols
		# matched = pd.concat([matched1, matched2], 1, ignore_index=True)	# weird error
		matched = np.concatenate([matched1.values, matched2.values], 1)
		df_matched = pd.DataFrame(matched, columns=ch1_cols+ch2_cols)
		df_matched['I1'] = mags1[idx1].values
		df_matched['I2'] = mags2[idx2].values
		outpath = '/'.join(ch1.split('/')[:-1])+'/{}_2ch_matched+sdss.csv'.format(reg1)
		df_matched.to_csv(outpath, index=False, float_format='%.8f')
		print("created file: {}".format(outpath))

		# identify SDSS galaxies and stars
		galaxies = (df1.cl[idx1].values == 3) & (df2.cl[idx2].values == 3)
		stars = (df1.cl[idx1].values == 6) & (df2.cl[idx2].values == 6)

		# plot I1-I2 vs. W1-W2
		color1 = df1.W1mag[idx1].values - df2.W2mag[idx2].values
		color2 = mags1[idx1].values - mags2[idx2].values
		# galaxies
		name = '{}_I1-I2_vs_W1-W2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(color1[galaxies], color2[galaxies], outpath, 'W1-W2 [mag]', 'I1-I2 [mag]', 
			plot_style=plot_style, plot_type='color-color')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(color1[stars], color2[stars], outpath, 'W1-W2 [mag]', 'I1-I2 [mag]', 
			plot_style=plot_style, plot_type='color-color')

		# plot I1-W1 vs. I2-W2
		color1 = mags1[idx1].values - df1.W1mag[idx1].values
		color2 = mags2[idx2].values - df2.W2mag[idx2].values
		# galaxies
		name = '{}_I1-W1_vs_I2-W2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(color1[galaxies], color2[galaxies], outpath, 'I1-W1 [mag]', 'I2-W2 [mag]', 
			plot_style=plot_style, plot_type='color-color')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(color1[stars], color2[stars], outpath, 'I1-W1 [mag]', 'I2-W2 [mag]', 
			plot_style=plot_style, plot_type='color-color')

		# plot spz color-magnitude diagrams
		color = mags1[idx1].values - mags2[idx2].values
		mags = mags1[idx1].values
		# galaxies
		name = '{}_I1_vs_I1-I2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(mags[galaxies], color[galaxies], outpath, 'I1 [mag]', 'I1-I2 [mag]', 
			plot_style=plot_style, plot_type='color-mag')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(mags[stars], color[stars], outpath, 'I1 [mag]', 'I1-I2 [mag]', 
			plot_style=plot_style, plot_type='color-mag')

		# plot wise color-magnitude diagrams
		color = df1.W1mag[idx1].values - df2.W2mag[idx2].values
		mags = df1.W1mag[idx1].values
		# galaxies
		name = '{}_W1_vs_W1-W2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(mags[galaxies], color[galaxies], outpath, 'W1 [mag]', 'W1-W2 [mag]', 
			plot_style=plot_style, plot_type='color-mag')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(mags[stars], color[stars], outpath, 'W1 [mag]', 'W1-W2 [mag]', 
			plot_style=plot_style, plot_type='color-mag')
	
		# plot I1 vs I2
		mags1_matched = mags1[idx1].values
		mags2_matched = mags2[idx2].values
		# galaxies
		name = '{}_I1_vs_I2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(mags1_matched[galaxies], mags2_matched[galaxies], outpath, 'I1 [mag]', 'I2 [mag]', 
			plot_style=plot_style, plot_type='mag-mag')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(mags1_matched[stars], mags2_matched[stars], outpath, 'I1 [mag]', 'I2 [mag]', 
			plot_style=plot_style, plot_type='mag-mag')
Exemplo n.º 37
0
def plot_spz_vs_wise_sdss_class(cat_path, plot_style='scatter'):

    ch1 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[::2]
    ch2 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[1::2]

    for ch1, ch2 in zip(ch1, ch2):

        reg1 = ch1.split('/')[-1].split('_')[0]
        reg2 = ch2.split('/')[-1].split('_')[0]
        assert reg1 == reg2

        print("\nreading catalog: {}".format(ch1))
        print("reading catalog: {}".format(ch2))
        df1 = pd.read_csv(ch1)
        df2 = pd.read_csv(ch2)

        # convert to magnitudes
        mags1 = spz_jy_to_mags(df1.flux * 1e-3, 1)
        mags2 = spz_jy_to_mags(df2.flux * 1e-3, 2)

        # match ch1 / ch2
        idx1, idx2 = match_cats(df1, df2, tol=2 / 3600.)

        # save matched catalogs
        matched1 = df1.loc[idx1]
        matched2 = df2.loc[idx2]
        ch1_cols = [i + '_1' for i in df1.columns.tolist()]
        ch2_cols = [i + '_2' for i in df2.columns.tolist()]
        # matched1.columns = ch1_cols
        # matched2.columns = ch2_cols
        # matched = pd.concat([matched1, matched2], 1, ignore_index=True)	# weird error
        matched = np.concatenate([matched1.values, matched2.values], 1)
        df_matched = pd.DataFrame(matched, columns=ch1_cols + ch2_cols)
        df_matched['I1'] = mags1[idx1].values
        df_matched['I2'] = mags2[idx2].values
        outpath = '/'.join(
            ch1.split('/')[:-1]) + '/{}_2ch_matched+sdss.csv'.format(reg1)
        df_matched.to_csv(outpath, index=False, float_format='%.8f')
        print("created file: {}".format(outpath))

        # identify SDSS galaxies and stars
        galaxies = (df1.cl[idx1].values == 3) & (df2.cl[idx2].values == 3)
        stars = (df1.cl[idx1].values == 6) & (df2.cl[idx2].values == 6)

        # plot I1-I2 vs. W1-W2
        color1 = df1.W1mag[idx1].values - df2.W2mag[idx2].values
        color2 = mags1[idx1].values - mags2[idx2].values
        # galaxies
        name = '{}_I1-I2_vs_W1-W2_galaxies_plot_style.png'.format(reg1)
        name = name.replace('plot_style', plot_style)
        outpath = '/'.join(ch1.split('/')[:-1] + [name])
        plot(color1[galaxies],
             color2[galaxies],
             outpath,
             'W1-W2 [mag]',
             'I1-I2 [mag]',
             plot_style=plot_style,
             plot_type='color-color')
        # stars
        outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace(
            'galaxies', 'stars')
        plot(color1[stars],
             color2[stars],
             outpath,
             'W1-W2 [mag]',
             'I1-I2 [mag]',
             plot_style=plot_style,
             plot_type='color-color')

        # plot I1-W1 vs. I2-W2
        color1 = mags1[idx1].values - df1.W1mag[idx1].values
        color2 = mags2[idx2].values - df2.W2mag[idx2].values
        # galaxies
        name = '{}_I1-W1_vs_I2-W2_galaxies_plot_style.png'.format(reg1)
        name = name.replace('plot_style', plot_style)
        outpath = '/'.join(ch1.split('/')[:-1] + [name])
        plot(color1[galaxies],
             color2[galaxies],
             outpath,
             'I1-W1 [mag]',
             'I2-W2 [mag]',
             plot_style=plot_style,
             plot_type='color-color')
        # stars
        outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace(
            'galaxies', 'stars')
        plot(color1[stars],
             color2[stars],
             outpath,
             'I1-W1 [mag]',
             'I2-W2 [mag]',
             plot_style=plot_style,
             plot_type='color-color')

        # plot spz color-magnitude diagrams
        color = mags1[idx1].values - mags2[idx2].values
        mags = mags1[idx1].values
        # galaxies
        name = '{}_I1_vs_I1-I2_galaxies_plot_style.png'.format(reg1)
        name = name.replace('plot_style', plot_style)
        outpath = '/'.join(ch1.split('/')[:-1] + [name])
        plot(mags[galaxies],
             color[galaxies],
             outpath,
             'I1 [mag]',
             'I1-I2 [mag]',
             plot_style=plot_style,
             plot_type='color-mag')
        # stars
        outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace(
            'galaxies', 'stars')
        plot(mags[stars],
             color[stars],
             outpath,
             'I1 [mag]',
             'I1-I2 [mag]',
             plot_style=plot_style,
             plot_type='color-mag')

        # plot wise color-magnitude diagrams
        color = df1.W1mag[idx1].values - df2.W2mag[idx2].values
        mags = df1.W1mag[idx1].values
        # galaxies
        name = '{}_W1_vs_W1-W2_galaxies_plot_style.png'.format(reg1)
        name = name.replace('plot_style', plot_style)
        outpath = '/'.join(ch1.split('/')[:-1] + [name])
        plot(mags[galaxies],
             color[galaxies],
             outpath,
             'W1 [mag]',
             'W1-W2 [mag]',
             plot_style=plot_style,
             plot_type='color-mag')
        # stars
        outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace(
            'galaxies', 'stars')
        plot(mags[stars],
             color[stars],
             outpath,
             'W1 [mag]',
             'W1-W2 [mag]',
             plot_style=plot_style,
             plot_type='color-mag')

        # plot I1 vs I2
        mags1_matched = mags1[idx1].values
        mags2_matched = mags2[idx2].values
        # galaxies
        name = '{}_I1_vs_I2_galaxies_plot_style.png'.format(reg1)
        name = name.replace('plot_style', plot_style)
        outpath = '/'.join(ch1.split('/')[:-1] + [name])
        plot(mags1_matched[galaxies],
             mags2_matched[galaxies],
             outpath,
             'I1 [mag]',
             'I2 [mag]',
             plot_style=plot_style,
             plot_type='mag-mag')
        # stars
        outpath = '/'.join(ch1.split('/')[:-1] + [name]).replace(
            'galaxies', 'stars')
        plot(mags1_matched[stars],
             mags2_matched[stars],
             outpath,
             'I1 [mag]',
             'I2 [mag]',
             plot_style=plot_style,
             plot_type='mag-mag')
Exemplo n.º 38
0
def m2c_generator(max_num_sample):
    '''
        m2c Generator 
        Input  : a testing sample index 
        Output : Chord Label (n, 16)
                 Monophony Melody Label (n, 2)
                 BPM float 
        Average Elasped Time for one sample : 0.16 sec 
    '''
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    cpu_device = torch.device('cpu')

    # Load Data
    chord_dic = pd.read_pickle(CONFIG_ALL['data']['chord_dic'])

    # prepare features
    all_files = find_files(CONFIG_ALL['data']['test_dir'], '*.mid')
    input_dic = []
    for i_file in all_files:
        _ = midi_feature(i_file, sampling_fac=2)
        _ = np.reshape(_, (1, _.shape[0], _.shape[1]))
        input_dic.append({'midi': i_file, 'm_embed': _})
    print 'Total Number of files : ', len(input_dic)

    # training
    model = BiRNN(CONFIG_ALL['model']['input_size'],
                  CONFIG_ALL['model']['lstm_hidden_size'],
                  CONFIG_ALL['model']['fc_hidden_size'],
                  CONFIG_ALL['model']['num_layers'],
                  CONFIG_ALL['model']['num_classes_cf'],
                  CONFIG_ALL['model']['num_classes_c'], device).to(device)

    # Load Model
    path = os.path.join(CONFIG_ALL['model']['log_dir'],
                        CONFIG_ALL['model']['exp_name'], 'models/',
                        CONFIG_ALL['model']['eval_model'])
    model.load_state_dict(torch.load(path))

    # Test the model
    with torch.no_grad():
        while True:
            test_idx = yield

            if test_idx >= max_num_sample or test_idx < 0:
                print "Invalid sample index"
                continue
            m_embedding = input_dic[test_idx]['m_embed']
            out_cf, out_c = model(
                torch.tensor(m_embedding, dtype=torch.float).to(device))

            out_c = out_c.data.cpu().numpy()

            _, pred_cf = torch.max(out_cf.data, 1)
            pred_cf = pred_cf.data.cpu().numpy()

            i_out_tn1 = -1
            i_out_tn2 = -1
            i_out_tn3 = -1
            i_out_t = -1

            predicted = []
            c_threshold = 0.825
            f_threshold = 0.35
            #ochord_threshold = 1.0

            for idx, i_out in enumerate(out_c):
                # Seventh chord
                #T_chord_label = [0, 1, 2, 3, 4, 5, 102, 103, 104]
                #D_chord_label = [77, 78, 79, 55, 56, 57]
                #R_chord_label = [132]

                # Triad Chord
                T_chord_label = [0, 1, 37]
                D_chord_label = [20, 28]
                R_chord_label = [48]

                O_chord_label = [
                    i for i in range(0, 48) if not (i in T_chord_label) or (
                        i in D_chord_label) or (i in R_chord_label)
                ]

                # Bean Search for repeated note
                if pred_cf[idx] == 0:
                    L = np.argsort(
                        -np.asarray([i_out[i] for i in T_chord_label]))
                    if i_out_tn1 == T_chord_label[
                            L[0]] and i_out_tn2 == T_chord_label[L[0]]:
                        i_out_t = T_chord_label[L[1]]
                    else:
                        i_out_t = T_chord_label[L[0]]

                elif pred_cf[idx] == 1:
                    i_out_t = D_chord_label[np.argmax(
                        [i_out[i] for i in D_chord_label])]

                elif pred_cf[idx] == 3:
                    L = np.argsort(
                        -np.asarray([i_out[i] for i in O_chord_label]))
                    if i_out_tn1 == O_chord_label[
                            L[0]] and i_out_tn2 == O_chord_label[L[0]]:
                        i_out_t = O_chord_label[L[1]]
                    else:
                        i_out_t = O_chord_label[L[0]]

                else:
                    i_out_t = 48

                predicted.append(i_out_t)
                i_out_tn2 = i_out_tn1
                i_out_tn1 = i_out_t
                i_out_last = i_out

            # Write file to midi
            midi_original = pretty_midi.PrettyMIDI(input_dic[test_idx]['midi'])
            midi_chord = pro_chordlabel_to_midi(
                predicted,
                chord_dic,
                inv_beat_resolution=CONFIG_ALL['data']['chord_resolution'],
                constant_tempo=midi_original.get_tempo_changes()[1])
            midi_chord.instruments[0].name = "Predicted_w_func"
            midi_original.instruments.append(midi_chord.instruments[0])

            out_path = os.path.join('eval_test/', str(test_idx) + '.mid')
            ensure_dir(out_path)
            midi_original.write(out_path)
            print "Write Files to : ", out_path

            out_mc = midi_to_list(midi_original, predicted)

            yield {
                'melody': out_mc['melody'],
                'chord': out_mc['chord'],
                'BPM': float(midi_original.get_tempo_changes()[1])
            }
Exemplo n.º 39
0
def match_sdss(cat_path):
	for catfile in find_files(cat_path, "*merged.txt"):

		# read pipeline catalog
		print("\nreading catalog: {}".format(catfile))
		cat = pd.read_table(catfile, sep=' ')

		# retrieve SDSS data from ViZieR if not already downloaded
		ch = catfile.split('/')[-1].split('_')[1]
		outpath = catfile.replace('{}_merged.txt'.format(ch), 'sdss.vot')
		if not os.path.isfile(outpath):
			cntr_ra = np.median(cat.ra)
			cntr_dec = np.median(cat.dec)
			# get source from one corner of the mosaic to calculate radius
			c1 = (cat.ra.min(), cat.dec[cat.ra==cat.ra.min()].values[0])
			# make radius 10% bigger just to be on safe side
			radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1
			url = get_url(cntr_ra, cntr_dec, radius)
			print("retrieving URL: {}".format(url))
			handler = urllib2.urlopen(url)
			raw = handler.read()
			with open(outpath,'wb') as f:
				f.write(raw)
			print("created file: {}".format(outpath))

		# parse VOTable
		print("reading VOTable: {}".format(outpath))
		table = parse_single_table(outpath)

		# if this is one of the southern hemisphere regions, delete and continue
		if table.array.size == 0:
			os.remove(outpath)
			print("outside of SDSS coverage")
			continue

		# make sure no missing data
		for name in table.array.dtype.names:
			assert table.array[name].mask.sum() == 0

		# get unmasked array
		sdss = table.array.data

		# make sure sky coverage is big enough
		assert sdss['RAJ2000'].min() < cat.ra.min()
		assert sdss['RAJ2000'].max() > cat.ra.max()
		assert sdss['DEJ2000'].min() < cat.dec.min()
		assert sdss['DEJ2000'].max() > cat.dec.max()

		# match to catalog
		assert cat.shape[0] < sdss.shape[0]
		tol = 2/3600.
		idx1, idx2, ds = spherematch(cat.ra, cat.dec, 
			sdss['RAJ2000'], sdss['DEJ2000'], tolerance = tol)
		print("matched {} out of {} sources with {} arcsec tolerance".format(ds.size, 
			cat.shape[0], tol*3600))

		# create vector of star/galaxy class (0=missing, 3=galaxy, 6=star)
		cl = np.zeros(cat.shape[0]).astype('int')
		cl[idx1] = sdss['cl'][idx2]

		# add the column to the dataset
		cat['cl'] = cl

		# write to new file
		outpath = catfile.replace('merged.txt', 'merged+sdss.txt')
		# fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2
		# hdr = ' '.join(names)+' cl'
		# np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr)
		cat.to_csv(outpath, index=False, sep=' ', float_format='%.8f')
		print("created file: {}".format(outpath))
Exemplo n.º 40
0
def find_evt_files(root_dir):
    """Return a chronologically sorted list of EVT or OPP file paths in root_dir."""
    files = util.find_files(root_dir)
    files = parse_file_list(files)
    return seaflowfile.sorted_files(files)
Exemplo n.º 41
0
def run_xsc_phot(bcdphot_out_path, mosaic_path):
	replaced = {}
	for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"):

		print("\n======================================================")
		print("\nadjusting photometry in: {}".format(cat.split('/')[-1]))
		print("------------------------------------------------------")
		outpath = cat.replace('combined_hdr_catalog.txt','2mass_xsc.tbl')

		# retrieve 2mass data if file doesn't already exist (from previous run)
		if not os.path.isfile(outpath):
			# get url and retrieve data
			url = query_2mass_xsc_polygon(*get_region_corners(cat))
			print("\ndownloading 2MASS photometry from: {}".format(url))
			text = urllib2.urlopen(url).read()
			# write to disk
			with open(outpath, 'w') as f:
				f.write(text)
			print("\ncreated file: {}".format(outpath))

		# read back in as recarray	
		print("\nreading: {}".format(outpath))
		names = open(outpath).read().split('\n')[76].split('|')[1:-1]
		da = np.recfromtxt(outpath, skip_header=80, names=names)

		# write input file for xsc_phot.pro
		infile_outpath = '/'.join(cat.split('/')[:-1])+'/xsc.txt'
		with open(infile_outpath,'w') as w:
			for i in range(da.shape[0]):
				w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i], da.dec[i], da.r_ext[i]))
		print("\ncreated input file for xsc_phot.pro: {}".format(infile_outpath))

		# locate the FITS mosaic file for xsc_phot.pro to do photometry on
		reg, ch = cat.split('/')[-1].split('_')[:2]
		mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\
			.format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0]
		print("\nfound mosaic file: {}".format(mosaicfile))

		# spawn IDL subprocess running xsc_phot.pro and catch stdout in file
		outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt')
		if not os.path.isfile(outpath):
			outfile = open(outpath,'w')
			print("\nspawning xsc_phot.pro IDL subprocess")
			cmd = "xsc_phot,'"+mosaicfile+"','"+infile_outpath+"','long'"
			rc = subprocess.call(['/usr/local/itt/idl71/bin/idl','-quiet','-e',cmd], 
				stderr = subprocess.PIPE, stdout = outfile)
			outfile.close()

		# read in output to recarray
		print("\nreading: {}".format(outpath))
		phot = np.recfromtxt(outpath, names=['id','flux','unc','sky','skyunc'])

		# make sure rows are aligned
		assert (da.designation == phot.id).all()

		# ignore xsc sources we got a NaN or negative flux for
		bad = np.isnan(phot.flux) | (phot.flux < 0)
		print("\naper.pro returned NaN or negative flux for {} sources".format(bad.sum()))
		if bad.sum() > 0:
			for i in phot[bad].id:
				print(i)
			outpath = cat.replace('combined_hdr_catalog.txt','xsc_nan_phot.csv')
			with open(outpath,'w') as f:
				w = csv.writer(f)
				w.writerow(da.dtype.names)
				w.writerows(da[bad].tolist())
			print('\ncreated file: {}'.format(outpath))
		phot = phot[~bad]
		da = da[~bad]

		# read in pipeline catalog
		print("\nreading: {}".format(cat))
		names = open(cat).readline().split()[1:]
		c = np.recfromtxt(cat, names=names)

		# loop through xsc sources and find matches in pipeline catalog
		print("\nfinding records associated with XSC sources in pipeline catalog")
		c_flux_total = []
		n_in_aper = []
		c_idx = []
		coords = radec_to_coords(c.ra, c.dec)
		kdt = KDT(coords)
		for i in range(phot.size):
			radius = da.r_ext[i]/3600.
			# idx1, idx2, ds = spherematch(da.ra[i], da.dec[i], 
			# 	c.ra, c.dec, tolerance=radius)
			idx, ds = spherematch2(da.ra[i], da.dec[i], c.ra, c.dec,
				kdt, tolerance=radius, k=500)
			# c_flux_total.append(c.flux[idx2].sum())
			# n_in_aper.append(c.flux[idx2].size)
			# c_idx.append(idx2.tolist())
			c_flux_total.append(c.flux[idx].sum())
			n_in_aper.append(ds.size)
			c_idx.append(idx.tolist())
		print("\nhistogram of source counts in r_ext aperture")
		for i in [(i,n_in_aper.count(i)) for i in set(n_in_aper)]:
			print i

		# create new version of catalog file with xsc-associated entries replaced
		c_idx = np.array(flatten(c_idx))
		print("\nremoving {}, adding {}".format(c_idx.size, phot.size))
		replaced[cat] = {'old':c_idx.size, 'new':phot.size}
		replaced[cat]['hist'] = [(i,n_in_aper.count(i)) for i in set(n_in_aper)]
		c = np.delete(c, c_idx)
		newrows = np.rec.array([(-i, da.ra[i], da.dec[i], 
			phot.flux[i], phot.unc[i], 1) for i in \
			range(phot.size)], dtype=c.dtype)
		newcat = np.hstack((c, newrows))

		# write new version of catalog to disk
		fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']
		outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt')
		np.savetxt(outpath, newcat, fmt = fmt, header = ' '.join(names))
		print('\ncreated file: {}'.format(outpath))

		# make plot of total old vs. new flux
		plt.scatter(c_flux_total, phot.flux)
		ylim = plt.gca().get_ylim()
		plt.xlim(*ylim)
		max_y = ylim[1]
		plt.plot(ylim, ylim, 'r-')
		plt.xlabel('old flux [mJy]')
		plt.ylabel('new flux [mJy]')
		name = ' '.join(cat.split('/')[-1].split('_')[:2])
		plt.title(name)
		outpath = cat.replace('combined_hdr_catalog.txt','xsc_new_vs_old_phot.png')
		plt.savefig(outpath, dpi=200)
		plt.close()
		print('\ncreated file: {}'.format(outpath))

	outfile = 'xsc_replaced.json'
	json.dump(replaced, open(outfile,'w'))
	print("\ncreated file: {}".format(outfile))
	print("\nremoved / added")
	for k,v in replaced.iteritems():
		print k.split('/')[-1], v['old'], v['new']
	m = np.mean([i['old']/float(i['new']) for i in replaced.values()])
	print("average ratio: {}".format(m))
	print("\nK mag and r_ext of sources with NaN photometry:")
	for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"):
		reg = i.split('/')[-1]
		rec = np.recfromcsv(i)
		bad_id = rec.designation.tolist()
		bad_k = rec.k_m_k20fe.tolist()
		bad_r_ext = rec.r_ext.tolist()
		print reg
		print ("\tid\t\t\tKmag\tr_ext")
		if type(bad_id) is list:
			seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0])
			for j,k,l in seq:
				print("\t{}\t{}\t{}".format(j,k,l))
		else:
			print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))
Exemplo n.º 42
0
def match_sdss(cat_path):
    for catfile in find_files(cat_path, "*merged.txt"):

        # read pipeline catalog
        print("\nreading catalog: {}".format(catfile))
        cat = pd.read_table(catfile, sep=' ')

        # retrieve SDSS data from ViZieR if not already downloaded
        ch = catfile.split('/')[-1].split('_')[1]
        outpath = catfile.replace('{}_merged.txt'.format(ch), 'sdss.vot')
        if not os.path.isfile(outpath):
            cntr_ra = np.median(cat.ra)
            cntr_dec = np.median(cat.dec)
            # get source from one corner of the mosaic to calculate radius
            c1 = (cat.ra.min(), cat.dec[cat.ra == cat.ra.min()].values[0])
            # make radius 10% bigger just to be on safe side
            radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1
            url = get_url(cntr_ra, cntr_dec, radius)
            print("retrieving URL: {}".format(url))
            handler = urllib2.urlopen(url)
            raw = handler.read()
            with open(outpath, 'wb') as f:
                f.write(raw)
            print("created file: {}".format(outpath))

        # parse VOTable
        print("reading VOTable: {}".format(outpath))
        table = parse_single_table(outpath)

        # if this is one of the southern hemisphere regions, delete and continue
        if table.array.size == 0:
            os.remove(outpath)
            print("outside of SDSS coverage")
            continue

        # make sure no missing data
        for name in table.array.dtype.names:
            assert table.array[name].mask.sum() == 0

        # get unmasked array
        sdss = table.array.data

        # make sure sky coverage is big enough
        assert sdss['RAJ2000'].min() < cat.ra.min()
        assert sdss['RAJ2000'].max() > cat.ra.max()
        assert sdss['DEJ2000'].min() < cat.dec.min()
        assert sdss['DEJ2000'].max() > cat.dec.max()

        # match to catalog
        assert cat.shape[0] < sdss.shape[0]
        tol = 2 / 3600.
        idx1, idx2, ds = spherematch(cat.ra,
                                     cat.dec,
                                     sdss['RAJ2000'],
                                     sdss['DEJ2000'],
                                     tolerance=tol)
        print("matched {} out of {} sources with {} arcsec tolerance".format(
            ds.size, cat.shape[0], tol * 3600))

        # create vector of star/galaxy class (0=missing, 3=galaxy, 6=star)
        cl = np.zeros(cat.shape[0]).astype('int')
        cl[idx1] = sdss['cl'][idx2]

        # add the column to the dataset
        cat['cl'] = cl

        # write to new file
        outpath = catfile.replace('merged.txt', 'merged+sdss.txt')
        # fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2
        # hdr = ' '.join(names)+' cl'
        # np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr)
        cat.to_csv(outpath, index=False, sep=' ', float_format='%.8f')
        print("created file: {}".format(outpath))
Exemplo n.º 43
0
def dir_walk(target_dir=None, quiet=None):
    '''recursively walk a directory containing cti and return the stats'''
    files = find_files('*.xml', resolve_path(target_dir))
    if not quiet:
        widgets = [
            'Directory Walk: ',
            Percentage(), ' ',
            Bar(marker=RotatingMarker()), ' ',
            ETA()
        ]
        progress = ProgressBar(widgets=widgets, maxval=len(files)).start()
    cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \
                        'exploit_targets': set(), 'incidents': set(), \
                        'indicators': set(), 'threat_actors': set(), \
                        'ttps': set()}
    cooked_cybox_objs = {
        'AccountObjectType': set(),
        'AddressObjectType': set(),
        'APIObjectType': set(),
        'ArchiveFileObjectType': set(),
        'ARPCacheObjectType': set(),
        'ArtifactObjectType': set(),
        'ASObjectType': set(),
        'CodeObjectType': set(),
        'CustomObjectType': set(),
        'DeviceObjectType': set(),
        'DiskObjectType': set(),
        'DiskPartitionObjectType': set(),
        'DNSCacheObjectType': set(),
        'DNSQueryObjectType': set(),
        'DNSRecordObjectType': set(),
        'DomainNameObjectType': set(),
        'EmailMessageObjectType': set(),
        'FileObjectType': set(),
        'GUIDialogboxObjectType': set(),
        'GUIObjectType': set(),
        'GUIWindowObjectType': set(),
        'HostnameObjectType': set(),
        'HTTPSessionObjectType': set(),
        'ImageFileObjectType': set(),
        'LibraryObjectType': set(),
        'LinkObjectType': set(),
        'LinuxPackageObjectType': set(),
        'MemoryObjectType': set(),
        'MutexObjectType': set(),
        'NetworkConnectionObjectType': set(),
        'NetworkFlowObjectType': set(),
        'NetworkPacketObjectType': set(),
        'NetworkRouteEntryObjectType': set(),
        'NetRouteObjectType': set(),
        'NetworkSocketObjectType': set(),
        'NetworkSubnetObjectType': set(),
        'PDFFileObjectType': set(),
        'PipeObjectType': set(),
        'PortObjectType': set(),
        'ProcessObjectType': set(),
        'ProductObjectType': set(),
        'SemaphoreObjectType': set(),
        'SMSMessageObjectType': set(),
        'SocketAddressObjectType': set(),
        'SystemObjectType': set(),
        'UnixFileObjectType': set(),
        'UnixNetworkRouteEntryObjectType': set(),
        'UnixPipeObjectType': set(),
        'UnixProcessObjectType': set(),
        'UnixUserAccountObjectType': set(),
        'UnixVolumeObjectType': set(),
        'URIObjectType': set(),
        'URLHistoryObjectType': set(),
        'UserAccountObjectType': set(),
        'UserSessionObjectType': set(),
        'VolumeObjectType': set(),
        'WhoisObjectType': set(),
        'WindowsComputerAccountObjectType': set(),
        'WindowsCriticalSectionObjectType': set(),
        'WindowsDriverObjectType': set(),
        'WindowsEventLogObjectType': set(),
        'WindowsEventObjectType': set(),
        'WindowsExecutableFileObjectType': set(),
        'WindowsFilemappingObjectType': set(),
        'WindowsFileObjectType': set(),
        'WindowsHandleObjectType': set(),
        'WindowsHookObjectType': set(),
        'WindowsKernelHookObjectType': set(),
        'WindowsKernelObjectType': set(),
        'WindowsMailslotObjectType': set(),
        'WindowsMemoryPageRegionObjectType': set(),
        'WindowsMutexObjectType': set(),
        'WindowsNetworkRouteEntryObjectType': set(),
        'WindowsNetworkShareObjectType': set(),
        'WindowsPipeObjectType': set(),
        'WindowsPrefetchObjectType': set(),
        'WindowsProcessObjectType': set(),
        'WindowsRegistryKeyObjectType': set(),
        'WindowsSemaphoreObjectType': set(),
        'WindowsServiceObjectType': set(),
        'WindowsSystemObjectType': set(),
        'WindowsSystemRestoreObjectType': set(),
        'WindowsTaskObjectType': set(),
        'WindowsThreadObjectType': set(),
        'WindowsUserAccountObjectType': set(),
        'WindowsVolumeObjectType': set(),
        'WindowsWaitableTimerObjectType': set(),
        'X509CertificateObjectType': set(),
    }
    for file_ in files:
        try:
            stix_package = file_to_stix(file_)
            (raw_stix_objs, raw_cybox_objs) = \
                process_stix_pkg(stix_package)
            for k in raw_stix_objs.keys():
                cooked_stix_objs[k].update(raw_stix_objs[k])
            for k in raw_cybox_objs.keys():
                cooked_cybox_objs[k].update(raw_cybox_objs[k])
            if not quiet:
                progress.update(i)
        except:
            next
    if not quiet:
        progress.finish()
    return (cooked_stix_objs, cooked_cybox_objs)