Пример #1
0
def cluster_month_over_month_train(path_to_train, rad):
    home_dir = os.path.dirname(path_to_train)
    fit_dicts = []
    print('Starting month over month cluster run...')
    for prog, n in enumerate(range(-825, 14)):

        train_file = os.path.join(
            home_dir,
            'UnnObs_Training_1_line_A_ec_{}_pm15.0_r2.5.trans'.format(
                str(util.lunation_center(n))))

        if os.path.isfile(train_file):
            # Get the previously calc'd result
            with open(
                    os.path.join(
                        home_dir, 'train_result_{}_orbelem.pickle'.format(
                            str(util.lunation_center(n)))), 'rb') as handle:
                fit_dict = pickle.load(handle)
            fit_dicts.append(fit_dict)

    print('Data loaded...')
    final_dict, final_dict_cid = cluster_months(fit_dicts, rad=rad)

    with open(os.path.join(home_dir, 'train_final_results.pickle'),
              'wb') as handle:
        pickle.dump(final_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open(os.path.join(home_dir, 'train_final_results_cid.pickle'),
              'wb') as handle:
        pickle.dump(final_dict_cid, handle, protocol=pickle.HIGHEST_PROTOCOL)

    print('Run finished!')
Пример #2
0
def tune(moons, nside, home_dir, g_gdots=g_gdots, dts=np.arange(5, 30, 5),
        radii=np.arange(0.0001, 0.0100, 0.0001), mincount=3):
    """ This function takes in the g_gdots grid we iterate over, the dt's and radii we
    want to try, and the moons we want to include in the training run.  NOTE: it is
    not recommended to run this over the whole dataset, as that would be very slow,
    and likely not provide very exciting results. This function will take the values
    we pass and create pickle files for further analysis.
    ---------
    Args: moons; list, is a list of values of your choosing between -825 to 14,
            representing the different moons for lunation center. A recommended,
            relatively dense patch to start with is [-11, -14, -17, -20, -23].
          nside; int, the number of sides for the healpix dividing.
          home_dir; str, the path to the directory where we want to have the .trans
                        files, and where we want to output the pickles
          g_gdots; list of tuples of pairs, our grid of g and gdot.
          dts; array of floats to use to scale the velocity in relation to position
          radii; array of floats to use to search the radius of the given sizes in the
                    KD tree
          mincount; int, the minimum number of tracklets it takes to be considered a cluster.
    ---------
    Returns: None, writes pickles to given home_dir
    """
    abs_home_dir = os.path.abspath(home_dir)

    # Looping over five lunation centers, separated by 3 months each
    for i,n in enumerate(moons):
        lunation = util.lunation_center(n)
        pix_runs = {}
        infilename=os.path.join(abs_home_dir, 'UnnObs_Training_1_line_A_ec_%.1lf_pm15.0_r2.5.trans' % (lunation))
        pickle_filename = infilename.rstrip('trans') + 'train.pickle' # removed _v2 after train.

        for i,pix in enumerate(range(hp.nside2npix(nside))):
            # Do the training run
            pix_runs[pix] = train_clusters([pix], infilename, util.lunation_center(n), \
                                            g_gdots=g_gdots,dts=dts,radii=radii, mincount=mincount)

        # Write the output to a pickle
        with open(pickle_filename, 'wb') as handle:
            pickle.dump(pix_runs, handle, protocol=pickle.HIGHEST_PROTOCOL)

    print('Find the best velocity / position scaling, our dt value.')
Пример #3
0
def run_itf(path_to_itf, pixels, g_gdots, dt, cr):
    """ Run the whole ITF file """
    home_dir = os.path.dirname(path_to_itf)
    print('Starting run...')
    for prog, n in enumerate(range(-825, 14)):
        # Percent complete
        out = prog * 1. / len(range(-825, 14)) * 100
        sys.stdout.write("\r%d%%" % out)
        sys.stdout.flush()

        itf_file = os.path.join(
            home_dir, 'itf_new_1_line_ec_{}_pm15.0_r2.5.trans'.format(
                str(util.lunation_center(n))))

        if os.path.isfile(itf_file):
            itf_raw_results, itf_clust_ids = find_clusters(
                pixels,
                itf_file,
                util.lunation_center(n),
                g_gdots=g_gdots,
                dt=dt,
                rad=cr)

            # itf_tracklets_dict = util.get_original_tracklets_dict(os.path.join(mpc_path))
            # itf_obs_array = util.get_original_observation_array(os.path.join(txt_path))
            #
            # obs_dict={}
            # for cluster_key in itf_raw_results.keys():
            #     obs_dict[cluster_key] = util.get_observations(cluster_key, itf_tracklets_dict, itf_obs_array)

            with open(
                    os.path.join(
                        home_dir, 'itf_result_{}_initial.pickle'.format(
                            str(util.lunation_center(n)))), 'wb') as handle:
                pickle.dump(itf_raw_results,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)

    sys.stdout.write("\r%d%%" % 100)
    print('\n')
    print('Run finished!')
Пример #4
0
def postprocessing_train(path_to_train, pixels, nside):
    home_dir = os.path.dirname(path_to_train)
    print('Starting postprocessing run...')
    for prog, n in enumerate(range(-825, 14)):
        # Percent complete
        out = prog * 1. / len(range(-825, 14)) * 100
        sys.stdout.write("\r%d%%" % out)
        sys.stdout.flush()

        train_file = os.path.join(
            home_dir,
            'UnnObs_Training_1_line_A_ec_{}_pm15.0_r2.5.trans'.format(
                str(util.lunation_center(n))))

        if os.path.isfile(train_file):
            # Get the previously calc'd result
            with open(
                    os.path.join(
                        home_dir, 'train_result_{}_coc.pickle'.format(
                            str(util.lunation_center(n)))), 'rb') as handle:
                clust_counter = pickle.load(handle)

            fit_dict = postprocessing(train_file,
                                      clust_counter,
                                      pixels,
                                      nside,
                                      n,
                                      orb_elms=True,
                                      gi=0.4,
                                      gdoti=0.0)

            with open(
                    os.path.join(
                        home_dir, 'train_result_{}_orbelem.pickle'.format(
                            str(util.lunation_center(n)))), 'wb') as handle:
                pickle.dump(fit_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

    sys.stdout.write("\r%d%%" % 100)
    print('\n')
    print('Run finished!')
Пример #5
0
def separate_time_windows(tracklets, sortedTracklets, tracklets_jd_dict, file_stem, \
                                    n_begin=-825, n_end=14, dt=15., suff='.mpc'):
    """ Sweep through the tracklets once, outputting them into a sequence of
    overlapping time ranges that can be processed separately.
    -------
    Args: tracklets; dict, the first item returned from get_sorted_tracklets.
          sortedTracklets; list, the second item returned from get_sorted_tracklets.
          tracklets_jd_dict; dict, the third item returned from get_sorted_tracklets.
          file_stem; str, path to the realted mpc file that we are splitting up.
          n_begin; int, the beginning index for the lunar centers.
          n_end; int, the end index for the lunar centers.
          dt; float, the day scale factor used to weight realtive importance of
                position and velocity.
          suff; str, the suffix of the file, normally ".mpc" but sometimes ".txt"
    --------
    Returns: None; it just writes files to the directory you specify.
    """
    t_center = util.lunation_center(n_begin)
    files = {}

    header = '#trackletID yr   mn dy      obsCode mag filter  jd_tdb       x_target     y_target     z_target      x_obs       y_obs        z_obs     '

    for desig in sortedTracklets:
        jd_tdb = tracklets_jd_dict[desig]
        while (jd_tdb > t_center + dt):
            if n_begin in files:
                files[n_begin].close()
            n_begin += 1
            t_center = util.lunation_center(n_begin)
        for n in range(n_begin, n_end):
            if jd_tdb < util.lunation_center(n) - dt:
                break
            if n not in files:
                outfile = file_stem.replace('.mpc', '') + '_' + str(
                    util.lunation_center(n)) + '_pm' + str(dt) + suff
                files[n] = open(outfile, 'w')
                files[n].write(header + '\n')
            for line in tracklets[desig]:
                files[n].write(line)
Пример #6
0
def cluster_clusters_itf(path_to_itf, pixels, nside, dt, cr, new_rad):
    home_dir = os.path.dirname(path_to_itf)
    print('Starting cluster clusters run...')
    for prog, n in enumerate(range(-825, 14)):
        # Percent complete
        out = prog * 1. / len(range(-825, 14)) * 100
        sys.stdout.write("\r%d%%" % out)
        sys.stdout.flush()

        itf_file = os.path.join(
            home_dir, 'itf_new_1_line_ec_{}_pm15.0_r2.5.trans'.format(
                str(util.lunation_center(n))))

        if os.path.isfile(itf_file):
            # Get the previously calc'd result
            with open(
                    os.path.join(
                        home_dir, 'itf_result_{}_initial.pickle'.format(
                            str(util.lunation_center(n)))), 'rb') as handle:
                clust_counter = pickle.load(handle)


            coc_counter, coc_ids = cluster_clusters(itf_file, clust_counter, pixels, nside, n, dt=dt, rad=cr, \
                                                                            new_rad=new_rad, gi=0.4, gdoti=0.0, maxiter=200)

            with open(
                    os.path.join(
                        home_dir, 'itf_result_{}_coc.pickle'.format(
                            str(util.lunation_center(n)))), 'wb') as handle:
                pickle.dump(coc_counter,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)

    sys.stdout.write("\r%d%%" % 100)
    print('\n')
    print('Run finished!')
Пример #7
0
def run_train(path_to_train, pixels, g_gdots, dt, cr):
    """ Run the whole Training file """
    home_dir = os.path.dirname(path_to_train)
    print('Starting initial run...')

    for prog, n in enumerate(range(-825, 14)):
        # Percent complete
        out = prog * 1. / len(range(-825, 14)) * 100
        sys.stdout.write("\r%d%%" % out)
        sys.stdout.flush()

        train_file = os.path.join(
            home_dir,
            'UnnObs_Training_1_line_A_ec_{}_pm15.0_r2.5.trans'.format(
                str(util.lunation_center(n))))

        if os.path.isfile(train_file):
            train_raw_results, train_clust_ids = find_clusters(
                pixels,
                train_file,
                util.lunation_center(n),
                g_gdots=g_gdots,
                dt=dt,
                rad=cr)

            with open(
                    os.path.join(
                        home_dir, 'train_result_{}_initial.pickle'.format(
                            str(util.lunation_center(n)))), 'wb') as handle:
                pickle.dump(train_raw_results,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)

    sys.stdout.write("\r%d%%" % 100)
    print('\n')
    print('Run finished!')
Пример #8
0
def index_positions(n, r_func, file_stem, dt=45., nside=8):
    """
    Does the transformations on the data using the date of the n-th new
    moon as the reference time.

    It is reading and processing the entire *.mpc file.

    This does the heliocentric tranformation for the assumed radius function,
    r_func.

    It then does light-time correction.

    And it appends a healpix number on each line in order to be able to quickly
    select data from a given region of sky.

    This generates a file called *.trans, and it incorporates
    the distance assumed in the file name.
    """
    infilename = file_stem.replace('.mpc', '') + '_' + str(
        util.lunation_center(n)) + '_pm' + str(dt) + '.mpc'
    try:
        open(infilename, 'r')
    except IOError:
        return 0
    t_ref = util.lunation_center(n)
    r_ref = r_func(t_ref)
    r_name = "_r%.1lf" % (r_ref)
    outfilename = file_stem.replace('.mpc', '') + '_' + str(
        util.lunation_center(n)) + '_pm' + str(dt) + r_name + '.trans'

    with open(infilename, 'r') as infile, open(outfilename, 'w') as outfile:
        for line in infile:
            if line.startswith('#'):
                header = line.rstrip()
                outfile.write(
                    header +
                    '          dt         x_cor       y_cor        z_cor       pix \n'
                )
            else:
                lineID = line[:43]

                jd_tdb = float(line[43:57])

                x_target, y_target, z_target = line[57:97].split()
                r_target = np.array(
                    [float(x_target),
                     float(y_target),
                     float(z_target)])

                x_obs, y_obs, z_obs = line[97:135].split()
                r_obs = np.array([float(x_obs), float(y_obs), float(z_obs)])

                # This should be a function from here
                # Adjust positions
                dt = 0.0
                r_prev = r_func(jd_tdb - dt)
                rho_r_p, rho_r_m = adjust_position(r_prev, r_target, r_obs)
                dt = rho_r_p[0] / MPC_library.Constants.speed_of_light

                # Do light-time iterations.
                # Probably don't need to do this at this point, because it is
                # being re-done in a later step.
                i = 0
                while (np.abs(r_func(jd_tdb - dt) - r_prev) > 1e-8):
                    rho_r_p, rho_r_m = adjust_position(r_prev, r_target, r_obs)
                    dt = rho_r_p[0] / MPC_library.Constants.speed_of_light
                    r_prev = r_func(jd_tdb - dt)
                    i += 1

                # to here
                xp, yp, zp = rho_r_p[1]

                # Calculate HEALPix index
                pix = hp.vec2pix(nside, xp, yp, zp, nest=True)

                outstring = line.rstrip() + " %13.6lf %12.7lf %12.7lf %12.7lf %5d\n"% \
                      (dt, xp, yp, zp, pix)

                outfile.write(outstring)
Пример #9
0
pixels = range(hp.nside2npix(nside))

infilename = os.path.join(
    BASE_DIR,
    'demo_train/UnnObs_Training_1_line_A_ec_labelled_2457308.5_pm15.0_r2.5.trans'
)

print('Based on our tuning, the best dt is {0} and best cluster radius is {1}'.
      format(dt, cr))

true_count_set, mergedCounter_dict, mergedTime_dict = accessible_clusters(
    pixels, infilename=infilename)
true_count = len(true_count_set)
print('True count of clusters: {}'.format(true_count))

right, wrong, ids_right, ids_wrong = test_clusters(pixels, infilename, util.lunation_center(n), \
                                                    dt=dt,rad=cr)

print(
    'Using our optimal parameters we got {0} percent of clusters with {1} percent errors.'
    .format(right / true_count, wrong / true_count))
print('We got', right, 'right and', wrong, 'wrong out of total', true_count)

print(
    'Now that we have shown our performance on training data, lets run on the ITF.'
)

itf_file = os.path.join(
    BASE_DIR, 'demo_itf/itf_new_1_line_ec_2457308.5_pm15.0_r2.5.trans')
itf_pickle = itf_file.rstrip('.trans') + '.pickle'
itf_n = -14
Пример #10
0
def plot_tune_results(moons, home_dir):
    """ This function plots the related analysis plots of number of clusters,
    number of errors, and approx AUC.  NOTE: Only run this after you run tune()
    ---------
    Args: moons; list, is a list of values of your choosing between -825 to 14,
            representing the different moons for lunation center. A recommended,
            relatively dense patch to start with is [-11, -14, -17, -20, -23].
          home_dir; str, the path to the directory where we want to have the
            pickles from the tune() run.
    ---------
    Returns: None, plots the realted visualizations.
    """
    abs_home_dir = os.path.abspath(home_dir)

    for n in moons:
        infilename=os.path.join(abs_home_dir, 'UnnObs_Training_1_line_A_%.1lf_pm15.0_r2.5.trans' % (util.lunation_center(n)))
        pickle_filename = infilename.rstrip('trans') + 'train.pickle'

        if not os.path.isfile(pickle_filename):
            raise FileNotFoundError('Cannot find this file. Hint: make sure you have run the tune() function first!')
        with open(pickle_filename, 'rb') as handle:
            pix_runs = pickle.load(handle)

            true_count_dict, mergedCounter_dict, mergedTime_dict=accessible_clusters(list(pix_runs.keys()), infilename=infilename)
            true_count=sum(true_count_dict.values())

            visual.number_clusters_plot(pix_runs,true_count)
            visual.number_errors_plot(pix_runs)
            visual.auc_plot(pix_runs,true_count)
Пример #11
0
def find_cluster_radius(moons, home_dir, dt, max_tol=1e-3):
    """ This function finds the optimal cluster radius, given a value for dt
    and a maximum tolerable error rate.  The max error rate defaults to 0.1%.
    ---------
    Args: moons; list, is a list of values of your choosing between -825 to 14,
            representing the different moons for lunation center. A recommended,
            relatively dense patch to start with is [-11, -14, -17, -20, -23].
          home_dir; str, the path to the directory where we want to have the
            pickles from the tune() run.
          dt; float, the dt you decided to use based on the previous plots, or
            subject matter knowledge.
          max_tol; float, the maximum realtive error we tolerate in our output.
            defaults to 1e-3 or 0.1%
    ---------
    Returns: float, the optimal cluster radius (for finding the most clusters),
                while remaining under the specified error rate.
    """
    abs_home_dir = os.path.abspath(home_dir)

    print('Now that we have set dt={}, lets calculate the best cluster radius.'.format(dt))

    training_dict={}
    for n in moons:
        infilename=os.path.join(abs_home_dir, 'UnnObs_Training_1_line_A_ec_%.1lf_pm15.0_r2.5.trans' % (util.lunation_center(n)))
        pickle_filename = infilename.rstrip('trans') + 'train.pickle'

        if not os.path.isfile(pickle_filename):
            raise FileNotFoundError('Cannot find this file. Hint: make sure you have run the tune() function first!')
        with open(pickle_filename, 'rb') as handle:
            pix_runs = pickle.load(handle)

            true_count_dict, mergedCounter_dict, mergedTime_dict=accessible_clusters(list(pix_runs.keys()), infilename=infilename)
            true_count=sum(true_count_dict.values())

            for i in range(99):
                errs=0
                clusts=0
                trues=0
                for pix in list(pix_runs.keys()):
                    nclusters = pix_runs[pixels[pix]][dt][1][i]
                    nerrors = pix_runs[pixels[pix]][dt][2][i]
                    ntrue = true_count_dict[pix]

                    errs += nerrors
                    clusts += nclusters
                    trues += ntrue
                if float(errs)/trues < max_tol:
                    print(i, pix_runs[pixels[pix]][dt][0][i], errs, clusts, trues)
                else:
                    training_dict[n] = pix_runs[pixels[pix]][dt][0][i], errs, clusts, trues
                    break

    cluster_radius = np.mean([v[0] for k, v in training_dict.items()])

    return cluster_radius
Пример #12
0
def test_lunation():
    assert (util.lunation_center(-11) == 2457397.5)