def cluster_month_over_month_train(path_to_train, rad): home_dir = os.path.dirname(path_to_train) fit_dicts = [] print('Starting month over month cluster run...') for prog, n in enumerate(range(-825, 14)): train_file = os.path.join( home_dir, 'UnnObs_Training_1_line_A_ec_{}_pm15.0_r2.5.trans'.format( str(util.lunation_center(n)))) if os.path.isfile(train_file): # Get the previously calc'd result with open( os.path.join( home_dir, 'train_result_{}_orbelem.pickle'.format( str(util.lunation_center(n)))), 'rb') as handle: fit_dict = pickle.load(handle) fit_dicts.append(fit_dict) print('Data loaded...') final_dict, final_dict_cid = cluster_months(fit_dicts, rad=rad) with open(os.path.join(home_dir, 'train_final_results.pickle'), 'wb') as handle: pickle.dump(final_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) with open(os.path.join(home_dir, 'train_final_results_cid.pickle'), 'wb') as handle: pickle.dump(final_dict_cid, handle, protocol=pickle.HIGHEST_PROTOCOL) print('Run finished!')
def tune(moons, nside, home_dir, g_gdots=g_gdots, dts=np.arange(5, 30, 5), radii=np.arange(0.0001, 0.0100, 0.0001), mincount=3): """ This function takes in the g_gdots grid we iterate over, the dt's and radii we want to try, and the moons we want to include in the training run. NOTE: it is not recommended to run this over the whole dataset, as that would be very slow, and likely not provide very exciting results. This function will take the values we pass and create pickle files for further analysis. --------- Args: moons; list, is a list of values of your choosing between -825 to 14, representing the different moons for lunation center. A recommended, relatively dense patch to start with is [-11, -14, -17, -20, -23]. nside; int, the number of sides for the healpix dividing. home_dir; str, the path to the directory where we want to have the .trans files, and where we want to output the pickles g_gdots; list of tuples of pairs, our grid of g and gdot. dts; array of floats to use to scale the velocity in relation to position radii; array of floats to use to search the radius of the given sizes in the KD tree mincount; int, the minimum number of tracklets it takes to be considered a cluster. --------- Returns: None, writes pickles to given home_dir """ abs_home_dir = os.path.abspath(home_dir) # Looping over five lunation centers, separated by 3 months each for i,n in enumerate(moons): lunation = util.lunation_center(n) pix_runs = {} infilename=os.path.join(abs_home_dir, 'UnnObs_Training_1_line_A_ec_%.1lf_pm15.0_r2.5.trans' % (lunation)) pickle_filename = infilename.rstrip('trans') + 'train.pickle' # removed _v2 after train. for i,pix in enumerate(range(hp.nside2npix(nside))): # Do the training run pix_runs[pix] = train_clusters([pix], infilename, util.lunation_center(n), \ g_gdots=g_gdots,dts=dts,radii=radii, mincount=mincount) # Write the output to a pickle with open(pickle_filename, 'wb') as handle: pickle.dump(pix_runs, handle, protocol=pickle.HIGHEST_PROTOCOL) print('Find the best velocity / position scaling, our dt value.')
def run_itf(path_to_itf, pixels, g_gdots, dt, cr): """ Run the whole ITF file """ home_dir = os.path.dirname(path_to_itf) print('Starting run...') for prog, n in enumerate(range(-825, 14)): # Percent complete out = prog * 1. / len(range(-825, 14)) * 100 sys.stdout.write("\r%d%%" % out) sys.stdout.flush() itf_file = os.path.join( home_dir, 'itf_new_1_line_ec_{}_pm15.0_r2.5.trans'.format( str(util.lunation_center(n)))) if os.path.isfile(itf_file): itf_raw_results, itf_clust_ids = find_clusters( pixels, itf_file, util.lunation_center(n), g_gdots=g_gdots, dt=dt, rad=cr) # itf_tracklets_dict = util.get_original_tracklets_dict(os.path.join(mpc_path)) # itf_obs_array = util.get_original_observation_array(os.path.join(txt_path)) # # obs_dict={} # for cluster_key in itf_raw_results.keys(): # obs_dict[cluster_key] = util.get_observations(cluster_key, itf_tracklets_dict, itf_obs_array) with open( os.path.join( home_dir, 'itf_result_{}_initial.pickle'.format( str(util.lunation_center(n)))), 'wb') as handle: pickle.dump(itf_raw_results, handle, protocol=pickle.HIGHEST_PROTOCOL) sys.stdout.write("\r%d%%" % 100) print('\n') print('Run finished!')
def postprocessing_train(path_to_train, pixels, nside): home_dir = os.path.dirname(path_to_train) print('Starting postprocessing run...') for prog, n in enumerate(range(-825, 14)): # Percent complete out = prog * 1. / len(range(-825, 14)) * 100 sys.stdout.write("\r%d%%" % out) sys.stdout.flush() train_file = os.path.join( home_dir, 'UnnObs_Training_1_line_A_ec_{}_pm15.0_r2.5.trans'.format( str(util.lunation_center(n)))) if os.path.isfile(train_file): # Get the previously calc'd result with open( os.path.join( home_dir, 'train_result_{}_coc.pickle'.format( str(util.lunation_center(n)))), 'rb') as handle: clust_counter = pickle.load(handle) fit_dict = postprocessing(train_file, clust_counter, pixels, nside, n, orb_elms=True, gi=0.4, gdoti=0.0) with open( os.path.join( home_dir, 'train_result_{}_orbelem.pickle'.format( str(util.lunation_center(n)))), 'wb') as handle: pickle.dump(fit_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) sys.stdout.write("\r%d%%" % 100) print('\n') print('Run finished!')
def separate_time_windows(tracklets, sortedTracklets, tracklets_jd_dict, file_stem, \ n_begin=-825, n_end=14, dt=15., suff='.mpc'): """ Sweep through the tracklets once, outputting them into a sequence of overlapping time ranges that can be processed separately. ------- Args: tracklets; dict, the first item returned from get_sorted_tracklets. sortedTracklets; list, the second item returned from get_sorted_tracklets. tracklets_jd_dict; dict, the third item returned from get_sorted_tracklets. file_stem; str, path to the realted mpc file that we are splitting up. n_begin; int, the beginning index for the lunar centers. n_end; int, the end index for the lunar centers. dt; float, the day scale factor used to weight realtive importance of position and velocity. suff; str, the suffix of the file, normally ".mpc" but sometimes ".txt" -------- Returns: None; it just writes files to the directory you specify. """ t_center = util.lunation_center(n_begin) files = {} header = '#trackletID yr mn dy obsCode mag filter jd_tdb x_target y_target z_target x_obs y_obs z_obs ' for desig in sortedTracklets: jd_tdb = tracklets_jd_dict[desig] while (jd_tdb > t_center + dt): if n_begin in files: files[n_begin].close() n_begin += 1 t_center = util.lunation_center(n_begin) for n in range(n_begin, n_end): if jd_tdb < util.lunation_center(n) - dt: break if n not in files: outfile = file_stem.replace('.mpc', '') + '_' + str( util.lunation_center(n)) + '_pm' + str(dt) + suff files[n] = open(outfile, 'w') files[n].write(header + '\n') for line in tracklets[desig]: files[n].write(line)
def cluster_clusters_itf(path_to_itf, pixels, nside, dt, cr, new_rad): home_dir = os.path.dirname(path_to_itf) print('Starting cluster clusters run...') for prog, n in enumerate(range(-825, 14)): # Percent complete out = prog * 1. / len(range(-825, 14)) * 100 sys.stdout.write("\r%d%%" % out) sys.stdout.flush() itf_file = os.path.join( home_dir, 'itf_new_1_line_ec_{}_pm15.0_r2.5.trans'.format( str(util.lunation_center(n)))) if os.path.isfile(itf_file): # Get the previously calc'd result with open( os.path.join( home_dir, 'itf_result_{}_initial.pickle'.format( str(util.lunation_center(n)))), 'rb') as handle: clust_counter = pickle.load(handle) coc_counter, coc_ids = cluster_clusters(itf_file, clust_counter, pixels, nside, n, dt=dt, rad=cr, \ new_rad=new_rad, gi=0.4, gdoti=0.0, maxiter=200) with open( os.path.join( home_dir, 'itf_result_{}_coc.pickle'.format( str(util.lunation_center(n)))), 'wb') as handle: pickle.dump(coc_counter, handle, protocol=pickle.HIGHEST_PROTOCOL) sys.stdout.write("\r%d%%" % 100) print('\n') print('Run finished!')
def run_train(path_to_train, pixels, g_gdots, dt, cr): """ Run the whole Training file """ home_dir = os.path.dirname(path_to_train) print('Starting initial run...') for prog, n in enumerate(range(-825, 14)): # Percent complete out = prog * 1. / len(range(-825, 14)) * 100 sys.stdout.write("\r%d%%" % out) sys.stdout.flush() train_file = os.path.join( home_dir, 'UnnObs_Training_1_line_A_ec_{}_pm15.0_r2.5.trans'.format( str(util.lunation_center(n)))) if os.path.isfile(train_file): train_raw_results, train_clust_ids = find_clusters( pixels, train_file, util.lunation_center(n), g_gdots=g_gdots, dt=dt, rad=cr) with open( os.path.join( home_dir, 'train_result_{}_initial.pickle'.format( str(util.lunation_center(n)))), 'wb') as handle: pickle.dump(train_raw_results, handle, protocol=pickle.HIGHEST_PROTOCOL) sys.stdout.write("\r%d%%" % 100) print('\n') print('Run finished!')
def index_positions(n, r_func, file_stem, dt=45., nside=8): """ Does the transformations on the data using the date of the n-th new moon as the reference time. It is reading and processing the entire *.mpc file. This does the heliocentric tranformation for the assumed radius function, r_func. It then does light-time correction. And it appends a healpix number on each line in order to be able to quickly select data from a given region of sky. This generates a file called *.trans, and it incorporates the distance assumed in the file name. """ infilename = file_stem.replace('.mpc', '') + '_' + str( util.lunation_center(n)) + '_pm' + str(dt) + '.mpc' try: open(infilename, 'r') except IOError: return 0 t_ref = util.lunation_center(n) r_ref = r_func(t_ref) r_name = "_r%.1lf" % (r_ref) outfilename = file_stem.replace('.mpc', '') + '_' + str( util.lunation_center(n)) + '_pm' + str(dt) + r_name + '.trans' with open(infilename, 'r') as infile, open(outfilename, 'w') as outfile: for line in infile: if line.startswith('#'): header = line.rstrip() outfile.write( header + ' dt x_cor y_cor z_cor pix \n' ) else: lineID = line[:43] jd_tdb = float(line[43:57]) x_target, y_target, z_target = line[57:97].split() r_target = np.array( [float(x_target), float(y_target), float(z_target)]) x_obs, y_obs, z_obs = line[97:135].split() r_obs = np.array([float(x_obs), float(y_obs), float(z_obs)]) # This should be a function from here # Adjust positions dt = 0.0 r_prev = r_func(jd_tdb - dt) rho_r_p, rho_r_m = adjust_position(r_prev, r_target, r_obs) dt = rho_r_p[0] / MPC_library.Constants.speed_of_light # Do light-time iterations. # Probably don't need to do this at this point, because it is # being re-done in a later step. i = 0 while (np.abs(r_func(jd_tdb - dt) - r_prev) > 1e-8): rho_r_p, rho_r_m = adjust_position(r_prev, r_target, r_obs) dt = rho_r_p[0] / MPC_library.Constants.speed_of_light r_prev = r_func(jd_tdb - dt) i += 1 # to here xp, yp, zp = rho_r_p[1] # Calculate HEALPix index pix = hp.vec2pix(nside, xp, yp, zp, nest=True) outstring = line.rstrip() + " %13.6lf %12.7lf %12.7lf %12.7lf %5d\n"% \ (dt, xp, yp, zp, pix) outfile.write(outstring)
pixels = range(hp.nside2npix(nside)) infilename = os.path.join( BASE_DIR, 'demo_train/UnnObs_Training_1_line_A_ec_labelled_2457308.5_pm15.0_r2.5.trans' ) print('Based on our tuning, the best dt is {0} and best cluster radius is {1}'. format(dt, cr)) true_count_set, mergedCounter_dict, mergedTime_dict = accessible_clusters( pixels, infilename=infilename) true_count = len(true_count_set) print('True count of clusters: {}'.format(true_count)) right, wrong, ids_right, ids_wrong = test_clusters(pixels, infilename, util.lunation_center(n), \ dt=dt,rad=cr) print( 'Using our optimal parameters we got {0} percent of clusters with {1} percent errors.' .format(right / true_count, wrong / true_count)) print('We got', right, 'right and', wrong, 'wrong out of total', true_count) print( 'Now that we have shown our performance on training data, lets run on the ITF.' ) itf_file = os.path.join( BASE_DIR, 'demo_itf/itf_new_1_line_ec_2457308.5_pm15.0_r2.5.trans') itf_pickle = itf_file.rstrip('.trans') + '.pickle' itf_n = -14
def plot_tune_results(moons, home_dir): """ This function plots the related analysis plots of number of clusters, number of errors, and approx AUC. NOTE: Only run this after you run tune() --------- Args: moons; list, is a list of values of your choosing between -825 to 14, representing the different moons for lunation center. A recommended, relatively dense patch to start with is [-11, -14, -17, -20, -23]. home_dir; str, the path to the directory where we want to have the pickles from the tune() run. --------- Returns: None, plots the realted visualizations. """ abs_home_dir = os.path.abspath(home_dir) for n in moons: infilename=os.path.join(abs_home_dir, 'UnnObs_Training_1_line_A_%.1lf_pm15.0_r2.5.trans' % (util.lunation_center(n))) pickle_filename = infilename.rstrip('trans') + 'train.pickle' if not os.path.isfile(pickle_filename): raise FileNotFoundError('Cannot find this file. Hint: make sure you have run the tune() function first!') with open(pickle_filename, 'rb') as handle: pix_runs = pickle.load(handle) true_count_dict, mergedCounter_dict, mergedTime_dict=accessible_clusters(list(pix_runs.keys()), infilename=infilename) true_count=sum(true_count_dict.values()) visual.number_clusters_plot(pix_runs,true_count) visual.number_errors_plot(pix_runs) visual.auc_plot(pix_runs,true_count)
def find_cluster_radius(moons, home_dir, dt, max_tol=1e-3): """ This function finds the optimal cluster radius, given a value for dt and a maximum tolerable error rate. The max error rate defaults to 0.1%. --------- Args: moons; list, is a list of values of your choosing between -825 to 14, representing the different moons for lunation center. A recommended, relatively dense patch to start with is [-11, -14, -17, -20, -23]. home_dir; str, the path to the directory where we want to have the pickles from the tune() run. dt; float, the dt you decided to use based on the previous plots, or subject matter knowledge. max_tol; float, the maximum realtive error we tolerate in our output. defaults to 1e-3 or 0.1% --------- Returns: float, the optimal cluster radius (for finding the most clusters), while remaining under the specified error rate. """ abs_home_dir = os.path.abspath(home_dir) print('Now that we have set dt={}, lets calculate the best cluster radius.'.format(dt)) training_dict={} for n in moons: infilename=os.path.join(abs_home_dir, 'UnnObs_Training_1_line_A_ec_%.1lf_pm15.0_r2.5.trans' % (util.lunation_center(n))) pickle_filename = infilename.rstrip('trans') + 'train.pickle' if not os.path.isfile(pickle_filename): raise FileNotFoundError('Cannot find this file. Hint: make sure you have run the tune() function first!') with open(pickle_filename, 'rb') as handle: pix_runs = pickle.load(handle) true_count_dict, mergedCounter_dict, mergedTime_dict=accessible_clusters(list(pix_runs.keys()), infilename=infilename) true_count=sum(true_count_dict.values()) for i in range(99): errs=0 clusts=0 trues=0 for pix in list(pix_runs.keys()): nclusters = pix_runs[pixels[pix]][dt][1][i] nerrors = pix_runs[pixels[pix]][dt][2][i] ntrue = true_count_dict[pix] errs += nerrors clusts += nclusters trues += ntrue if float(errs)/trues < max_tol: print(i, pix_runs[pixels[pix]][dt][0][i], errs, clusts, trues) else: training_dict[n] = pix_runs[pixels[pix]][dt][0][i], errs, clusts, trues break cluster_radius = np.mean([v[0] for k, v in training_dict.items()]) return cluster_radius
def test_lunation(): assert (util.lunation_center(-11) == 2457397.5)