def apass_zone_to_dat(save_dir, filter_config, zone_container_filename): """Process all of the rectangles found in the zone. With APASS data, we average on a per-container basis.""" averages = [] zone_id = apass.zone_from_name(zone_container_filename) zone_name = apass.name_zone(zone_id) print "Processing zone " + zone_name # load the zone's tree and data from disk and get the leaves zone_json = save_dir + apass.name_zone_json_file(zone_id) zone_tree = QuadTreeNode.from_file(zone_json, leafClass=RectLeaf) zone.load_zone_data(zone_tree, save_dir) leaves = zone_tree.get_leaves() # average the data in each container. for leaf in leaves: for container in leaf.containers: # average the data c_ave = average_container(container, filter_config) averages.append(c_ave) # write out the average information dat_filename = save_dir + "/" + zone_name + ".dat" averages = dat.dicts_to_ndarray(averages, dat_type="apass") dat.write_dat(dat_filename, averages, dat_type="apass")
def purge_nights(night_names, save_dir, filename): """Loads a fredbin file, purges all data corresponding to night_name""" results = [] # load the zone contribution file zone_id = apass.zone_from_name(filename) contrib_filename = save_dir + '/' + apass.name_zone_contrib_file(zone_id) contrib_data = read_contrib_file(contrib_filename) # Find common nights between the contrib file and the nights to # be purged. If none exist, return. common_nights = np.intersect1d(contrib_data['night_name'], night_names) if len(common_nights) == 0: print("No purge-able nights for zone %i" % (zone_id)) return results # remove the data from the fredbin and containerized fredbin file fredbin_file = save_dir + '/' + apass.name_zone_file(zone_id) r = purge_nights_from_file(common_nights, fredbin_file) results.append(r) container_file = save_dir + '/' + apass.name_zone_container_file(zone_id) r = purge_nights_from_file(common_nights, container_file) results.append(r) # now purge the nights from the contrib file indices = np.nonzero(np.in1d(contrib_data['night_name'], common_nights)) contrib_data = np.delete(contrib_data, indices) write_contrib_file(contrib_filename, contrib_data)
def zone_to_rects(save_dir, filename): """Processes and APASS zone file into overlapping rectangles""" zone_id = apass.zone_from_name(filename) global tree_file # read in the (binary) data file data = read_fredbin(filename) print "Processing '%s' which has %i data points " % (filename, data.size) # find the bounds of this zone using the first data point in the file global_tree = QuadTreeNode.from_file(tree_file, leafClass=IDLeaf) datum = data[0] ra, dec = apass.get_coords(datum) zone_node = global_tree.find_leaf(ra, dec) zone_bounds = zone_node.rect # build a tree for the zone zone_tree = QuadTreeNode(zone_bounds, 0, parent=None) zone_tree.split_until(apass.zone_depth, leafClass=RectLeaf) # insert the data into the tree, building up containers (rectangles) in the # process for datum in np.nditer(data): datum = datum.copy() ra, dec = apass.get_coords(datum) try: zone_tree.insert(ra, dec, datum) except RuntimeError: print("ERROR: Potential data corruption in " + filename) print( "ERROR: Check file, remove the zone directory, and re-run this program" ) return # prepare the save the data. # the zone file's name filename_no_ext = os.path.splitext(filename)[0] # now number the (leaf) nodes number_containers(zone_tree, zone_id=zone_id) #plot_rects(zone_tree) # plot the nodes before the merge zone_border_info = merge_containers_on_borders(zone_tree) #plot_rects(zone_tree) # plot the nodes after the merge # write out the containers that were on the border filename = save_dir + '/' + apass.name_zone_border_file(zone_id) save_border_info(filename, zone_border_info) zone.save_zone_data(zone_tree, save_dir) # save the zone -> container mapping filename = save_dir + '/' + apass.name_zone_json_file(zone_id) QuadTreeNode.to_file(zone_tree, filename)
def save_zone(save_dir, zone_dict): """Saves the zone data to disk. The zone_dict format matches the format found in load_zone above.""" json_file = zone_dict['json_filename'] container_file = zone_dict['container_filename'] border_info_file = zone_dict['border_info_filename'] tree = zone_dict['tree'] border_info = zone_dict['border_info'] load_succeeded = zone_dict['loaded'] lock = zone_dict['lock'] zone_id = apass.zone_from_name(container_file) #print("Saving zone %i" % (zone_id)) # save the data to disk if load_succeeded: save_zone_data(tree, save_dir) save_border_info(border_info_file, border_info) QuadTreeNode.to_file(tree, json_file) # release the lock lock.release()
def sro_zone_to_dat(save_dir, filter_config, zone_container_filename): """Processes all of the rectangles found in zone. Zone should be a valid subdirectory of save_dir""" zone_id = apass.zone_from_name(zone_container_filename) zone_name = apass.name_zone(zone_id) print "Processing zone " + zone_name # create a graph data structure for this zone G = nx.Graph() # load the zone's tree and data from disk and get the leaves zone_json = save_dir + apass.name_zone_json_file(zone_id) zone_tree = QuadTreeNode.from_file(zone_json, leafClass=RectLeaf) zone.load_zone_data(zone_tree, save_dir) leaves = zone_tree.get_leaves() # average the data and populate the graph with shared container information line_number = 0 averages = [] for leaf in leaves: for container in leaf.containers: # average the data c_aves = average_by_field(container, filter_config) averages.extend(c_aves) # populate overlapping line information line_numbers = [] field_ids = [] for c_ave in c_aves: line_numbers.append(line_number) field_ids.append(c_ave['field_id']) line_number += 1 # if there are more than one field ID present, populate information # in the graph if len(field_ids) > 1: # generate all possible combinations of the line numbers and field IDs # these express the edges in the graph line_pairs = list(itertools.combinations(line_numbers, 2)) field_pairs = list(itertools.combinations(field_ids, 2)) for line_pair, field_pair in zip(line_pairs, field_pairs): src_line = line_pair[0] dst_line = line_pair[1] src_field = field_pair[0] dst_field = field_pair[1] try: edge = G[src_field][dst_field] except: G.add_edge(src_field, dst_field, line_ids=[], weight=0) edge = G[src_field][dst_field] edge['line_ids'].append((src_line, dst_line)) edge['weight'] += 1 # write out the average information dat_filename = save_dir + "/" + zone_name + ".dat" averages = dat.dicts_to_ndarray(averages, dat_type="sro") dat.write_dat(dat_filename, averages, dat_type="sro") # save the graph to a pickle file graph_filename = save_dir + "/" + zone_name + ".p" nx.write_gpickle(G, graph_filename)
def main(): """Plots all data in an APASS zone and optionally displays container boundaries.""" parser = argparse.ArgumentParser( description='Plots a zone file and its data') parser.add_argument('input', nargs='+', help="Zone JSON files to be plotted") parser.add_argument('--show-containers', default=False, action="store_true", help="Plot borders for containers") args = parser.parse_args() save_dir = os.path.dirname(os.path.realpath(args.input[0])) + "/" inputs = args.input for filename in inputs: zone_id = apass.zone_from_name(filename) zone_name = apass.name_zone(zone_id) print "Plotting zone " + zone_name # load the original zone data. Note, we don't restore it to the tree zone_data_file = save_dir + apass.name_zone_file(zone_id) zone_data = fred.read_fredbin(zone_data_file) print("Zone file has " + str(zone_data.size) + " entries") # load the containerized zone data zone_container_file = save_dir + apass.name_zone_container_file( zone_id) zone_container_data = fred.read_fredbin(zone_container_file) print("Zone container file has " + str(zone_container_data.size) + " entries") # load the zone's tree zone_json = save_dir + apass.name_zone_json_file(zone_id) zone_tree = QuadTreeNode.from_file(zone_json, leafClass=RectLeaf) leaves = zone_tree.get_leaves() total_containers = 0 for leaf in leaves: total_containers += len(leaf.containers) print("Zone contains a total of " + str(total_containers) + " containers") fig, axes = plt.subplots(1) # plot the zone container data for leaf in leaves: rect = leaf.rect x = rect.x_min y = rect.y_min dx = rect.x_max - x dy = rect.y_max - y axes.add_patch(patches.Rectangle((x, y), dx, dy, fill=False)) if args.show_containers: plot_containers(leaf, axes) # plot the data dec = zone_data['dec'] ra = zone_data['ra'] plt.scatter(ra, dec) dec = zone_container_data['dec'] ra = zone_container_data['ra'] plt.scatter(ra, dec, color="green") plt.show()
def main(): global error_filename global tree_file parser = argparse.ArgumentParser( description='Inserts zone data into a quadtree data structure.') parser.add_argument('save_dir', help="Directory to which output files should be saved") parser.add_argument('input', nargs='+', help="Input files which will be split into zonefiles") parser.add_argument('-j', '--jobs', type=int, help="Parallel jobs", default=4) parser.add_argument('--debug', default=False, action='store_true', help="Run in debug mode") parser.set_defaults(jobs=1) args = parser.parse_args() start = time.time() save_dir = os.path.dirname(args.save_dir) # configure globals error_filename = save_dir + "/error_zone_to_rects.txt" tree_file = save_dir + "/global.json" # Construct a partial to serve as the function to call in serial or # parallel mode below. ztr_func = partial(zone_to_rects_wrapper, zone_to_rects, save_dir) # use this for single thread development and debugging if args.debug: for filename in args.input: ztr_func(filename) else: # generate a pool of threads to process the input pool = Pool(args.jobs) # farm out the work result = pool.imap(ztr_func, args.input) pool.close() pool.join() # write out a file containing information on the containers modified. mod_file = save_dir + "/zone-to-rects-modified-files.txt" with open(mod_file, 'w') as outfile: for filename in args.input: path, fredbin_filename = os.path.split(filename) zone_id = apass.zone_from_name(fredbin_filename) filename = apass.name_zone_container_file(zone_id) outfile.write(save_dir + filename + "\n") print("A list of modified files has been written to %s" % (mod_file)) end = time.time() print("Time elapsed: %is" % (int(end - start)))