def fetch_full_san_data(stream_key, time_range, location_metadata=None): """ Given a time range and stream key. Genereate all data in the inverval using data from the SAN. :param stream_key: :param time_range: :return: """ if location_metadata is None: location_metadata = get_location_metadata_by_store(stream_key, time_range, SAN_LOCATION_NAME) # get which bins we can gather data from ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True) if not os.path.exists(ref_des_dir): log.warning("Reference Designator does not exist in offloaded DataSAN") return None data = [] next_index = 0 for time_bin in location_metadata.bin_list: direct = dir_string.format(time_bin) if os.path.exists(direct): # get data from all of the deployments deployments = os.listdir(direct) for deployment in deployments: full_path = os.path.join(direct, deployment) if os.path.isdir(full_path): new_data = get_deployment_data(full_path, stream_key.stream_name, -1, time_range, index_start=next_index) if new_data is not None: data.append(new_data) # Keep track of indexes so they are unique in the final dataset next_index += len(new_data['index']) if not data: return None return xr.concat(data, dim='index')
def fetch_nsan_data(stream_key, time_range, num_points=1000, location_metadata=None): """ Given a time range and stream key. Genereate evenly spaced times over the inverval using data from the SAN. :param stream_key: :param time_range: :param num_points: :return: """ if location_metadata is None: location_metadata = get_location_metadata_by_store( stream_key, time_range, SAN_LOCATION_NAME) ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True) if not os.path.exists(ref_des_dir): log.warning("Reference Designator does not exist in offloaded SAN") return None to_sample = get_SAN_samples(num_points, location_metadata) # now get data in the present we are going to start by grabbing first file in the directory with name that matches # grab a random amount of particles from that file if they are within the time range. missed = 0 data = [] next_index = 0 futures = [] for time_bin, num_data_points in to_sample: direct = dir_string.format(time_bin) if os.path.exists(direct): # get data from all of the deployments deployments = os.listdir(direct) for deployment in deployments: full_path = os.path.join(direct, deployment) if os.path.isdir(full_path): futures.append( san_threadpool.apply_async( get_deployment_data, (full_path, stream_key.stream_name, num_data_points, time_range), kwds={'index_start': next_index})) else: missed += num_data_points for future in futures: new_data = future.get() if new_data is None: missed += num_data_points continue count = len(new_data['index']) missed += (num_data_points - count) data.append(new_data) # keep track of the indexes so that the final dataset has unique indices next_index += len(new_data['index']) log.warn( "SAN: Failed to produce {:d} points due to nature of sampling".format( missed)) return compile_datasets(data)
def fetch_nsan_data(stream_key, time_range, num_points=1000, location_metadata=None): """ Given a time range and stream key. Genereate evenly spaced times over the inverval using data from the SAN. :param stream_key: :param time_range: :param num_points: :return: """ if location_metadata is None: location_metadata = get_location_metadata_by_store(stream_key, time_range, SAN_LOCATION_NAME) ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True) if not os.path.exists(ref_des_dir): log.warning("Reference Designator does not exist in offloaded SAN") return None to_sample = get_SAN_samples(num_points, location_metadata) # now get data in the present we are going to start by grabbing first file in the directory with name that matches # grab a random amount of particles from that file if they are within the time range. missed = 0 data = [] next_index = 0 futures = [] for time_bin, num_data_points in to_sample: direct = dir_string.format(time_bin) if os.path.exists(direct): # get data from all of the deployments deployments = os.listdir(direct) for deployment in deployments: full_path = os.path.join(direct, deployment) if os.path.isdir(full_path): futures.append( san_threadpool.apply_async(get_deployment_data, (full_path, stream_key.stream_name, num_data_points, time_range), kwds={'index_start': next_index})) else: missed += num_data_points for future in futures: new_data = future.get() if new_data is None: missed += num_data_points continue count = len(new_data['index']) missed += (num_data_points - count) data.append(new_data) # keep track of the indexes so that the final dataset has unique indices next_index += len(new_data['index']) log.warn("SAN: Failed to produce {:d} points due to nature of sampling".format(missed)) return compile_datasets(data)
def fetch_all_data(stream_key, time_range, location_metadata=None): """ Given a time range, Fetch all records from the starting hour to ending hour :param stream_key: :param time_range: :return: """ if location_metadata is None: location_metadata = get_location_metadata_by_store(stream_key, time_range, CASS_LOCATION_NAME) cols = SessionManager.get_query_columns(stream_key.stream.name) rows = [] for bin_num in location_metadata.bin_list: rows.extend(execute_unlimited_query(stream_key, cols, bin_num, time_range)) return cols, rows
def fetch_full_san_data(stream_key, time_range, location_metadata=None): """ Given a time range and stream key. Genereate all data in the inverval using data from the SAN. :param stream_key: :param time_range: :return: """ if location_metadata is None: location_metadata = get_location_metadata_by_store( stream_key, time_range, SAN_LOCATION_NAME) # get which bins we can gather data from ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True) if not os.path.exists(ref_des_dir): log.warning("Reference Designator does not exist in offloaded DataSAN") return None data = [] next_index = 0 for time_bin in location_metadata.bin_list: direct = dir_string.format(time_bin) if os.path.exists(direct): # get data from all of the deployments deployments = os.listdir(direct) for deployment in deployments: full_path = os.path.join(direct, deployment) if os.path.isdir(full_path): new_data = get_deployment_data(full_path, stream_key.stream_name, -1, time_range, index_start=next_index) if new_data is not None: data.append(new_data) # Keep track of indexes so they are unique in the final dataset next_index += len(new_data['index']) if not data: return None return xr.concat(data, dim='index')