def finder_from_spec(spec, node_spoof=None): """Get a `Finder` object from the dataspec. Parameters ---------- dspec : dict Dataspec dictionary. Returns ------- fi : ch_util.finder.Finder """ instrument = spec["instrument"] timerange = spec["timerange"] fi = None if mpiutil.rank0: # Get instrument inst_obj = ( di.ArchiveInst.select().where(di.ArchiveInst.name == instrument).get() ) # Ensure timerange is a list if not isinstance(timerange, list): timerange = [timerange] # Find the earliest and latest times earliest = min([tr["start"] for tr in timerange]) latest = max([tr["end"] for tr in timerange]) # Set the archive_root if node_spoof is None and "node_spoof" in spec: node_spoof = spec["node_spoof"] # Create a finder object limited to the relevant time fi = finder.Finder(node_spoof=node_spoof) # Set the time range that encapsulates all the intervals fi.set_time_range(earliest, latest) # Add in all the time ranges for ti in timerange: fi.include_time_interval(ti["start"], ti["end"]) # Only include the required instrument fi.filter_acqs(di.ArchiveAcq.inst == inst_obj) return fi
def set_acq_list(self): """This method sets four attributes. The first two attributes are 'night_finder' and 'night_acq_list', which are the finder object and list of acquisitions that contain all night time data between self.t1 and self.t2. The second two attributes are 'finder' and 'acq_list', which are the finder object and list of acquisitions that contain all data beween self.t1 and self.t2 with the sunrise, sun transit, and sunset removed. """ # Create a Finder object and focus on time range f = finder.Finder(node_spoof=_DEFAULT_NODE_SPOOF) f.filter_acqs((data_index.ArchiveInst.name == "pathfinder")) f.only_corr() f.set_time_range(self.t1, self.t2) # Create a list of acquisitions that only contain data collected at night f_night = copy.deepcopy(f) f_night.exclude_daytime() self.night_finder = f_night self.night_acq_list = f_night.get_results() # Create a list of acquisitions that flag out sunrise, sun transit, and sunset mm = ephemeris.unix_to_datetime(self.t1).month dd = ephemeris.unix_to_datetime(self.t1).day mm = mm + float(dd) / 30.0 fct = 3.0 tol1 = (np.arctan( (mm - 3.0) * fct) + np.pi / 2.0) * 10500.0 / np.pi + 1500.0 tol2 = (np.pi / 2.0 - np.arctan( (mm - 11.0) * fct)) * 10500.0 / np.pi + 1500.0 ttol = np.minimum(tol1, tol2) fct = 5.0 tol1 = (np.arctan( (mm - 4.0) * fct) + np.pi / 2.0) * 2100.0 / np.pi + 6000.0 tol2 = (np.pi / 2.0 - np.arctan( (mm - 10.0) * fct)) * 2100.0 / np.pi + 6000.0 rstol = np.minimum(tol1, tol2) f.exclude_sun(time_delta=ttol, time_delta_rise_set=rstol) self.finder = f self.acq_list = f.get_results()
def get_results(self, src, tdelt=2800): """If self.finder exists, then it takes a deep copy of this object, further restricts the time range to include only src transits, and then queries the database to obtain a list of the acquisitions. If self.finder does not exist, then it creates a finder object, restricts the time range to include only src transits between self.t1 and self.t2, and then queries the database to obtain a list of the acquisitions. """ if self.finder is not None: f = copy.deepcopy(self.finder) else: f = finder.Finder(node_spoof=_DEFAULT_NODE_SPOOF) f.filter_acqs((data_index.ArchiveInst.name == "pathfinder")) f.only_corr() f.set_time_range(self.t1, self.t2) f.include_transits(src, time_delta=tdelt) return f.get_results()
def setup(self): """Query the database, fetch the files, and save to attribute.""" from ch_util import layout from chimedb import data_index as di # Function to break a list of files into groups of roughly the same size def _choose_group_size(n, m, accept): if (n % m) < accept: return m l, u = m - 1, m + 1 while ((n % l) > accept) and ((n % u) > accept): l, u = l - 1, u + 1 if (n % l) < (n % u): return l else: return u # Query the database on rank=0 only, and broadcast to everywhere else files = None if self.comm.rank == 0: layout.connect_database() fi = finder.Finder(node_spoof=self.node_spoof) fi.only_corr() if self.accept_all_global_flags: fi.accept_all_global_flags() fi.set_time_range(self.start_time, self.end_time) fi.filter_acqs(di.ArchiveInst.name == self.instrument) files = [] for aa, acq in enumerate(fi.acqs): acq_results = fi.get_results_acq(aa) filelist = [ff for acqr in acq_results for ff in acqr[0]] nfiles = len(filelist) if (self.min_num_files is not None) and (nfiles < self.min_num_files): continue if (self.max_num_files is None) or (nfiles <= self.max_num_files): files.append(filelist) else: group_size = _choose_group_size( nfiles, self.max_num_files, max(1, int(0.10 * self.max_num_files)), ) ngroup, offset = nfiles // group_size, (nfiles % group_size) // 2 bnd = [offset + gg * group_size for gg in range(ngroup + 1)] bnd[0], bnd[-1] = 0, nfiles files += [ filelist[bnd[ii] : bnd[ii + 1]] for ii in range(len(bnd) - 1) ] # Broadcast the files to the other nodes files = self.comm.bcast(files, root=0) self.comm.Barrier() self.files = files
def setup(self): """Fetch the files in the specified run. Returns ------- files : list List of files to load """ from ch_util import layout from chimedb import data_index as di files = None # Query the database on rank=0 only, and broadcast to everywhere else if mpiutil.rank0: layout.connect_database() cat_run = ( layout.global_flag_category.select() .where(layout.global_flag_category.name == "run") .get() ) # Find run in database run_query = layout.global_flag.select().where( layout.global_flag.category == cat_run, layout.global_flag.name == self.run_name, ) # Make sure we only have flags with active events run_query = ( run_query.join(layout.graph_obj) .join(layout.event) .where(layout.event.active) ) if run_query.count() == 0: raise RuntimeError("Run %s not found in database" % self.run_name) elif run_query.count() > 1: raise RuntimeError( "Multiple global flags found in database for run %s" % self.run_name ) run = run_query.get() # Fetch run start and end time run_event = run.event().get() start, end = run_event.start.time, run_event.end.time # Fetch the instrument if run.inst is None: raise RuntimeError("Instrument is not specified in database.") inst_obj = run.inst # Create a finder object limited to the relevant time fi = finder.Finder(node_spoof=self.node_spoof) fi.only_corr() # Set the time range that encapsulates all the intervals fi.set_time_range(start, end) # Add in all the time ranges # for ti in timerange: # fi.include_time_interval(ti['start'], ti['end']) # Only include the required instrument fi.filter_acqs(di.ArchiveAcq.inst == inst_obj) # Pull out the results and extract all the files results = fi.get_results() files = [fname for result in results for fname in result[0]] files.sort() files = mpiutil.world.bcast(files, root=0) # Make sure all nodes have container before return mpiutil.world.Barrier() return files
def setup(self): """Query the database and fetch the files Returns ------- files : list List of files to load """ files = None # Query the database on rank=0 only, and broadcast to everywhere else if mpiutil.rank0: if self.run_name: return self.QueryRun() layout.connect_database() f = finder.Finder(node_spoof=self.node_spoof) f.filter_acqs(di.AcqType.name == self.acqtype) if self.instrument is not None: f.filter_acqs(di.ArchiveInst.name == self.instrument) if self.accept_all_global_flags: f.accept_all_global_flags() # Use start and end times if set, or try and use the start and end CSDs if self.start_time: st, et = self.start_time, self.end_time elif self.start_csd: st = ephemeris.csd_to_unix(self.start_csd) et = ( ephemeris.csd_to_unix(self.end_csd) if self.end_csd is not None else None ) # Note: include_time_interval includes the specified time interval # Using this instead of set_time_range, which only narrows the interval # f.include_time_interval(self.start_time, self.end_time) f.set_time_range(st, et) if self.start_RA and self.end_RA: f.include_RA_interval(self.start_RA, self.end_RA) elif self.start_RA or self.start_RA: self.log.warning( "One but not both of start_RA and end_RA " "are set. Ignoring both." ) f.filter_acqs(di.ArchiveInst.name == self.instrument) if self.exclude_daytime: f.exclude_daytime() if self.exclude_sun: f.exclude_sun( time_delta=self.exclude_sun_time_delta, time_delta_rise_set=self.exclude_sun_time_delta_rise_set, ) if self.include_transits: time_delta = self.include_transits_time_delta ntime_delta = len(time_delta) if (ntime_delta > 1) and (ntime_delta < len(self.include_transits)): raise ValueError( "Must specify `time_delta` for each source in " "`include_transits` or provide single value for all sources." ) for ss, src in enumerate(self.include_transits): tdelta = time_delta[ss % ntime_delta] if ntime_delta > 0 else None bdy = ( ephemeris.source_dictionary[src] if isinstance(src, str) else src ) f.include_transits(bdy, time_delta=tdelta) if self.exclude_transits: time_delta = self.exclude_transits_time_delta ntime_delta = len(time_delta) if (ntime_delta > 1) and (ntime_delta < len(self.exclude_transits)): raise ValueError( "Must specify `time_delta` for each source in " "`exclude_transits` or provide single value for all sources." ) for ss, src in enumerate(self.exclude_transits): tdelta = time_delta[ss % ntime_delta] if ntime_delta > 0 else None bdy = ( ephemeris.source_dictionary[src] if isinstance(src, str) else src ) f.exclude_transits(bdy, time_delta=tdelta) if self.source_26m: f.include_26m_obs(self.source_26m) if len(self.exclude_data_flag_types) > 0: f.exclude_data_flag_type(self.exclude_data_flag_types) results = f.get_results() if not self.return_intervals: files = [fname for result in results for fname in result[0]] files.sort() else: files = results files.sort(key=lambda x: x[1][0]) files = mpiutil.world.bcast(files, root=0) # Make sure all nodes have container before return mpiutil.world.Barrier() return files