Python Finder示例，ch_util.finder.Finder Python示例

示例#1

0

显示文件

def finder_from_spec(spec, node_spoof=None):
    """Get a `Finder` object from the dataspec.

    Parameters
    ----------
    dspec : dict
        Dataspec dictionary.

    Returns
    -------
    fi : ch_util.finder.Finder
    """

    instrument = spec["instrument"]
    timerange = spec["timerange"]

    fi = None
    if mpiutil.rank0:

        # Get instrument
        inst_obj = (
            di.ArchiveInst.select().where(di.ArchiveInst.name == instrument).get()
        )

        # Ensure timerange is a list
        if not isinstance(timerange, list):
            timerange = [timerange]

        # Find the earliest and latest times
        earliest = min([tr["start"] for tr in timerange])
        latest = max([tr["end"] for tr in timerange])

        # Set the archive_root
        if node_spoof is None and "node_spoof" in spec:
            node_spoof = spec["node_spoof"]

        # Create a finder object limited to the relevant time
        fi = finder.Finder(node_spoof=node_spoof)

        # Set the time range that encapsulates all the intervals
        fi.set_time_range(earliest, latest)

        # Add in all the time ranges
        for ti in timerange:
            fi.include_time_interval(ti["start"], ti["end"])

        # Only include the required instrument
        fi.filter_acqs(di.ArchiveAcq.inst == inst_obj)

    return fi

示例#2

0

显示文件

    def set_acq_list(self):
        """This method sets four attributes.  The first two attributes
        are 'night_finder' and 'night_acq_list', which are the
        finder object and list of acquisitions that
        contain all night time data between self.t1 and self.t2.
        The second two attributes are 'finder' and 'acq_list',
        which are the finder object and list of acquisitions
        that contain all data beween self.t1 and self.t2 with the
        sunrise, sun transit, and sunset removed.
        """

        # Create a Finder object and focus on time range
        f = finder.Finder(node_spoof=_DEFAULT_NODE_SPOOF)
        f.filter_acqs((data_index.ArchiveInst.name == "pathfinder"))
        f.only_corr()
        f.set_time_range(self.t1, self.t2)

        # Create a list of acquisitions that only contain data collected at night
        f_night = copy.deepcopy(f)
        f_night.exclude_daytime()

        self.night_finder = f_night
        self.night_acq_list = f_night.get_results()

        # Create a list of acquisitions that flag out sunrise, sun transit, and sunset
        mm = ephemeris.unix_to_datetime(self.t1).month
        dd = ephemeris.unix_to_datetime(self.t1).day
        mm = mm + float(dd) / 30.0

        fct = 3.0
        tol1 = (np.arctan(
            (mm - 3.0) * fct) + np.pi / 2.0) * 10500.0 / np.pi + 1500.0
        tol2 = (np.pi / 2.0 - np.arctan(
            (mm - 11.0) * fct)) * 10500.0 / np.pi + 1500.0
        ttol = np.minimum(tol1, tol2)

        fct = 5.0
        tol1 = (np.arctan(
            (mm - 4.0) * fct) + np.pi / 2.0) * 2100.0 / np.pi + 6000.0
        tol2 = (np.pi / 2.0 - np.arctan(
            (mm - 10.0) * fct)) * 2100.0 / np.pi + 6000.0
        rstol = np.minimum(tol1, tol2)

        f.exclude_sun(time_delta=ttol, time_delta_rise_set=rstol)

        self.finder = f
        self.acq_list = f.get_results()

示例#3

0

显示文件

    def get_results(self, src, tdelt=2800):
        """If self.finder exists, then it takes a deep copy of this object,
        further restricts the time range to include only src transits,
        and then queries the database to obtain a list of the acquisitions.
        If self.finder does not exist, then it creates a finder object,
        restricts the time range to include only src transits between
        self.t1 and self.t2, and then queries the database to obtain a list
        of the acquisitions.
        """

        if self.finder is not None:
            f = copy.deepcopy(self.finder)
        else:
            f = finder.Finder(node_spoof=_DEFAULT_NODE_SPOOF)
            f.filter_acqs((data_index.ArchiveInst.name == "pathfinder"))
            f.only_corr()
            f.set_time_range(self.t1, self.t2)

        f.include_transits(src, time_delta=tdelt)

        return f.get_results()

示例#4

0

显示文件

    def setup(self):
        """Query the database, fetch the files, and save to attribute."""
        from ch_util import layout
        from chimedb import data_index as di

        # Function to break a list of files into groups of roughly the same size
        def _choose_group_size(n, m, accept):
            if (n % m) < accept:
                return m
            l, u = m - 1, m + 1
            while ((n % l) > accept) and ((n % u) > accept):
                l, u = l - 1, u + 1
            if (n % l) < (n % u):
                return l
            else:
                return u

        # Query the database on rank=0 only, and broadcast to everywhere else
        files = None
        if self.comm.rank == 0:

            layout.connect_database()

            fi = finder.Finder(node_spoof=self.node_spoof)
            fi.only_corr()
            if self.accept_all_global_flags:
                fi.accept_all_global_flags()
            fi.set_time_range(self.start_time, self.end_time)
            fi.filter_acqs(di.ArchiveInst.name == self.instrument)

            files = []
            for aa, acq in enumerate(fi.acqs):

                acq_results = fi.get_results_acq(aa)

                filelist = [ff for acqr in acq_results for ff in acqr[0]]
                nfiles = len(filelist)

                if (self.min_num_files is not None) and (nfiles < self.min_num_files):
                    continue

                if (self.max_num_files is None) or (nfiles <= self.max_num_files):
                    files.append(filelist)

                else:
                    group_size = _choose_group_size(
                        nfiles,
                        self.max_num_files,
                        max(1, int(0.10 * self.max_num_files)),
                    )

                    ngroup, offset = nfiles // group_size, (nfiles % group_size) // 2
                    bnd = [offset + gg * group_size for gg in range(ngroup + 1)]
                    bnd[0], bnd[-1] = 0, nfiles

                    files += [
                        filelist[bnd[ii] : bnd[ii + 1]] for ii in range(len(bnd) - 1)
                    ]

        # Broadcast the files to the other nodes
        files = self.comm.bcast(files, root=0)
        self.comm.Barrier()

        self.files = files

示例#5

0

显示文件

    def setup(self):
        """Fetch the files in the specified run.

        Returns
        -------
        files : list
            List of files to load
        """
        from ch_util import layout
        from chimedb import data_index as di

        files = None

        # Query the database on rank=0 only, and broadcast to everywhere else
        if mpiutil.rank0:

            layout.connect_database()

            cat_run = (
                layout.global_flag_category.select()
                .where(layout.global_flag_category.name == "run")
                .get()
            )

            # Find run in database
            run_query = layout.global_flag.select().where(
                layout.global_flag.category == cat_run,
                layout.global_flag.name == self.run_name,
            )

            # Make sure we only have flags with active events
            run_query = (
                run_query.join(layout.graph_obj)
                .join(layout.event)
                .where(layout.event.active)
            )

            if run_query.count() == 0:
                raise RuntimeError("Run %s not found in database" % self.run_name)
            elif run_query.count() > 1:
                raise RuntimeError(
                    "Multiple global flags found in database for run %s" % self.run_name
                )

            run = run_query.get()

            # Fetch run start and end time
            run_event = run.event().get()
            start, end = run_event.start.time, run_event.end.time

            # Fetch the instrument
            if run.inst is None:
                raise RuntimeError("Instrument is not specified in database.")
            inst_obj = run.inst

            # Create a finder object limited to the relevant time
            fi = finder.Finder(node_spoof=self.node_spoof)
            fi.only_corr()

            # Set the time range that encapsulates all the intervals
            fi.set_time_range(start, end)

            # Add in all the time ranges
            # for ti in timerange:
            #     fi.include_time_interval(ti['start'], ti['end'])

            # Only include the required instrument
            fi.filter_acqs(di.ArchiveAcq.inst == inst_obj)

            # Pull out the results and extract all the files
            results = fi.get_results()
            files = [fname for result in results for fname in result[0]]
            files.sort()

        files = mpiutil.world.bcast(files, root=0)

        # Make sure all nodes have container before return
        mpiutil.world.Barrier()

        return files

示例#6

0

显示文件

    def setup(self):
        """Query the database and fetch the files

        Returns
        -------
        files : list
            List of files to load
        """
        files = None

        # Query the database on rank=0 only, and broadcast to everywhere else
        if mpiutil.rank0:

            if self.run_name:
                return self.QueryRun()

            layout.connect_database()

            f = finder.Finder(node_spoof=self.node_spoof)

            f.filter_acqs(di.AcqType.name == self.acqtype)

            if self.instrument is not None:
                f.filter_acqs(di.ArchiveInst.name == self.instrument)

            if self.accept_all_global_flags:
                f.accept_all_global_flags()

            # Use start and end times if set, or try and use the start and end CSDs
            if self.start_time:
                st, et = self.start_time, self.end_time
            elif self.start_csd:
                st = ephemeris.csd_to_unix(self.start_csd)
                et = (
                    ephemeris.csd_to_unix(self.end_csd)
                    if self.end_csd is not None
                    else None
                )

            # Note: include_time_interval includes the specified time interval
            # Using this instead of set_time_range, which only narrows the interval
            # f.include_time_interval(self.start_time, self.end_time)
            f.set_time_range(st, et)

            if self.start_RA and self.end_RA:
                f.include_RA_interval(self.start_RA, self.end_RA)
            elif self.start_RA or self.start_RA:
                self.log.warning(
                    "One but not both of start_RA and end_RA " "are set. Ignoring both."
                )

            f.filter_acqs(di.ArchiveInst.name == self.instrument)

            if self.exclude_daytime:
                f.exclude_daytime()

            if self.exclude_sun:
                f.exclude_sun(
                    time_delta=self.exclude_sun_time_delta,
                    time_delta_rise_set=self.exclude_sun_time_delta_rise_set,
                )

            if self.include_transits:
                time_delta = self.include_transits_time_delta
                ntime_delta = len(time_delta)
                if (ntime_delta > 1) and (ntime_delta < len(self.include_transits)):
                    raise ValueError(
                        "Must specify `time_delta` for each source in "
                        "`include_transits` or provide single value for all sources."
                    )
                for ss, src in enumerate(self.include_transits):
                    tdelta = time_delta[ss % ntime_delta] if ntime_delta > 0 else None
                    bdy = (
                        ephemeris.source_dictionary[src]
                        if isinstance(src, str)
                        else src
                    )
                    f.include_transits(bdy, time_delta=tdelta)

            if self.exclude_transits:
                time_delta = self.exclude_transits_time_delta
                ntime_delta = len(time_delta)
                if (ntime_delta > 1) and (ntime_delta < len(self.exclude_transits)):
                    raise ValueError(
                        "Must specify `time_delta` for each source in "
                        "`exclude_transits` or provide single value for all sources."
                    )
                for ss, src in enumerate(self.exclude_transits):
                    tdelta = time_delta[ss % ntime_delta] if ntime_delta > 0 else None
                    bdy = (
                        ephemeris.source_dictionary[src]
                        if isinstance(src, str)
                        else src
                    )
                    f.exclude_transits(bdy, time_delta=tdelta)

            if self.source_26m:
                f.include_26m_obs(self.source_26m)

            if len(self.exclude_data_flag_types) > 0:
                f.exclude_data_flag_type(self.exclude_data_flag_types)

            results = f.get_results()
            if not self.return_intervals:
                files = [fname for result in results for fname in result[0]]
                files.sort()
            else:
                files = results
                files.sort(key=lambda x: x[1][0])

        files = mpiutil.world.bcast(files, root=0)

        # Make sure all nodes have container before return
        mpiutil.world.Barrier()

        return files