Пример #1
0
 def get_valid_period(self, ts, suggested_start, suggested_end):
     """
     This method takes two dates which are generally the start and end dates for an iteration.  If they are
     specified, they are used (even if they are outside the range of the time series).  If a date is not specified,
     then the appropriate date from the time series is used.  This routine may require logic at some point to
     handle special cases.  For example, the incoming arguments may specify a start date but no end date.
     If the start date from the time series is later than the specified end date, then what?
     @return The limits given a suggested start and end date.  The date limits can
     extend beyond the end of the time series dates.  If the suggestions are null,
     the appropriate start/end dates from the time series are used.  New date instances
     are created to protect against changing the original dates.
     @param ts Time series of interest.
     @param suggested_start Suggested start date.
     @param suggested_end Suggested end date.
     """
     dates = TSLimits()
     if (suggested_start is None) and (ts is not None):
         dates.set_date1(date_time=DateTime(ts.get_date1()))
     else:
         dates.set_date1(date_time=DateTime(suggested_start))
     if (suggested_end is None) and (ts is not None):
         dates.set_date2(date_time=DateTime(ts.get_date2()))
     else:
         dates.set_date2(date_time=DateTime(suggested_end))
     return dates
Пример #2
0
 def set_date2(self, t):
     """
     Set the last date in the period.  A copy is made.
     The date precision is set to the precision appropriate for the time series.
     :param t: Last date in period
     """
     if t is not None:
         self.date2 = DateTime(date_time=t)
         if self.data_interval_base != TimeInterval.IRREGULAR:
             self.date2.set_precision(self.data_interval_base)
Пример #3
0
 def set_date2_original(self, t):
     """
     Set the last date in the period in the original data. A copy is made.
     The date precision is set to the precision appropriate for the time series.
     :param t: Last date in period in the original data.
     """
     if t is not None:
         self.date2_original = DateTime(date_time=t)
         if self.data_interval_base != TimeInterval.IRREGULAR:
             # For irregular, rely on the DateTime precision.
             self.date2_original.set_precision(self.data_interval_base)
Пример #4
0
 def set_date1(self, t):
     """
     Set the first date in the period.  A copy is made.
     The date precision is set to the precision appropriate for the time series.
     :param t: First date in period
     """
     if t is not None:
         self.date1 = DateTime(date_time=t)
         if self.data_interval_base != TimeInterval.IRREGULAR:
             # For irregular, rely on the DateTime precision
             self.date1.set_precision(self.data_interval_base)
Пример #5
0
    def allocate_data_space(self, value=None):
        """
        Allocate the data space.  The start and end dates and the interval multiplier should have been set.
        :param value: The value to initialize the time series, if None use the time series missing value.
        :return: 0 if successful, 1 if failure.
        """
        logger = logging.getLogger(__name__)
        nmonths = 0

        if not value:
            value = self.missing

        if not self.date1 or not self.date2:
            logger.warning("No dates set for memory allocation.")
            return 1
        if self.data_interval_mult != 1:
            # Do not know how to handle N-day interval...
            message = "Only know how to handle 1-day data, not " + str(self.data_interval_mult) + "Day"
            logger.warning(message)
            return 1

        if nmonths == 0:
            logger.warning("TS has 0 months POR, maybe dates haven't been set yet")
            return 1

        self.data = [[float()]]*nmonths
        if self.has_data_flags:
            self.data_flags = [[str()]]*nmonths

        # May need to catch an exception here in case we run out of memory.

        # Set the counter date to match the starting month. This data is used to
        # to determine the number of days in each month.

        date = DateTime(DateTime.DATE_FAST)
        date.set_month(self.date1.get_month())
        date.set_year(self.date1.get_year())

        for imon in range(nmonths):
            ndays_in_month = TimeUtil.num_days_in_month_from_datetime(date)
            # Handle 1-day data, otherwise an excpetion was thrown above.
            # Here would change the number of values if N-day was supported.
            nvals = ndays_in_month
            self.data[imon] = [float()]*nvals

            # Now fill with the missing data value for each day in month...

            for iday in range(nvals):
                self.data[imon][iday] = value
                if self.has_data_flags:
                    self.data_flags[imon][iday] = ""

            date.add_month(1)

        nactual = DayTS.calculate_data_size(self.date1, self.date2, self.data_interval_mult)
        self.set_data_size(nactual)

        return 0
Пример #6
0
 def set_min_value_date(self, min_value_date):
     """
     Set the date corresponding to the minimum data value for the time series.
     @param min_value_date The date corresponding to the minimum data value.
     """
     if min_value_date is not None:
         self.min_value_date = DateTime(date_time=min_value_date)
     self.check_dates()
Пример #7
0
 def get_date2(self):
     """
     Return the last date in the period of record (returns a copy).
     :return: The last date in the period of record, or None if the date is None.
     """
     if self.date2 is None:
         return None
     return DateTime(date_time=self.date2)
Пример #8
0
 def set_non_missing_data_date2(self, date):
     """
     Set the date for the last non-missing data value.
     @param date The date for the last non-missing data value.
     """
     if date is not None:
         self.non_missing_data_date2 = DateTime(date_time=date)
     self.check_dates()
Пример #9
0
 def set_date2(self, date2):
     """
     Set the last date for the time series.  This is used for memory allocation.
     @param date2 The last date for the time series.
     @see TS#allocateDataSpace
     """
     if date2 is not None:
         self.date2 = DateTime(date_time=date2)
     self.check_dates()
Пример #10
0
 def get_date2_original(self):
     """
     Return the last date in the original period of record (returns a copy).
     :return: The last date of the original data source (generally equal to or
     later than the time series that is actually read), or null if the date is null.
     """
     if self.date2_original is None:
         return None
     return DateTime(date_time=self.date2_original)
Пример #11
0
 def get_non_missing_data_date2(self):
     """
     Return the date corresponding to the last non-missing data in the time series.
     @return The date corresponding to the last non-missing data in the time series.
     A copy of the date is returned.
     """
     if self.non_missing_data_date2 is None:
         return self.non_missing_data_date2
     else:
         return DateTime(date_time=self.non_missing_data_date2)
Пример #12
0
 def get_min_value_date(self):
     """
     Return the date corresponding to the minimum data value for the time series.
     @return The date corresponding to the minimum data value for the time series.
     A copy of the date is returned.
     """
     if self.min_value_date is None:
         return self.min_value_date
     else:
         return DateTime(date_time=self.min_value_date)
Пример #13
0
 def get_date2(self):
     """
     Return the last date for the time series according to the memory allocation.
     @return The last date for the time series according to the memory allocation.
     A copy of the date is returned.
     """
     if self.date2 is None:
         return self.date2
     else:
         return DateTime(date_time=self.date2)
Пример #14
0
    def __init__(self, limits=None):
        """
        Default constructor.  Initialize the dates to null and the limits to zeros.
        :instance instance: Copy constructor.  A deep copy is made, except that the time series is not copied.
        """

        # Data members...

        self.ts = None  # Time series being studied.
        self.date1 = None
        self.date2 = None
        self.flags = None  # Flags to control behavior.
        self.max_value = None
        self.max_value_date = None
        self.mean = None
        self.median = None
        self.min_value = None
        self.min_value_date = None
        self.missing_data_count = None
        self.non_missing_data_count = None
        self.non_missing_data_date1 = None
        self.non_missing_data_date2 = None
        self.skew = None
        self.stdDev = None
        self.sum = None
        self.data_units=""  # Data units (just copy from TS at the time of creation).

        self.found = False

        if limits is None:
            self.initialize()
        else:
            # Copy constructor
            self.initialize()
            if limits.date1 is not None:
                self.date1 = DateTime(date_time=limits.date1)
            if limits.date2 is not None:
                date2 = DateTime(date_time=limits.date2)
            self.max_value = limits.max_value
            if limits.max_value_date is not None:
                self.max_value_date = DateTime(date_time=limits.max_value_date)
            self.min_value = limits.min_value
            if limits.min_value_date is not None:
                min_value_date = DateTime(date_time=limits.min_value_date)
            if limits.non_missing_data_date1 is not None:
                non_missing_data_date1 = DateTime(date_time=limits.non_missing_data_date1)
            if limits.non_missing_data_date2 is not None:
                non_missing_data_date2 = DateTime(date_time=limits.non_missing_data_date2)
            self.non_missing_data_count = limits.non_missing_data_count
            self.missing_data_count = limits.missing_data_count
            self.mean = limits.mean
            self.median = limits.median
            self.sum = limits.sum
            self.found = limits.found
            self.flags = limits.flags
            self.skew = limits.skew
            self.std_dev = limits.std_dev
            self.ts = limits.ts
Пример #15
0
    def calculate_data_limits(self, ts, start0, end0, refresh_flag):
        """
        Calculate the total data limits for a time series between two dates.
        This code was taken from the TSUtil.getDataLimits method.
        @param ts Time series of interest.
        @param start0 Starting date for the check.
        @param end0 Ending date for the check.
        @param refresh_flag Indicates whether the time series should be refreshed first
        (in general this is used only within the TS package and the version of this
        routine without the flag should be called).
        """
        max = 1.0
        mean = 0.0
        min = 0.0
        sum = 0.0
        value = 0.0
        base = 0
        missing_count = 0
        mult = 0
        non_missing_count = 0
        found = False
        max_date = None
        min_date = None
        non_missing_data_date1 = None
        non_missing_data_date2 = None
        t = None

        logger = logging.getLogger(__name__)
        debug = False

        try:
            # Main try...
            if ts is None:
                message = "NULL time series"
                logger.warning(message)
                # throw new TSException ( message )
                raise ValueError(message)

            # Initialize the sum and the mean...

            missing = ts.get_missing()
            sum = missing
            mean = missing

            # Get valid date limits because the ones passed in may have been null...

            valid_dates = self.get_valid_period(ts, start0, end0)
            start = valid_dates.get_date1()
            end = valid_dates.get_date2()
            valid_dates = None

            # Make sure that the time series has current limits...

            base = ts.get_data_interval_base()
            mult = ts.get_data_interval_mult()
            if refresh_flag:
                # Force a refresh of the time series.
                ts.refresh()

            # Get the variables that are used often in this function.

            ts_date1 = ts.get_date1()
            ts_date2 = ts.get_date2()

            # Figure out if we are treating data <= 0 as missing...

            ignore_lezero = False
            if (self.flags & TSLimits.IGNORE_LESS_THAN_OR_EQUAL_ZERO) != 0:
                ignore_lezero = True

            # Loop through the dates and get max and min data values
            # TODO SAM 2010-06-15 Need to consolidate code to use iterator

            if base == TimeInterval.IRREGULAR:
                # Loop through the dates and get max and min data values
                # Need to cast as an irregular TS...

                # IrregularTS its = (IrregularTS)ts
                its = ts

                data_array = its.get_data
                if data_array is None:
                    message = "Null data for " + str(ts)
                    logger.warning(message)
                    # throw new TSException ( message )
                    raise ValueError(message)
                size = len(data_array)
                ptr = None
                for i in range(size):
                    ptr = data_array[i]
                    date = ptr.get_date()

                    if date.less_than(ts_date1):
                        # Still looking for data...
                        continue
                    elif date.greater_than(ts_date2):
                        # No need to continue processing...
                        break

                    value = ptr.get_data_value()

                    if ts.is_data_missing(value) or (ignore_lezero and (value <= 0.0)):
                        # The value is missing
                        missing_count += 1
                        continue

                    # Else, data value is not missing...

                    if ts.is_data_missing(sum):
                        # Reset the sum...
                        sum = value
                    else:
                        # Add to the sum...
                        sum += value
                    non_missing_count += 1

                    if found:
                        # Already found the first non-missing point so
                        # all we need to do is check the limits.  These
                        # should only result in new DateTime a few times...
                        if value > max:
                            max = value
                            max_date = DateTime(date_time=date)
                        if value < min:
                            min = value
                            min_date = DateTime(date_time=date)
                    else:
                        # Set the limits to the first value found...
                        # date = new DateTime ( t )
                        max = value
                        max_date = DateTime(date_time=date)
                        min = value
                        min_date = max_date
                        non_missing_data_date1 = max_date
                        non_missing_data_date2 = max_date
                        found = True
                        continue

                # Now search backwards to find the first non-missing date...

                if found:
                    for i in range((size - 1), 0, -1):
                        ptr = data_array[i]
                        date = ptr.get_date()
                        value = ptr.get_data_value()
                        if date.greater_than(end):
                            # Have not found data...
                            continue
                        elif date.less_than(start):
                            # Passed start...
                            break
                        if (not ignore_lezero and not ts.is_data_missing(value)) or \
                                (ignore_lezero and ((value > 0.0) and not ts.is_data_missing(value))):
                            # Found the one date we are after...
                            non_missing_data_date2 = DateTime(date_time=date)
                            break
            else:
                # A regular TS... easier to iterate...
                # First loop through and find the data limits and the minimum non-missing date...
                t = DateTime(date_time=start, flag=DateTime.DATE_FAST)
                # Python for loops are not as clean as original Java code
                # for ( ; t.lessThanOrEqualTo(end); t.addInterval( base, mult )) {
                first_iteration = True
                while t.less_than_or_equal_to(end):
                    if first_iteration:
                        first_iteration = False
                    else:
                        t.add_interval(base, mult)

                    value = ts.get_data_value(t)

                    if ts.is_data_missing(value) or (ignore_lezero and (value <= 0.0)):
                        # The value is missing
                        missing_count += 1
                        continue

                    # Else, data value is not missing...

                    if ts.is_data_missing(sum):
                        # Reset the sum...
                        sum = value
                    else:
                        # Add to the sum...
                        sum += value
                    non_missing_count += 1

                    if found:
                        # Already found the first non-missing point so
                        # all we need to do is check the limits.  These
                        # should only result in new DateTime a few times...
                        if value > max:
                            max = value
                            max_date = DateTime(date_time=t)
                        if value < min:
                            min = value
                        min_date = DateTime(date_time=t)
                    else:
                        # First non-missing point so set the initial values...
                        date = DateTime(date_time=t)
                        max = value
                        max_date = date
                        min = value
                        min_date = date
                        non_missing_data_date1 = date
                        non_missing_data_date2 = date
                        found = True
                # Now loop backwards and find the last non-missing value...
                t = DateTime(date_time=end, flag=DateTime.DATE_FAST)
                if found:
                    # for(; t.greaterThanOrEqualTo(start); t.addInterval( base, -mult )) {
                    first_iteration = True
                    while t.greater_than_or_equal_to(start):
                        if first_iteration:
                            first_iteration = False
                        else:
                            t.add_interval(base, -mult)
                        value = ts.get_data_value(t)
                        if (not ignore_lezero and not ts.is_data_missing(value)) or \
                                (ignore_lezero and ((value > 0.0) and not ts.is_data_missing(value))):
                            # The value is not missing...
                            non_missing_data_date2 = DateTime(date_time=t)
                            break

            # TODO SAM 2010-06-15 This is a performance hit, but not too bad
            # TODO SAM 2010-06-15 Consider treating other statistics similarly but need to define unit tests
            # TODO SAM 2010-06-15 This code would need to be changed if doing Lag-1 correlation because order matters
            # For newly added statistics, use helper method to get data, ignoring missing...
            data_array = self.to_array(ts, start, end, 0, False)
            # Check for <= 0 values if necessary
            n_data_array = len(data_array)
            if ignore_lezero:
                for i in range(n_data_array):
                    if data_array[i] <= 0.0:
                        # Just exchange with the last value and reduce the size
                        temp = data_array[i]
                        data_array[i] = data_array[n_data_array - 1]
                        data_array[n_data_array - 1] = temp
                        n_data_array -= 1

            if n_data_array > 0:
                self.set_median(MathUtil.median(n_data_array, data_array))

            if n_data_array > 1:
                try:
                    self.set_std_dev(MathUtil.standard_deviation(n_data_array, data_array))
                except Exception as e:
                    # Likely due to small sample size
                    pass
            if n_data_array > 2:
                try:
                    self.set_skew(MathUtil.skew(n_data_array, data_array))
                except Exception as e:
                    # Likely due to small sample size
                    pass

            if not found:
                message = "\"" + ts.getIdentifierString() + "\": problems finding limits, whole POR missing!"
                logger.warning(message)
                # throw new TSException ( message )
                raise ValueError(message)

            if debug:
                logger.debug("Overall date limits are: " + str(start) + " to " + str(end))
                logger.debug("Found limits to be: " + str(min) + " on " + str(min_date) + " to " + str(max) +
                             " on " + str(max_date))
                logger.debug("Found non-missing data dates to be: " + str(non_missing_data_date1) + " -> " +
                             str(non_missing_data_date2))

            # Set the basic information...

            self.set_date1(start)
            self.set_date2(end)
            self.set_max_value(max, max_date)
            self.set_min_value(min, min_date)
            self.set_non_missing_data_date1(non_missing_data_date1)
            self.set_non_missing_data_date2(non_missing_data_date2)
            self.set_missing_data_count(missing_count)
            self.set_non_missing_data_count(non_missing_count)
            # //int data_size = calculate_data_size(ts, start, end)
            # //limits.set_non_missing_data_count(data_size - missing_count)
            if not ts.is_data_missing(sum) and (non_missing_count > 0):
                mean = sum/float(non_missing_count)
            else:
                mean = missing
            self.set_sum(sum)
            self.set_mean(mean)
        except Exception as e:
            message = "Error computing limits."
            logger.warning(message)
            # Put in debug because output sometimes is overwhelming when data are not available.
            if debug:
                logger.warning(e)
            # throw new TSException ( message )
            raise Exception(message)
Пример #16
0
class TS(object):
    """
    This class is the base class for all time series classes.
    """
    def __init__(self, ts=None):
        # General string to use for status of the time series (use as appropriate by
        # high-level code).  This value is volatile - do not assume its value will remain
        # for long periods.  This value is not used much now that the GRTS package has been updated.
        self.status = None

        # Beginning date/time for data, at a precision appropriate for the data.
        # Missing data may be included in the period.
        self.date1 = None

        # Original starting date/time for data, at a precision appropriate for the data.
        # For example, this may be used to indicate the period in a database, which is
        # different than the period that was actually queried and saved in memory.
        self.date1_original = None

        # Ending date/time for data, at a precision appropriate for the data.
        # Missing data may be included in the period.
        self.date2 = None

        # Original ending date/time for data, at a precision appropriate for the data.
        # For example, this may be used to indicate the period in a database, which is
        # different than the period that was actually queried and saved in memory.
        self.date2_original = None

        # The data interval base. See TimeInterval.HOUR, etc.
        self.data_interval_base = None

        # The base interval multiplier (what to multiply _interval_base by to get the
        # real interval).  For example 15-minute data would have
        # _interval_base = TimeInterval.MINUTE and _interval_mult = 15.
        self.data_interval_mult = None

        # The data interval in the original data source (for example, the source may be
        # in days but the current time series is in months).
        self.data_interval_base_original = None

        # The data interval multiplier in the original data source.
        self.data_interval_mult_original = None

        # Number of data values inclusive of _date1 and _date2.  Set in the
        # allocate_data_space() method.  This is useful for general information.
        self.data_size = None

        # Data units. A list of units and conversions is typically maintained in the DataUnits* classes.
        self.data_units = None

        # Units in the original data source (e.g., the current data may be in CFS and the
        # original data were in CMS).
        self.data_units_original = None

        # Indicates whether data flags are being used with data.  If enabled, the derived
        # classes that store data should override the allocate_data_space(boolean, int)
        # method to create a data array to track the data flags.  It is recommended to
        # save space that the flags be handled using String.intern().
        self.has_data_flags = False

        # Indicate whether data flags should use String.intern()
        # - used in Java to reuse string instances
        # self.internDataFlagStrings = True

        # Version of the data format (mainly for use with flies).
        self.version = None

        # Input source information.  Filename if read from file or perhaps a database
        # name and table (e.g., HydroBase.daily_flow).  This is the actual location read,
        # which should not be confused with the TSIdent storage name (which may not be fully expanded).
        self.input_name = None

        # Time series identifier, which provides a unique and absolute handle on the time series.
        # An alias is provided within the TSIdent class.
        self.tsid = TSIdent()

        # Indicates whether the time series data have been modified by calling
        # setDataValue().  Call refresh() to update the limits.  This is not used with header data.
        self.dirty = None

        # Indicates whether the time series is editable.  This primarily applies to the
        # data (not the header information).  UI components can check to verify whether
        # users should be able to edit the time series.  It is not intended to be checked
        # by low-level code (manipulation is always granted).
        self.editable = False

        # A short description (e.g, "XYZ gage at ABC river").
        self.description = None

        # Comments that describe the data.  This can be anything from an original data
        # source.  Sometimes the comments are created on the fly to generate a standard
        # header (e.g., describe drainage area).
        self.comments = []

        # List of metadata about data flags.  This provides a description about flags
        # encountered in the time series.
        self.dataFlagMetadataList = []

        # History of time series.  This is not the same as the comments but instead
        # chronicles how the time series is manipulated in memory.  For example the first
        # genesis note may be about how the time series was read.  The second may
        # indicate how it was filled.  Many TSUtil methods add to the genesis.
        self.genesis = []

        # TODO SAM 2010-09-21 Evaluate whether generic "Attributable" interface should be implemented instead.
        # Properties for the time series beyond the built-in properties.  For example, location
        # information like county and state can be set as a property.
        self.property_HashMap = None

        # The missing data value. Default for some legacy formats is -999.0 but increasingly Double.NaN is used.
        self.missing = None

        # Lower bound on the missing data value (for quick comparisons and when missing data ranges are used).
        self.missingl = None

        # Upper bound on the missing data value (for quick comparisons and when missing data ranges are used).
        self.missingu = None

        # Limits of the data. This also contains the date limits other than the original dates.
        # self.data_limits = TSLimits()

        # Limits of the original data. Currently only used by apps like TSTool.
        # self.data_limits_original = TSLimits()

        # Legend to show when plotting or tabulating a time series. This is generally a short legend.
        self.legend = None

        # Legend to show when plotting or tabulating a time series.  This is usually a
        # long legend.  This may be phased out now that the GRTS package has been phased in for visualization.
        self.extended_legend = None

        # Indicates whether time series is enabled (used to "comment" out of plots, etc).
        # This may be phased out.
        self.enabled = None

        # Indicates whether time series is selected (e.g., as result of a query).
        # Often time series might need to be programmatically selected (e.g., with TSTool
        # selectTimeSeries() command) to simplify output by other commands.
        self.selected = None

        # Used for troubleshooting
        self.debug = False

        self.initialize()

    def add_to_genesis(self, genesis):
        """
        Add a string to the genesis string list.  The genesis is a list of comments
        indicating how the time series was read and manipulated.  Genesis information
        should be added by methods that, for example, fill data and change the period.
        :param genesis: Comment string to add to genesis information.
        """
        if genesis is not None:
            self.genesis.append(genesis)

    def allocate_data_space(self):
        """
        Allocate the data space for the time series.  This requires that the data
        interval base and multiplier are set correctly and that _date1 and _date2 have
        been set.  If data flags are used, hasDataFlags() should also be called before
        calling this method.  This method is meant to be overridden in derived classes
        (e.g., MinuteTS, MonthTS) that are optimized for data storage for different intervals.
        :return: 0 if successful allocating memory, non-zero if failure.
        """
        logger = logging.getLogger(__name__)
        logger.warning(
            "TS.allocate_data_space() is virtual, define in derived classes.")
        return 1

    def get_data_interval_base(self):
        """
        Return the data interval base.
        :return: The data interval base.
        """
        return self.data_interval_base

    def get_data_interval_mult(self):
        """
        Return the data interval mult.
        :return: The data interval mult.
        """
        return self.data_interval_mult

    def get_data_units(self):
        """
        Return the data units.
        :return: The data units.
        """
        return self.data_units

    def get_data_units_original(self):
        """
        Return the original data units.
        :return: The original data units.
        """
        return self.data_units_original

    def get_date1(self):
        """
        Return the first date in the period of record (returns a copy).
        :return: The first date in the period of record, or None if the date is None
        """
        if self.date1 is None:
            return None
        return DateTime(date_time=self.date1)

    def get_date1_original(self):
        """
        Return the first date in the original period of record (returns a copy).
        :return: The first date of the original data source (generally equal to or
        earlier than the time series that is actually read), or None if the data is None.
        """
        if self.date1_original is None:
            return None
        return DateTime(date_time=self.date1_original)

    def get_date2(self):
        """
        Return the last date in the period of record (returns a copy).
        :return: The last date in the period of record, or None if the date is None.
        """
        if self.date2 is None:
            return None
        return DateTime(date_time=self.date2)

    def get_date2_original(self):
        """
        Return the last date in the original period of record (returns a copy).
        :return: The last date of the original data source (generally equal to or
        later than the time series that is actually read), or null if the date is null.
        """
        if self.date2_original is None:
            return None
        return DateTime(date_time=self.date2_original)

    def get_description(self):
        """
        Return the time series description
        :return: the time series description
        """
        return self.description

    def get_identifier(self):
        """
        Return the time series identifier as TSIdent
        :return: the time series identifier as TSIdent
        """
        return self.tsid

    def get_identifier_string(self):
        """
        Return the time series identifier as TSIdent string
        :return: the time series identifier as TSIdent string
        """
        return self.tsid.to_string(False)

    def get_location(self):
        """
        Return the location part of the time series identifier. Does not include location type.
        :return: The location part of the time series identifier (from TSIdent).
        """
        return self.tsid.get_location()

    def initialize(self):
        """
        Initialize data members.
        """
        self.version = ""

        self.input_name = ""

        # Need to initialize an empty TSIdent...

        # self.tsid = TSIdent()
        self.legend = ""
        self.extended_legend = ""
        self.data_size = 0
        # DateTime need to be initialized somehow...
        # self.set_data_type( "" )
        self.data_interval_base = 0
        self.data_interval_mult = 1
        self.data_interval_base_original = 1
        self.data_interval_mult_original = 0
        self.set_description("")
        self.comments = []
        self.genesis = []
        self.set_data_units("")
        self.set_data_units_original("")
        self.set_missing(-999.0)
        # self.data_limits = TSLimits()
        self.dirty = True
        self.enabled = True
        self.selected = False
        self.editable = False

    def is_data_missing(self, value):
        """
        Determine if a data value for the time series is missing.  The missing value can
        be set to a range of values or a single value, using setMissing().
        There is no straightforward way to check to see if a value is equal to NaN
        (the code: if ( value == Double.NaN ) will always return false if one or both
        values are NaN).  Consequently there is no way to see know if only one or both
        values is NaN, using the standard operators.  Instead, we assume that NaN
        should be interpreted as missing and do the check if ( value != value ), which
        will return true if the value is NaN.  Consequently, code that uses time series
        data should not check for missing and treat NaN differently because the TS class treats NaN as missing.
        @return true if the data value is missing, false if not.
        @param value Value to check.
        """
        if math.isnan(value):
            return True
        elif (value >= self.missingl) and (value <= self.missingu):
            return True
        return False

    def set_data_interval(self, base, mult):
        """
        Set the data interval.
        :param base: Base interval (see TimeInterval.*)
        :param mult: Base interval multiplier.
        """
        self.data_interval_base = base
        self.data_interval_mult = mult

    def set_data_interval_original(self, base, mult):
        """
        Set the data interval for the original data.
        :param base: Base interval (see TimeInterval.*)
        :param mult: Base interval multiplier.
        """
        self.data_interval_base_original = base
        self.data_interval_mult_original = mult

    def set_data_size(self, data_size):
        """
        Set the number of data points including the full period. This should be called by refresh()
        :param data_size: Number of data points in the time series.
        """
        self.data_size = data_size

    def set_data_type(self, data_type):
        """
        Set the data type
        :param data_type: Data type abbreviation
        """
        if (data_type is not None) and (self.tsid is not None):
            self.tsid.set_type(type=data_type)

    def set_data_units(self, data_units):
        """
        Set the data units.
        :return: Data units abbreviation
        """
        if data_units is not None:
            self.data_units = data_units

    def set_data_units_original(self, units):
        """
        Set the data units for the original data.
        :param units: Data units abbreviation
        """
        if units is not None:
            self.data_units_original = units

    def set_date1(self, t):
        """
        Set the first date in the period.  A copy is made.
        The date precision is set to the precision appropriate for the time series.
        :param t: First date in period
        """
        if t is not None:
            self.date1 = DateTime(date_time=t)
            if self.data_interval_base != TimeInterval.IRREGULAR:
                # For irregular, rely on the DateTime precision
                self.date1.set_precision(self.data_interval_base)

    def set_date1_original(self, t):
        """
        Set the first date in the period in the original data.  A copy is made.
        The date precision is set to the precision appropriate for the time series.
        :param t: First date in period in the original data.
        """
        if t is not None:
            self.date1_original = DateTime(date_time=t)
            if self.data_interval_base != TimeInterval.IRREGULAR:
                # For irregular, rely on the DateTime precision
                self.date1_original.set_precision(self.data_interval_base)

    def set_date2(self, t):
        """
        Set the last date in the period.  A copy is made.
        The date precision is set to the precision appropriate for the time series.
        :param t: Last date in period
        """
        if t is not None:
            self.date2 = DateTime(date_time=t)
            if self.data_interval_base != TimeInterval.IRREGULAR:
                self.date2.set_precision(self.data_interval_base)

    def set_date2_original(self, t):
        """
        Set the last date in the period in the original data. A copy is made.
        The date precision is set to the precision appropriate for the time series.
        :param t: Last date in period in the original data.
        """
        if t is not None:
            self.date2_original = DateTime(date_time=t)
            if self.data_interval_base != TimeInterval.IRREGULAR:
                # For irregular, rely on the DateTime precision.
                self.date2_original.set_precision(self.data_interval_base)

    def set_description(self, description):
        """
        Set the description
        :param description: Time series description (this is not the comments).
        """
        if description is not None:
            self.description = description

    def set_identifier(self, tsident):
        """
        Note that this only sets the identifier but does not set the
        separate data fields (like data type).
        :param tsident: Time series identifier as TSIdent object
        """
        if self.debug:
            logger = logging.getLogger(__name__)

        if tsident is not None:
            if isinstance(tsident, TSIdent):
                if self.debug:
                    logger.debug("Before setting TSIdent to \"" +
                                 str(tsident) + "\" using TSIdent, tsid=" +
                                 str(self.tsid))
                # Make a new copy
                self.tsid = TSIdent(tsident=tsident)
                if self.debug:
                    logger.debug("After setting TSIdent to \"" + str(tsident) +
                                 "\" using TSIdent, tsid=" + str(self.tsid))
            elif isinstance(tsident, str):
                # Make a new copy
                logger = logging.getLogger(__name__)
                if self.debug:
                    logger.debug("Before setting TSIdent to \"" +
                                 str(tsident) + "\" using str, tsid=" +
                                 str(self.tsid))
                self.tsid = TSIdent(identifier=tsident)
                if self.debug:
                    logger.debug("After setting TSIdent to \"" + str(tsident) +
                                 "\" using str, tsid=" + str(self.tsid))
            else:
                raise ValueError("Parameter type " + str(type(tsident)) +
                                 " for set_identifier() is not supported.")

    def set_input_name(self, input_name):
        """
        Set the input name (file or database table)
        :param input_name: the input name
        """
        if input_name is not None:
            self.input_name = input_name

    def set_missing(self, missing):
        """
        Set the missing data value for the time series.  The upper and lower bounds
        of missing data are set to this value +.001 and -.001, to allow for precision truncation.
        The value is constrained to Double.MAX and Double.Min.
        :param missing: Missing data value for time series.
        """
        self.missing = missing
        if not math.isnan(missing):
            # Set the bounding limits also just to make sure that values like -999 are not treated as missing.
            self.missingl = math.nan
            self.missingu = math.nan
            return
        if missing == sys.float_info.max:
            self.missingl = missing - .001
            self.missingu = missing
        else:
            # Set a range on the missing value check that is slightly on each side of the value
            self.missingl = missing - .001
            self.missingu = missing + .001

    def set_property(self, property_name, property_value):
        """
        Set a time series property's contents (case-specific).
        :param property_name: name of property being set.
        :param property_value: property object corresponding to the property name.
        """
        if self.property_HashMap is None:
            self.property_HashMap = {}
        self.property_HashMap[property_name] = property_value
Пример #17
0
    def to_array(self, ts, start_date=None, end_date=None, month_index=None, include_months=None, include_missing=True,
                 match_other_nonmissing=None, paired_ts=None, return_type=None):
        """
        Return an array containing the data values of the time series for the specified
        period.  If the start date or end date are outside the period of
        record for the time series, use the missing data value from the time series
        for those values.  If the start date or end date are null, the start and end
        dates of the time series are used.  This is a utility routine mainly used by other versions of this routine.
        @return The array of data for the time series.  If an error, return null.
        @param ts Time series to convert data to array format.
        @param start_date Date corresponding to the first date of the returned array.
        @param end_date Date corresponding to the last date of the returned array.
        @param month_index Month of interest (1=Jan, 12=Dec).  If zero, process all months.
        @param include_months
        @param include_missing indicate whether missing values should be included in the result.
        @param match_other_nonmissing
        @param paired_ts
        @param return_type
        """
        if month_index == None:
            # Called with no month index
            month_indices = None
            if month_index != 0:
                month_indices = [month_index]
            # Recursively call
            return self.to_array(ts, start_date=start_date, end_date=end_date, include_months=include_months,
                                 include_missing=include_missing)

        # If here do the processing based on input arguments

        if paired_ts is not None:
            if not TimeInterval.is_regular_interval(ts.get_data_interval_base()):
                # throw new IrregularTimeSeriesNotSupportedException(
                raise ValueError(
                    "Irregular interval time series cannot have data array extracted using paired time series.")
            if not self.intervals_match(ts, paired_ts):
                # throw new UnequalTimeIntervalException(
                raise ValueError(
                    "Time series from which to extract data has a different interval than paired time series.")
        # Get valid dates because the ones passed in may have been null...

        valid_dates = self.get_valid_period(ts, start_date, end_date)
        start = valid_dates.get_date1()
        end = valid_dates.get_date2()

        interval_base = ts.get_data_interval_base()
        interval_mult = ts.get_data_interval_mult()
        size = 0
        # if ts.get_data_interval_base() == TimeInterval.IRREGULAR:
            # size = self.calculate_data_size(ts, start, end)
        # else:
        size = self.calculate_data_size(start, end, interval_base, interval_mult)
        if return_type is None:
            return_type = TSToArrayReturnType.DATA_VALUE
        if return_type == TSToArrayReturnType.DATE_TIME:
            # Only 1Year, 1Month, 1Day intervals are supported
            if (interval_mult != 1) or ((interval_base != TimeInterval.YEAR) and
                (interval_base != TimeInterval.YEAR) and (interval_base != TimeInterval.YEAR)):
                # throw new InvalidTimeIntervalException(
                raise ValueError(
                    "Interval must be Year, Month, or Day (no multiplier) to return date/time as array.")

        include_months_mask = []
        if (include_months is None) or (len(include_months) == 0):
            for i in range(12):
                include_months_mask[i] = True
        else:
            for i in range(12):
                include_months_mask[i] = False
            for i in range(len(include_months)):
                include_months_mask[include_months[i] - 1] = True

        if size == 0:
            return []

        data_array = []  # Initial size including missing
        count = 0  # Number of values in array.
        month = 0  # Month

        if interval_base == TimeInterval.IRREGULAR:
            # Get the data and loop through the vector...
            irrts = ts
            alltsdata = irrts.get_data()
            if alltsdata is None:
                # No data for the time series...
                return None
            nalltsdata = len(alltsdata)
            tsdata = None
            date = None
            for i in range(nalltsdata):
                tsdata = alltsdata[i]
                date = tsdata.get_date()
                if date.greater_than(end):
                    # Past the end of where we want to go so quit...
                    break
                if date.greater_than_or_equal_to(start):
                    month = date.get_month()
                    if include_months_mask[month - 1]:
                        value = tsdata.get_data_value()
                        if include_missing or not ts.is_data_missing(value):
                            if return_type == TSToArrayReturnType.DATA_VALUE:
                                data_array[count] = value
                                count += 1
                            elif return_type == TSToArrayReturnType.DATE_TIME:
                                if interval_base == TimeInterval.YEAR:
                                    data_array[count] = date.get_year()
                                    count += 1
                                elif interval_base == TimeInterval.MONTH:
                                    data_array[count] = date.get_absolute_month()
                                    count += 1
                                elif interval_base == TimeInterval.DAY:
                                    data_array[count] = date.get_absolute_day()
                                    count += 1
        else:
            # Regular, increment the data by interval...
            date = DateTime(date_time=start)
            count = 0
            do_transfer = False
            is_missing = False
            # for ; date.lessThanOrEqualTo( end); date.addInterval(interval_base, interval_mult):
            first_iteration = True
            while date.less_than_or_equal_to(end):
                if first_iteration:
                    first_iteration = False
                else:
                    date.add_interval(interval_base, interval_mult)
                # First figure out if the data should be skipped because not in a requested month
                month = date.get_month()
                if not include_months_mask[month - 1]:
                    continue
                # Now transfer the value while checking the paired time series
                do_transfer = False  # Do not transfer unless criteria are met below
                value = ts.get_data_value(date)
                is_missing = ts.is_data_missing(value)
                if paired_ts is not None:
                    # Value in "ts" time series MUST be non-missing
                    if not is_missing:
                        value2 = paired_ts.get_data_value(date)
                        is_missing2 = paired_ts.is_data_missing(value2)
                        if match_other_nonmissing:
                            # Want non-missing in both "ts" and "pairedTS"
                            if not is_missing2:
                                do_transfer = True
                        else:
                            # Want non-missing in "ts" and missing in "pairedTS"
                            if is_missing2:
                                do_transfer = True
                else:
                    if include_missing or not is_missing:
                        # Value is not missing.
                        do_transfer = True

                # OK to transfer the value...
                if do_transfer:
                    if return_type == TSToArrayReturnType.DATA_VALUE:
                        data_array[count] = value
                        count += 1
                    elif return_type == TSToArrayReturnType.DATE_TIME:
                        if interval_base == TimeInterval.YEAR:
                            data_array[count] = date.get_year()
                            count += 1
                        elif interval_base == TimeInterval.MONTH:
                            data_array[count] = date.get_absolute_month()
                            count += 1
                        elif interval_base == TimeInterval.DAY:
                            # TODO smalers 2020-01-04 need to enable
                            # data_array[count] = date.get_absolute_day()
                            count += 1

        if count != size:
            # The original array is too big and needs to be cut down to the exact size due to limited
            # months or missing data being excluded)...
            new_data_array = [count]
            for j in range(count):
                new_data_array[j] = data_array[j]
            return new_data_array

        # Return the full array...
        return data_array
Пример #18
0
    def get_period_from_ts(tslist, por_flag):
        """
        Determine the limits for a list of time series.
        <pre>
        Example of POR calculation:
            ------------------------    TS1
              -------------------       TS2
                        --------------  TS3

            --------------------------  MAX_POR
                        ---------       MIN_POR
        </pre>
        @return The TSLimits for the list of time series (recomputed).  If the limits
        do not overlap, return the maximum.
        @param tslist A list of time series of interest.
        @param por_flag Use a *_POR flag.
        @exception RTi.TS.TSException If the period cannot be determined from the time series.
        """

        logger = logging.getLogger(__name__)
        debug = False

        end = None
        start = None

        if tslist is None:
            message = "Unable to get period for time series - time series list is null"
            logger.warning(message)
            raise ValueError(message)

        list_size = len(tslist)
        if debug:
            logger.debug("Getting " + str(por_flag) + "-flag limits for " + str(list_size) + " time series")

        if list_size == 0:
            message = "Unable to get period for time series - time series list is zero size"
            logger.warning(message)
            raise ValueError(message)
        if (por_flag != TSUtil.MIN_POR) and (por_flag != TSUtil.MAX_POR):
            message = "Unknown option for TSUtil.getPeriodForTS" + str(por_flag)
            logger.warning(message)
            raise ValueError(message)

        # Initialize the start and end dates to the first TS dates...

        nullcount = 0
        for its in range(list_size):
            ts_ptr = tslist[its]
            if ts_ptr is not None:
                if ts_ptr.get_date1() is not None:
                    start = ts_ptr.get_date1()
                if ts_ptr.get_date2() is not None:
                    end = ts_ptr.get_date2()
                if (start is not None) and (end is not None):
                    # Done looking for starting date/times
                    break
            else:
                nullcount += 1

        if debug:
            logger.debug("Starting comparison dates " + str(start) + " " + str(end))

        if (start is None) or (end is None):
            message = "Unable to get period (all null dates) from " + str(list_size) +\
                      " time series (" + str(nullcount) + " null time series)."
            logger.warning(message)
            raise ValueError(message)

        # Now loop through the remaining time series...

        for i in range(1, list_size):
            ts_ptr = tslist[i]
            if ts_ptr is None:
                # Ignore the time series...
                continue
            ts_ptr_start = ts_ptr.get_date1()
            ts_ptr_end = ts_ptr.get_date2()
            if (ts_ptr_start is None) or (ts_ptr_end is None):
                continue
            if debug:
                logger.debug("Comparison dates " + str(ts_ptr_start) + " " + str(ts_ptr_end))
            if por_flag == TSUtil.MAX_POR:
                if ts_ptr_start.less_than(start):
                    start = DateTime(date_time=ts_ptr_start)
                if ts_ptr_end.greater_than(end):
                    end = DateTime(date_time=ts_ptr_end)
            elif por_flag == TSUtil.MIN_POR:
                if ts_ptr_start.greater_than(start):
                    start = DateTime(date_time=ts_ptr_start)
                if ts_ptr_end.less_than(end):
                    end = DateTime(date_time=ts_ptr_end)

        # If the time series do not overlap, then the limits may be reversed.  In this case, throw an exception...
        if start.greater_than(end):
            message = "Periods do not overlap.  Can't determine minimum period."
            logger.warning(message)
            raise ValueError(message)

        if debug:
            if por_flag == TSUtil.MAX_POR:
                if debug:
                    logger.debug("Maximum POR limits are " + str(start) + " to " + str(end))
            elif por_flag == TSUtil.MIN_POR:
                if debug:
                    logger.debug("Minimum POR limits are " + str(start) + " to " + str(end))

        # Now return the dates as a new instance so we don't mess up what was in the time series...

        limits = TSLimits()
        limits.set_date1(DateTime(date_time=start))
        limits.set_date2(DateTime(date_time=end))
        limits.set_limits_found(True)
        return limits