def test_read_no_size_end(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', format='csv') assert exc.value.args[ 0] == 'Either `time_bin_end_column` or `time_bin_size_column` should be provided.'
def test_read_end_time_missing(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_end_column="missing", format='csv') assert exc.value.args[ 0] == "Bin end time column 'missing' not found in the input data."
def test_read_time_unit_missing(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_size_column="bin_size", format='csv') assert exc.value.args[ 0] == "The bin size unit should be specified as an astropy Unit using ``time_bin_size_unit``."
def test_read_both_extra_bins(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_end_column='END', time_bin_size_column='bin_size', format='csv') assert exc.value.args[ 0] == "Cannot specify both `time_bin_end_column` and `time_bin_size_column`."
def test_read_start_time_missing(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='abc', time_bin_size_column='bin_size', time_bin_size_unit=u.second, format='csv') assert exc.value.args[ 0] == "Bin start time column 'abc' not found in the input data."
def test_initialization_time_bin_invalid(): # Make sure things crash when time_bin_* is passed incorrectly. with pytest.raises(TypeError) as exc: BinnedTimeSeries(data=[[1, 4, 3]]) assert exc.value.args[0] == ("'time_bin_start' has not been specified") with pytest.raises(TypeError) as exc: BinnedTimeSeries(time_bin_start='2016-03-22T12:30:31', data=[[1, 4, 3]]) assert exc.value.args[0] == ( "Either 'time_bin_size' or 'time_bin_end' should be specified")
def test_read(): timeseries = BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_end_column='time_end', format='csv') assert timeseries.colnames == ['time_bin_start', 'time_bin_size', 'bin_size', 'A', 'B', 'C', 'D', 'E', 'F'] assert len(timeseries) == 10 assert timeseries['B'].sum() == 1151.54 timeseries = BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_size_column='bin_size', time_bin_size_unit=u.second, format='csv') assert timeseries.colnames == ['time_bin_start', 'time_bin_size', 'time_end', 'A', 'B', 'C', 'D', 'E', 'F'] assert len(timeseries) == 10 assert timeseries['B'].sum() == 1151.54
def test_uneven_non_contiguous(): # Initialize a ``BinnedTimeSeries`` with uneven non-contiguous bins with # lists of start times, bin sizes and data: ts = BinnedTimeSeries(time_bin_start=[ '2016-03-22T12:30:31', '2016-03-22T12:30:38', '2016-03-22T12:34:40' ], time_bin_size=[5, 100, 2] * u.s, data=[[1, 4, 3]]) assert_equal(ts.time_bin_start.isot, [ '2016-03-22T12:30:31.000', '2016-03-22T12:30:38.000', '2016-03-22T12:34:40.000' ]) assert_equal(ts.time_bin_center.isot, [ '2016-03-22T12:30:33.500', '2016-03-22T12:31:28.000', '2016-03-22T12:34:41.000' ]) assert_equal(ts.time_bin_end.isot, [ '2016-03-22T12:30:36.000', '2016-03-22T12:32:18.000', '2016-03-22T12:34:42.000' ])
def test_uneven_contiguous(): # Initialize a ``BinnedTimeSeries`` with uneven contiguous bins by giving an # end time: ts = BinnedTimeSeries(time_bin_start=[ '2016-03-22T12:30:31', '2016-03-22T12:30:32', '2016-03-22T12:30:40' ], time_bin_end='2016-03-22T12:30:55', data=[[1, 4, 3]]) assert_equal(ts.time_bin_start.isot, [ '2016-03-22T12:30:31.000', '2016-03-22T12:30:32.000', '2016-03-22T12:30:40.000' ]) assert_equal(ts.time_bin_center.isot, [ '2016-03-22T12:30:31.500', '2016-03-22T12:30:36.000', '2016-03-22T12:30:47.500' ]) assert_equal(ts.time_bin_end.isot, [ '2016-03-22T12:30:32.000', '2016-03-22T12:30:40.000', '2016-03-22T12:30:55.000' ])
def test_initialization_time_bin_size(): # Make sure things crash when time_bin_size has no units with pytest.raises(TypeError) as exc: BinnedTimeSeries(data={"time": ["2016-03-22T12:30:31"]}, time_bin_start="2016-03-22T12:30:31", time_bin_size=1) assert exc.value.args[0] == ( "'time_bin_size' should be a Quantity or a TimeDelta") # TimeDelta for time_bin_size ts = BinnedTimeSeries(data={"time": ["2016-03-22T12:30:31"]}, time_bin_start="2016-03-22T12:30:31", time_bin_size=TimeDelta(1, format="jd")) assert isinstance(ts.time_bin_size, u.quantity.Quantity)
def test_uneven_non_contiguous_full(): # Initialize a ``BinnedTimeSeries`` with uneven non-contiguous bins by # specifying the start and end times for the bins: ts = BinnedTimeSeries(time_bin_start=[ '2016-03-22T12:30:31', '2016-03-22T12:30:33', '2016-03-22T12:30:40' ], time_bin_end=[ '2016-03-22T12:30:32', '2016-03-22T12:30:35', '2016-03-22T12:30:41' ], data=[[1, 4, 3]]) assert_equal(ts.time_bin_start.isot, [ '2016-03-22T12:30:31.000', '2016-03-22T12:30:33.000', '2016-03-22T12:30:40.000' ]) assert_equal(ts.time_bin_center.isot, [ '2016-03-22T12:30:31.500', '2016-03-22T12:30:34.000', '2016-03-22T12:30:40.500' ]) assert_equal(ts.time_bin_end.isot, [ '2016-03-22T12:30:32.000', '2016-03-22T12:30:35.000', '2016-03-22T12:30:41.000' ])
def test_initialization_time_bin_both(): # Make sure things crash when time_bin_* is passed twice. with pytest.raises(TypeError) as exc: BinnedTimeSeries(data={"time_bin_start": ["2016-03-22T12:30:31"]}, time_bin_start="2016-03-22T12:30:31") assert exc.value.args[0] == ( "'time_bin_start' has been given both in the table " "and as a keyword argument") with pytest.raises(TypeError) as exc: BinnedTimeSeries(data={"time_bin_size": ["2016-03-22T12:30:31"]}, time_bin_size=[1] * u.s) assert exc.value.args[0] == ( "'time_bin_size' has been given both in the table " "and as a keyword argument")
def test_initialization_time_bin_start_scalar(): # Make sure things crash when time_bin_start is a scalar with no time_bin_size with pytest.raises(TypeError) as exc: BinnedTimeSeries(data={"time": ["2016-03-22T12:30:31"]}, time_bin_start=Time(1, format='mjd'), time_bin_end=Time(1, format='mjd')) assert exc.value.args[0] == ( "'time_bin_start' is scalar, so 'time_bin_size' is required")
def test_empty_initialization_invalid(): # Make sure things crash when the first column added is not a time column ts = BinnedTimeSeries() with pytest.raises(ValueError) as exc: ts['flux'] = [1, 2, 3] assert exc.value.args[0] == ( "BinnedTimeSeries object is invalid - expected " "'time_bin_start' as the first column but found 'flux'")
def test_initialization_non_scalar_time(): # Make sure things crash with incorrect size of time_bin_start with pytest.raises(ValueError) as exc: BinnedTimeSeries( data={"time": ["2016-03-22T12:30:31"]}, time_bin_start=["2016-03-22T12:30:31", "2016-03-22T12:30:32"], time_bin_size=1 * u.s, time_bin_end=Time(1, format='mjd')) assert exc.value.args[0] == ( "Length of 'time_bin_start' (2) should match table length (1)") with pytest.raises(TypeError) as exc: BinnedTimeSeries(data={"time": ["2016-03-22T12:30:31"]}, time_bin_start=["2016-03-22T12:30:31"], time_bin_size=None, time_bin_end=None) assert exc.value.args[0] == ( "Either 'time_bin_size' or 'time_bin_end' should be specified")
def test_initialization_n_bins(): # Make sure things crash with incorrect n_bins with pytest.raises(TypeError) as exc: BinnedTimeSeries(data={"time": ["2016-03-22T12:30:31"]}, time_bin_start=Time(1, format='mjd'), time_bin_size=1 * u.s, time_bin_end=Time(1, format='mjd'), n_bins=10) assert exc.value.args[0] == ("'n_bins' has been given and it is not the " "same length as the input data.")
def test_initialization_n_bins_invalid_arguments(): # Make sure an exception is raised when n_bins is passed as an argument while # any of the parameters 'time_bin_start' or 'time_bin_end' is not scalar. with pytest.raises(TypeError) as exc: BinnedTimeSeries(time_bin_start=Time([1, 2, 3], format='cxcsec'), time_bin_size=1 * u.s, n_bins=10) assert exc.value.args[0] == ( "'n_bins' cannot be specified if 'time_bin_start' or " "'time_bin_size' are not scalar'")
def test_read(): timeseries = BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_end_column='time_end', format='csv') assert timeseries.colnames == [ 'time_bin_start', 'time_bin_size', 'bin_size', 'A', 'B', 'C', 'D', 'E', 'F' ] assert len(timeseries) == 10 assert timeseries['B'].sum() == 1151.54 timeseries = BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_size_column='bin_size', time_bin_size_unit=u.second, format='csv') assert timeseries.colnames == [ 'time_bin_start', 'time_bin_size', 'time_end', 'A', 'B', 'C', 'D', 'E', 'F' ] assert len(timeseries) == 10 assert timeseries['B'].sum() == 1151.54
def test_periodogram(cls): # Note that we don't need to check the actual results from the periodogram # classes here since these are tested extensively in # astropy.timeseries.periodograms. ts = BinnedTimeSeries(time_bin_start='2016-03-22T12:30:31', time_bin_size=3 * u.s, data=[[1, 4, 3], [3, 4, 3]], names=['a', 'b']) p1 = cls.from_timeseries(ts, 'a') assert isinstance(p1, cls) assert_allclose(p1.t.jd, ts.time_bin_center.jd) assert_equal(p1.y, ts['a']) assert p1.dy is None p2 = cls.from_timeseries(ts, 'a', uncertainty='b') assert_quantity_allclose(p2.dy, ts['b']) p3 = cls.from_timeseries(ts, 'a', uncertainty=0.1) assert_allclose(p3.dy, 0.1)
def test_even_contiguous(): # Initialize a ``BinnedTimeSeries`` with even contiguous bins by specifying # the bin width: ts = BinnedTimeSeries(time_bin_start='2016-03-22T12:30:31', time_bin_size=3 * u.s, data=[[1, 4, 3]]) assert_equal(ts.time_bin_start.isot, [ '2016-03-22T12:30:31.000', '2016-03-22T12:30:34.000', '2016-03-22T12:30:37.000' ]) assert_equal(ts.time_bin_center.isot, [ '2016-03-22T12:30:32.500', '2016-03-22T12:30:35.500', '2016-03-22T12:30:38.500' ]) assert_equal(ts.time_bin_end.isot, [ '2016-03-22T12:30:34.000', '2016-03-22T12:30:37.000', '2016-03-22T12:30:40.000' ])
def aggregate_downsample(time_series, *, time_bin_size=None, time_bin_start=None, time_bin_end=None, n_bins=None, aggregate_func=None): """ Downsample a time series by binning values into bins with a fixed size or custom sizes, using a single function to combine the values in the bin. Parameters ---------- time_series : :class:`~astropy.timeseries.TimeSeries` The time series to downsample. time_bin_size : `~astropy.units.Quantity` or `~astropy.time.TimeDelta` ['time'], optional The time interval for the binned time series - this is either a scalar value (in which case all time bins will be assumed to have the same duration) or as an array of values (in which case each time bin can have a different duration). If this argument is provided, ``time_bin_end`` should not be provided. time_bin_start : `~astropy.time.Time` or iterable, optional The start time for the binned time series - this can be either given directly as a `~astropy.time.Time` array or as any iterable that initializes the `~astropy.time.Time` class. This can also be a scalar value if ``time_bin_size`` or ``time_bin_end`` is provided. Defaults to the first time in the sampled time series. time_bin_end : `~astropy.time.Time` or iterable, optional The times of the end of each bin - this can be either given directly as a `~astropy.time.Time` array or as any iterable that initializes the `~astropy.time.Time` class. This can only be given if ``time_bin_start`` is provided or its default is used. If ``time_bin_end`` is scalar and ``time_bin_start`` is an array, time bins are assumed to be contiguous; the end of each bin is the start of the next one, and ``time_bin_end`` gives the end time for the last bin. If ``time_bin_end`` is an array and ``time_bin_start`` is scalar, bins will be contiguous. If both ``time_bin_end`` and ``time_bin_start`` are arrays, bins do not need to be contiguous. If this argument is provided, ``time_bin_size`` should not be provided. n_bins : int, optional The number of bins to use. Defaults to the number needed to fit all the original points. If both ``time_bin_start`` and ``time_bin_size`` are provided and are scalar values, this determines the total bins within that interval. If ``time_bin_start`` is an iterable, this parameter will be ignored. aggregate_func : callable, optional The function to use for combining points in the same bin. Defaults to np.nanmean. Returns ------- binned_time_series : :class:`~astropy.timeseries.BinnedTimeSeries` The downsampled time series. """ if not isinstance(time_series, TimeSeries): raise TypeError("time_series should be a TimeSeries") if time_bin_size is not None and not isinstance(time_bin_size, (u.Quantity, TimeDelta)): raise TypeError("'time_bin_size' should be a Quantity or a TimeDelta") if time_bin_start is not None and not isinstance(time_bin_start, (Time, TimeDelta)): time_bin_start = Time(time_bin_start) if time_bin_end is not None and not isinstance(time_bin_end, (Time, TimeDelta)): time_bin_end = Time(time_bin_end) # Use the table sorted by time ts_sorted = time_series.iloc[:] # If start time is not provided, it is assumed to be the start of the timeseries if time_bin_start is None: time_bin_start = ts_sorted.time[0] # Total duration of the timeseries is needed for determining either # `time_bin_size` or `nbins` in the case of scalar `time_bin_start` if time_bin_start.isscalar: time_duration = (ts_sorted.time[-1] - time_bin_start).sec if time_bin_size is None and time_bin_end is None: if time_bin_start.isscalar: if n_bins is None: raise TypeError("With single 'time_bin_start' either 'n_bins', " "'time_bin_size' or time_bin_end' must be provided") else: # `nbins` defaults to the number needed to fit all points time_bin_size = time_duration / n_bins * u.s else: time_bin_end = np.maximum(ts_sorted.time[-1], time_bin_start[-1]) if time_bin_start.isscalar: if time_bin_size is not None: if time_bin_size.isscalar: # Determine the number of bins if n_bins is None: bin_size_sec = time_bin_size.to_value(u.s) n_bins = int(np.ceil(time_duration/bin_size_sec)) elif time_bin_end is not None: if not time_bin_end.isscalar: # Convert start time to an array and populate using `time_bin_end` scalar_start_time = time_bin_start time_bin_start = time_bin_end.replicate(copy=True) time_bin_start[0] = scalar_start_time time_bin_start[1:] = time_bin_end[:-1] # Check for overlapping bins, and warn if they are present if time_bin_end is not None: if (not time_bin_end.isscalar and not time_bin_start.isscalar and np.any(time_bin_start[1:] < time_bin_end[:-1])): warnings.warn("Overlapping bins should be avoided since they " "can lead to double-counting of data during binning.", AstropyUserWarning) binned = BinnedTimeSeries(time_bin_size=time_bin_size, time_bin_start=time_bin_start, time_bin_end=time_bin_end, n_bins=n_bins) if aggregate_func is None: aggregate_func = np.nanmean # Start and end times of the binned timeseries bin_start = binned.time_bin_start bin_end = binned.time_bin_end # Set `n_bins` to match the length of `time_bin_start` if # `n_bins` is unspecified or if `time_bin_start` is an iterable if n_bins is None or not time_bin_start.isscalar: n_bins = len(bin_start) # Find the subset of the table that is inside the union of all bins keep = ((ts_sorted.time >= bin_start[0]) & (ts_sorted.time <= bin_end[-1])) # Find out indices to be removed because of uncontiguous bins for ind in range(n_bins-1): delete_indices = np.where(np.logical_and(ts_sorted.time > bin_end[ind], ts_sorted.time < bin_start[ind+1])) keep[delete_indices] = False subset = ts_sorted[keep] # Figure out which bin each row falls in by sorting with respect # to the bin end times indices = np.searchsorted(bin_end, ts_sorted.time[keep]) # For time == bin_start[i+1] == bin_end[i], let bin_start takes precedence if len(indices) and np.all(bin_start[1:] >= bin_end[:-1]): indices_start = np.searchsorted(subset.time, bin_start[bin_start <= ts_sorted.time[-1]]) indices[indices_start] = np.arange(len(indices_start)) # Determine rows where values are defined if len(indices): groups = np.hstack([0, np.nonzero(np.diff(indices))[0] + 1]) else: groups = np.array([]) # Find unique indices to determine which rows in the final time series # will not be empty. unique_indices = np.unique(indices) # Add back columns for colname in subset.colnames: if colname == 'time': continue values = subset[colname] # FIXME: figure out how to avoid the following, if possible if not isinstance(values, (np.ndarray, u.Quantity)): warnings.warn("Skipping column {0} since it has a mix-in type", AstropyUserWarning) continue if isinstance(values, u.Quantity): data = u.Quantity(np.repeat(np.nan, n_bins), unit=values.unit) data[unique_indices] = u.Quantity(reduceat(values.value, groups, aggregate_func), values.unit, copy=False) else: data = np.ma.zeros(n_bins, dtype=values.dtype) data.mask = 1 data[unique_indices] = reduceat(values, groups, aggregate_func) data.mask[unique_indices] = 0 binned[colname] = data return binned
def setup_method(self, method): self.series = BinnedTimeSeries(time_bin_start=INPUT_TIME, time_bin_size=3 * u.s, data=PLAIN_TABLE) self.time_attr = 'time_bin_start'
def test_read_empty(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, format='csv') assert exc.value.args[0] == '``time_bin_start_column`` should be provided since the default Table readers are being used.'
def test_read_no_size_end(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', format='csv') assert exc.value.args[0] == 'Either `time_bin_end_column` or `time_bin_size_column` should be provided.'
def test_read_both_extra_bins(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_end_column='END', time_bin_size_column='bin_size', format='csv') assert exc.value.args[0] == "Cannot specify both `time_bin_end_column` and `time_bin_size_column`."
def test_read_size_missing(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_size_column="missing", time_bin_size_unit=u.second, format='csv') assert exc.value.args[0] == "Bin size column 'missing' not found in the input data."
def test_read_time_unit_missing(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, time_bin_start_column='time_start', time_bin_size_column="bin_size", format='csv') assert exc.value.args[0] == "The bin size unit should be specified as an astropy Unit using ``time_bin_size_unit``."
def test_empty_initialization(): ts = BinnedTimeSeries() ts['time_bin_start'] = Time([1, 2, 3], format='mjd')
def aggregate_downsample(time_series, *, time_bin_size=None, time_bin_start=None, n_bins=None, aggregate_func=None): """ Downsample a time series by binning values into bins with a fixed size, using a single function to combine the values in the bin. Parameters ---------- time_series : :class:`~astropy.timeseries.TimeSeries` The time series to downsample. time_bin_size : `~astropy.units.Quantity` The time interval for the binned time series. time_bin_start : `~astropy.time.Time`, optional The start time for the binned time series. Defaults to the first time in the sampled time series. n_bins : int, optional The number of bins to use. Defaults to the number needed to fit all the original points. aggregate_func : callable, optional The function to use for combining points in the same bin. Defaults to np.nanmean. Returns ------- binned_time_series : :class:`~astropy.timeseries.BinnedTimeSeries` The downsampled time series. """ if not isinstance(time_series, TimeSeries): raise TypeError("time_series should be a TimeSeries") if not isinstance(time_bin_size, u.Quantity): raise TypeError("time_bin_size should be a astropy.unit quantity") bin_size_sec = time_bin_size.to_value(u.s) # Use the table sorted by time sorted = time_series.iloc[:] # Determine start time if needed if time_bin_start is None: time_bin_start = sorted.time[0] # Find the relative time since the start time, in seconds relative_time_sec = (sorted.time - time_bin_start).sec # Determine the number of bins if needed if n_bins is None: n_bins = int(np.ceil(relative_time_sec[-1] / bin_size_sec)) if aggregate_func is None: aggregate_func = np.nanmean # Determine the bins relative_bins_sec = np.cumsum( np.hstack([0, np.repeat(bin_size_sec, n_bins)])) bins = time_bin_start + relative_bins_sec * u.s # Find the subset of the table that is inside the bins keep = ((relative_time_sec >= relative_bins_sec[0]) & (relative_time_sec < relative_bins_sec[-1])) subset = sorted[keep] # Figure out which bin each row falls in - the -1 is because items # falling in the first bins will have index 1 but we want that to be 0 indices = np.searchsorted(relative_bins_sec, relative_time_sec[keep]) - 1 # Add back the first time. indices[relative_time_sec[keep] == relative_bins_sec[0]] = 0 # Create new binned time series binned = BinnedTimeSeries(time_bin_start=bins[:-1], time_bin_end=bins[-1]) # Determine rows where values are defined groups = np.hstack([0, np.nonzero(np.diff(indices))[0] + 1]) # Find unique indices to determine which rows in the final time series # will not be empty. unique_indices = np.unique(indices) # Add back columns for colname in subset.colnames: if colname == 'time': continue values = subset[colname] # FIXME: figure out how to avoid the following, if possible if not isinstance(values, (np.ndarray, u.Quantity)): warnings.warn("Skipping column {0} since it has a mix-in type", AstropyUserWarning) continue if isinstance(values, u.Quantity): data = u.Quantity(np.repeat(np.nan, n_bins), unit=values.unit) data[unique_indices] = u.Quantity(reduceat(values.value, groups, aggregate_func), values.unit, copy=False) else: data = np.ma.zeros(n_bins, dtype=values.dtype) data.mask = 1 data[unique_indices] = reduceat(values, groups, aggregate_func) data.mask[unique_indices] = 0 binned[colname] = data return binned
def test_read_empty(): with pytest.raises(ValueError) as exc: BinnedTimeSeries.read(CSV_FILE, format='csv') assert exc.value.args[ 0] == '``time_bin_start_column`` should be provided since the default Table readers are being used.'