Exemplo n.º 1
0
    def setUp(self):
        date_index = DateRange(datetime(2009, 12, 11), periods=3,
                               offset=datetools.bday)
        ts = Series([3, 1, 4], index=date_index)
        self.TS1 = ts

        date_index = DateRange(datetime(2009, 12, 11), periods=5,
                               offset=datetools.bday)
        ts = Series([1, 5, 9, 2, 6], index=date_index)
        self.TS2 = ts

        date_index = DateRange(datetime(2009, 12, 11), periods=3,
                               offset=datetools.bday)
        ts = Series([5, np.nan, 3], index=date_index)
        self.TS3 = ts

        date_index = DateRange(datetime(2009, 12, 11), periods=5,
                               offset=datetools.bday)
        ts = Series([np.nan, 5, 8, 9, 7], index=date_index)
        self.TS4 = ts

        data = {'x1' : self.TS2, 'x2' : self.TS4}
        self.DF1 = DataFrame(data=data)

        data = {'x1' : self.TS2, 'x2' : self.TS4}
        self.DICT1 = data
Exemplo n.º 2
0
    def get_metric_statistics(self,
                              project_id,
                              end_time,
                              metric_name,
                              namespace,
                              period,
                              start_time,
                              statistics,
                              unit=None,
                              dimensions=None):
        """
        입력받은 조건에 일치하는 메트릭의 통계자료 리스트를 반환한다.
        """
        def to_datapoint(df, idx):
            datapoint = df.ix[idx].dropna()
            if len(datapoint):
                return idx, datapoint

        end_idx = end_time.replace(second=0, microsecond=0)
        start_idx = start_time.replace(second=0, microsecond=0)
        start_ana_idx = start_idx - datetools.Minute() * (period / 60)
        daterange = DateRange(start_idx, end_idx, offset=datetools.Minute())
        daterange_ana = DateRange(start_ana_idx,
                                  end_idx,
                                  offset=datetools.Minute())

        # load default unit for metric from database
        if unit == "None" or not unit:
            metric_key = self.cass.get_metric_key(project_id=project_id,
                                                  namespace=namespace,
                                                  metric_name=metric_name,
                                                  dimensions=dimensions)

            if metric_key:
                unit = self.cass.get_metric_unit(metric_key)
            else:
                unit = "None"

        # load statistics data from database
        stats = self.cass.get_metric_statistics(project_id=project_id,
                                                namespace=namespace,
                                                metric_name=metric_name,
                                                start_time=start_ana_idx,
                                                end_time=end_time,
                                                period=period,
                                                statistics=statistics,
                                                dimensions=dimensions)

        period = period / 60  # convert sec to min
        stat = DataFrame(index=daterange)

        for statistic, series in zip(statistics, stats):
            func = self.ROLLING_FUNC_MAP[statistic]
            ts = TimeSeries(series, index=daterange_ana)
            rolled_ts = func(ts, period, min_periods=0)
            stat[statistic] = rolled_ts.ix[::period]
            LOG.debug("stat %s\n%s" % (statistic, stat[statistic]))

        ret = filter(None, (to_datapoint(stat, i) for i in stat.index))
        return ret, unit
Exemplo n.º 3
0
def rountrip_archive(N, K=50, iterations=10):
    # Create data
    arr = np.random.randn(N, K)
    # lar = la.larry(arr)
    dma = pandas.DataFrame(
        arr, DateRange('1/1/2000', periods=N, offset=datetools.Minute()))
    dma[201] = 'bar'

    # filenames
    filename_numpy = '/Users/wesm/tmp/numpy.npz'
    filename_larry = '/Users/wesm/tmp/archive.hdf5'
    filename_pandas = '/Users/wesm/tmp/pandas_tmp'

    # Delete old files
    try:
        os.unlink(filename_numpy)
    except:
        pass
    try:
        os.unlink(filename_larry)
    except:
        pass

    try:
        os.unlink(filename_pandas)
    except:
        pass

    # Time a round trip save and load
    # numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr)
    # numpy_time = timeit(numpy_f, iterations) / iterations

    # larry_f = lambda: larry_roundtrip(filename_larry, lar, lar)
    # larry_time = timeit(larry_f, iterations) / iterations

    pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
    pandas_time = timeit(pandas_f, iterations) / iterations
    print('pandas (HDF5) %7.4f seconds' % pandas_time)

    pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
    pickle_time = timeit(pickle_f, iterations) / iterations
    print('pandas (pickle) %7.4f seconds' % pickle_time)

    # print('Numpy (npz)   %7.4f seconds' % numpy_time)
    # print('larry (HDF5)  %7.4f seconds' % larry_time)

    # Delete old files
    try:
        os.unlink(filename_numpy)
    except:
        pass
    try:
        os.unlink(filename_larry)
    except:
        pass

    try:
        os.unlink(filename_pandas)
    except:
        pass
Exemplo n.º 4
0
    def _make_predict_dates(self):
        data = self.data
        dtstart = data.predict_start
        dtend = data.predict_end
        freq = data.freq

        if freq is not None:
            pandas_freq = _freq_to_pandas[freq]
            try:
                from pandas import DatetimeIndex
                dates = DatetimeIndex(start=dtstart, end=dtend,
                                        freq=pandas_freq)
            except ImportError as err:
                from pandas import DateRange
                dates = DateRange(dtstart, dtend, offset = pandas_freq).values
        # handle
        elif freq is None and (isinstance(dtstart, int) and
                               isinstance(dtend, int)):
            from pandas import Index
            dates = Index(lrange(dtstart, dtend+1))
        # if freq is None and dtstart and dtend aren't integers, we're
        # in sample
        else:
            dates = self.data.dates
            start = self._get_dates_loc(dates, dtstart)
            end = self._get_dates_loc(dates, dtend)
            dates = dates[start:end+1] # is this index inclusive?
        self.data.predict_dates = dates
Exemplo n.º 5
0
    def setUp(self):
        self.data = {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'C': np.arange(10),
            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]
        }

        self.dates = DateRange('1/1/2011', periods=10)

        self.frame = SparseDataFrame(self.data, index=self.dates)
        self.iframe = SparseDataFrame(self.data,
                                      index=self.dates,
                                      default_kind='integer')

        values = self.frame.values.copy()
        values[np.isnan(values)] = 0

        self.zframe = SparseDataFrame(values,
                                      columns=['A', 'B', 'C', 'D'],
                                      default_fill_value=0,
                                      index=self.dates)

        values = self.frame.values.copy()
        values[np.isnan(values)] = 2
        self.fill_frame = SparseDataFrame(values,
                                          columns=['A', 'B', 'C', 'D'],
                                          default_fill_value=2,
                                          index=self.dates)

        self.empty = SparseDataFrame()
Exemplo n.º 6
0
def parse_lutkepohl_data(path): # pragma: no cover
    """
    Parse data files from Lutkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from collections import deque
    from datetime import datetime
    import pandas
    import pandas.core.datetools as dt
    import re
    from statsmodels.compatnp.py3k import asbytes

    regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*'))
    lines = deque(open(path, 'rb'))

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = np.genfromtxt(path, names=True, skip_header=to_skip+1)

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q') : dt.BQuarterEnd(),
        asbytes('M') : dt.BMonthEnd(),
        asbytes('A') : dt.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    try:
        from pandas import DatetimeIndex   # pylint: disable=E0611
        date_range = DatetimeIndex(start=start_date, freq=offset, periods=n)
    except ImportError:
        from pandas import DateRange
        date_range = DateRange(start_date, offset=offset, periods=n)

    return data, date_range
Exemplo n.º 7
0
    def _get_range(self):
        now_idx = datetime.utcnow().replace(second=0, microsecond=0)

        start = now_idx - timedelta(seconds=self.left_offset)
        end = now_idx + timedelta(seconds=self.right_offset)

        daterange = DateRange(start, end, offset=datetools.Minute())

        return daterange
Exemplo n.º 8
0
 def _make_predict_dates(self):
     from pandas import DateRange
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     pandas_freq = _freq_to_pandas[freq]
     dates = DateRange(dtstart, dtend, offset=pandas_freq).values
     self._data.predict_dates = dates
Exemplo n.º 9
0
def test_predict_freq():
    # test that predicted dates have same frequency
    x = np.arange(1, 36.)

    if _pandas_08x:
        from pandas import date_range

        # there's a bug in pandas up to 0.10.2 for YearBegin
        #dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
        dates = date_range("1972-4-30", "2006-4-30", freq="A-APR")
        series = Series(x, index=dates)
        model = TimeSeriesModel(series)
        #npt.assert_(model.data.freq == "AS-APR")
        npt.assert_(model.data.freq == "A-APR")

        start = model._get_predict_start("2006-4-30")
        end = model._get_predict_end("2016-4-30")
        model._make_predict_dates()

        predict_dates = model.data.predict_dates

        #expected_dates = date_range("2006-12-31", "2016-12-31",
        #                            freq="AS-APR")
        expected_dates = date_range("2006-4-30", "2016-4-30", freq="A-APR")
        npt.assert_equal(predict_dates, expected_dates)
        #ptesting.assert_series_equal(predict_dates, expected_dates)

    else:
        from pandas import DateRange, datetools
        dates = DateRange("1972-1-1", "2007-1-1", offset=datetools.yearEnd)
        series = Series(x, index=dates)
        model = TimeSeriesModel(series)
        npt.assert_(model.data.freq == "A")

        start = model._get_predict_start("2006-12-31")
        end = model._get_predict_end("2016-12-31")
        model._make_predict_dates()

        predict_dates = model.data.predict_dates

        expected_dates = DateRange("2006-12-31",
                                   "2016-12-31",
                                   offset=datetools.yearEnd)
        npt.assert_array_equal(predict_dates, expected_dates)
Exemplo n.º 10
0
    def test_timeseries_preepoch(self):
        if sys.version_info[0] == 2 and sys.version_info[1] < 7:
            raise nose.SkipTest

        dr = DateRange('1/1/1940', '1/1/1960')
        ts = Series(np.random.randn(len(dr)), index=dr)
        try:
            self._check_roundtrip(ts, tm.assert_series_equal)
        except OverflowError:
            raise nose.SkipTest('known failer on some windows platforms')
Exemplo n.º 11
0
def panel_data2():
    index = DateRange('1/1/2011', periods=9)

    return DataFrame(
        {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5],
            'B': [0, 1, 2, 3, 4, 5, nan, nan, nan],
            'C': [0, 1, 2, nan, nan, nan, 3, 4, 5],
            'D': [nan, 0, 1, nan, 2, 3, 4, 5, nan]
        },
        index=index)
Exemplo n.º 12
0
def panel_data3():
    index = DateRange('1/1/2011', periods=10).shift(-2)

    return DataFrame(
        {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, 3, 4, 5, 6, nan, nan, nan],
            'C': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'D': [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan]
        },
        index=index)
Exemplo n.º 13
0
 def _make_predict_dates(self):
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     pandas_freq = _freq_to_pandas[freq]
     try:
         from pandas import DatetimeIndex
         dates = DatetimeIndex(start=dtstart, end=dtend, freq=pandas_freq)
     except ImportError, err:
         from pandas import DateRange
         dates = DateRange(dtstart, dtend, offset=pandas_freq).values
Exemplo n.º 14
0
def test_infer_freq():
    from pandas import DateRange
    d1 = datetime(2008, 12, 31)
    d2 = datetime(2012, 9, 30)

    b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values
    d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values
    w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values
    m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values
    a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values
    q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values

    assert _infer_freq(b[2:5]) == 'B'
    assert _infer_freq(b[:3]) == 'D'

    assert _infer_freq(b) == 'B'
    assert _infer_freq(d) == 'D'
    assert _infer_freq(w) == 'W'
    assert _infer_freq(m) == 'M'
    assert _infer_freq(a) == 'A'
    assert _infer_freq(q) == 'Q'
    assert _infer_freq(d[:3]) == 'D'
    assert _infer_freq(w[:3]) == 'W'
    assert _infer_freq(m[:3]) == 'M'
    assert _infer_freq(a[:3]) == 'A'
    assert _infer_freq(q[:3]) == 'Q'
Exemplo n.º 15
0
def test_infer_freq():
    from pandas import DateRange
    d1 = datetime(2008, 12, 31)
    d2 = datetime(2012, 9, 30)

    b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values
    d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values
    w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values
    m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values
    a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values
    q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values

    npt.assert_string_equal(_infer_freq(b), 'B')
    npt.assert_string_equal(_infer_freq(d), 'D')
    npt.assert_string_equal(_infer_freq(w), 'W')
    npt.assert_string_equal(_infer_freq(m), 'M')
    npt.assert_string_equal(_infer_freq(a), 'A')
    npt.assert_string_equal(_infer_freq(q), 'Q')
    npt.assert_string_equal(_infer_freq(b[2:4]), 'B')
    npt.assert_string_equal(_infer_freq(b[:2]), 'D')
    npt.assert_string_equal(_infer_freq(d[:2]), 'D')
    npt.assert_string_equal(_infer_freq(w[:2]), 'W')
    npt.assert_string_equal(_infer_freq(m[:2]), 'M')
    npt.assert_string_equal(_infer_freq(a[:2]), 'A')
    npt.assert_string_equal(_infer_freq(q[:2]), 'Q')
Exemplo n.º 16
0
def _idx_from_dates(d1, d2, freq):
    """
    Returns an index offset from datetimes d1 and d2. d1 is expected to be the
    last date in a date series and d2 is the out of sample date.

    Notes
    -----
    Rounds down the index if the end date is before the next date at freq.
    Does not check the start date to see whether it is on the offest but
    assumes that it is.
    """
    from pandas import DateRange
    return len(DateRange(d1, d2, offset=_freq_to_pandas[freq])) - 1
Exemplo n.º 17
0
    def test_setitem_ndarray(self):
        from pandas import DateRange, datetools

        timeidx = DateRange(start=datetime(2009,1,1),
                            end=datetime(2009,12,31),
                            offset=datetools.MonthEnd())
        lons_coarse = np.linspace(-177.5, 177.5, 72)
        lats_coarse = np.linspace(-87.5, 87.5, 36)
        P = Panel(items=timeidx, major_axis=lons_coarse, minor_axis=lats_coarse)
        data = np.random.randn(72*36).reshape((72,36))
        key = datetime(2009,2,28)
        P[key] = data

        assert_almost_equal(P[key].values, data)
Exemplo n.º 18
0
def test_keyerror_start_date():
    x = np.arange(1, 36.)

    if _pandas_08x:
        from pandas import date_range

        # there's a bug in pandas up to 0.10.2 for YearBegin
        #dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
        dates = date_range("1972-4-30", "2006-4-30", freq="A-APR")
        series = Series(x, index=dates)
        model = TimeSeriesModel(series)
    else:
        from pandas import DateRange, datetools
        dates = DateRange("1972-1-1", "2007-1-1", offset=datetools.yearEnd)
        series = Series(x, index=dates)
        model = TimeSeriesModel(series)

    npt.assert_raises(ValueError, model._get_predict_start, "1970-4-30")
Exemplo n.º 19
0
    def test_shift(self):
        series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6))

        shifted = series.shift(0)
        self.assert_(shifted is not series)
        assert_sp_series_equal(shifted, series)

        f = lambda s: s.shift(1)
        _dense_series_compare(series, f)

        f = lambda s: s.shift(-2)
        _dense_series_compare(series, f)

        series = SparseSeries([nan, 1., 2., 3., nan, nan],
                              index=DateRange('1/1/2000', periods=6))
        f = lambda s: s.shift(2, timeRule='WEEKDAY')
        _dense_series_compare(series, f)

        f = lambda s: s.shift(2, offset=datetools.bday)
        _dense_series_compare(series, f)
Exemplo n.º 20
0
    def test_constructor(self):
        # test setup guys
        self.assert_(np.isnan(self.bseries.fill_value))
        self.assert_(isinstance(self.bseries.sp_index, BlockIndex))
        self.assert_(np.isnan(self.iseries.fill_value))
        self.assert_(isinstance(self.iseries.sp_index, IntIndex))

        self.assertEquals(self.zbseries.fill_value, 0)
        assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0))

        # pass SparseSeries
        s2 = SparseSeries(self.bseries)
        s3 = SparseSeries(self.iseries)
        s4 = SparseSeries(self.zbseries)
        assert_sp_series_equal(s2, self.bseries)
        assert_sp_series_equal(s3, self.iseries)
        assert_sp_series_equal(s4, self.zbseries)

        # Sparse time series works
        date_index = DateRange('1/1/2000', periods=len(self.bseries))
        s5 = SparseSeries(self.bseries, index=date_index)
        self.assert_(isinstance(s5, SparseTimeSeries))

        # pass Series
        bseries2 = SparseSeries(self.bseries.to_dense())
        assert_equal(self.bseries.sp_values, bseries2.sp_values)

        # pass dict?

        # don't copy the data by default
        values = np.ones(len(self.bseries.sp_values))
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
        sp.sp_values[:5] = 97
        self.assert_(values[0] == 97)

        # but can make it copy!
        sp = SparseSeries(values,
                          sparse_index=self.bseries.sp_index,
                          copy=True)
        sp.sp_values[:5] = 100
        self.assert_(values[0] == 97)
Exemplo n.º 21
0
    def setUp(self):
        arr, index = _test_data1()

        date_index = DateRange('1/1/2011', periods=len(index))

        self.bseries = SparseSeries(arr, index=index, kind='block')
        self.bseries.name = 'bseries'

        self.ts = self.bseries

        self.btseries = SparseSeries(arr, index=date_index, kind='block')

        self.iseries = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data2()
        self.bseries2 = SparseSeries(arr, index=index, kind='block')
        self.iseries2 = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data1_zero()
        self.zbseries = SparseSeries(arr,
                                     index=index,
                                     kind='block',
                                     fill_value=0)
        self.ziseries = SparseSeries(arr,
                                     index=index,
                                     kind='integer',
                                     fill_value=0)

        arr, index = _test_data2_zero()
        self.zbseries2 = SparseSeries(arr,
                                      index=index,
                                      kind='block',
                                      fill_value=0)
        self.ziseries2 = SparseSeries(arr,
                                      index=index,
                                      kind='integer',
                                      fill_value=0)
Exemplo n.º 22
0
def test_infer_freq():
    d1 = datetime(2008, 12, 31)
    d2 = datetime(2012, 9, 30)

    if _pandas_08x:
        b = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['B']).values
        d = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['D']).values
        w = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['W']).values
        m = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['M']).values
        a = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['A']).values
        q = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['Q']).values
        assert _infer_freq(w) == 'W-SUN'
        assert _infer_freq(a) == 'A-DEC'
        assert _infer_freq(q) == 'Q-DEC'
        assert _infer_freq(w[:3]) == 'W-SUN'
        assert _infer_freq(a[:3]) == 'A-DEC'
        assert _infer_freq(q[:3]) == 'Q-DEC'
    else:
        from pandas import DateRange

        b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values
        d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values
        w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values
        m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values
        a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values
        q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values
        assert _infer_freq(w) == 'W'
        assert _infer_freq(a) == 'A'
        assert _infer_freq(q) == 'Q'
        assert _infer_freq(w[:3]) == 'W'
        assert _infer_freq(a[:3]) == 'A'
        assert _infer_freq(q[:3]) == 'Q'

    assert _infer_freq(b[2:5]) == 'B'
    assert _infer_freq(b[:3]) == 'D'
    assert _infer_freq(b) == 'B'
    assert _infer_freq(d) == 'D'
    assert _infer_freq(m) == 'M'
    assert _infer_freq(d[:3]) == 'D'
    assert _infer_freq(m[:3]) == 'M'
Exemplo n.º 23
0
 def test_can_serialize_dates(self):
     rng = [x.date() for x in DateRange('1/1/2000', '1/30/2000')]
     frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
     self._check_roundtrip(frame, tm.assert_frame_equal)
Exemplo n.º 24
0
    def daysLeft(self, date):
        """ business days to expiration date """
        from pandas import DateRange  # this will cause a problem with pandas 0.14 and higher... Method is depreciated and replaced by DatetimeIndex

        r = DateRange(date, self.expirationDate())
        return len(r)
Exemplo n.º 25
0
 def daysLeft(self, date):
     """ business days to expiration date """
     r = DateRange(date, self.expirationDate())
     return len(r)