Пример #1
0
    def setUp(self):
        date_index = DateRange(datetime(2009, 12, 11), periods=3,
                               offset=datetools.bday)
        ts = Series([3, 1, 4], index=date_index)
        self.TS1 = ts

        date_index = DateRange(datetime(2009, 12, 11), periods=5,
                               offset=datetools.bday)
        ts = Series([1, 5, 9, 2, 6], index=date_index)
        self.TS2 = ts

        date_index = DateRange(datetime(2009, 12, 11), periods=3,
                               offset=datetools.bday)
        ts = Series([5, np.nan, 3], index=date_index)
        self.TS3 = ts

        date_index = DateRange(datetime(2009, 12, 11), periods=5,
                               offset=datetools.bday)
        ts = Series([np.nan, 5, 8, 9, 7], index=date_index)
        self.TS4 = ts

        data = {'x1' : self.TS2, 'x2' : self.TS4}
        self.DF1 = DataFrame(data=data)

        data = {'x1' : self.TS2, 'x2' : self.TS4}
        self.DICT1 = data
Пример #2
0
    def get_metric_statistics(self,
                              project_id,
                              end_time,
                              metric_name,
                              namespace,
                              period,
                              start_time,
                              statistics,
                              unit=None,
                              dimensions=None):
        """
        입력받은 조건에 일치하는 메트릭의 통계자료 리스트를 반환한다.
        """
        def to_datapoint(df, idx):
            datapoint = df.ix[idx].dropna()
            if len(datapoint):
                return idx, datapoint

        end_idx = end_time.replace(second=0, microsecond=0)
        start_idx = start_time.replace(second=0, microsecond=0)
        start_ana_idx = start_idx - datetools.Minute() * (period / 60)
        daterange = DateRange(start_idx, end_idx, offset=datetools.Minute())
        daterange_ana = DateRange(start_ana_idx,
                                  end_idx,
                                  offset=datetools.Minute())

        # load default unit for metric from database
        if unit == "None" or not unit:
            metric_key = self.cass.get_metric_key(project_id=project_id,
                                                  namespace=namespace,
                                                  metric_name=metric_name,
                                                  dimensions=dimensions)

            if metric_key:
                unit = self.cass.get_metric_unit(metric_key)
            else:
                unit = "None"

        # load statistics data from database
        stats = self.cass.get_metric_statistics(project_id=project_id,
                                                namespace=namespace,
                                                metric_name=metric_name,
                                                start_time=start_ana_idx,
                                                end_time=end_time,
                                                period=period,
                                                statistics=statistics,
                                                dimensions=dimensions)

        period = period / 60  # convert sec to min
        stat = DataFrame(index=daterange)

        for statistic, series in zip(statistics, stats):
            func = self.ROLLING_FUNC_MAP[statistic]
            ts = TimeSeries(series, index=daterange_ana)
            rolled_ts = func(ts, period, min_periods=0)
            stat[statistic] = rolled_ts.ix[::period]
            LOG.debug("stat %s\n%s" % (statistic, stat[statistic]))

        ret = filter(None, (to_datapoint(stat, i) for i in stat.index))
        return ret, unit
Пример #3
0
def rountrip_archive(N, K=50, iterations=10):
    # Create data
    arr = np.random.randn(N, K)
    # lar = la.larry(arr)
    dma = pandas.DataFrame(
        arr, DateRange('1/1/2000', periods=N, offset=datetools.Minute()))
    dma[201] = 'bar'

    # filenames
    filename_numpy = '/Users/wesm/tmp/numpy.npz'
    filename_larry = '/Users/wesm/tmp/archive.hdf5'
    filename_pandas = '/Users/wesm/tmp/pandas_tmp'

    # Delete old files
    try:
        os.unlink(filename_numpy)
    except:
        pass
    try:
        os.unlink(filename_larry)
    except:
        pass

    try:
        os.unlink(filename_pandas)
    except:
        pass

    # Time a round trip save and load
    # numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr)
    # numpy_time = timeit(numpy_f, iterations) / iterations

    # larry_f = lambda: larry_roundtrip(filename_larry, lar, lar)
    # larry_time = timeit(larry_f, iterations) / iterations

    pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
    pandas_time = timeit(pandas_f, iterations) / iterations
    print('pandas (HDF5) %7.4f seconds' % pandas_time)

    pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
    pickle_time = timeit(pickle_f, iterations) / iterations
    print('pandas (pickle) %7.4f seconds' % pickle_time)

    # print('Numpy (npz)   %7.4f seconds' % numpy_time)
    # print('larry (HDF5)  %7.4f seconds' % larry_time)

    # Delete old files
    try:
        os.unlink(filename_numpy)
    except:
        pass
    try:
        os.unlink(filename_larry)
    except:
        pass

    try:
        os.unlink(filename_pandas)
    except:
        pass
Пример #4
0
    def _make_predict_dates(self):
        data = self.data
        dtstart = data.predict_start
        dtend = data.predict_end
        freq = data.freq

        if freq is not None:
            pandas_freq = _freq_to_pandas[freq]
            try:
                from pandas import DatetimeIndex
                dates = DatetimeIndex(start=dtstart, end=dtend,
                                        freq=pandas_freq)
            except ImportError as err:
                from pandas import DateRange
                dates = DateRange(dtstart, dtend, offset = pandas_freq).values
        # handle
        elif freq is None and (isinstance(dtstart, int) and
                               isinstance(dtend, int)):
            from pandas import Index
            dates = Index(lrange(dtstart, dtend+1))
        # if freq is None and dtstart and dtend aren't integers, we're
        # in sample
        else:
            dates = self.data.dates
            start = self._get_dates_loc(dates, dtstart)
            end = self._get_dates_loc(dates, dtend)
            dates = dates[start:end+1] # is this index inclusive?
        self.data.predict_dates = dates
Пример #5
0
    def setUp(self):
        self.data = {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'C': np.arange(10),
            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]
        }

        self.dates = DateRange('1/1/2011', periods=10)

        self.frame = SparseDataFrame(self.data, index=self.dates)
        self.iframe = SparseDataFrame(self.data,
                                      index=self.dates,
                                      default_kind='integer')

        values = self.frame.values.copy()
        values[np.isnan(values)] = 0

        self.zframe = SparseDataFrame(values,
                                      columns=['A', 'B', 'C', 'D'],
                                      default_fill_value=0,
                                      index=self.dates)

        values = self.frame.values.copy()
        values[np.isnan(values)] = 2
        self.fill_frame = SparseDataFrame(values,
                                          columns=['A', 'B', 'C', 'D'],
                                          default_fill_value=2,
                                          index=self.dates)

        self.empty = SparseDataFrame()
Пример #6
0
def parse_lutkepohl_data(path): # pragma: no cover
    """
    Parse data files from Lutkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from collections import deque
    from datetime import datetime
    import pandas
    import pandas.core.datetools as dt
    import re
    from statsmodels.compatnp.py3k import asbytes

    regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*'))
    lines = deque(open(path, 'rb'))

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = np.genfromtxt(path, names=True, skip_header=to_skip+1)

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q') : dt.BQuarterEnd(),
        asbytes('M') : dt.BMonthEnd(),
        asbytes('A') : dt.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    try:
        from pandas import DatetimeIndex   # pylint: disable=E0611
        date_range = DatetimeIndex(start=start_date, freq=offset, periods=n)
    except ImportError:
        from pandas import DateRange
        date_range = DateRange(start_date, offset=offset, periods=n)

    return data, date_range
Пример #7
0
    def _get_range(self):
        now_idx = datetime.utcnow().replace(second=0, microsecond=0)

        start = now_idx - timedelta(seconds=self.left_offset)
        end = now_idx + timedelta(seconds=self.right_offset)

        daterange = DateRange(start, end, offset=datetools.Minute())

        return daterange
Пример #8
0
 def _make_predict_dates(self):
     from pandas import DateRange
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     pandas_freq = _freq_to_pandas[freq]
     dates = DateRange(dtstart, dtend, offset=pandas_freq).values
     self._data.predict_dates = dates
Пример #9
0
def test_predict_freq():
    # test that predicted dates have same frequency
    x = np.arange(1, 36.)

    if _pandas_08x:
        from pandas import date_range

        # there's a bug in pandas up to 0.10.2 for YearBegin
        #dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
        dates = date_range("1972-4-30", "2006-4-30", freq="A-APR")
        series = Series(x, index=dates)
        model = TimeSeriesModel(series)
        #npt.assert_(model.data.freq == "AS-APR")
        npt.assert_(model.data.freq == "A-APR")

        start = model._get_predict_start("2006-4-30")
        end = model._get_predict_end("2016-4-30")
        model._make_predict_dates()

        predict_dates = model.data.predict_dates

        #expected_dates = date_range("2006-12-31", "2016-12-31",
        #                            freq="AS-APR")
        expected_dates = date_range("2006-4-30", "2016-4-30", freq="A-APR")
        npt.assert_equal(predict_dates, expected_dates)
        #ptesting.assert_series_equal(predict_dates, expected_dates)

    else:
        from pandas import DateRange, datetools
        dates = DateRange("1972-1-1", "2007-1-1", offset=datetools.yearEnd)
        series = Series(x, index=dates)
        model = TimeSeriesModel(series)
        npt.assert_(model.data.freq == "A")

        start = model._get_predict_start("2006-12-31")
        end = model._get_predict_end("2016-12-31")
        model._make_predict_dates()

        predict_dates = model.data.predict_dates

        expected_dates = DateRange("2006-12-31",
                                   "2016-12-31",
                                   offset=datetools.yearEnd)
        npt.assert_array_equal(predict_dates, expected_dates)
Пример #10
0
    def test_timeseries_preepoch(self):
        if sys.version_info[0] == 2 and sys.version_info[1] < 7:
            raise nose.SkipTest

        dr = DateRange('1/1/1940', '1/1/1960')
        ts = Series(np.random.randn(len(dr)), index=dr)
        try:
            self._check_roundtrip(ts, tm.assert_series_equal)
        except OverflowError:
            raise nose.SkipTest('known failer on some windows platforms')
Пример #11
0
def panel_data2():
    index = DateRange('1/1/2011', periods=9)

    return DataFrame(
        {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5],
            'B': [0, 1, 2, 3, 4, 5, nan, nan, nan],
            'C': [0, 1, 2, nan, nan, nan, 3, 4, 5],
            'D': [nan, 0, 1, nan, 2, 3, 4, 5, nan]
        },
        index=index)
Пример #12
0
def panel_data3():
    index = DateRange('1/1/2011', periods=10).shift(-2)

    return DataFrame(
        {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, 3, 4, 5, 6, nan, nan, nan],
            'C': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'D': [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan]
        },
        index=index)
Пример #13
0
 def _make_predict_dates(self):
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     pandas_freq = _freq_to_pandas[freq]
     try:
         from pandas import DatetimeIndex
         dates = DatetimeIndex(start=dtstart, end=dtend, freq=pandas_freq)
     except ImportError, err:
         from pandas import DateRange
         dates = DateRange(dtstart, dtend, offset=pandas_freq).values
Пример #14
0
def test_infer_freq():
    from pandas import DateRange
    d1 = datetime(2008, 12, 31)
    d2 = datetime(2012, 9, 30)

    b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values
    d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values
    w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values
    m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values
    a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values
    q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values

    assert _infer_freq(b[2:5]) == 'B'
    assert _infer_freq(b[:3]) == 'D'

    assert _infer_freq(b) == 'B'
    assert _infer_freq(d) == 'D'
    assert _infer_freq(w) == 'W'
    assert _infer_freq(m) == 'M'
    assert _infer_freq(a) == 'A'
    assert _infer_freq(q) == 'Q'
    assert _infer_freq(d[:3]) == 'D'
    assert _infer_freq(w[:3]) == 'W'
    assert _infer_freq(m[:3]) == 'M'
    assert _infer_freq(a[:3]) == 'A'
    assert _infer_freq(q[:3]) == 'Q'
Пример #15
0
def test_infer_freq():
    from pandas import DateRange
    d1 = datetime(2008, 12, 31)
    d2 = datetime(2012, 9, 30)

    b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values
    d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values
    w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values
    m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values
    a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values
    q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values

    npt.assert_string_equal(_infer_freq(b), 'B')
    npt.assert_string_equal(_infer_freq(d), 'D')
    npt.assert_string_equal(_infer_freq(w), 'W')
    npt.assert_string_equal(_infer_freq(m), 'M')
    npt.assert_string_equal(_infer_freq(a), 'A')
    npt.assert_string_equal(_infer_freq(q), 'Q')
    npt.assert_string_equal(_infer_freq(b[2:4]), 'B')
    npt.assert_string_equal(_infer_freq(b[:2]), 'D')
    npt.assert_string_equal(_infer_freq(d[:2]), 'D')
    npt.assert_string_equal(_infer_freq(w[:2]), 'W')
    npt.assert_string_equal(_infer_freq(m[:2]), 'M')
    npt.assert_string_equal(_infer_freq(a[:2]), 'A')
    npt.assert_string_equal(_infer_freq(q[:2]), 'Q')
Пример #16
0
def _idx_from_dates(d1, d2, freq):
    """
    Returns an index offset from datetimes d1 and d2. d1 is expected to be the
    last date in a date series and d2 is the out of sample date.

    Notes
    -----
    Rounds down the index if the end date is before the next date at freq.
    Does not check the start date to see whether it is on the offest but
    assumes that it is.
    """
    from pandas import DateRange
    return len(DateRange(d1, d2, offset=_freq_to_pandas[freq])) - 1
Пример #17
0
    def test_setitem_ndarray(self):
        from pandas import DateRange, datetools

        timeidx = DateRange(start=datetime(2009,1,1),
                            end=datetime(2009,12,31),
                            offset=datetools.MonthEnd())
        lons_coarse = np.linspace(-177.5, 177.5, 72)
        lats_coarse = np.linspace(-87.5, 87.5, 36)
        P = Panel(items=timeidx, major_axis=lons_coarse, minor_axis=lats_coarse)
        data = np.random.randn(72*36).reshape((72,36))
        key = datetime(2009,2,28)
        P[key] = data

        assert_almost_equal(P[key].values, data)
Пример #18
0
def test_keyerror_start_date():
    x = np.arange(1, 36.)

    if _pandas_08x:
        from pandas import date_range

        # there's a bug in pandas up to 0.10.2 for YearBegin
        #dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
        dates = date_range("1972-4-30", "2006-4-30", freq="A-APR")
        series = Series(x, index=dates)
        model = TimeSeriesModel(series)
    else:
        from pandas import DateRange, datetools
        dates = DateRange("1972-1-1", "2007-1-1", offset=datetools.yearEnd)
        series = Series(x, index=dates)
        model = TimeSeriesModel(series)

    npt.assert_raises(ValueError, model._get_predict_start, "1970-4-30")
Пример #19
0
    def test_shift(self):
        series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6))

        shifted = series.shift(0)
        self.assert_(shifted is not series)
        assert_sp_series_equal(shifted, series)

        f = lambda s: s.shift(1)
        _dense_series_compare(series, f)

        f = lambda s: s.shift(-2)
        _dense_series_compare(series, f)

        series = SparseSeries([nan, 1., 2., 3., nan, nan],
                              index=DateRange('1/1/2000', periods=6))
        f = lambda s: s.shift(2, timeRule='WEEKDAY')
        _dense_series_compare(series, f)

        f = lambda s: s.shift(2, offset=datetools.bday)
        _dense_series_compare(series, f)
Пример #20
0
    def test_constructor(self):
        # test setup guys
        self.assert_(np.isnan(self.bseries.fill_value))
        self.assert_(isinstance(self.bseries.sp_index, BlockIndex))
        self.assert_(np.isnan(self.iseries.fill_value))
        self.assert_(isinstance(self.iseries.sp_index, IntIndex))

        self.assertEquals(self.zbseries.fill_value, 0)
        assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0))

        # pass SparseSeries
        s2 = SparseSeries(self.bseries)
        s3 = SparseSeries(self.iseries)
        s4 = SparseSeries(self.zbseries)
        assert_sp_series_equal(s2, self.bseries)
        assert_sp_series_equal(s3, self.iseries)
        assert_sp_series_equal(s4, self.zbseries)

        # Sparse time series works
        date_index = DateRange('1/1/2000', periods=len(self.bseries))
        s5 = SparseSeries(self.bseries, index=date_index)
        self.assert_(isinstance(s5, SparseTimeSeries))

        # pass Series
        bseries2 = SparseSeries(self.bseries.to_dense())
        assert_equal(self.bseries.sp_values, bseries2.sp_values)

        # pass dict?

        # don't copy the data by default
        values = np.ones(len(self.bseries.sp_values))
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
        sp.sp_values[:5] = 97
        self.assert_(values[0] == 97)

        # but can make it copy!
        sp = SparseSeries(values,
                          sparse_index=self.bseries.sp_index,
                          copy=True)
        sp.sp_values[:5] = 100
        self.assert_(values[0] == 97)
Пример #21
0
    def setUp(self):
        arr, index = _test_data1()

        date_index = DateRange('1/1/2011', periods=len(index))

        self.bseries = SparseSeries(arr, index=index, kind='block')
        self.bseries.name = 'bseries'

        self.ts = self.bseries

        self.btseries = SparseSeries(arr, index=date_index, kind='block')

        self.iseries = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data2()
        self.bseries2 = SparseSeries(arr, index=index, kind='block')
        self.iseries2 = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data1_zero()
        self.zbseries = SparseSeries(arr,
                                     index=index,
                                     kind='block',
                                     fill_value=0)
        self.ziseries = SparseSeries(arr,
                                     index=index,
                                     kind='integer',
                                     fill_value=0)

        arr, index = _test_data2_zero()
        self.zbseries2 = SparseSeries(arr,
                                      index=index,
                                      kind='block',
                                      fill_value=0)
        self.ziseries2 = SparseSeries(arr,
                                      index=index,
                                      kind='integer',
                                      fill_value=0)
Пример #22
0
def test_infer_freq():
    d1 = datetime(2008, 12, 31)
    d2 = datetime(2012, 9, 30)

    if _pandas_08x:
        b = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['B']).values
        d = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['D']).values
        w = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['W']).values
        m = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['M']).values
        a = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['A']).values
        q = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['Q']).values
        assert _infer_freq(w) == 'W-SUN'
        assert _infer_freq(a) == 'A-DEC'
        assert _infer_freq(q) == 'Q-DEC'
        assert _infer_freq(w[:3]) == 'W-SUN'
        assert _infer_freq(a[:3]) == 'A-DEC'
        assert _infer_freq(q[:3]) == 'Q-DEC'
    else:
        from pandas import DateRange

        b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values
        d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values
        w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values
        m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values
        a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values
        q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values
        assert _infer_freq(w) == 'W'
        assert _infer_freq(a) == 'A'
        assert _infer_freq(q) == 'Q'
        assert _infer_freq(w[:3]) == 'W'
        assert _infer_freq(a[:3]) == 'A'
        assert _infer_freq(q[:3]) == 'Q'

    assert _infer_freq(b[2:5]) == 'B'
    assert _infer_freq(b[:3]) == 'D'
    assert _infer_freq(b) == 'B'
    assert _infer_freq(d) == 'D'
    assert _infer_freq(m) == 'M'
    assert _infer_freq(d[:3]) == 'D'
    assert _infer_freq(m[:3]) == 'M'
Пример #23
0
 def test_can_serialize_dates(self):
     rng = [x.date() for x in DateRange('1/1/2000', '1/30/2000')]
     frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
     self._check_roundtrip(frame, tm.assert_frame_equal)
Пример #24
0
    def daysLeft(self, date):
        """ business days to expiration date """
        from pandas import DateRange  # this will cause a problem with pandas 0.14 and higher... Method is depreciated and replaced by DatetimeIndex

        r = DateRange(date, self.expirationDate())
        return len(r)
Пример #25
0
 def daysLeft(self, date):
     """ business days to expiration date """
     r = DateRange(date, self.expirationDate())
     return len(r)