Пример #1
0
def mktimerange(
        time_resolution: TimeResolution,
        date_from: Union[datetime, str],
        date_to: Union[datetime, str] = None) -> Tuple[Timestamp, Timestamp]:
    """
    Compute appropriate time ranges for monthly and annual time resolutions.
    This takes into account to properly floor/ceil the date_from/date_to
    values to respective "begin of month/year" and "end of month/year" values.

    Args:
        time_resolution: time resolution as enumeration
        date_from: datetime string or object
        date_to: datetime string or object

    Returns:
        Tuple of two Timestamps: "date_from" and "date_to"
    """

    if date_to is None:
        date_to = date_from

    if time_resolution == TimeResolution.ANNUAL:
        date_from = pd.to_datetime(date_from) - YearBegin(1)
        date_to = pd.to_datetime(date_to) + YearEnd(1)

    elif time_resolution == TimeResolution.MONTHLY:
        date_from = pd.to_datetime(date_from) - MonthBegin(1)
        date_to = pd.to_datetime(date_to) + MonthEnd(1)

    else:
        raise NotImplementedError(
            "mktimerange only implemented for annual and monthly time ranges")

    return date_from, date_to
Пример #2
0
 def ls_returns_sheet(self, cur_day=None):
     if cur_day is None:
         cur_day = pd.to_datetime(
             data_source.trade_calendar.get_latest_trade_days(
                 datetime.today().strftime("%Y%m%d")))
     else:
         cur_day = pd.to_datetime(cur_day)
     dates = [
         cur_day,
         cur_day.to_period('W').start_time, cur_day + MonthBegin(-1),
         cur_day + QuarterBegin(-1), cur_day + MonthBegin(-6),
         cur_day + YearBegin(-1), cur_day + YearBegin(-2)
     ]
     returns = list(map(lambda x: self.ls_range_pct(x, cur_day), dates)) + \
               [self.ls_annual_return, self.ls_total_return]
     return pd.DataFrame([returns],
                         columns=[
                             '日回报', '本周以来', '本月以来', '本季以来', '近6个月', '今年以来',
                             '近两年', '年化回报', '成立以来'
                         ])
Пример #3
0
def _split_by_year(tile, time_dim='time'):
    start_range = tile.sources[time_dim][0].data
    end_range = tile.sources[time_dim][-1].data

    for date in pd.date_range(start=YearBegin(normalize=True).rollback(start_range),
                              end=end_range,
                              freq='AS',
                              normalize=True):
        sources_slice = tile.sources.loc[{time_dim: slice(date, YearEnd(normalize=True).rollforward(date))}]
        year_str = '{0:%Y}'.format(date)
        yield year_str, Tile(sources=sources_slice, geobox=tile.geobox)
 def to_offset(self) -> DateOffset:
     if self.value == "H":
         return Hour(1)
     elif self.value == "D":
         return Day(1)
     elif self.value == "W-MON":
         return Week(1, weekday=0)
     elif self.value == "MS":
         return MonthBegin(1)
     elif self.value == "QS-DEC":
         return QuarterBegin(startingMonth=10)
     elif self.value == "AS":
         return YearBegin(1)
     raise NotImplementedError(self.value)
Пример #5
0
def timestamp_rollforward_rollback():
    """ How to role the date forward (end of time) or backward (beg of time) """
    now = datetime(2014, 4, 15)
    print "Current time is:", now
    now = now + 3 * Day()
    print "Adding 3 days to now:", now

    offset = MonthEnd()
    now = offset.rollforward(now)
    print "Rolling foward to last day of the month", now

    offset = MonthBegin()
    now = offset.rollback(now)
    print "Rolling foward to first day of the month", now

    ts = pd.Series(np.random.randn(20),
                   index=pd.date_range('1/1/2000', periods=20, freq='4d'))
    print "Original Time Series is:\n", ts

    offset = YearBegin()
    ts = ts.groupby(offset.rollforward).mean()
    print "Time Series after rolling forward\n", ts
Пример #6
0
def get_dividends(start, end, **kwargs):
    start = (pd.to_datetime(start) - YearBegin(1)).strftime("%Y%m%d")
    end = (pd.to_datetime(end) + YearEnd(1)).strftime("%Y%m%d")
    raw_dividends = uqer_db.run_api(
        "EquDivGet",
        beginDate=start,
        endDate=end,
        field=["endDate", "ticker", "publishDate", "perCashDiv"])
    raw_dividends.dropna(inplace=True)
    raw_dividends['endDate'] = (raw_dividends['endDate'].str.replace(
        '-', '')).astype('int')
    raw_dividends['publishDate'] = (raw_dividends['publishDate'].str.replace(
        '-', '')).astype('int')
    raw_dividends['ticker'] = raw_dividends['ticker'].astype('int')
    raw_dividends.sort_values(['ticker', 'endDate', 'publishDate'],
                              inplace=True)
    raw_dividends.rename(columns={
        'ticker': 'IDs',
        'endDate': 'date',
        'publishDate': 'ann_dt',
        'perCashDiv': 'dividend'
    },
                         inplace=True)
    h5db.save_h5file(raw_dividends, 'cash_div', '/dividends/')
Пример #7
0
class TestYearBegin(Base):
    _offset = YearBegin

    def test_misspecified(self):
        with pytest.raises(ValueError, match="Month must go from 1 to 12"):
            YearBegin(month=13)

    offset_cases = []
    offset_cases.append((YearBegin(), {
        datetime(2008, 1, 1): datetime(2009, 1, 1),
        datetime(2008, 6, 30): datetime(2009, 1, 1),
        datetime(2008, 12, 31): datetime(2009, 1, 1),
        datetime(2005, 12, 30): datetime(2006, 1, 1),
        datetime(2005, 12, 31): datetime(2006, 1, 1)}))

    offset_cases.append((YearBegin(0), {
        datetime(2008, 1, 1): datetime(2008, 1, 1),
        datetime(2008, 6, 30): datetime(2009, 1, 1),
        datetime(2008, 12, 31): datetime(2009, 1, 1),
        datetime(2005, 12, 30): datetime(2006, 1, 1),
        datetime(2005, 12, 31): datetime(2006, 1, 1)}))

    offset_cases.append((YearBegin(3), {
        datetime(2008, 1, 1): datetime(2011, 1, 1),
        datetime(2008, 6, 30): datetime(2011, 1, 1),
        datetime(2008, 12, 31): datetime(2011, 1, 1),
        datetime(2005, 12, 30): datetime(2008, 1, 1),
        datetime(2005, 12, 31): datetime(2008, 1, 1)}))

    offset_cases.append((YearBegin(-1), {
        datetime(2007, 1, 1): datetime(2006, 1, 1),
        datetime(2007, 1, 15): datetime(2007, 1, 1),
        datetime(2008, 6, 30): datetime(2008, 1, 1),
        datetime(2008, 12, 31): datetime(2008, 1, 1),
        datetime(2006, 12, 29): datetime(2006, 1, 1),
        datetime(2006, 12, 30): datetime(2006, 1, 1),
        datetime(2007, 1, 1): datetime(2006, 1, 1)}))

    offset_cases.append((YearBegin(-2), {
        datetime(2007, 1, 1): datetime(2005, 1, 1),
        datetime(2008, 6, 30): datetime(2007, 1, 1),
        datetime(2008, 12, 31): datetime(2007, 1, 1)}))

    offset_cases.append((YearBegin(month=4), {
        datetime(2007, 4, 1): datetime(2008, 4, 1),
        datetime(2007, 4, 15): datetime(2008, 4, 1),
        datetime(2007, 3, 1): datetime(2007, 4, 1),
        datetime(2007, 12, 15): datetime(2008, 4, 1),
        datetime(2012, 1, 31): datetime(2012, 4, 1)}))

    offset_cases.append((YearBegin(0, month=4), {
        datetime(2007, 4, 1): datetime(2007, 4, 1),
        datetime(2007, 3, 1): datetime(2007, 4, 1),
        datetime(2007, 12, 15): datetime(2008, 4, 1),
        datetime(2012, 1, 31): datetime(2012, 4, 1)}))

    offset_cases.append((YearBegin(4, month=4), {
        datetime(2007, 4, 1): datetime(2011, 4, 1),
        datetime(2007, 4, 15): datetime(2011, 4, 1),
        datetime(2007, 3, 1): datetime(2010, 4, 1),
        datetime(2007, 12, 15): datetime(2011, 4, 1),
        datetime(2012, 1, 31): datetime(2015, 4, 1)}))

    offset_cases.append((YearBegin(-1, month=4), {
        datetime(2007, 4, 1): datetime(2006, 4, 1),
        datetime(2007, 3, 1): datetime(2006, 4, 1),
        datetime(2007, 12, 15): datetime(2007, 4, 1),
        datetime(2012, 1, 31): datetime(2011, 4, 1)}))

    offset_cases.append((YearBegin(-3, month=4), {
        datetime(2007, 4, 1): datetime(2004, 4, 1),
        datetime(2007, 3, 1): datetime(2004, 4, 1),
        datetime(2007, 12, 15): datetime(2005, 4, 1),
        datetime(2012, 1, 31): datetime(2009, 4, 1)}))

    @pytest.mark.parametrize('case', offset_cases)
    def test_offset(self, case):
        offset, cases = case
        for base, expected in compat.iteritems(cases):
            assert_offset_equal(offset, base, expected)

    on_offset_cases = [(YearBegin(), datetime(2007, 1, 3), False),
                       (YearBegin(), datetime(2008, 1, 1), True),
                       (YearBegin(), datetime(2006, 12, 31), False),
                       (YearBegin(), datetime(2006, 1, 2), False)]

    @pytest.mark.parametrize('case', on_offset_cases)
    def test_onOffset(self, case):
        offset, dt, expected = case
        assert_onOffset(offset, dt, expected)
Пример #8
0
 def test_misspecified(self):
     with pytest.raises(ValueError, match="Month must go from 1 to 12"):
         YearBegin(month=13)
def create_data():
    """ create the pickle/msgpack data """

    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(
            zip(*[
                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                ["one", "two", "one", "two", "one", "two", "one", "two"],
            ])),
        names=["first", "second"],
    ))

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range("20130101", periods=10)),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        dup=Series(np.arange(5).astype(np.float64),
                   index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        int=DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=["A", "B", "A"]),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )
Пример #10
0
def time_for_next_update(last_time, freq='D', num=9, is_end=False):
    """前次更新后下一次更新时间

    Arguments:
        last_time {obj} -- 上次时间

    Keyword Arguments:
        freq {str} -- 更新周期 (default: {'D'})
        num {int} -- 日级别以下为单位数,以上为小时数 (default: {9})
        is_end {bool} -- 是否为周期尾部 (default: {False})

    Raises:
        TypeError: 不能识别的周期类型

    Returns:
        Timestamp -- 下一次更新时间

    Notes:
        一、 freq < D
            `num`代表周期数
            上一时点`normalize`后移动`num`周期,不考虑开始及结束问题
        二、 freq in D、B
            `num`代表小时
            对于历史时间,上一时点`normalize`后一律移动到下一个周期,且将小时调整到指定的num
            如上一时点其日期为当前日期,且在其`normalize`及调整小时后的值晚于上一时点,则取调整后的值
        三、 freq > D 开始及结束才有效
            `num`无效
            如周初、周末、月初、月末、季初、季末、年初、年末
            此时num数字不起作用
    """
    valid_freq = ('B', 'D', 'W', 'M', 'Q', 'H', 'MIN')
    if pd.isnull(last_time):
        return pd.Timestamp(MARKET_START)
    assert isinstance(
        last_time, pd.Timestamp), f'类型错误,希望Timestamp,实际为{type(last_time)}'
    now = pd.Timestamp.now(tz=last_time.tz)
    assert last_time <= now, '过去时间必须小于当前时间'
    freq = freq.upper()
    if freq == 'MIN':
        offset = Minute(n=num)
        return offset.apply(last_time.floor(freq))
    if freq == 'H':
        offset = Hour(n=num)
        return offset.apply(last_time.floor(freq))
    if freq == 'D':
        # √ 此处要考虑小时数
        limit = last_time.floor(freq).replace(hour=num)
        if last_time < limit:
            return limit
        else:
            offset = Day()
            return offset.apply(last_time.floor(freq)).replace(hour=num)
    if freq == 'B':
        offset = BDay()
        # 工作日
        if last_time.weekday() in range(0, 5):
            # √ 此处要考虑小时数
            limit = last_time.normalize().replace(hour=num)
            if last_time < limit:
                return limit
            else:
                return offset.apply(last_time.normalize()).replace(hour=num)
        else:
            return offset.apply(last_time.normalize()).replace(hour=num)
    if freq == 'W':
        nw = last_time.normalize() + pd.Timedelta(weeks=1)
        if is_end:
            return nw + pd.Timedelta(days=7-nw.weekday()) - pd.Timedelta(nanoseconds=1)
        else:
            return nw - pd.Timedelta(days=nw.weekday())
    if freq == 'M':
        if is_end:
            offset = MonthEnd(n=2)
            res = offset.apply(last_time.normalize())
            if last_time.is_month_end:
                res = offset.rollback(res)
            return res
        else:
            offset = MonthBegin()
            return offset.apply(last_time.normalize())
    if freq == 'Q':
        if is_end:
            offset = QuarterEnd(n=2, startingMonth=3, normalize=True)
            res = offset.apply(last_time)
            if last_time.is_quarter_end:
                offset = QuarterEnd(n=-1, startingMonth=3, normalize=True)
                res = offset.apply(res)
            return res
        else:
            offset = QuarterBegin(n=1, normalize=True, startingMonth=1)
            return offset.apply(last_time)
    if freq == 'Y':
        if last_time.year == now.year:
            if is_end:
                return last_time.normalize().replace(year=now.year, month=12, day=31)
            else:
                return last_time.normalize().replace(year=now.year, month=1, day=1)
        if is_end:
            offset = YearEnd(normalize=True, month=12, n=2)
            res = offset.apply(last_time)
            if last_time.is_year_end:
                offset = YearEnd(n=-1, month=12, normalize=True)
                res = offset.apply(res)
            return res
        else:
            offset = YearBegin(normalize=True, month=1, n=1)
            return offset.apply(last_time)
    raise ValueError('不能识别的周期类型,仅接受{}。实际输入为{}'.format(
        valid_freq, freq))
Пример #11
0
#! coding: utf-8
from django.conf import settings
from pandas.tseries.offsets import YearBegin, QuarterBegin, MonthBegin, Day

# Transformaciones
VALUE = 'value'
CHANGE = 'change'
PCT_CHANGE = 'percent_change'
CHANGE_YEAR_AGO = 'change_a_year_ago'
PCT_CHANGE_YEAR_AGO = 'percent_change_a_year_ago'
CHANGE_BEG_YEAR = 'change_since_beginning_of_year'
PCT_CHANGE_BEG_YEAR = 'percent_change_since_beginning_of_year'

# Pandas freqs
PANDAS_YEAR = YearBegin()
PANDAS_SEMESTER = MonthBegin(6)
PANDAS_QUARTER = QuarterBegin(startingMonth=1)
PANDAS_MONTH = MonthBegin()
PANDAS_WEEK = Day(7)
PANDAS_DAY = Day()

# Frecuencias *en orden* de mayor a menor
PANDAS_FREQS = [
    PANDAS_YEAR, PANDAS_SEMESTER, PANDAS_QUARTER, PANDAS_MONTH, PANDAS_WEEK,
    PANDAS_DAY
]

IDENTIFIER = "identifier"
DATASET_IDENTIFIER = "dataset_identifier"
DOWNLOAD_URL = "downloadURL"
Пример #12
0
def bin_df(df):
    years = df['$date_to'] - YearBegin(1)
    df.index = pd.DatetimeIndex(years, ambiguous='infer').floor('D')
    return df.groupby(level=0).count()
Пример #13
0
def create_data():
    """create the pickle data"""
    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = {
        "timestamp": Timestamp("20130101"),
        "period": Period("2012", "M")
    }

    index = {
        "int": Index(np.arange(10)),
        "date": date_range("20130101", periods=10),
        "period": period_range("2013-01-01", freq="M", periods=10),
        "float": Index(np.arange(10, dtype=np.float64)),
        "uint": Index(np.arange(10, dtype=np.uint64)),
        "timedelta": timedelta_range("00:00:00", freq="30T", periods=10),
    }

    index["range"] = RangeIndex(10)

    index["interval"] = interval_range(0, periods=10)

    mi = {
        "reg2":
        MultiIndex.from_tuples(
            tuple(
                zip(*[
                    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                    ["one", "two", "one", "two", "one", "two", "one", "two"],
                ])),
            names=["first", "second"],
        )
    }

    series = {
        "float":
        Series(data["A"]),
        "int":
        Series(data["B"]),
        "mixed":
        Series(data["E"]),
        "ts":
        Series(np.arange(10).astype(np.int64),
               index=date_range("20130101", periods=10)),
        "mi":
        Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        "dup":
        Series(np.arange(5).astype(np.float64),
               index=["A", "B", "C", "D", "A"]),
        "cat":
        Series(Categorical(["foo", "bar", "baz"])),
        "dt":
        Series(date_range("20130101", periods=5)),
        "dt_tz":
        Series(date_range("20130101", periods=5, tz="US/Eastern")),
        "period":
        Series([Period("2000Q1")] * 5),
    }

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = {
        "float":
        DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        "int":
        DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        "mixed":
        DataFrame({k: data[k]
                   for k in ["A", "B", "C", "D"]}),
        "mi":
        DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        "dup":
        DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                  columns=["A", "B", "A"]),
        "cat_onecol":
        DataFrame({"A": Categorical(["foo", "bar"])}),
        "cat_and_float":
        DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        "mixed_dup":
        mixed_dup_df,
        "dt_mixed_tzs":
        DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        "dt_mixed2_tzs":
        DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    }

    cat = {
        "int8": Categorical(list("abcdefg")),
        "int16": Categorical(np.arange(1000)),
        "int32": Categorical(np.arange(10000)),
    }

    timestamp = {
        "normal": Timestamp("2011-01-01"),
        "nat": NaT,
        "tz": Timestamp("2011-01-01", tz="US/Eastern"),
    }

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return {
        "series": series,
        "frame": frame,
        "index": index,
        "scalars": scalars,
        "mi": mi,
        "sp_series": {
            "float": _create_sp_series(),
            "ts": _create_sp_tsseries()
        },
        "sp_frame": {
            "float": _create_sp_frame()
        },
        "cat": cat,
        "timestamp": timestamp,
        "offsets": off,
    }
Пример #14
0
from flask import Flask, render_template, request, make_response
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay, YearEnd, YearBegin
from decimal import Decimal
import requests
from bs4 import BeautifulSoup
import base64
import json

app = Flask(__name__)

today = pd.to_datetime("today")
todays_date = str(today).split(" ")[0][5:]
yearBegin = today - YearBegin()
yearEnd = today + YearEnd()
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
holidays = USFederalHolidayCalendar().holidays(start=today,
                                               end=yearEnd,
                                               return_name=True)
holidayDates = holidays.index
workable_days_total = Decimal(
    len(pd.DatetimeIndex(start=yearBegin, end=yearEnd, freq=us_bd)))
worked_days_todate = Decimal(
    len(pd.DatetimeIndex(start=yearBegin, end=today, freq=us_bd)))
progress = round((worked_days_todate / workable_days_total) * 100, 0)
workable_days_remaining = Decimal(
    len(pd.DatetimeIndex(start=today, end=yearEnd, freq=us_bd)))
days_off = ""

Пример #15
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        'A': [0., 1., 2., 3., np.nan],
        'B': [0, 1, 0, 1, 0],
        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
        'D': date_range('1/1/2009', periods=5),
        'E': [0., 1, Timestamp('20100101'), 'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
                                          names=['first', 'second']))

    series = dict(float=Series(data['A']),
                  int=Series(data['B']),
                  mixed=Series(data['E']),
                  ts=Series(np.arange(10).astype(np.int64),
                            index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(tuple(
                                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                                         names=['one',
                                                                'two'])),
                  dup=Series(np.arange(5).astype(np.float64),
                             index=['A', 'B', 'C', 'D', 'A']),
                  cat=Series(Categorical(['foo', 'bar', 'baz'])),
                  dt=Series(date_range('20130101', periods=5)),
                  dt_tz=Series(
                      date_range('20130101', periods=5, tz='US/Eastern')),
                  period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(float=DataFrame({
        'A': series['float'],
        'B': series['float'] + 1
    }),
                 int=DataFrame({
                     'A': series['int'],
                     'B': series['int'] + 1
                 }),
                 mixed=DataFrame({k: data[k]
                                  for k in ['A', 'B', 'C', 'D']}),
                 mi=DataFrame(
                     {
                         'A': np.arange(5).astype(np.float64),
                         'B': np.arange(5).astype(np.int64)
                     },
                     index=MultiIndex.from_tuples(tuple(
                         zip(*[['bar', 'bar', 'baz', 'baz', 'baz'],
                               ['one', 'two', 'one', 'two', 'three']])),
                                                  names=['first', 'second'])),
                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                               columns=['A', 'B', 'A']),
                 cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}),
                 cat_and_float=DataFrame({
                     'A':
                     Categorical(['foo', 'bar', 'baz']),
                     'B':
                     np.arange(3).astype(np.int64)
                 }),
                 mixed_dup=mixed_dup_df,
                 dt_mixed_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET')
                     },
                     index=range(5)),
                 dt_mixed2_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET'),
                         'C': Timestamp('20130603', tz='UTC')
                     },
                     index=range(5)))

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
Пример #16
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10))

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'],
              [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']
              ])),
                                          names=[u'first', u'second']))

    series = dict(
        float=Series(data[u'A']),
        int=Series(data[u'B']),
        mixed=Series(data[u'E']),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range('20130101', periods=10)),
        mi=Series(np.arange(5).astype(np.float64),
                  index=MultiIndex.from_tuples(tuple(
                      zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                               names=[u'one', u'two'])),
        dup=Series(np.arange(5).astype(np.float64),
                   index=[u'A', u'B', u'C', u'D', u'A']),
        cat=Series(Categorical([u'foo', u'bar', u'baz'])),
        dt=Series(date_range('20130101', periods=5)),
        dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')),
        period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(
        float=DataFrame({
            u'A': series[u'float'],
            u'B': series[u'float'] + 1
        }),
        int=DataFrame({
            u'A': series[u'int'],
            u'B': series[u'int'] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in [u'A', u'B', u'C', u'D']}),
        mi=DataFrame(
            {
                u'A': np.arange(5).astype(np.float64),
                u'B': np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(tuple(
                zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'],
                      [u'one', u'two', u'one', u'two', u'three']])),
                                         names=[u'first', u'second'])),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=[u'A', u'B', u'A']),
        cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
        cat_and_float=DataFrame({
            u'A': Categorical([u'foo', u'bar', u'baz']),
            u'B': np.arange(3).astype(np.int64)
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET')
            },
            index=range(5)),
        dt_mixed2_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET'),
                u'C': Timestamp('20130603', tz='UTC')
            },
            index=range(5)))

    with catch_warnings(record=True):
        mixed_dup_panel = Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'int']
        })
        mixed_dup_panel.items = [u'ItemA', u'ItemA']
        panel = dict(float=Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'float'] + 1
        }),
                     dup=Panel(np.arange(30).reshape(3, 5,
                                                     2).astype(np.float64),
                               items=[u'A', u'B', u'A']),
                     mixed_dup=mixed_dup_panel)

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < '0.19.2':
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
Пример #17
0
    def tickValues(self, minVal, maxVal, size):
                
        minVal, maxVal = sorted((minVal, maxVal))
        
        # upon opening, we don't want any tick values
        if minVal == 0 and maxVal == 1:
            return [(0,[]), (0,[])]
        
        self._freqs = ['YEARLY', 'MONTHLY', 'DAILY', 'HOURLY', 'MINUTELY', 'SECONDLY']
        self.minticks = 5

        self.maxticks = {'YEARLY': 11, 'MONTHLY': 12, 'DAILY': 11, 'HOURLY': 12,
                         'MINUTELY': 11, 'SECONDLY': 11}
        self.interval_multiples = True
        self.intervald = {
            'YEARLY': [1, 2, 4, 5, 10, 20, 40, 50, 100],
            'MONTHLY': [1, 2, 3, 4, 6],
            'DAILY': [1, 2, 3, 7, 14],
            'HOURLY': [1, 2, 3, 4, 6, 12],
            'MINUTELY': [1, 5, 10, 15, 30],
            'SECONDLY': [1, 5, 10, 15, 30],
            }

        minDate = datetime.fromtimestamp(minVal)
        maxDate = datetime.fromtimestamp(maxVal)
        delta   = relativedelta(maxDate, minDate)

        numYears    = (delta.years * 1.0)
        numMonths   = (numYears * 12.0) + delta.months
        numDays     = (numMonths * 31.0) + delta.days
        numHours    = (numDays * 24.0) + delta.hours
        numMinutes  = (numHours * 60.0) + delta.minutes
        numSeconds  = (numMinutes * 60.0) + delta.seconds
        numMicroseconds = (numSeconds * 1e6) + delta.microseconds

        nums = [numYears, numMonths, numDays, numHours, numMinutes,
                numSeconds, numMicroseconds]

        for (freq, num) in izip(self._freqs, nums):
            # If this particular frequency doesn't give enough ticks, continue
            if num < self.minticks:
                continue

            # Find the first available interval that doesn't give too many
            # ticks
            for interval in self.intervald[freq]:
                if num <= interval * (self.maxticks[freq] - 1):
                    break
            else:
                # We went through the whole loop without breaking, default to
                # the last interval in the list and raise a warning
                warnings.warn('AutoDateLocator was unable to pick an '
                              'appropriate interval for this date range. '
                              'It may be necessary to add an interval value '
                              "to the AutoDateLocator's intervald dictionary."
                              ' Defaulting to {0}.'.format(interval))

            # Set some parameters as appropriate
            self._freq = freq

            break
        else:
            raise ValueError('No sensible date limit could be found')
        
        baseDate = datetime(minDate.year, minDate.month, minDate.day)
        if freq == 'YEARLY':
            offset = DateOffset(years=interval)
            majorTicks = date_range(baseDate + YearBegin(-1), maxDate, freq=offset)
        if freq == 'MONTHLY':
            offset = DateOffset(months=interval)
            majorTicks = date_range(baseDate + MonthBegin(-1), maxDate, freq=offset)
        if freq == 'WEEKLY':
            offset = DateOffset(weeks=interval)
            majorTicks = date_range(baseDate + DateOffset(days=-1), maxDate, freq=offset)
        if freq == 'DAILY':
            offset = DateOffset(days=interval)
            majorTicks = date_range(baseDate + DateOffset(days=-1), maxDate, freq=offset)
        if freq == 'HOURLY':
            offset = DateOffset(hours=interval)
            majorTicks = date_range(baseDate + DateOffset(days=-1), maxDate, freq=offset)
        
        majorTicks  = majorTicks.tz_localize('US/Eastern')
        ticks       = [ (0, majorTicks.asi8 / 1e9), (0,[])]
        return ticks
Пример #18
0
    'A-JAN' : YearEnd(month=1),
    'A-FEB' : YearEnd(month=2),
    'A-MAR' : YearEnd(month=3),
    'A-APR' : YearEnd(month=4),
    'A-MAY' : YearEnd(month=5),
    'A-JUN' : YearEnd(month=6),
    'A-JUL' : YearEnd(month=7),
    'A-AUG' : YearEnd(month=8),
    'A-SEP' : YearEnd(month=9),
    'A-OCT' : YearEnd(month=10),
    'A-NOV' : YearEnd(month=11),
    'A-DEC' : YearEnd(month=12),
    'A'     : YearEnd(month=12),

    # Annual - Calendar (start)
    'AS-JAN' : YearBegin(month=1),
    'AS'     : YearBegin(month=1),
    'AS-FEB' : YearBegin(month=2),
    'AS-MAR' : YearBegin(month=3),
    'AS-APR' : YearBegin(month=4),
    'AS-MAY' : YearBegin(month=5),
    'AS-JUN' : YearBegin(month=6),
    'AS-JUL' : YearBegin(month=7),
    'AS-AUG' : YearBegin(month=8),
    'AS-SEP' : YearBegin(month=9),
    'AS-OCT' : YearBegin(month=10),
    'AS-NOV' : YearBegin(month=11),
    'AS-DEC' : YearBegin(month=12),

    # Annual - Business
    'BA-JAN' : BYearEnd(month=1),
Пример #19
0
        return jsonify(data=data, x=cols[0], y=cols[1], stats=stats)
    except BaseException as e:
        return jsonify(
            dict(error=str(e), traceback=str(traceback.format_exc())))


DATE_RANGES = {
    'W':
    lambda today: today - Day(today.dayofweek),
    'M':
    lambda today: today if today.is_month_start else today - MonthBegin(),
    'Q':
    lambda today: today
    if today.is_quarter_start else today - QuarterBegin(startingMonth=1),
    'Y':
    lambda today: today if today.is_year_start else today - YearBegin(),
}


@dtale.route('/coverage')
@swag_from('swagger/dtale/views/coverage.yml')
def find_coverage():
    """
    Flask route which returns coverage information(counts) for a column grouped by other column(s)

    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param col: string from flask.request.args['col'] containing name of a column in your dataframe
    :param filters(deprecated): JSON string from flaks.request.args['filters'] with filtering information from group
           drilldown [
        {name: col1, prevFreq: Y, freq: Q, date: YYYY-MM-DD},
        ...