Exemplo n.º 1
0
def reindex_to_calendar(calendar, data, freq='1d'):
    start_session, end_session = data.index[[0, -1]]
    if not isinstance(start_session, pd.Timestamp):
        start_session = pd.Timestamp(start_session, unit='m')
        end_session = pd.Timestamp(end_session, unit='m')

    start_session = start_session.normalize()
    end_session = end_session.normalize()

    if freq == '1d':
        all_sessions = calendar.sessions_in_range(
            start_session, end_session).tz_localize(None)
        df = data.reindex(all_sessions, copy=False)
        df = fillna(df)
        df.id.fillna(method='pad', inplace=True)
        df.day = df.index.values.astype('datetime64[m]').astype(np.int64)
    else:
        all_sessions = calendar.minutes_for_sessions_in_range(
            start_session, end_session).tz_localize(None)
        data.index = data.index.tz_localize(
            pytz.timezone('Asia/Shanghai')).tz_convert('UTC').tz_localize(None)
        df = data.reindex(all_sessions, copy=False)
        df = fillna(df)

    return df
Exemplo n.º 2
0
def reindex_to_calendar(calendar, data, freq='1d', start_session=None, end_session=None):
    if data.empty:
        return None
    if start_session is None:
        start_session, end_session = data.index[[0, -1]]
    if not isinstance(start_session, pd.Timestamp):
        start_session = pd.Timestamp(start_session, unit='m')
        end_session = pd.Timestamp(end_session, unit='m')

    start_session = start_session.normalize()
    end_session = end_session.normalize()

    if freq == '1d':
        all_sessions = calendar.sessions_in_range(start_session, end_session).tz_localize(None)
        df = data.reindex(all_sessions, copy=False)
        df = fillna(df)
        df.id.fillna(method='pad', inplace=True)
        df.id.fillna(method="bfill", inplace=True)
        df.day = df.index.values.astype('datetime64[m]').astype(np.int64)
    else:
        all_sessions = calendar.minutes_for_sessions_in_range(start_session, end_session).tz_localize(None)
        data.index = data.index.tz_localize(pytz.timezone('Asia/Shanghai')).tz_convert('UTC').tz_localize(None)
        df = data.reindex(all_sessions, copy=False)
        df = fillna(df)

    return df
Exemplo n.º 3
0
    def gen_symbols_data(symbol_map, freq='1d'):
        if not session_bars.has_table(SESSION_BAR_TABLE):
            Base.metadata.create_all(
                session_bars.connect(),
                checkfirst=True,
                tables=[Base.metadata.tables[SESSION_BAR_TABLE]])

        func = partial(fetch_single_equity, eg)
        now = pd.to_datetime('now', utc=True)
        if end_session >= now.normalize():
            end = now.normalize()
            if now.tz_convert('Asia/Shanghai').time() < datetime.time(15, 5):
                end = end - pd.Timedelta('1 D')
        else:
            end = end_session
        end_idx = calendar.all_sessions.searchsorted(end)
        if calendar.all_sessions[end_idx] > end:
            end = calendar.all_sessions[end_idx - 1]

        for index, symbol in symbol_map.iteritems():
            try:
                start = pd.to_datetime(dates_json[freq][symbol],
                                       utc=True) + pd.Timedelta('1 D')
                start = calendar.all_sessions[
                    calendar.all_sessions.searchsorted(start)]
                if start > end:
                    if freq == '1d' and symbol in dates_json[freq]:
                        data = pd.read_sql(
                            "select * from {} where id = {} order by day ASC ".
                            format(SESSION_BAR_TABLE, int(symbol)),
                            session_bars,
                            index_col='day')
                        data.index = pd.to_datetime(data.index)
                        yield int(symbol), data
                    else:
                        yield int(symbol), pd.DataFrame()
                    continue
            except KeyError:
                start = start_session
            if freq == '1m':
                single_distance = calendar.session_distance(start, end)
                if single_distance >= 100:
                    func = eg.get_k_data
            data = reindex_to_calendar(
                calendar,
                func(symbol, start, end, freq),
                start_session=start,
                end_session=end,
                freq=freq,
            )
            if data is None or data.empty:
                if freq == '1d' and symbol in dates_json[freq]:
                    data = pd.read_sql(
                        "select * from {} where id = {} order by day ASC ".
                        format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars,
                        index_col='day')
                    data.index = pd.to_datetime(data.index)
                    yield int(symbol), data
                continue
            if freq == '1d':
                if data.close.isnull(
                )[0]:  # padding fill error if the first is NaN
                    data2 = pd.read_sql(
                        "select * from {} where id = {} order by day desc limit 1 "
                        .format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars,
                        index_col='day')
                    if data2.empty:
                        data = data[data.close.notnull()]
                    else:
                        data["close"][0] = data2["close"][0]
                        fillna(data)
                data.to_sql(SESSION_BAR_TABLE,
                            session_bars.connect(),
                            if_exists='append',
                            index_label='day')
                if symbol in dates_json[freq]:
                    data = pd.read_sql(
                        "select * from {} where id = {} order by day ASC ".
                        format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars,
                        index_col='day')
                    data.index = pd.to_datetime(data.index)
            dates_json[freq][symbol] = data.index[-1].strftime('%Y%m%d')
            yield int(symbol), data

            with open(dates_path, 'w') as f:
                json.dump(dates_json, f)
Exemplo n.º 4
0
    def gen_symbols_data(symbol_map, freq='1d'):
        if not session_bars.has_table(SESSION_BAR_TABLE):
            Base.metadata.create_all(session_bars.connect(), checkfirst=True,
                                     tables=[Base.metadata.tables[SESSION_BAR_TABLE]])

        func = partial(fetch_single_equity, eg)
        now = pd.to_datetime('now', utc=True)
        if end_session >= now.normalize():
            end = now.normalize()
            if now.tz_convert('Asia/Shanghai').time() < datetime.time(15, 5):
                end = end - pd.Timedelta('1 D')
        else:
            end = end_session
        end_idx = calendar.all_sessions.searchsorted(end)
        if calendar.all_sessions[end_idx] > end:
            end = calendar.all_sessions[end_idx -1]

        for index, symbol in symbol_map.iteritems():
            try:
                start = pd.to_datetime(dates_json[freq][symbol], utc=True) + pd.Timedelta('1 D')
                start = calendar.all_sessions[calendar.all_sessions.searchsorted(start)]
                if start > end:
                    if freq == '1d'and symbol in dates_json[freq]:
                        data = pd.read_sql(
                            "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)),
                            session_bars, index_col='day')
                        data.index = pd.to_datetime(data.index)
                        yield int(symbol), data
                    else:
                        yield int(symbol), pd.DataFrame()
                    continue
            except KeyError:
                start = start_session
            if freq == '1m':
                single_distance = calendar.session_distance(start, end)
                if single_distance >= 100:
                    func = eg.get_k_data
            data = reindex_to_calendar(
                calendar,
                func(symbol, start, end, freq),
                start_session=start, end_session=end,
                freq=freq,
            )
            if data is None or data.empty:
                if freq == '1d'and symbol in dates_json[freq]:
                    data = pd.read_sql(
                        "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars, index_col='day')
                    data.index = pd.to_datetime(data.index)
                    yield int(symbol), data
                continue
            if freq == '1d':
                if data.close.isnull()[0]:  # padding fill error if the first is NaN
                    data2 = pd.read_sql(
                        "select * from {} where id = {} order by day desc limit 1 ".format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars, index_col='day')
                    if data2.empty:
                        data = data[data.close.notnull()]
                    else:
                        data["close"][0] = data2["close"][0]
                        fillna(data)
                data.to_sql(SESSION_BAR_TABLE, session_bars.connect(), if_exists='append', index_label='day')
                if symbol in dates_json[freq]:
                    data = pd.read_sql(
                        "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars, index_col='day')
                    data.index = pd.to_datetime(data.index)
            dates_json[freq][symbol] = data.index[-1].strftime('%Y%m%d')
            yield int(symbol), data

            with open(dates_path, 'w') as f:
                json.dump(dates_json, f)