예제 #1
0
    def ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer,
               adjustment_writer, calendar, start_session, end_session, cache,
               show_progress, output_dir):

        assets_to_sids = asset_to_sid_map(asset_db_writer.asset_finder,
                                          list_assets())

        def minute_data_generator():
            return (sid_df
                    for (sid_df, *metadata.iloc[sid_df[0]]
                         ) in df_generator(interval='1m',
                                           start=start_session,
                                           end=end_session,
                                           assets_to_sids=assets_to_sids))

        def daily_data_generator():
            return (sid_df
                    for (sid_df, *metadata.iloc[sid_df[0]]
                         ) in df_generator(interval='1d',
                                           start=start_session,
                                           end=end_session,
                                           assets_to_sids=assets_to_sids))

        for _interval in interval:
            metadata = metadata_df()
            if _interval == '1d':
                daily_bar_writer.write(daily_data_generator(),
                                       assets=assets_to_sids.values(),
                                       show_progress=True)
            elif _interval == '1m':
                minute_bar_writer.write(minute_data_generator(),
                                        assets=assets_to_sids.values(),
                                        show_progress=True)

            # Drop the ticker rows which have missing sessions in their data sets
            metadata.dropna(inplace=True)

            asset_db_writer.write(equities=metadata)
            print(metadata)
            adjustment_writer.write()
예제 #2
0
def csvdir_bundle(environ,
                  asset_db_writer,
                  minute_bar_writer,
                  daily_bar_writer,
                  adjustment_writer,
                  calendar,
                  start_session,
                  end_session,
                  cache,
                  show_progress,
                  output_dir,
                  tframes=None,
                  csvdir=None):
    """
    Build a zipline data bundle from the directory with csv files.
    """
    if not csvdir:
        csvdir = environ.get('CSVDIR')
        if not csvdir:
            raise ValueError("CSVDIR environment variable is not set")

    if not os.path.isdir(csvdir):
        raise ValueError("%s is not a directory" % csvdir)

    if not tframes:
        tframes = set(["daily", "minute"]).intersection(os.listdir(csvdir))

        if not tframes:
            raise ValueError("'daily' and 'minute' directories "
                             "not found in '%s'" % csvdir)

    divs_splits = {
        'divs':
        DataFrame(columns=[
            'sid', 'amount', 'ex_date', 'record_date', 'declared_date',
            'pay_date'
        ]),
        'splits':
        DataFrame(columns=['sid', 'ratio', 'effective_date'])
    }
    for tframe in tframes:
        ddir = os.path.join(csvdir, tframe)

        symbols = sorted(
            item.split('.csv')[0] for item in os.listdir(ddir)
            if '.csv' in item)
        if not symbols:
            raise ValueError("no <symbol>.csv* files found in %s" % ddir)

        dtype = [('start_date', 'datetime64[ns]'),
                 ('end_date', 'datetime64[ns]'),
                 ('auto_close_date', 'datetime64[ns]'), ('symbol', 'object')]
        metadata = DataFrame(empty(len(symbols), dtype=dtype))

        if tframe == 'minute':
            writer = minute_bar_writer
        else:
            writer = daily_bar_writer

        assets_to_sids = asset_to_sid_map(asset_db_writer.asset_finder,
                                          symbols)

        writer.write(_pricing_iter(ddir,
                                   symbols,
                                   metadata,
                                   divs_splits,
                                   show_progress,
                                   assets_to_sids=assets_to_sids),
                     show_progress=show_progress)

        # Hardcode the exchange to "CSVDIR" for all assets and (elsewhere)
        # register "CSVDIR" to resolve to the NYSE calendar, because these
        # are all equities and thus can use the NYSE calendar.
        metadata['exchange'] = "CSVDIR"

        asset_db_writer.write(equities=metadata)

        divs_splits['divs']['sid'] = divs_splits['divs']['sid'].astype(int)
        divs_splits['splits']['sid'] = divs_splits['splits']['sid'].astype(int)
        adjustment_writer.write(splits=divs_splits['splits'],
                                dividends=divs_splits['divs'])
예제 #3
0
    def ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer,
               adjustment_writer, calendar, start_session, end_session, cache,
               show_progress, output_dir):

        divs_splits = {
            'divs':
            pd.DataFrame(columns=[
                'sid', 'amount', 'ex_date', 'record_date', 'declared_date',
                'pay_date'
            ]),
            'splits':
            pd.DataFrame(columns=['sid', 'ratio', 'effective_date'])
        }

        assets_to_sids = asset_to_sid_map(asset_db_writer.asset_finder,
                                          list_assets())

        def minute_data_generator():
            return (sid_df
                    for (sid_df, *metadata.iloc[sid_df[0]]
                         ) in df_generator(interval='1m',
                                           start=start_session,
                                           end=end_session,
                                           assets_to_sids=assets_to_sids,
                                           divs_splits=divs_splits))

        def daily_data_generator():
            return (sid_df
                    for (sid_df, *metadata.loc[sid_df[0]]
                         ) in df_generator(interval='1d',
                                           start=start_session,
                                           end=end_session,
                                           assets_to_sids=assets_to_sids,
                                           divs_splits=divs_splits))

        metadata = metadata_df(assets_to_sids)

        assets = list_assets()
        for _interval in interval:
            if _interval == '1d':
                daily_bar_writer.write(daily_data_generator(),
                                       assets=assets_to_sids.values(),
                                       show_progress=True,
                                       invalid_data_behavior='raise')
            elif _interval == '1m':
                minute_bar_writer.write(minute_data_generator(),
                                        show_progress=True)

        metadata.dropna(inplace=True)
        asset_db_writer.write(equities=metadata)

        # convert back wrong datatypes after pd.concat
        divs_splits['splits']['sid'] = divs_splits['splits']['sid'].astype(
            np.int)
        divs_splits['divs']['sid'] = divs_splits['divs']['sid'].astype(np.int)
        divs_splits['divs']['ex_date'] = pd.to_datetime(
            divs_splits['divs']['ex_date'], utc=True)
        divs_splits['divs']['pay_date'] = pd.to_datetime(
            divs_splits['divs']['pay_date'], utc=True)

        adjustment_writer.write(splits=divs_splits['splits'],
                                dividends=divs_splits['divs'])

        # Drop the ticker rows which have missing sessions in their data sets

        print(metadata)