示例#1
0
    def __init__(self, start_t=date(2010, 1, 1), end_t=date(2016, 12, 31)):
        # 从 pkl 读数据是另外一个 class 处理
        # cache 的文件路径将根据 start_t , end_t , FEATURES 做 hash
        # 暂时根据 start / end 确定存盘路径,以后改成只有一份全部数据的 pickle
        cache_file_path = os.path.join(
            "/tmp",
            f"TestTSDataGenerator_{start_t.isocalendar()}_{end_t.isoformat()}.pkl"
        )
        self.original_data: pd.DataFrame = None
        self.df_x_to_loop: pd.DataFrame = None
        self.df_y_to_loop: pd.DataFrame = None

        if os.path.isfile(cache_file_path):
            self.original_data = pd.read_pickle(cache_file_path,
                                                compression="gzip")
        else:  # 从  arctic 读取数据并缓存
            arctic_store = Arctic(get_mongo_admin_conn_str())
            lib_name = "jy_chn_equity_otvn_chunkstore"
            lib_chunk_store = arctic_store[lib_name]
            symbol_name = "mkt_data"
            self.original_data: pd.DataFrame = lib_chunk_store.read(
                symbol_name,
                chunk_range=pd.date_range(start_t, end_t),
                filter_data=True,
                columns=self.FEATURES)
            self.original_data.to_pickle(cache_file_path,
                                         compression="gzip",
                                         protocol=4)
示例#2
0
    def save_to_chunkstore_per_symbol(self):
        lib_name = "jy_equity_mkt_data"
        arctic_store = Arctic(get_mongo_admin_conn_str())
        arctic_store.delete_library(lib_name)
        arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store[lib_name]

        df = self.load_all_close_price()

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)
        df2.index.rename("date", inplace=True)

        i = 0
        for col in df2.columns:
            df3 = df2.loc[:, col]
            df3 = df3.dropna(axis=0)
            lib_chunk_store.write(col,
                                  df3,
                                  chunker=DateChunker(),
                                  chunk_size="D")
            i += 1
            if i % 2 == 0:
                print(f"{i}:{col}")
示例#3
0
    def save_to_arctic(self):
        # see https://github.com/manahl/arctic/blob/master/howtos/201507_demo_pydata.py
        arctic_store = Arctic(get_mongo_admin_conn_str())
        # print(arctic_store.list_libraries())
        closeprice_lib = arctic_store["jy_equity_closeprice"]
        # print(closeprice_lib)
        df = self.load_all_close_price()
        # print(df.index.to_frame()["o"].unique())

        # df.reset_index(level=1, inplace=True)
        # print(df)

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)
        # print(df2)
        #
        # print(df2.columns)
        i = 0
        for col in df2.columns:
            df3 = df2.loc[:, col]
            df3 = df3.dropna(axis=0)
            closeprice_lib.write(col, df3)
            i += 1
            print(f"{i}:{col}")
            # if i > 5 :
            #     break

        print(closeprice_lib.list_symbols())
示例#4
0
 def show_chunk_store_info(self):
     arctic_store = Arctic(get_mongo_admin_conn_str())
     lib_chunk_store = arctic_store["jy_otv_chunkstore"]
     print("list_symbols")
     print(lib_chunk_store.list_symbols())
     print("get_info")
     print(lib_chunk_store.get_info("close_price"))
     print("chunk_ranges")
     print(list(lib_chunk_store.get_chunk_ranges("close_price")))
示例#5
0
 def read_all_data_from_arctic(self):
     arctic_store = Arctic(get_mongo_admin_conn_str())
     closeprice_lib = arctic_store["jy_equity_closeprice"]
     start = time.time()
     rows_read = 0
     for s in closeprice_lib.list_symbols():
         rows_read += len(closeprice_lib.read(s).data)
     print("Symbols: %s Rows: %s  Time: %s  Rows/s: %s" %
           (len(closeprice_lib.list_symbols()), rows_read,
            (time.time() - start), rows_read / (time.time() - start)))
     pass
示例#6
0
    def convert_mkt_history_data(self):
        arctic_store = Arctic(get_mongo_admin_conn_str())
        lib_name = "jy_chn_equity_otvn_chunkstore"

        arctic_store.delete_library(lib_name)
        arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store[lib_name]

        # 先 hardcode 日期范围,可以有更优雅的表达
        for i, t_period in enumerate([(date(1990, 1, 1), date(2000, 1, 15)),
                                      (date(2000, 1, 15), date(2010, 1, 15)),
                                      (date(2010, 1, 15), date(2020, 1, 1))]):
            # 测算下来,日频数据,用 "M" 作为 chunk_size 的写入和读取效率是综合最高的
            self._convert_period_equity_mkt_data_to_arctic(
                t_period[0], t_period[1], lib_chunk_store, "mkt_data", "M",
                i == 0)
示例#7
0
    def save_to_arctic_v2(self):
        # see https://github.com/manahl/arctic/blob/master/howtos/201507_demo_pydata.py
        arctic_store = Arctic(get_mongo_admin_conn_str())
        # print(arctic_store.list_libraries())
        # arctic_store.initialize_library("jy_equity_closeprice_v2")
        closeprice_lib = arctic_store["jy_equity_closeprice_v2"]
        # print(closeprice_lib)
        df = self.load_all_close_price()
        # print(df.index.to_frame()["o"].unique())

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)
        closeprice_lib.write("close_price", df2)
        print(closeprice_lib.list_symbols())
 def __init__(self,
              lib_name: str = _ARCTIC_BINARY_LIBRARY,
              mongo_db: str = "auto"):
     """假定一个 instance 只操作一个 library
         mongo_db :
             "auto" 根据环境是否为 colab 自动选择  google 还是 local
             "google" 选择 google 的 mongo
             "intranet" 选择机房中的Mongo
     """
     # 这里 暂时先 hardcode arctic 所使用的 mongo 地址
     mongo_db_conn_str = get_mongo_admin_conn_str()
     if mongo_db == "google":
         mongo_db_conn_str = get_google_mongo_conn_str()
     elif mongo_db == "intranet":
         mongo_db_conn_str = get_intranet_mongo_conn_str()
     self._store = Arctic(mongo_db_conn_str)
     if not self._store.library_exists(lib_name):
         self._store.initialize_library(lib_name, VERSION_STORE)
     self._lib = self._store[lib_name]
示例#9
0
    def save_to_arctic_tickstore(self):
        # not work
        arctic_store = Arctic(get_mongo_admin_conn_str())
        arctic_store.delete_library("jy_otv_tickstore")
        arctic_store.initialize_library("jy_otv_tickstore",
                                        lib_type=TICK_STORE)
        lib_tick_store = arctic_store["jy_otv_tickstore"]
        # lib_tick_store._chunk_size = 8396800

        # print(closeprice_lib)
        df = self.load_all_close_price()
        # print(df.index.to_frame()["o"].unique())

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)

        df2.index = df2.index.tz_localize("Asia/Shanghai")
        # df2.reset_index(df2.index.tz_localize("Asia/Shanghai"), inplace=True)
        # print(df2.head())

        lib_tick_store.write("close_price", df2)
示例#10
0
    def read_from_chunkstore(self, start: date, end: date, cols: List[str]):
        # start = time.time()
        # df = self.load_all_close_price()
        # print(f"total read time {time.time() - start} ")
        # 9,595,866 rows 读取时间 0.48 sec , 本地 压缩的 pickles 文件

        arctic_store = Arctic(get_mongo_admin_conn_str())
        lib_name = "jy_chn_equity_otvn_chunkstore"
        lib_chunk_store = arctic_store[lib_name]

        symbol_name = "mkt_data"

        run_start = time.time()
        # data = lib_chunk_store.read("close_price")
        data: pd.DataFrame = lib_chunk_store.read(symbol_name,
                                                  chunk_range=pd.date_range(
                                                      start, end),
                                                  filter_data=True,
                                                  columns=cols)
        logger.info(f"total read time {time.time() - run_start} ")

        # [5Y (2012/01/15 - 2017/12/15)] 3,906,128 rows  , [ALL] 9,688,283 rows
        #  chunk_size = Y , 16.74 secs(5Y) ,  38.89 secs(ALL)
        #  chunk_size = M , 16.83 secs(5Y) ,  39.73 secs(ALL)
        #  chunk_size = D , 23.75 secs(5Y) ,  74.94 secs(ALL)

        # 增加读取的 v 的 column 对性能基本没影响,多读取两列 约增加了 0.5 sec
        # 参考: mysql 中读取 10Y(2000-2009, 3,187,574 rows) 数据约 120 secs,写入 arctic 约 77 secs

        # 之前错误的测试结果
        # # 9,595,866 rows 读取时间 11.68 sec , chunk_size = M (ALL)
        # # 6,361,499 rows 读取时间 8.19 sec , chunk_size = M (10Y)
        # # 4,018,657 rows 读取时间 5.24 sec , chunk_size = M (5Y)
        # # 8,48,959  rows 读取时间 1.16 sec , chunk_size = M (1Y)

        return data
示例#11
0
    def save_to_chunkstore(self):
        arctic_store = Arctic(get_mongo_admin_conn_str())
        arctic_store.delete_library("jy_otv_chunkstore")
        arctic_store.initialize_library("jy_otv_chunkstore",
                                        lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store["jy_otv_chunkstore"]
        # lib_tick_store._chunk_size = 8396800

        # print(closeprice_lib)
        df = self.load_all_close_price()
        df.sort_index(axis=0, ascending=True, inplace=True)
        print(df)

        # df2 = df.pivot_table(values="close_price", index="t", columns="o", aggfunc=np.mean)

        # df2.index = df2.index.tz_localize("Asia/Shanghai")
        # df2.index.rename("date",inplace=True)
        # df2.reset_index(df2.index.tz_localize("Asia/Shanghai"), inplace=True)
        # print(df2.head())
        # print(df2[-100:])

        start = time.time()
        lib_chunk_store.write("close_price", df, chunk_size="M")
        print(f"total write time {time.time()-start} ")
 def __init__(self):
     self.arctic_store = Arctic(get_mongo_admin_conn_str(),
                                connectTimeoutMS=600 * 1000,
                                serverSelectionTimeoutMS=600 * 1000)