def test_cal_sam_minute(self): # test the correctness of the code random_n = 1000 cal = get_min_cal() def gen_args(): for time in np.random.choice(cal, size=random_n, replace=True): sam_minutes = np.random.choice([1, 2, 3, 4, 5, 6]) dt = pd.Timestamp( datetime( 2021, month=3, day=3, hour=time.hour, minute=time.minute, second=time.second, microsecond=time.microsecond, )) args = dt, sam_minutes yield args for args in gen_args(): assert cal_sam_minute(*args) == cal_sam_minute_new(*args) # test the performance of the code args_l = list(gen_args()) with TimeInspector.logt(): for args in args_l: cal_sam_minute(*args) with TimeInspector.logt(): for args in args_l: cal_sam_minute_new(*args)
def test_handler_storage(self): # init data handler data_handler = TestHandler(**self.data_handler_kwargs) # init data handler with hasing storage data_handler_hs = TestHandler(**self.data_handler_kwargs, infer_processors=["HashStockFormat"]) fetch_start_time = "2019-01-01" fetch_end_time = "2019-12-31" instruments = D.instruments(market=self.market) instruments = D.list_instruments(instruments=instruments, start_time=fetch_start_time, end_time=fetch_end_time, as_list=True) with TimeInspector.logt("random fetch with DataFrame Storage"): # single stock for i in range(100): random_index = np.random.randint(len(instruments), size=1)[0] fetch_stock = instruments[random_index] data_handler.fetch(selector=(fetch_stock, slice(fetch_start_time, fetch_end_time)), level=None) # multi stocks for i in range(100): random_indexs = np.random.randint(len(instruments), size=5) fetch_stocks = [ instruments[_index] for _index in random_indexs ] data_handler.fetch(selector=(fetch_stocks, slice(fetch_start_time, fetch_end_time)), level=None) with TimeInspector.logt("random fetch with HasingStock Storage"): # single stock for i in range(100): random_index = np.random.randint(len(instruments), size=1)[0] fetch_stock = instruments[random_index] data_handler_hs.fetch(selector=(fetch_stock, slice(fetch_start_time, fetch_end_time)), level=None) # multi stocks for i in range(100): random_indexs = np.random.randint(len(instruments), size=5) fetch_stocks = [ instruments[_index] for _index in random_indexs ] data_handler_hs.fetch(selector=(fetch_stocks, slice(fetch_start_time, fetch_end_time)), level=None)
def _to_excel( reports: Union[pd.DataFrame, List[pd.DataFrame]], sheet_names: Union[str, List[str]], file_path: Path ): if not isinstance(reports, list): reports = [reports] sheet_names = [sheet_names] if file_path.exists(): mode = 'a' if_sheet_exists = 'replace' else: mode = 'w' if_sheet_exists = None with pd.ExcelWriter(file_path, mode=mode, if_sheet_exists=if_sheet_exists) as writer: for report_df, sheet_name in zip(reports, sheet_names): with TimeInspector.logt(f"write {sheet_name} sheet to excel"): report_df.to_excel(writer, sheet_name=sheet_name, float_format="%.4f", merge_cells=False, freeze_panes=(1, 2))
def get_all_data(self) -> pd.DataFrame: cw_dir = self.tdx_dir.joinpath("cw") gpcw_path = cw_dir.joinpath("gpcw.txt") self.updated_date = int( pd.to_datetime(gpcw_path.stat().st_mtime_ns).strftime('%Y%m%d')) file_df = pd.read_csv(gpcw_path, header=None, usecols=[0]) from pytdx.reader.history_financial_reader import HistoryFinancialReader with TimeInspector.logt("get all cn financial files ......"): df = pd.concat([ HistoryFinancialReader().get_df(cw_dir.joinpath(file_path)) for file_path in file_df.iloc[:, 0].to_list() ]) # 获取无效的 col unit_bool = df == 0 col_bool = unit_bool.all(axis=0) unused_col = col_bool[col_bool].index.to_list() df = df.rename(columns={'col314': 'date', 'report_date': 'period'}) return df.drop(columns=unused_col)
def __init__(self, dataset: pd.DataFrame): self._dataset = dataset with TimeInspector.logt("calc_stat_values"): self.calc_stat_values()
def collect(self, artifacts_key=None, rec_filter_func=None, only_exist=True) -> dict: """ Collect different artifacts based on recorder after filtering. Args: artifacts_key (str or List, optional): the artifacts key you want to get. If None, use the default. rec_filter_func (Callable, optional): filter the recorder by return True or False. If None, use the default. only_exist (bool, optional): if only collect the artifacts when a recorder really has. If True, the recorder with exception when loading will not be collected. But if False, it will raise the exception. Returns: dict: the dict after collected like {artifact: {rec_key: object}} """ if artifacts_key is None: artifacts_key = self.artifacts_key if rec_filter_func is None: rec_filter_func = self.rec_filter_func if isinstance(artifacts_key, str): artifacts_key = [artifacts_key] collect_dict = {} # filter records if isinstance(self.experiment, Experiment): with TimeInspector.logt( "Time to `list_recorders` in RecorderCollector"): recs = list( self.experiment.list_recorders( **self.list_kwargs).values()) elif isinstance(self.experiment, Callable): recs = self.experiment() recs = [ rec for rec in recs if rec_filter_func is None or rec_filter_func(rec) ] logger = get_module_logger("RecorderCollector") for rec in recs: rec_key = self.rec_key_func(rec) for key in artifacts_key: if self.ART_KEY_RAW == key: artifact = rec else: try: artifact = rec.load_object(self.artifacts_path[key]) except Exception as e: if only_exist: # only collect existing artifact continue raise e # give user some warning if the values are overridden cdd = collect_dict.setdefault(key, {}) if rec_key in cdd: logger.warning( f"key '{rec_key}' is duplicated. Previous value will be overrides. Please check you `rec_key_func`" ) cdd[rec_key] = artifact return collect_dict