Пример #1
0
    def test_cal_sam_minute(self):
        # test the correctness of the code
        random_n = 1000
        cal = get_min_cal()

        def gen_args():
            for time in np.random.choice(cal, size=random_n, replace=True):
                sam_minutes = np.random.choice([1, 2, 3, 4, 5, 6])
                dt = pd.Timestamp(
                    datetime(
                        2021,
                        month=3,
                        day=3,
                        hour=time.hour,
                        minute=time.minute,
                        second=time.second,
                        microsecond=time.microsecond,
                    ))
                args = dt, sam_minutes
                yield args

        for args in gen_args():
            assert cal_sam_minute(*args) == cal_sam_minute_new(*args)

        # test the performance of the code

        args_l = list(gen_args())

        with TimeInspector.logt():
            for args in args_l:
                cal_sam_minute(*args)

        with TimeInspector.logt():
            for args in args_l:
                cal_sam_minute_new(*args)
Пример #2
0
    def test_handler_storage(self):
        # init data handler
        data_handler = TestHandler(**self.data_handler_kwargs)

        # init data handler with hasing storage
        data_handler_hs = TestHandler(**self.data_handler_kwargs,
                                      infer_processors=["HashStockFormat"])

        fetch_start_time = "2019-01-01"
        fetch_end_time = "2019-12-31"
        instruments = D.instruments(market=self.market)
        instruments = D.list_instruments(instruments=instruments,
                                         start_time=fetch_start_time,
                                         end_time=fetch_end_time,
                                         as_list=True)

        with TimeInspector.logt("random fetch with DataFrame Storage"):

            # single stock
            for i in range(100):
                random_index = np.random.randint(len(instruments), size=1)[0]
                fetch_stock = instruments[random_index]
                data_handler.fetch(selector=(fetch_stock,
                                             slice(fetch_start_time,
                                                   fetch_end_time)),
                                   level=None)

            # multi stocks
            for i in range(100):
                random_indexs = np.random.randint(len(instruments), size=5)
                fetch_stocks = [
                    instruments[_index] for _index in random_indexs
                ]
                data_handler.fetch(selector=(fetch_stocks,
                                             slice(fetch_start_time,
                                                   fetch_end_time)),
                                   level=None)

        with TimeInspector.logt("random fetch with HasingStock Storage"):

            # single stock
            for i in range(100):
                random_index = np.random.randint(len(instruments), size=1)[0]
                fetch_stock = instruments[random_index]
                data_handler_hs.fetch(selector=(fetch_stock,
                                                slice(fetch_start_time,
                                                      fetch_end_time)),
                                      level=None)

            # multi stocks
            for i in range(100):
                random_indexs = np.random.randint(len(instruments), size=5)
                fetch_stocks = [
                    instruments[_index] for _index in random_indexs
                ]
                data_handler_hs.fetch(selector=(fetch_stocks,
                                                slice(fetch_start_time,
                                                      fetch_end_time)),
                                      level=None)
Пример #3
0
def _to_excel(
        reports: Union[pd.DataFrame, List[pd.DataFrame]],
        sheet_names: Union[str, List[str]],
        file_path: Path
):
    if not isinstance(reports, list):
        reports = [reports]
        sheet_names = [sheet_names]

    if file_path.exists():
        mode = 'a'
        if_sheet_exists = 'replace'
    else:
        mode = 'w'
        if_sheet_exists = None

    with pd.ExcelWriter(file_path, mode=mode, if_sheet_exists=if_sheet_exists) as writer:
        for report_df, sheet_name in zip(reports, sheet_names):
            with TimeInspector.logt(f"write {sheet_name} sheet to excel"):
                report_df.to_excel(writer, sheet_name=sheet_name, float_format="%.4f", merge_cells=False,
                                   freeze_panes=(1, 2))
Пример #4
0
    def get_all_data(self) -> pd.DataFrame:
        cw_dir = self.tdx_dir.joinpath("cw")
        gpcw_path = cw_dir.joinpath("gpcw.txt")

        self.updated_date = int(
            pd.to_datetime(gpcw_path.stat().st_mtime_ns).strftime('%Y%m%d'))
        file_df = pd.read_csv(gpcw_path, header=None, usecols=[0])

        from pytdx.reader.history_financial_reader import HistoryFinancialReader

        with TimeInspector.logt("get all cn financial files  ......"):
            df = pd.concat([
                HistoryFinancialReader().get_df(cw_dir.joinpath(file_path))
                for file_path in file_df.iloc[:, 0].to_list()
            ])

        # 获取无效的 col
        unit_bool = df == 0
        col_bool = unit_bool.all(axis=0)
        unused_col = col_bool[col_bool].index.to_list()
        df = df.rename(columns={'col314': 'date', 'report_date': 'period'})
        return df.drop(columns=unused_col)
Пример #5
0
 def __init__(self, dataset: pd.DataFrame):
     self._dataset = dataset
     with TimeInspector.logt("calc_stat_values"):
         self.calc_stat_values()
Пример #6
0
    def collect(self,
                artifacts_key=None,
                rec_filter_func=None,
                only_exist=True) -> dict:
        """
        Collect different artifacts based on recorder after filtering.

        Args:
            artifacts_key (str or List, optional): the artifacts key you want to get. If None, use the default.
            rec_filter_func (Callable, optional): filter the recorder by return True or False. If None, use the default.
            only_exist (bool, optional): if only collect the artifacts when a recorder really has.
                If True, the recorder with exception when loading will not be collected. But if False, it will raise the exception.

        Returns:
            dict: the dict after collected like {artifact: {rec_key: object}}
        """
        if artifacts_key is None:
            artifacts_key = self.artifacts_key
        if rec_filter_func is None:
            rec_filter_func = self.rec_filter_func

        if isinstance(artifacts_key, str):
            artifacts_key = [artifacts_key]

        collect_dict = {}
        # filter records

        if isinstance(self.experiment, Experiment):
            with TimeInspector.logt(
                    "Time to `list_recorders` in RecorderCollector"):
                recs = list(
                    self.experiment.list_recorders(
                        **self.list_kwargs).values())
        elif isinstance(self.experiment, Callable):
            recs = self.experiment()

        recs = [
            rec for rec in recs
            if rec_filter_func is None or rec_filter_func(rec)
        ]

        logger = get_module_logger("RecorderCollector")
        for rec in recs:
            rec_key = self.rec_key_func(rec)
            for key in artifacts_key:
                if self.ART_KEY_RAW == key:
                    artifact = rec
                else:
                    try:
                        artifact = rec.load_object(self.artifacts_path[key])
                    except Exception as e:
                        if only_exist:
                            # only collect existing artifact
                            continue
                        raise e
                # give user some warning if the values are overridden
                cdd = collect_dict.setdefault(key, {})
                if rec_key in cdd:
                    logger.warning(
                        f"key '{rec_key}' is duplicated. Previous value will be overrides. Please check you `rec_key_func`"
                    )
                cdd[rec_key] = artifact

        return collect_dict