def pred_autocorr(pred: pd.Series, lag=1, inst_col="instrument", date_col="datetime"): """pred_autocorr. Limitation: - If the datetime is not sequential densely, the correlation will be calulated based on adjacent dates. (some users may expected NaN) :param pred: pd.Series with following format instrument datetime SH600000 2016-01-04 -0.000403 2016-01-05 -0.000753 2016-01-06 -0.021801 2016-01-07 -0.065230 2016-01-08 -0.062465 :type pred: pd.Series :param lag: """ if isinstance(pred, pd.DataFrame): pred = pred.iloc[:, 0] get_module_logger("pred_autocorr").warning( f"Only the first column in {pred.columns} of `pred` is kept") pred_ustk = pred.sort_index().unstack(inst_col) corr_s = {} for (idx, cur), (_, prev) in zip(pred_ustk.iterrows(), pred_ustk.shift(lag).iterrows()): corr_s[idx] = cur.corr(prev) corr_s = pd.Series(corr_s).sort_index() return corr_s
def __init__( self, strategies: Union[OnlineStrategy, List[OnlineStrategy]], trainer: Trainer = None, begin_time: Union[str, pd.Timestamp] = None, freq="day", ): """ Init OnlineManager. One OnlineManager must have at least one OnlineStrategy. Args: strategies (Union[OnlineStrategy, List[OnlineStrategy]]): an instance of OnlineStrategy or a list of OnlineStrategy begin_time (Union[str,pd.Timestamp], optional): the OnlineManager will begin at this time. Defaults to None for using the latest date. trainer (Trainer): the trainer to train task. None for using TrainerR. freq (str, optional): data frequency. Defaults to "day". """ self.logger = get_module_logger(self.__class__.__name__) if not isinstance(strategies, list): strategies = [strategies] self.strategies = strategies self.freq = freq if begin_time is None: begin_time = D.calendar(freq=self.freq).max() self.begin_time = pd.Timestamp(begin_time) self.cur_time = self.begin_time # OnlineManager will recorder the history of online models, which is a dict like {pd.Timestamp, {strategy, [online_models]}}. self.history = {} if trainer is None: trainer = TrainerR() self.trainer = trainer self.signals = None self.status = self.STATUS_NORMAL
def __init__(self, task_pool: str): """ Init Task Manager, remember to make the statement of MongoDB url and database name firstly. A TaskManager instance serves a specific task pool. The static method of this module serves the whole MongoDB. Parameters ---------- task_pool: str the name of Collection in MongoDB """ self.task_pool = getattr(get_mongodb(), task_pool) self.logger = get_module_logger(self.__class__.__name__)
def __init__(self, task_pool: str = None): """ Init Task Manager, remember to make the statement of MongoDB url and database name firstly. Parameters ---------- task_pool: str the name of Collection in MongoDB """ self.mdb = get_mongodb() if task_pool is not None: self.task_pool = getattr(self.mdb, task_pool) self.logger = get_module_logger(self.__class__.__name__)
def run_task( task_func: Callable, task_pool: str, query: dict = {}, force_release: bool = False, before_status: str = TaskManager.STATUS_WAITING, after_status: str = TaskManager.STATUS_DONE, **kwargs, ): """ While the task pool is not empty (has WAITING tasks), use task_func to fetch and run tasks in task_pool After running this method, here are 4 situations (before_status -> after_status): STATUS_WAITING -> STATUS_DONE: use task["def"] as `task_func` param STATUS_WAITING -> STATUS_PART_DONE: use task["def"] as `task_func` param STATUS_PART_DONE -> STATUS_PART_DONE: use task["res"] as `task_func` param STATUS_PART_DONE -> STATUS_DONE: use task["res"] as `task_func` param Parameters ---------- task_func : Callable def (task_def, **kwargs) -> <res which will be committed> the function to run the task task_pool : str the name of the task pool (Collection in MongoDB) query: dict will use this dict to query task_pool when fetching task force_release : bool will the program force to release the resource before_status : str: the tasks in before_status will be fetched and trained. Can be STATUS_WAITING, STATUS_PART_DONE. after_status : str: the tasks after trained will become after_status. Can be STATUS_WAITING, STATUS_PART_DONE. kwargs the params for `task_func` """ tm = TaskManager(task_pool) ever_run = False while True: with tm.safe_fetch_task(status=before_status, query=query) as task: if task is None: break get_module_logger("run_task").info(task["def"]) # when fetching `WAITING` task, use task["def"] to train if before_status == TaskManager.STATUS_WAITING: param = task["def"] # when fetching `PART_DONE` task, use task["res"] to train because the middle result has been saved to task["res"] elif before_status == TaskManager.STATUS_PART_DONE: param = task["res"] else: raise ValueError( "The fetched task must be `STATUS_WAITING` or `STATUS_PART_DONE`!" ) if force_release: with concurrent.futures.ProcessPoolExecutor( max_workers=1) as executor: res = executor.submit(task_func, param, **kwargs).result() else: res = task_func(param, **kwargs) tm.commit_task_res(task, res, status=after_status) ever_run = True return ever_run
def __init__(self, record: Recorder, *args, **kwargs): self.record = record self.logger = get_module_logger(self.__class__.__name__)