def train_with_sigana(uri_path: str = None): """train model followed by SigAnaRecord Returns ------- pred_score: pandas.DataFrame predict scores performance: dict model performance """ model = init_instance_by_config(CSI300_GBDT_TASK["model"]) dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) # start exp with R.start(experiment_name="workflow_with_sigana", uri=uri_path): R.log_params(**flatten_dict(CSI300_GBDT_TASK)) model.fit(dataset) recorder = R.get_recorder() sr = SignalRecord(model, dataset, recorder) sr.generate() pred_score = sr.load("pred.pkl") # predict and calculate ic and ric sar = SigAnaRecord(recorder) sar.generate() ic = sar.load("ic.pkl") ric = sar.load("ric.pkl") uri_path = R.get_uri() return pred_score, {"ic": ic, "ric": ric}, uri_path
def _train_model(self, model, dataset): with R.start(experiment_name="train"): R.log_params(**flatten_dict(self.task)) model.fit(dataset) R.save_objects(**{"params.pkl": model}) # prediction recorder = R.get_recorder() sr = SignalRecord(model, dataset, recorder) sr.generate()
def train(uri_path: str = None): """train model Returns ------- pred_score: pandas.DataFrame predict scores performance: dict model performance """ # model initiaiton model = init_instance_by_config(CSI300_GBDT_TASK["model"]) dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) # To test __repr__ print(dataset) print(R) # start exp with R.start(experiment_name="workflow", uri=uri_path): R.log_params(**flatten_dict(CSI300_GBDT_TASK)) model.fit(dataset) R.save_objects(trained_model=model) # prediction recorder = R.get_recorder() # To test __repr__ print(recorder) # To test get_local_dir print(recorder.get_local_dir()) rid = recorder.id sr = SignalRecord(model, dataset, recorder) sr.generate() pred_score = sr.load("pred.pkl") # calculate ic and ric sar = SigAnaRecord(recorder) sar.generate() ic = sar.load("ic.pkl") ric = sar.load("ric.pkl") return pred_score, {"ic": ic, "ric": ric}, rid
def train_mse(uri_path: str = None): model = init_instance_by_config(CSI300_GBDT_TASK["model"]) dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) with R.start(experiment_name="workflow", uri=uri_path): R.log_params(**flatten_dict(CSI300_GBDT_TASK)) model.fit(dataset) recorder = R.get_recorder() SignalRecord(recorder=recorder, model=model, dataset=dataset).generate() sr = SignalMseRecord(recorder) sr.generate() uri = R.get_uri() return uri
def train(): """train model Returns ------- pred_score: pandas.DataFrame predict scores performance: dict model performance """ # model initiaiton model = init_instance_by_config(task["model"]) dataset = init_instance_by_config(task["dataset"]) # To test __repr__ print(dataset) print(R) # start exp with R.start(experiment_name="workflow"): R.log_params(**flatten_dict(task)) model.fit(dataset) # prediction recorder = R.get_recorder() # To test __repr__ print(recorder) rid = recorder.id sr = SignalRecord(model, dataset, recorder) sr.generate() pred_score = sr.load() # calculate ic and ric sar = SigAnaRecord(recorder) sar.generate() ic = sar.load(sar.get_path("ic.pkl")) ric = sar.load(sar.get_path("ric.pkl")) return pred_score, {"ic": ic, "ric": ric}, rid
"close_cost": 0.0015, "min_cost": 5, "return_order": True, }, } # model initialization model = init_instance_by_config(task["model"]) dataset = init_instance_by_config(task["dataset"]) # NOTE: This line is optional # It demonstrates that the dataset can be used standalone. example_df = dataset.prepare("train") print(example_df.head()) # start exp with R.start(experiment_name="workflow"): R.log_params(**flatten_dict(task)) model.fit(dataset) R.save_objects(**{"params.pkl": model}) # prediction recorder = R.get_recorder() sr = SignalRecord(model, dataset, recorder) sr.generate() # backtest. If users want to use backtest based on their own prediction, # please refer to https://qlib.readthedocs.io/en/latest/component/recorder.html#record-template. par = PortAnaRecord(recorder, port_analysis_config) par.generate()
def get_update_data(self, dataset: Dataset) -> pd.DataFrame: new_label = SignalRecord.generate_label(dataset) cb_data = _replace_range(self.old_data.sort_index(), new_label) return cb_data
def get_update_data(self, dataset: Dataset) -> pd.DataFrame: new_label = SignalRecord.generate_label(dataset) cb_data = pd.concat([self.old_data, new_label], axis=0) cb_data = cb_data[~cb_data.index.duplicated(keep="last")].sort_index() return cb_data
def main(xargs): dataset_config = { "class": "DatasetH", "module_path": "qlib.data.dataset", "kwargs": { "handler": { "class": "Alpha360", "module_path": "qlib.contrib.data.handler", "kwargs": { "start_time": "2008-01-01", "end_time": "2020-08-01", "fit_start_time": "2008-01-01", "fit_end_time": "2014-12-31", "instruments": xargs.market, "infer_processors": [ { "class": "RobustZScoreNorm", "kwargs": { "fields_group": "feature", "clip_outlier": True } }, { "class": "Fillna", "kwargs": { "fields_group": "feature" } }, ], "learn_processors": [ { "class": "DropnaLabel" }, { "class": "CSRankNorm", "kwargs": { "fields_group": "label" } }, ], "label": ["Ref($close, -2) / Ref($close, -1) - 1"], }, }, "segments": { "train": ("2008-01-01", "2014-12-31"), "valid": ("2015-01-01", "2016-12-31"), "test": ("2017-01-01", "2020-08-01"), }, }, } model_config = { "class": "QuantTransformer", "module_path": "trade_models", "kwargs": { "loss": "mse", "GPU": "0", "metric": "loss", }, } task = {"model": model_config, "dataset": dataset_config} model = init_instance_by_config(model_config) dataset = init_instance_by_config(dataset_config) # start exp to train model with R.start(experiment_name="train_tt_model"): R.log_params(**flatten_dict(task)) model.fit(dataset) R.save_objects(trained_model=model) # prediction recorder = R.get_recorder() print(recorder) sr = SignalRecord(model, dataset, recorder) sr.generate() # backtest. If users want to use backtest based on their own prediction, # please refer to https://qlib.readthedocs.io/en/latest/component/recorder.html#record-template. par = PortAnaRecord(recorder, port_analysis_config) par.generate()