SIGN IN SIGN UP
microsoft / qlib UNCLAIMED

Qlib is an AI-oriented Quant investment platform that aims to use AI tech to empower Quant Research, from exploring ideas to implementing productions. Qlib supports diverse ML modeling paradigms, including supervised learning, market dynamics modeling, and RL, and is now equipped with https://github.com/microsoft/RD-Agent to automate R&D process.

0 0 0 Python
2021-03-25 16:14:22 +08:00
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import qlib
2021-03-25 19:58:55 +08:00
import fire
2021-03-25 16:14:22 +08:00
import pickle
2021-03-25 19:54:52 +08:00
from datetime import datetime
from qlib.constant import REG_CN
2021-03-25 16:14:22 +08:00
from qlib.data.dataset.handler import DataHandlerLP
2021-05-28 13:24:47 +08:00
from qlib.utils import init_instance_by_config
2021-03-25 16:14:22 +08:00
from qlib.tests.data import GetData
2021-03-25 19:59:22 +08:00
2021-03-30 00:38:15 +08:00
class RollingDataWorkflow:
2021-03-25 16:14:22 +08:00
MARKET = "csi300"
start_time = "2010-01-01"
2021-03-25 19:59:22 +08:00
end_time = "2019-12-31"
2021-03-25 16:14:22 +08:00
rolling_cnt = 5
def _init_qlib(self):
"""initialize qlib"""
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
2021-03-25 16:14:22 +08:00
qlib.init(provider_uri=provider_uri, region=REG_CN)
2021-03-25 19:59:22 +08:00
2021-03-25 16:14:22 +08:00
def _dump_pre_handler(self, path):
handler_config = {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": {
2021-03-25 19:54:52 +08:00
"start_time": self.start_time,
"end_time": self.end_time,
"instruments": self.MARKET,
2021-03-25 20:36:07 +08:00
"infer_processors": [],
"learn_processors": [],
2021-03-25 16:14:22 +08:00
},
}
pre_handler = init_instance_by_config(handler_config)
2021-03-25 20:36:07 +08:00
pre_handler.config(dump_all=True)
2021-03-25 16:14:22 +08:00
pre_handler.to_pickle(path)
def _load_pre_handler(self, path):
with open(path, "rb") as file_dataset:
pre_handler = pickle.load(file_dataset)
return pre_handler
def rolling_process(self):
self._init_qlib()
2021-03-25 20:36:07 +08:00
self._dump_pre_handler("pre_handler.pkl")
pre_handler = self._load_pre_handler("pre_handler.pkl")
2021-03-25 16:14:22 +08:00
2021-03-25 19:59:22 +08:00
train_start_time = (2010, 1, 1)
train_end_time = (2012, 12, 31)
valid_start_time = (2013, 1, 1)
valid_end_time = (2013, 12, 31)
test_start_time = (2014, 1, 1)
test_end_time = (2014, 12, 31)
2021-03-25 16:14:22 +08:00
dataset_config = {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "RollingDataHandler",
"module_path": "rolling_handler",
"kwargs": {
2021-03-25 19:54:52 +08:00
"start_time": datetime(*train_start_time),
"end_time": datetime(*test_end_time),
"fit_start_time": datetime(*train_start_time),
"fit_end_time": datetime(*train_end_time),
2021-03-25 20:36:07 +08:00
"infer_processors": [
2021-03-25 20:36:45 +08:00
{"class": "RobustZScoreNorm", "kwargs": {"fields_group": "feature"}},
2021-03-25 20:36:07 +08:00
],
"learn_processors": [
{"class": "DropnaLabel"},
{"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}},
],
2021-03-25 19:59:22 +08:00
"data_loader_kwargs": {
2021-03-25 16:14:22 +08:00
"handler_config": pre_handler,
2021-03-25 19:59:22 +08:00
},
2021-03-25 16:14:22 +08:00
},
},
"segments": {
2021-03-25 19:54:52 +08:00
"train": (datetime(*train_start_time), datetime(*train_end_time)),
"valid": (datetime(*valid_start_time), datetime(*valid_end_time)),
"test": (datetime(*test_start_time), datetime(*test_end_time)),
2021-03-25 16:14:22 +08:00
},
},
}
dataset = init_instance_by_config(dataset_config)
2021-03-25 20:36:07 +08:00
for rolling_offset in range(self.rolling_cnt):
print(f"===========rolling{rolling_offset} start===========")
2021-03-25 16:14:22 +08:00
if rolling_offset:
2021-03-29 20:15:42 +08:00
dataset.config(
2021-03-25 16:14:22 +08:00
handler_kwargs={
2021-03-25 21:47:17 +08:00
"start_time": datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]),
"end_time": datetime(test_end_time[0] + rolling_offset, *test_end_time[1:]),
2021-03-29 20:16:00 +08:00
"processor_kwargs": {
2021-03-29 20:15:42 +08:00
"fit_start_time": datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]),
"fit_end_time": datetime(train_end_time[0] + rolling_offset, *train_end_time[1:]),
},
2021-03-25 16:14:22 +08:00
},
2021-03-29 20:15:42 +08:00
segments={
2021-03-25 19:59:22 +08:00
"train": (
2021-03-25 21:47:17 +08:00
datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]),
datetime(train_end_time[0] + rolling_offset, *train_end_time[1:]),
2021-03-25 19:59:22 +08:00
),
"valid": (
2021-03-25 21:47:17 +08:00
datetime(valid_start_time[0] + rolling_offset, *valid_start_time[1:]),
datetime(valid_end_time[0] + rolling_offset, *valid_end_time[1:]),
2021-03-25 19:59:22 +08:00
),
"test": (
2021-03-25 21:47:17 +08:00
datetime(test_start_time[0] + rolling_offset, *test_start_time[1:]),
datetime(test_end_time[0] + rolling_offset, *test_end_time[1:]),
2021-03-25 19:59:22 +08:00
),
2021-03-25 16:14:22 +08:00
},
)
2021-03-29 20:15:42 +08:00
dataset.setup_data(
2021-03-29 20:16:00 +08:00
handler_kwargs={
"init_type": DataHandlerLP.IT_FIT_SEQ,
}
2021-03-29 20:15:42 +08:00
)
2021-03-25 16:14:22 +08:00
2021-03-25 19:59:22 +08:00
dtrain, dvalid, dtest = dataset.prepare(["train", "valid", "test"])
2021-03-25 22:08:23 +08:00
print(dtrain, dvalid, dtest)
2021-03-25 20:36:07 +08:00
## print or dump data
print(f"===========rolling{rolling_offset} end===========")
2021-03-25 19:59:22 +08:00
2021-03-25 16:14:22 +08:00
if __name__ == "__main__":
2021-03-25 19:58:55 +08:00
fire.Fire(RollingDataWorkflow)