2021-03-25 16:14:22 +08:00
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import qlib
2021-03-25 19:58:55 +08:00
import fire
2021-03-25 16:14:22 +08:00
import pickle
2021-03-25 19:54:52 +08:00
from datetime import datetime
2022-01-09 21:39:46 +08:00
from qlib . constant import REG_CN
2021-03-25 16:14:22 +08:00
from qlib . data . dataset . handler import DataHandlerLP
2021-05-28 13:24:47 +08:00
from qlib . utils import init_instance_by_config
2021-03-25 16:14:22 +08:00
from qlib . tests . data import GetData
2021-03-25 19:59:22 +08:00
2021-03-30 00:38:15 +08:00
class RollingDataWorkflow :
2021-03-25 16:14:22 +08:00
MARKET = " csi300 "
start_time = " 2010-01-01 "
2021-03-25 19:59:22 +08:00
end_time = " 2019-12-31 "
2021-03-25 16:14:22 +08:00
rolling_cnt = 5
def _init_qlib ( self ) :
""" initialize qlib """
provider_uri = " ~/.qlib/qlib_data/cn_data " # target_dir
2021-05-28 14:57:06 +08:00
GetData ( ) . qlib_data ( target_dir = provider_uri , region = REG_CN , exists_skip = True )
2021-03-25 16:14:22 +08:00
qlib . init ( provider_uri = provider_uri , region = REG_CN )
2021-03-25 19:59:22 +08:00
2021-03-25 16:14:22 +08:00
def _dump_pre_handler ( self , path ) :
handler_config = {
" class " : " Alpha158 " ,
" module_path " : " qlib.contrib.data.handler " ,
" kwargs " : {
2021-03-25 19:54:52 +08:00
" start_time " : self . start_time ,
" end_time " : self . end_time ,
" instruments " : self . MARKET ,
2021-03-25 20:36:07 +08:00
" infer_processors " : [ ] ,
" learn_processors " : [ ] ,
2021-03-25 16:14:22 +08:00
} ,
}
pre_handler = init_instance_by_config ( handler_config )
2021-03-25 20:36:07 +08:00
pre_handler . config ( dump_all = True )
2021-03-25 16:14:22 +08:00
pre_handler . to_pickle ( path )
def _load_pre_handler ( self , path ) :
with open ( path , " rb " ) as file_dataset :
pre_handler = pickle . load ( file_dataset )
return pre_handler
def rolling_process ( self ) :
self . _init_qlib ( )
2021-03-25 20:36:07 +08:00
self . _dump_pre_handler ( " pre_handler.pkl " )
pre_handler = self . _load_pre_handler ( " pre_handler.pkl " )
2021-03-25 16:14:22 +08:00
2021-03-25 19:59:22 +08:00
train_start_time = ( 2010 , 1 , 1 )
train_end_time = ( 2012 , 12 , 31 )
valid_start_time = ( 2013 , 1 , 1 )
valid_end_time = ( 2013 , 12 , 31 )
test_start_time = ( 2014 , 1 , 1 )
test_end_time = ( 2014 , 12 , 31 )
2021-03-25 16:14:22 +08:00
dataset_config = {
" class " : " DatasetH " ,
" module_path " : " qlib.data.dataset " ,
" kwargs " : {
" handler " : {
" class " : " RollingDataHandler " ,
" module_path " : " rolling_handler " ,
" kwargs " : {
2021-03-25 19:54:52 +08:00
" start_time " : datetime ( * train_start_time ) ,
" end_time " : datetime ( * test_end_time ) ,
" fit_start_time " : datetime ( * train_start_time ) ,
" fit_end_time " : datetime ( * train_end_time ) ,
2021-03-25 20:36:07 +08:00
" infer_processors " : [
2021-03-25 20:36:45 +08:00
{ " class " : " RobustZScoreNorm " , " kwargs " : { " fields_group " : " feature " } } ,
2021-03-25 20:36:07 +08:00
] ,
" learn_processors " : [
{ " class " : " DropnaLabel " } ,
{ " class " : " CSZScoreNorm " , " kwargs " : { " fields_group " : " label " } } ,
] ,
2021-03-25 19:59:22 +08:00
" data_loader_kwargs " : {
2021-03-25 16:14:22 +08:00
" handler_config " : pre_handler ,
2021-03-25 19:59:22 +08:00
} ,
2021-03-25 16:14:22 +08:00
} ,
} ,
" segments " : {
2021-03-25 19:54:52 +08:00
" train " : ( datetime ( * train_start_time ) , datetime ( * train_end_time ) ) ,
" valid " : ( datetime ( * valid_start_time ) , datetime ( * valid_end_time ) ) ,
" test " : ( datetime ( * test_start_time ) , datetime ( * test_end_time ) ) ,
2021-03-25 16:14:22 +08:00
} ,
} ,
}
dataset = init_instance_by_config ( dataset_config )
2021-03-25 20:36:07 +08:00
for rolling_offset in range ( self . rolling_cnt ) :
print ( f " ===========rolling { rolling_offset } start=========== " )
2021-03-25 16:14:22 +08:00
if rolling_offset :
2021-03-29 20:15:42 +08:00
dataset . config (
2021-03-25 16:14:22 +08:00
handler_kwargs = {
2021-03-25 21:47:17 +08:00
" start_time " : datetime ( train_start_time [ 0 ] + rolling_offset , * train_start_time [ 1 : ] ) ,
" end_time " : datetime ( test_end_time [ 0 ] + rolling_offset , * test_end_time [ 1 : ] ) ,
2021-03-29 20:16:00 +08:00
" processor_kwargs " : {
2021-03-29 20:15:42 +08:00
" fit_start_time " : datetime ( train_start_time [ 0 ] + rolling_offset , * train_start_time [ 1 : ] ) ,
" fit_end_time " : datetime ( train_end_time [ 0 ] + rolling_offset , * train_end_time [ 1 : ] ) ,
} ,
2021-03-25 16:14:22 +08:00
} ,
2021-03-29 20:15:42 +08:00
segments = {
2021-03-25 19:59:22 +08:00
" train " : (
2021-03-25 21:47:17 +08:00
datetime ( train_start_time [ 0 ] + rolling_offset , * train_start_time [ 1 : ] ) ,
datetime ( train_end_time [ 0 ] + rolling_offset , * train_end_time [ 1 : ] ) ,
2021-03-25 19:59:22 +08:00
) ,
" valid " : (
2021-03-25 21:47:17 +08:00
datetime ( valid_start_time [ 0 ] + rolling_offset , * valid_start_time [ 1 : ] ) ,
datetime ( valid_end_time [ 0 ] + rolling_offset , * valid_end_time [ 1 : ] ) ,
2021-03-25 19:59:22 +08:00
) ,
" test " : (
2021-03-25 21:47:17 +08:00
datetime ( test_start_time [ 0 ] + rolling_offset , * test_start_time [ 1 : ] ) ,
datetime ( test_end_time [ 0 ] + rolling_offset , * test_end_time [ 1 : ] ) ,
2021-03-25 19:59:22 +08:00
) ,
2021-03-25 16:14:22 +08:00
} ,
)
2021-03-29 20:15:42 +08:00
dataset . setup_data (
2021-03-29 20:16:00 +08:00
handler_kwargs = {
" init_type " : DataHandlerLP . IT_FIT_SEQ ,
}
2021-03-29 20:15:42 +08:00
)
2021-03-25 16:14:22 +08:00
2021-03-25 19:59:22 +08:00
dtrain , dvalid , dtest = dataset . prepare ( [ " train " , " valid " , " test " ] )
2021-03-25 22:08:23 +08:00
print ( dtrain , dvalid , dtest )
2021-03-25 20:36:07 +08:00
## print or dump data
print ( f " ===========rolling { rolling_offset } end=========== " )
2021-03-25 19:59:22 +08:00
2021-03-25 16:14:22 +08:00
if __name__ == " __main__ " :
2021-03-25 19:58:55 +08:00
fire . Fire ( RollingDataWorkflow )