def get_dv(start=20170101, end=20180101): import jaqs_fxdayu jaqs_fxdayu.patch_all() from jaqs_fxdayu.data import DataView from jaqs_fxdayu.data.dataservice import LocalDataService import warnings warnings.filterwarnings("ignore") #-------------------------------------------------------- #define factor_list = ['fcffps', 'lt_borrow', 'st_borrow', 'LCAP', 'end_bal_cash'] check_factor = ','.join(factor_list) dataview_folder = r'D:/data' ds = LocalDataService(fp=dataview_folder) ZZ800_id = ds.query_index_member("000906.SH", start, end) stock_symbol = list(set(ZZ800_id)) dv_props = { 'start_date': start, 'end_date': end, 'symbol': ','.join(stock_symbol), 'fields': check_factor, 'freq': 1, "prepare_fields": True } dv = DataView() dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() # total market value data_config = { "remote.data.address": "tcp://data.tushare.org:8910", "remote.data.username": "******", "remote.data.password": "******" } from jaqs_fxdayu.data.dataservice import RemoteDataService ds1 = RemoteDataService() ds1.init_from_config(data_config) dv.add_field('total_mv', ds1) dv.add_field('ncf_oper_ttm', ds1) return dv
def test_write(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() secs = '600030.SH,000063.SZ,000001.SZ' props = { 'start_date': 20160601, 'end_date': 20170601, 'symbol': secs, 'fields': 'open,close,high,low,volume,pb,net_assets,pcf_ncf', 'freq': 1 } dv.init_from_config(props, data_api=ds) dv.prepare_data() assert dv.data_d.shape == (281, 48) assert dv.dates.shape == (281, ) # TODO """ PerformanceWarning: your performance may suffer as PyTables will pickle object types that it cannot map directly to c-types [inferred_type->mixed,key->block1_values] [items->[('000001.SZ', 'int_income'), ('000001.SZ', 'less_handling_chrg_comm_exp'), ('000001.SZ', 'net_int_income'), ('000001.SZ', 'oper_exp'), ('000001.SZ', 'symbol'), ('000063.SZ', 'int_income'), ('000063.SZ', 'less_handling_chrg_comm_exp'), ('000063.SZ', 'net_int_income'), ('000063.SZ', 'oper_exp'), ('000063.SZ', 'symbol'), ('600030.SH', 'int_income'), ('600030.SH', 'less_handling_chrg_comm_exp'), ('600030.SH', 'net_int_income'), ('600030.SH', 'oper_exp'), ('600030.SH', 'symbol')]] """ dv.save_dataview(folder_path=daily_path)
def save_dataview(): data_config = { "remote.data.address": "tcp://data.quantOS.org:8910", "remote.data.username": "******", "remote.data.password": "******" } ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20140101, 'end_date': 20180101, 'universe': '000300.SH', 'fields': "volume,pb,pe,ps,roe,float_mv,sw1", 'freq': 1, 'timeout': 180 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载
def save_dataview(): data_config = { "remote.data.address": "tcp://data.tushare.org:8910", "remote.data.username": "******", "remote.data.password": "******" #QuantOs API令牌 } ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20140101, 'end_date': 20180101, 'universe': '000300.SH', #'universe': '000300.SH'表示标的股票池取的是沪深300 'fields': "pb,pe,ps,float_mv,sw1,roe", #'fields'表示取的数据,pb,pe,ps分别表示市净率,市盈率,市销率;float_mv代表流通市值,sw1表示申万1级行业分类的数据 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载
def test_q_add_formula(): dv = DataView() folder_path = '../output/prepared/20160609_20170601_freq=1D' dv.load_dataview(folder_path=quarterly_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'total_oper_rev / close' dv.add_formula('myvar1', formula, is_quarterly=False, add_data=True) df1 = dv.get_ts('myvar1') assert not df1.empty formula2 = 'Delta(oper_exp * myvar1 - open, 3)' dv.add_formula('myvar2', formula2, is_quarterly=False, add_data=True) df2 = dv.get_ts('myvar2') assert not df2.empty
def test_add_formula(): dv = DataView() dv.load_dataview(folder_path=daily_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'Delta(high - close, 1)' dv.add_formula('myvar1', formula, is_quarterly=False, add_data=True) assert dv.data_d.shape == (nrows, ncols + 1 * n_securities) formula2 = 'myvar1 - close' dv.add_formula('myvar2', formula2, is_quarterly=False, add_data=True) assert dv.data_d.shape == (nrows, ncols + 2 * n_securities)
def test_add_field(): dv = DataView() dv.load_dataview(folder_path=daily_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) ds = RemoteDataService() ds.init_from_config(data_config) dv.add_field('total_share', ds) assert dv.data_d.shape == (nrows, ncols + 1 * n_securities)
def test_q_add_field(): dv = DataView() dv.load_dataview(folder_path=quarterly_path) nrows, ncols = dv.data_q.shape n_securities = len(dv.data_d.columns.levels[0]) ds = RemoteDataService() ds.init_from_config(data_config) dv.add_field("debttoassets", ds) assert dv.data_q.shape == (nrows, ncols + 1 * n_securities)
def test_add_formula_directly(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() secs = '600030.SH,000063.SZ,000001.SZ' props = { 'start_date': 20160601, 'end_date': 20170601, 'symbol': secs, 'fields': 'open,close', 'freq': 1 } dv.init_from_config(props, data_api=ds) dv.prepare_data() dv.add_formula("myfactor", 'close / open', is_quarterly=False) assert dv.data_d.shape == (281, 36)
def test_q(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() secs = '600030.SH,000063.SZ,000001.SZ' props = { 'start_date': 20160609, 'end_date': 20170601, 'symbol': secs, 'fields': ('open,close,' + 'pb,net_assets,' + 'total_oper_rev,oper_exp,' + 'cash_paid_invest,' + 'capital_stk,' + 'roe'), 'freq': 1 } dv.init_from_config(props, data_api=ds) dv.prepare_data() dv.save_dataview(folder_path=quarterly_path)
def test_load(): dv = DataView() dv.load_dataview(folder_path=daily_path) assert dv.start_date == 20160601 and set(dv.symbol) == set( '000001.SZ,600030.SH,000063.SZ'.split(',')) # test get_snapshot snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb') assert snap1.shape == (2, 2) assert set(snap1.columns.values) == {'close', 'pb'} assert set(snap1.index.values) == {'600030.SH', '000063.SZ'} # test get_ts ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302) assert ts1.shape == (38, 2) assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'} assert ts1.index.values[-1] == 20170302
def get_dv(start=20170101, end=20180101): #import jaqs_fxdayu #jaqs_fxdayu.patch_all() from jaqs_fxdayu.data import DataView from jaqs_fxdayu.data.dataservice import LocalDataService import warnings warnings.filterwarnings("ignore") #-------------------------------------------------------- #define factor_list = [ 'BBI', 'EPS', 'PE', 'PS', 'ACCA', 'CTOP', 'MA10RegressCoeff12', 'AR', 'BR', 'ARBR', 'np_parent_comp_ttm', 'total_share', 'bps', 'volume' ] check_factor = ','.join(factor_list) dataview_folder = r'../data' ds = LocalDataService(fp=dataview_folder) ZZ800_id = ds.query_index_member("000906.SH", start, end) stock_symbol = list(set(ZZ800_id)) dv_props = { 'start_date': start, 'end_date': end, 'symbol': ','.join(stock_symbol), 'fields': check_factor, 'freq': 1, "prepare_fields": True } dv = DataView() dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() return dv
from jaqs_fxdayu.data import DataView from jaqs_fxdayu.data import RemoteDataService # 远程数据服务类 data_config = { "remote.data.address": "tcp://data.quantos.org:8910", "remote.data.username": "******", "remote.data.password": "******" } # step 2 ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() sub_period = [(20190130, 20190630)] file_name = ['data'] universe = '000300.SH' # step 3 for i in range(len(sub_period)): props = { 'start_date': sub_period[i][0], 'end_date': sub_period[i][1], 'universe': universe, 'fields': "pb,pe,oper_exp,sw1", 'report_type': '408003000',
check_factor = ','.join(factor_list) import jaqs_fxdayu jaqs_fxdayu.patch_all() from jaqs_fxdayu.data import DataView from jaqs.data import RemoteDataService from jaqs_fxdayu.data.dataservice import LocalDataService import os import numpy as np import pandas as pd import warnings warnings.filterwarnings("ignore") dataview_folder = 'E:/data/data' dv = DataView() ds = LocalDataService(fp=dataview_folder) factor_list = ['volume'] check_factor = ','.join(factor_list) dv_props = { 'start_date': start, 'end_date': end, 'symbol': ','.join(stock_symbol), 'fields': check_factor, 'freq': 1, "prepare_fields": True } dv.init_from_config(dv_props, data_api=ds)
#1_读取本地数据 import jaqs_fxdayu jaqs_fxdayu.patch_all() from jaqs_fxdayu.data import DataView from jaqs_fxdayu.data import RemoteDataService import os import numpy as np import warnings warnings.filterwarnings("ignore") dv = DataView() dataview_folder = 'G:/data/hs300' dv.load_dataview(dataview_folder) #2_因子绩效 例子以roe_pb为因子 factor = dv.get_ts('roe_pb') #之前自定义做的roe/pb的因子(需要先运行前面第二节roe_pb) print(factor.tail()) #读取数据: mask = dv.get_ts('mask_index_member') #是否在指数成分里 can_enter = dv.get_ts('can_enter') #能否买入 can_exit = dv.get_ts('can_exit') #能否卖出 price = dv.get_ts('close_adj') #价格 group = dv.get_ts('group') #分类信息 print(can_enter.shape) print(group.shape) #定义函数:
factor_list = ['volume', 'pb', 'roe','close'] check_factor = ','.join(factor_list) import jaqs_fxdayu jaqs_fxdayu.patch_all() from jaqs_fxdayu.data import DataView from jaqs_fxdayu.data import RemoteDataService from jaqs_fxdayu.data.dataservice import LocalDataService import os import numpy as np import warnings warnings.filterwarnings("ignore") from pandas import Series,DataFrame dataview_folder = 'D:\data' dv = DataView() ds = LocalDataService(fp=dataview_folder) dv_props = {'start_date': start, 'end_date': end, 'symbol':','.join(stock_symbol), 'fields': check_factor, 'freq': 1, "prepare_fields": True} dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() dv.add_field('sw1') sw1 = dv.get_ts('sw1')
from jaqs_fxdayu.data import DataView import warnings warnings.filterwarnings("ignore") dataview_folder = '../Factor' dv = DataView() dv.load_dataview(dataview_folder) dv.add_formula("Divert", "Corr(volume,close_adj,20)", is_quarterly=False).head() # 添加到数据集dv里,则计算结果之后可以反复调用 dv.add_formula("Divert", "Corr(volume,close_adj,20)", is_quarterly=False, add_data=True) dv.get_ts("Divert").head()
'end_date': 20180101, 'universe': '000300.SH', 'fields': "volume,pb,pe,ps,roe,float_mv,sw1", 'freq': 1, 'timeout': 180 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载 save_dataview() # 加载数据 dv = DataView() dv.load_dataview(dataview_folder) factor = dv.get_ts("ps") factor.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), factor.index)) #索引调整为datetime日期格式 factor = factor.stack() #处理成MultiIndex格式(alphalens分析因子必须的格式) print(factor.head()) def change_index(df): df.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index)) #索引调整为datetime日期格式
#1_读取本地数据 import jaqs_fxdayu jaqs_fxdayu.patch_all() from jaqs_fxdayu.data import DataView from jaqs_fxdayu.data import RemoteDataService import os import numpy as np import warnings warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' #档案地址 dv = DataView() dv.load_dataview(dataview_folder) #加载档案地址,结果出现Dataview loaded successfully则成功 print(dv.fields) #查看dv中取得的数据 #2_过滤停牌涨跌停(可买可卖) mask_index_member为要过滤的为True,can_enter与can_exit皆为可交易为True from jaqs_fxdayu.util import dp from jaqs.data.dataapi import DataApi A = dv.get_ts('index_member') #得到一张表,1表示在指数成分里,0表示不在指数成分里 B = dv.get_ts('trade_status') #得到一张表,从中可以得出股票能否交易(或停牌) def mask_index(): df_index_member = dv.get_ts('index_member') mask_index_member = df_index_member == 0 #定义信号过滤条件-非指数成分,若df_index_member==0则mask_index_member=true return mask_index_member
import warnings from jaqs_fxdayu.util import dp warnings.filterwarnings("ignore") data_config = { "remote.data.address": "tcp://data.tushare.org:8910", "remote.data.username": "******", "remote.data.password": "******" } ds = RemoteDataService() ds.init_from_config(data_config) DV = DataView() start = 20100101 end = 20161231 stock_symbol = list( set(dp.index_cons(ds, "000906.SH", start, end).symbol.values)) zz800_props = { 'start_date': start, 'end_date': end, 'symbol': ','.join(stock_symbol), 'fields': 'pb,pe,ps,float_mv,sw1,volume', 'freq': 1, "prepare_fields": True }
'end_date': 20180101, 'universe': '000300.SH', #'universe': '000300.SH'表示标的股票池取的是沪深300 'fields': "pb,pe,ps,float_mv,sw1,roe", #'fields'表示取的数据,pb,pe,ps分别表示市净率,市盈率,市销率;float_mv代表流通市值,sw1表示申万1级行业分类的数据 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载 save_dataview() # 加载数据 dv = DataView() dv.load_dataview(dataview_folder) print(dv.fields) #查看dv中取得的数据 print(dv.get_ts("pb").head()) #查看dv中取得的市净率 #3_因子分析 #step1:因子收集及处理 import pandas as pd from datetime import datetime factor = dv.get_ts("pb") factor.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), factor.index)) # 改时间索引: 将整数类型的日期转成datetime的格式的日期datetime.strptime (注意pd.Index,I要大写) factor = factor.stack() #修改成Mutiindex格式(Alphalen因子分析必要) 得到一窜Series
def test_dataview_universe(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20170227, 'end_date': 20170327, 'universe': '000016.SH', # 'symbol': 'rb1710.SHF,rb1801.SHF', 'fields': ('open,high,low,close,vwap,volume,turnover,' + 'sw1,zz2,' + 'roe,net_assets,' + 'total_oper_rev,oper_exp,tot_profit,int_income'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() data_bench = dv.data_benchmark.copy() dv.data_benchmark = data_bench try: dv.data_benchmark = data_bench.iloc[3:] except ValueError: pass dv.remove_field('roe,net_assets') dv.remove_field('close')
check_factor = ','.join(factor_list) import jaqs_fxdayu jaqs_fxdayu.patch_all() from jaqs_fxdayu.data import DataView from jaqs.data import RemoteDataService from jaqs_fxdayu.data.dataservice import LocalDataService import os import numpy as np import pandas as pd import warnings warnings.filterwarnings("ignore") dataview_folder = 'E:/data/data' dv = DataView() ds = LocalDataService(fp=dataview_folder) factor_list = ['volume'] check_factor = ','.join(factor_list) dv_props = {'start_date': start, 'end_date': end, 'symbol':','.join(stock_symbol), 'fields': check_factor, 'freq': 1, "prepare_fields": True} dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() for name in FactorList:
#1_初始化 from jaqs_fxdayu.data import DataView import warnings warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' dv = DataView() dv.load_dataview(dataview_folder) #定义过滤条件 import numpy as np def mask_index_member(): df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member >0) #定义信号过滤条件-非指数成分 return mask_index_member def limit_up_down(): # 定义可买卖条件——未停牌、未涨跌停 trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' # 涨停 dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False, add_data=True) # 跌停 dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False, add_data=True) can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌 can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌 return can_enter,can_exit mask = mask_index_member() can_enter,can_exit = limit_up_down()
#1_初始化 from jaqs_fxdayu.data import DataView import warnings warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' dv = DataView() dv.load_dataview(dataview_folder) dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True) #直接使用内置的函数,添加新因子,可能之前已经添加过了 dv.get_ts("momentum").head() print(dv.fields) #查看dv中取得的数据 import numpy as np #定义过滤条件 def mask_index_member(): df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member > 0) #定义信号过滤条件-非指数成分 return mask_index_member def limit_up_down(): # 定义可买卖条件——未停牌、未涨跌停 trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' # 涨停 dv.add_formula('up_limit',
def test_q_get(): dv = DataView() dv.load_dataview(folder_path=quarterly_path) res = dv.get("", 0, 0, 'total_oper_rev') assert set(res.index.values) == set(dv.dates[dv.dates >= dv.start_date])
def get_(folder): # step 2 因子数据预处理 # 加载dataview数据集 dv = DataView() dataview_folder = folder dv.load_dataview(dataview_folder) # 定义信号过滤条件-非指数成分 # df_index_member = dv.get_ts('index_member') signal = dv.get_ts("pb") price = dv.get_ts("close_adj") group = dv.get_ts("sw1") # mask mask = dv.get_ts('index_member') == 0 # 定义信号过滤条件-非指数成分 # 定义可买入卖出条件——未停牌、未涨跌停 trade_status = dv.get_ts('trade_status') can_trade = trade_status == 1 # 可以交易 # 涨停 up_limit = dv.add_formula( 'up_limit', '(open - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False) # 跌停 down_limit = dv.add_formula( 'down_limit', '(open - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False) can_enter = np.logical_and(up_limit < 1, can_trade) # 未涨停未停牌 can_exit = np.logical_and(down_limit < 1, can_trade) # 未跌停未停牌 benchmark = dv.data_benchmark return signal, price, mask, group, can_enter, can_exit, benchmark
'universe': '000300.SH', 'fields': "volume,pb,pe,ps,roe,float_mv,sw1", 'freq': 1, 'timeout': 180 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载 save_dataview() # 加载数据 dv = DataView() dv.load_dataview(dataview_folder) print(dv.get_ts("pe").head()) import numpy as np #定义信号过滤条件-非指数成分 def mask_index_member(): df_index_member = dv.get_ts('index_member') mask_index_member = df_index_member == 0 return mask_index_member # 定义可买卖条件——未停牌、未涨跌停
print(group2_code.tail() ) #value="industry1_code"表示以industry2_name为分类标准,不过返回的code类型(类别代号) group3 = dp.daily_sec_industry(api, symbols, start, end, source='zz', value="industry1_name") print(group3.tail()) #source='zz'表示以中证为分类标准,只是分类标准不同而已 group3_code = dp.daily_sec_industry(api, symbols, start, end, source='zz', value="industry1_code") print(group3_code.tail() ) #source='zz'表示以中证为分类标准,只是分类标准不同而已,industry1_code返回的是类别代号(code类型) #3_添加数据保存 dv = DataView() dataview_folder = 'G:/data/hs300' #档案地址 dv.load_dataview(dataview_folder) #加载档案地址 dv.append_df(group, 'group') #将group列加入dv中,后面的'group'为列名 dv.save_dataview('G:/data/hs300') #保存 #获取数据 print(dv.get_ts('group').tail()) A = dv.get_ts('group') #这样看的更清楚点
from jaqs_fxdayu.data import DataView import warnings warnings.filterwarnings("ignore") dataview_folder = '../Factor' dv = DataView() dv.load_dataview(dataview_folder) from jaqs_fxdayu.research.signaldigger import process Open = dv.get_ts("open_adj") High = dv.get_ts("high_adj") Low = dv.get_ts("low_adj") Close = dv.get_ts("close_adj") trade_status = dv.get_ts('trade_status') mask_sus = trade_status == 0 # 剔除掉停牌期的数据 再计算指标 open_masked = process._mask_df(Open, mask=mask_sus) high_masked = process._mask_df(High, mask=mask_sus) low_masked = process._mask_df(Low, mask=mask_sus) close_masked = process._mask_df(Close, mask=mask_sus) from jaqs_fxdayu.data import signal_function_mod as sfm cci = sfm.ta(ta_method='CCI', ta_column=0, Open=open_masked, High=high_masked, Low=low_masked, Close=close_masked, Volume=None) dv.append_df(cci, 'CCI')