Пример #1
0
def download(startDt, endDt):
    dt = startDt

    filenames = [
        os.path.join(config.LOCAL_PATH_RAW, dn + '.csv') for dn in DATA_NAMES
    ]
    cntDt = 0
    lastMon = ''
    while dt <= endDt:
        for dataname, filename in zip(DATA_NAMES, filenames):
            if dt[:7] != lastMon:
                lastMon = dt[:7]
                print(lastMon)
                pass

            df = eval(dataname + '(dt)')

            if df.shape[0] > 0:
                if cntDt > 0:
                    fmode = 'a'
                    wheader = False
                else:
                    fmode = 'w'
                    wheader = True
                    pass

                df.to_csv(filename, mode=fmode, header=wheader, index=False)
                cntDt += 1
                pass
            pass
        dt = utils_common.dtAdd(dt, 1)
        pass
    pass
Пример #2
0
def main(modelPath, startDt, endDt, initMoney, strategy, reportPath,
         testStartDt):
    df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT)
    turnoverFilter = dataio.getTurnoverRankFilter()
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter()
    secFilter = turnoverFilter & maxContinousCloseDayFilter
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index])

    df = df[idxFilter]

    feature = df[config.FEATURE_SELECT].values
    dts = df.index.get_level_values('tradeDate')
    index = df.index.values

    dt = startDt
    while dt <= endDt:
        print(dt)
        filename = 'xgb_' + dt
        stdout = sys.stdout

        with open(os.path.join(reportPath, filename), 'w') as fout:
            sys.stdout = fout
            daily_test(feature, dts, index, initMoney, testStartDt,
                       os.path.join(modelPath, filename), strategy)
            sys.stdout = stdout
            pass
        dt = utils_common.dtAdd(dt, 1)
    pass
Пример #3
0
def main(modelPath,startDt,endDt,initMoney,strategy,reportPath,testStartDt):
    df = dataio.getLabelAndFeature(config.LABEL,config.FEATURE_SELECT);
    turnoverFilter = dataio.getTurnoverRankFilter();
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter();
    secFilter = turnoverFilter & maxContinousCloseDayFilter;
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index]);

    df = df[idxFilter]
    
    feature = df[config.FEATURE_SELECT].values;
    dts = df.index.get_level_values('tradeDate');
    index = df.index.values;

    dt = startDt;
    while dt<=endDt:
        print(dt);
        filename = 'xgb_' + dt;
        stdout = sys.stdout;

        with open(os.path.join(reportPath,filename),'w') as fout:
            sys.stdout = fout;
            daily_test(feature,dts,index,initMoney,testStartDt,
                       os.path.join(modelPath,filename),
                       strategy);
            sys.stdout = stdout;
            pass;
        dt = utils_common.dtAdd(dt,1);
    pass;
Пример #4
0
def download(startDt,endDt):
    dt = startDt;

    filenames = [os.path.join(config.LOCAL_PATH_RAW,dn + '.csv') for dn in DATA_NAMES];
    cntDt = 0;
    lastMon = '';
    while dt<=endDt:
        for dataname,filename in zip(DATA_NAMES,filenames):
            if dt[:7]!=lastMon:
                lastMon = dt[:7];
                print(lastMon);
                pass;
            
            df = eval(dataname + '(dt)');

            if df.shape[0]>0:
                if cntDt>0:
                    fmode = 'a';
                    wheader = False;
                else:
                    fmode = 'w';
                    wheader = True;
                    pass;
                
                df.to_csv(filename,mode=fmode,header=wheader,index=False);
                cntDt+=1;
                pass;
            pass;
        dt = utils_common.dtAdd(dt,1);
        pass;
    pass;
Пример #5
0
def main(dtStart,dtEnd,savePath):
    label,feature,dts = loadDataReg();
    
    dt = dtStart;

    while dt<=dtEnd:
        filename = 'xgb_' + dt;
        print(dt);
        daily_train_reg(feature,label,dts,dt,os.path.join(savePath,filename));
        dt = utils_common.dtAdd(dt,1);
        pass;
    pass;
Пример #6
0
def main(dtStart,dtEnd,savePath):
    label,feature,dts = daily_train.loadData();
    
    dt = dtStart;

    while dt<=dtEnd:
        filename = 'xgb_' + dt;
        print(dt);
        daily_train.daily_train(feature,label,dts,dt,os.path.join(savePath,filename));
        dt = utils_common.dtAdd(dt,1);
        pass;
    pass;
Пример #7
0
def main(dtStart, dtEnd, savePath):
    label, feature, dts = loadDataReg()

    dt = dtStart

    while dt <= dtEnd:
        filename = 'xgb_' + dt
        print(dt)
        daily_train_reg(feature, label, dts, dt,
                        os.path.join(savePath, filename))
        dt = utils_common.dtAdd(dt, 1)
        pass
    pass
Пример #8
0
def getDecFactors(dtStart,dtEnd):
    ret = dict();

    decFactors = dict();
    dt = utils_common.dtAdd(dtStart,-7);

    while dt<=dtEnd:
        decFactor = getDecFactorSingle(dt);
        if decFactor is not None:
            decFactors[dt] = decFactor;
            pass;
        dt = utils_common.dtAdd(dt,1);
        pass;

    # shift dt
    sortedDt = sorted(list(decFactors.keys()));
    mapDt = {sortedDt[i]:sortedDt[i+1] for i in range(len(sortedDt)-1)};

    for k,v in mapDt.items():
        ret[v] = decFactors[k];
        pass;
    
    return ret;
Пример #9
0
def getDecFactors(dtStart, dtEnd):
    ret = dict()

    decFactors = dict()
    dt = utils_common.dtAdd(dtStart, -7)

    while dt <= dtEnd:
        decFactor = getDecFactorSingle(dt)
        if decFactor is not None:
            decFactors[dt] = decFactor
            pass
        dt = utils_common.dtAdd(dt, 1)
        pass

    # shift dt
    sortedDt = sorted(list(decFactors.keys()))
    mapDt = {sortedDt[i]: sortedDt[i + 1]
             for i in range(len(sortedDt) - 1)}

    for k, v in mapDt.items():
        ret[v] = decFactors[k]
        pass

    return ret
Пример #10
0
def daily_train(feature,label,dts,dtEnd,savePath):    
    idx = (dts<dtEnd) & (dts>=utils_common.dtAdd(dtEnd,-config.TRAINING_DAYS));

    feature = feature[idx];
    label = label[idx];

    weight = utils_common.getWeight(dts[idx],config.WEIGHTER);

    cls = xgb.XGBClassifier(max_depth=4,learning_rate=0.1,n_estimators=500);
    print('training sample: {0}'.format(feature.shape));
    print(weight.shape);
    cls.fit(feature,label,sample_weight=weight);

    print('train end');
    pickle.dump(cls,open(savePath,'w'));
    pass;
Пример #11
0
def daily_train_reg(feature, label, dts, dtEnd, savePath):
    idx = (dts < dtEnd) & (dts >= utils_common.dtAdd(dtEnd,
                                                     -config.TRAINING_DAYS))

    feature = feature[idx]
    label = label[idx]

    weight = utils_common.getWeight(dts[idx], config.WEIGHTER)

    cls = xgb.XGBRegressor(max_depth=11, learning_rate=0.04, n_estimators=200)
    print('training sample: {0}'.format(feature.shape))
    print(weight.shape)
    cls.fit(feature, label, sample_weight=weight)

    print('train end')
    pickle.dump(cls, open(savePath, 'w'))
    pass
Пример #12
0
def completeFundETFConsGet():
    df = getdf('FundETFConsGet');
    dfMarket = getdf('MktEqudAdjAfGet');

    secIDs = df.index.get_level_values('secID').values;
    tradeDates = df.index.get_level_values('tradeDate').values;
    usecs = np.unique(secIDs);

    udtMarket = np.unique(dfMarket.index.get_level_values('tradeDate').values);

    dfComp = df.copy();
    for secID in usecs:
        idx = secIDs==secID;
        dts = tradeDates[idx];
        minDt = np.min(dts);
        missDt = {dt for dt in udtMarket if dt not in dts and dt>=minDt};

        print('{0}:{1}'.format(secID,json.dumps(list(missDt))));
        
        for dt in missDt:
            for i in range(20):
                preDt = utils_common.dtAdd(dt,-i);
                if preDt in dts:
                    break;
                pass;
            if preDt not in dts:
                print('cannot find previous date, sec: {0}'.format(secID));
                break;
            
            addDf = df[idx & (tradeDates==preDt)];
            addDf.reset_index(inplace=True);
            addDf.loc[:,'tradeDate'] = dt;
            addDf.set_index(config.DATA_NAMES['FundETFConsGet'],inplace=True);

            dfComp = dfComp.append(addDf);
            pass;
        pass;

    dfComp.to_csv(os.path.join(getdb(),'FundETFConsGet'));
    pass;
Пример #13
0
def backtest(initMoney,dtStart,dtEnd,strategy,decFactor, outputfile=None):
    print('dt start: ' + dtStart + ', dt end:' + dtEnd);

    totals = [];
    gains = [];
    tReturns = [];

    trader = BacktestTrader(initMoney);
    
    totals.append(trader.getMarketValue());

    numDt = 0;
    dt = dtStart;
    dts = decFactor.keys();
    if '9999-99-99' in dts:
        dts.remove('9999-99-99');
        pass;
    
    maxDt = max(dts);
    dtEnd = min(dtEnd,maxDt);
    dt = max(dt,min(dts));
    if outputfile is not None:
        outputfile = open(outputfile,'w')
        pass
    
    while dt<=dtEnd:
        if not trader.isTradingDay(dt):
            dt = utils_common.dtAdd(dt,1);
            continue;

        decFactorDt = decFactor[dt] if dt in decFactor else None;
        trader.setTime(dt,'open');
        strategy.handle(dt,'open',trader,decFactorDt);

        #print(decFactorDt.shape);
        #print('open: ' + str(len(trader.getPositions())) + ', cache: ' + str(trader.cache_));
        trader.setTime(dt,'close');
        strategy.handle(dt,'close',trader,decFactorDt);
        #print('close: ' + str(len([pos['secID'] for pos in trader.position_])) + ', cache: ' + str(trader.cache_));
        #print('close: ' + str(trader.getPositions()) + ', cache: ' + str(trader.cache_));

        totals.append(trader.getMarketValue());
        gains.append((totals[-1]-totals[-2])/totals[-2]);
        tReturns.append((totals[-1]-totals[0])/totals[0]);

        print('dt: {0}, value: {1}, gains: {2}, sharp: {3}'.format(dt,totals[-1],gains[-1],
              utils_common.sharpRatio(tReturns,0.1/200)));

        if outputfile is not None:
            outputfile.write('{0},{1}\n'.format(dt, totals[-1]));
            pass
        
        dt = utils_common.dtAdd(dt,1);
        numDt+=1;
        pass;

    print('total value: ' + str(totals[-1]));
    print('max loss: ' + str(utils_common.minSumSubList(gains)));
    posDays = len([i for i in gains if i>0]);
    totalDays = numDt;
    print('posive days: {0}/{1}'.format(float(posDays)/totalDays,totalDays));
    print('sharp ratio : {0}'.format(utils_common.sharpRatio(tReturns,0.1/200)));
    print(totalDays);
    if outputfile is not None:
        outputfile.close();
        pass;
    pass;
Пример #14
0
def backtest(initMoney, dtStart, dtEnd, strategy, decFactor, outputfile=None):
    print('dt start: ' + dtStart + ', dt end:' + dtEnd)

    totals = []
    gains = []
    tReturns = []

    trader = BacktestTrader(initMoney)

    totals.append(trader.getMarketValue())

    numDt = 0
    dt = dtStart
    dts = decFactor.keys()
    if '9999-99-99' in dts:
        dts.remove('9999-99-99')
        pass

    maxDt = max(dts)
    dtEnd = min(dtEnd, maxDt)
    dt = max(dt, min(dts))
    if outputfile is not None:
        outputfile = open(outputfile, 'w')
        pass

    while dt <= dtEnd:
        if not trader.isTradingDay(dt):
            dt = utils_common.dtAdd(dt, 1)
            continue

        decFactorDt = decFactor[dt] if dt in decFactor else None
        trader.setTime(dt, 'open')
        strategy.handle(dt, 'open', trader, decFactorDt)

        #print(decFactorDt.shape);
        #print('open: ' + str(len(trader.getPositions())) + ', cache: ' + str(trader.cache_));
        trader.setTime(dt, 'close')
        strategy.handle(dt, 'close', trader, decFactorDt)
        #print('close: ' + str(len([pos['secID'] for pos in trader.position_])) + ', cache: ' + str(trader.cache_));
        #print('close: ' + str(trader.getPositions()) + ', cache: ' + str(trader.cache_));

        totals.append(trader.getMarketValue())
        gains.append((totals[-1] - totals[-2]) / totals[-2])
        tReturns.append((totals[-1] - totals[0]) / totals[0])

        print('dt: {0}, value: {1}, gains: {2}, sharp: {3}'.format(
            dt, totals[-1], gains[-1],
            utils_common.sharpRatio(tReturns, 0.1 / 200)))

        if outputfile is not None:
            outputfile.write('{0},{1}\n'.format(dt, totals[-1]))
            pass

        dt = utils_common.dtAdd(dt, 1)
        numDt += 1
        pass

    print('total value: ' + str(totals[-1]))
    print('max loss: ' + str(utils_common.minSumSubList(gains)))
    posDays = len([i for i in gains if i > 0])
    totalDays = numDt
    print('posive days: {0}/{1}'.format(float(posDays) / totalDays, totalDays))
    print('sharp ratio : {0}'.format(
        utils_common.sharpRatio(tReturns, 0.1 / 200)))
    print(totalDays)
    if outputfile is not None:
        outputfile.close()
        pass
    pass
Пример #15
0
    #turnoverFilter = dataio.getHS300Filter();

    print(df.shape[0])

    print('prepare...')
    dt = df.index.get_level_values('tradeDate')

    label = np.squeeze(df[[LABEL]].values)
    feature = df[FEATURE_SELECT].values

    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index])
    print('begin')
    for iy, year in enumerate(YEARS[:-1]):
        if len(year) < 10:
            dtTrainStart = utils_common.dtAdd(year + '-01-01',
                                              -config.TRAINING_DAYS)
        else:
            dtTrainStart = utils_common.dtAdd(year, -config.TRAINING_DAYS)
            pass
        idxTrain = (dt < year) & (dt >= dtTrainStart)  #&idxFilter;
        idxTest = (dt >= year) & (dt < YEARS[iy + 1])  #&idxFilter;
        labelTrain = label[idxTrain]
        labelTest = label[idxTest]
        featureTrain = feature[idxTrain]
        featureTest = feature[idxTest]
        dtTest = dt[idxTest]
        dtTrain = dt[idxTrain]
        weightTrain = getWeight(dtTrain)

        validate(labelTrain, labelTest, featureTrain, featureTest, dtTest,
                 weightTrain)
Пример #16
0
    def extract(self):
        df = dataio.getLabelAndFeature(self.labelName_, config.FEATURE_SELECT)
        df = df[df[self.labelName_] > -1]

        df = dataio.joinTurnoverRank(df)
        dt = '2014-01-01'

        dts = df.index.get_level_values('tradeDate')
        indices = df.index.values
        label = np.squeeze(df[[self.labelName_]].values)
        feature = df[config.FEATURE_SELECT].values

        #dtMax = '2016-01-01';
        dtMax = np.max(dts.values)
        prs = []
        while dt <= dtMax:
            dtStart = dt
            dtEnd = utils_common.dtAdd(dtStart, self.iterval_)

            print('start: {0}, end: {1}'.format(dtStart, dtEnd))

            idxTrain = (dts < dtStart) & (dts >= utils_common.dtAdd(
                dtStart, -config.TRAINING_DAYS))
            idxTest = (dts >= dtStart) & (dts <= dtEnd)

            labelTrain = label[idxTrain]
            featureTrain = feature[idxTrain]
            featureTest = feature[idxTest]
            weight = utils_common.getWeight(dts[idxTrain], config.WEIGHTER)

            pred = self.trainTestReg(labelTrain, featureTrain, featureTest,
                                     weight)
            #print(utils_common.topNPosRate(dts[idxTest],label[idxTest],pred,5));

            df = pd.DataFrame(pred,
                              index=pd.MultiIndex.from_tuples(
                                  indices[idxTest],
                                  names=['secID', 'tradeDate']),
                              columns=[self.__class__.__name__])
            prs.append(df)
            dt = utils_common.dtAdd(dtEnd, 1)

        df = pd.concat(prs)

        udt = np.sort(np.unique(df.index.get_level_values('tradeDate').values))
        dtMap = {udt[i]: udt[i + 1]
                 for i in range(udt.shape[0] - 1)}
        maxDt = np.max(udt)

        df.reset_index(inplace=True)

        arr = df.values
        idxDt = df.columns.get_loc('tradeDate')

        print(df.shape)
        for i in range(df.shape[0]):
            dt = arr[i, idxDt]
            if dt == maxDt:
                arr[i, idxDt] = '9999-99-99'
            else:
                arr[i, idxDt] = dtMap[dt]
                pass
            pass

        df = pd.DataFrame(arr, columns=df.columns)
        df.set_index(['secID', 'tradeDate'], inplace=True)
        return df
Пример #17
0
    def extract(self):
        df = dataio.getLabelAndFeature(self.labelName_,config.FEATURE_SELECT);
        df = df[df[self.labelName_]>-1];

        df = dataio.joinTurnoverRank(df);
        dt = '2014-01-01';

        dts = df.index.get_level_values('tradeDate');
        indices = df.index.values;
        label = np.squeeze(df[[self.labelName_]].values);
        feature = df[config.FEATURE_SELECT].values;

        #dtMax = '2016-01-01';
        dtMax = np.max(dts.values);
        prs = [];
        while dt<=dtMax:
            dtStart = dt;
            dtEnd = utils_common.dtAdd(dtStart,self.iterval_);

            print('start: {0}, end: {1}'.format(dtStart,dtEnd));

            idxTrain = (dts<dtStart) & (dts>=utils_common.dtAdd(dtStart,-config.TRAINING_DAYS));
            idxTest = (dts>=dtStart) & (dts<=dtEnd);

            labelTrain = label[idxTrain];
            featureTrain = feature[idxTrain];
            featureTest = feature[idxTest];
            weight = utils_common.getWeight(dts[idxTrain],config.WEIGHTER);

            pred = self.trainTestReg(labelTrain,featureTrain,featureTest,weight);
            #print(utils_common.topNPosRate(dts[idxTest],label[idxTest],pred,5));
            
            df = pd.DataFrame(pred,
                              index=pd.MultiIndex.from_tuples(indices[idxTest],
                                                              names=['secID','tradeDate']),
                              columns=[self.__class__.__name__]
                              );
            prs.append(df);
            dt = utils_common.dtAdd(dtEnd,1);
            
        df = pd.concat(prs);

        udt = np.sort(np.unique(df.index.get_level_values('tradeDate').values));
        dtMap = {udt[i]:udt[i+1] for i in range(udt.shape[0]-1)};
        maxDt = np.max(udt);

        df.reset_index(inplace=True);

        arr = df.values;
        idxDt = df.columns.get_loc('tradeDate');

        print(df.shape);
        for i in range(df.shape[0]):
            dt = arr[i,idxDt];
            if dt==maxDt:
                arr[i,idxDt] = '9999-99-99'
            else:
                arr[i,idxDt] = dtMap[dt];
                pass;
            pass;

        df = pd.DataFrame(arr,columns=df.columns);
        df.set_index(['secID','tradeDate'],inplace=True);
        return df;
Пример #18
0
    #turnoverFilter = dataio.getHS300Filter();

    print(df.shape[0]);
    
    print('prepare...');
    dt = df.index.get_level_values('tradeDate');

    label = np.squeeze(df[[LABEL]].values);
    feature = df[FEATURE_SELECT].values;

    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index]);
    print('begin');
    for iy,year in enumerate(YEARS[:-1]):
        if len(year)<10:
            dtTrainStart = utils_common.dtAdd(year+'-01-01',-config.TRAINING_DAYS);
        else:
            dtTrainStart = utils_common.dtAdd(year,-config.TRAINING_DAYS);
            pass
        idxTrain = (dt<year)&(dt>=dtTrainStart)#&idxFilter;
        idxTest = (dt>=year)&(dt<YEARS[iy+1])#&idxFilter;
        labelTrain = label[idxTrain];
        labelTest = label[idxTest];
        featureTrain = feature[idxTrain];
        featureTest = feature[idxTest];
        dtTest = dt[idxTest];
        dtTrain = dt[idxTrain];
        weightTrain = getWeight(dtTrain);
        
        validate(labelTrain,labelTest,featureTrain,featureTest,dtTest,weightTrain);
        pass;