示例#1
0
def createModel(data):
    print 'Create_model'
    importr('forecast')
    robj.r('''
            arima_data <- function(data){
               
               best_arima = auto.arima(data,trace=F,stepwise=T)
               forecast = forecast.Arima(best_arima,h=60,level=c(99.5))
               output = forecast$mean
               return (output)
               
               
            }
            
   ''')
    features_names = ["Ret_%d_pred" % (i) for i in range(121, 181)]
    predict = pd.DataFrame(columns=features_names)
    i = 1
    for tmp in DataFrame.iter_row(data):
        if i % 100 == 0:
            print i
        tmp = robj.r("as.numeric")(tmp)
        #tmp = robj.r('ts')(tmp,start=2)
        tmp = robj.r('ts')(tmp, start=2, frequency=15)
        forecast = robj.r('arima_data')(tmp)
        forecast = robj.r('as.numeric')(forecast)
        forecast = np.array(forecast)
        predict2 = pd.DataFrame(forecast).T
        predict2.columns = features_names
        predict = pd.concat([predict, predict2], axis=0)
        i = i + 1
    print predict
    predict.to_csv("tmp1.csv")
def createModel(data, param):
    print 'Create_model'
    importr('forecast')
    robj.r('''
            arima_data <- function(data){
               
               best_arima = auto.arima(data,trace=F,stepwise=T,max.P=8,max.Q=8,max.p=10,max.q=10,max.order=10,
               ,start.p=1,start.q=0,start.P=1,start.Q=0,seasonal=T,ic=('bic'))
               forecast = forecast.Arima(best_arima,h=60,level=c(99.5),stationary=T)
               output = forecast$mean
               return (output)
               
               
            }
            
   ''')
    print 'the frequency is %d' % (param['frequency'])
    features_names = ["Ret_%d_pred" % (i) for i in range(121, 181)]
    predict = pd.DataFrame(columns=features_names)
    i = 1
    for tmp in DataFrame.iter_row(data):
        if i % 100 == 0:
            print i
        tmp = robj.r("as.numeric")(tmp)
        #tmp = robj.r('ts')(tmp,start=2)
        tmp = robj.r('ts')(tmp, start=2, frequency=param['frequency'])
        forecast = robj.r('arima_data')(tmp)
        forecast = robj.r('as.numeric')(forecast)
        forecast = np.array(forecast)
        predict2 = pd.DataFrame(forecast).T
        predict2.columns = features_names
        predict = pd.concat([predict, predict2], axis=0)
        i = i + 1

    #this way I will get forecast_data , train_data
    raw_data.to_csv("raw.csv")
    predict.to_csv("predict.csv")
    data = predict.join(raw_data, rsuffix='_2')
    data.to_csv("data.raw.csv")
    data['Ret_120_price'] = price_train['Ret_120_price']
    transform_format(data)
    Ret_1 = data['Ret_MinusTwo']
    Ret_2 = 1 - (1.0 / ((1.0 / (1 - data['Ret_MinusOne'])) *
                        data['Ret_120_price'] * data['Ret_180_price']))
    data['Ret_PlusOne_pred'] = 0.5 * Ret_1 + 0.5 * Ret_2
    data['Ret_PlusTwo_pred'] = 0.5 * Ret_2 + 0.5 * data['Ret_PlusOne_pred']
    data.to_csv("data.csv")
    WMAE_model(data)
    mase = np.sum(data['error']) / (40000 * 62)
    print 'loss:%f' % (mase)
    return {'loss': mase, 'status': STATUS_OK}