示例#1
0
def main (args):  
  """SRA-TSTR Main function.
  
  Args:
    - data_name: data to be used in this experiment
    - num_fold: the number of fold (interations)
    - test_fraction: the fraction of testing data
    - prob_flip: the fraction of testing labels to be flipped
    - metric_name: metrics to evaluate the predictive models
  """
    
  # Diverse Predictive models
  models = ['logisticregression','randomforest', 'gaussiannb',
            'bernoullinb','svmlin','extra trees','lda', 'adaboost',
            'bagging','gbm','nn','xgb']
  
  performance_trtr = np.zeros([len(models), args.num_fold])
  performance_tstr = np.zeros([len(models), args.num_fold])
  performance_tsts = np.zeros([len(models), args.num_fold])
  
  for i in range(args.num_fold):
                   
    # Load original data
    train_x_real, train_y_real, test_x_real, test_y_real = \
    data_loader(args.data_name, args.test_fraction)  
    # Generate synthetic data
    train_x_synth, train_y_synth, test_x_synth, test_y_synth = \
    synthetic_data_generator(train_x_real, train_y_real, 
                             test_x_real, test_y_real, args.prob_flip)
        
    for j in range(len(models)):
      
      model_name = models[j]            
      
      print('num fold: ' + str(i+1) + ', predictive model: ' + model_name)
          
      _, test_y_hat_trtr = predictive_model(train_x_real, train_y_real, 
                                            test_x_real, model_name)
      _, test_y_hat_tstr = predictive_model(train_x_synth, train_y_synth, 
                                            test_x_real, model_name)
      _, test_y_hat_tsts = predictive_model(train_x_synth, train_y_synth, 
                                            test_x_synth, model_name)
                  
      performance_trtr[j, i] = performance(test_y_real, test_y_hat_trtr, 
                                           args.metric_name)
      performance_tstr[j, i] = performance(test_y_real, test_y_hat_tstr, 
                                           args.metric_name)
      performance_tsts[j, i] = performance(test_y_synth, test_y_hat_tsts, 
                                           args.metric_name)
                        
  #%%
  print('TSTR Performance (' + args.metric_name + ')')
  output_visualization (performance_trtr, performance_tstr, models)
  
  result = synthetic_ranking_agreement(np.mean(performance_trtr, 1),
                                       np.mean(performance_tsts, 1))
  print('SRA Performance: ' + str(result))
示例#2
0
def portfolio_test(meanDf):
    sharp_list = []
    ret_list = []
    std_list = []
    mdd_list = []
    compare = pd.DataFrame()
    for oneleg in tqdm(range(len(meanDf.columns))):
        portfolioDF = pd.DataFrame()
        portfolioDF['ret'] = meanDf.iloc[:, oneleg]
        portfolioDF['nav'] = (portfolioDF['ret'] + 1).cumprod()
        performance_df = performance(portfolioDF, para)
        sharp_list.append(np.array(performance_df.iloc[:, 0].T)[0])
        ret_list.append(np.array(performance_df.iloc[:, 1].T)[0])
        std_list.append(np.array(performance_df.iloc[:, 2].T)[0])
        mdd_list.append(np.array(performance_df.iloc[:, 3].T)[0])
        compare[str(oneleg)] = portfolioDF['nav']
    performanceDf = pd.concat([
        pd.Series(sharp_list),
        pd.Series(ret_list),
        pd.Series(std_list),
        pd.Series(mdd_list)
    ],
                              axis=1,
                              sort=True)
    performanceDf.columns = ['Sharp', 'RetYearly', 'STD', 'MDD']
    compare.index = meanDf.index
    plt.plot(range(len(compare.iloc[1:, 1])), compare.iloc[1:, :])
    plt.title(para.factor)
    plt.grid(True)
    plt.legend()
    plt.savefig(para.factor + '_performance_nav.png')
    plt.show()
    return performanceDf, compare
示例#3
0
    def send_feedback(self, X, feature_names, reward, truth):
        """ Return outlier labels as part of the feedback loop.
        
        Arguments:
            - X: input data
            - feature_names
            - reward
            - truth: outlier labels
        """
        self.label = truth
        self._labels.append(self.label)
        self._labels = flatten(self._labels)

        scores = performance(self._labels,
                             self._predictions,
                             roll_window=self.roll_window)
        stats = outlier_stats(self._labels,
                              self._predictions,
                              roll_window=self.roll_window)

        convert = flatten([scores, stats])
        metric = []
        for c in convert:  # convert from np to native python type to jsonify
            metric.append(np.asscalar(np.asarray(c)))
        self.metric = metric

        return
 def send_feedback(self,X,feature_names,reward,truth):
     """ Return outlier labels as part of the feedback loop.
     
     Parameters
     ----------
         X : array of the features sent in the original predict request
         feature_names : array of feature names. May be None if not available.
         reward (float): the reward
         truth : array with correct value (optional)
     """
     _ = super().send_feedback(X,feature_names,reward,truth)
     
     # historical reconstruction errors and predictions
     self._mse.append(self.mse)
     self._mse = flatten(self._mse)
     self._predictions.append(self.prediction)
     self._predictions = flatten(self._predictions)
     
     # target labels
     self.label = truth
     self._labels.append(self.label)
     self._labels = flatten(self._labels)
     
     # performance metrics
     scores = performance(self._labels,self._predictions,roll_window=self.roll_window)
     stats = outlier_stats(self._labels,self._predictions,roll_window=self.roll_window)
     
     convert = flatten([scores,stats])
     metric = []
     for c in convert: # convert from np to native python type to jsonify
         metric.append(np.asscalar(np.asarray(c)))
     self.metric = metric
     
     return []
示例#5
0
 def portfolio_test(self):
     # portfolio_test function is to calculate the index of the porfolio
     # https://blog.csdn.net/weixin_42294255/article/details/103836548
     sharp_list = []
     ret_list = []
     std_list = []
     mdd_list = []
     r2var_list = []
     cr2var_list = []
     compare = pd.DataFrame()
     for oneleg in tqdm(range(len(self.df.columns))):
         portfolioDF = pd.DataFrame()
         portfolioDF['ret'] = self.df.iloc[:, oneleg]
         portfolioDF['nav'] = (portfolioDF['ret'] + 1).cumprod()
         performance_df = performance(portfolioDF, para)
         # performance_df_anl = performance_anl(portfolioDF,para)
         sharp_list.append(np.array(performance_df.iloc[:, 0].T)[0])
         ret_list.append(np.array(performance_df.iloc[:, 1].T)[0])
         std_list.append(np.array(performance_df.iloc[:, 2].T)[0])
         mdd_list.append(np.array(performance_df.iloc[:, 3].T)[0])
         r2var_list.append(np.array(performance_df.iloc[:, 4].T)[0])
         cr2var_list.append(np.array(performance_df.iloc[:, 5].T)[0])
         compare[str(oneleg)] = portfolioDF['nav']
     performanceDf = pd.concat([pd.Series(sharp_list),
                                pd.Series(ret_list),
                                pd.Series(std_list),
                                pd.Series(mdd_list),
                                pd.Series(r2var_list),
                                pd.Series(cr2var_list)],
                               axis=1, sort=True)
     performanceDf.columns = ['Sharp',
                              'RetYearly',
                              'STD',
                              'MDD',
                              'R2VaR',
                              'R2CVaR']
     compare.index = self.df.index
     plt.plot(range(len(compare.iloc[1:, 1])),
              compare.iloc[1:, :]
              )
     plt.title(para.factor +'_'+para.factor2)
     # plt.xticks([0, 25, 50, 75, 100, 125],
     #            ['2009/12/31', '2011/01/31', '2013/02/28', '2015/03/31', '2017/04/30', '2020/04/30'])
     # plt.grid(True)
     # plt.xlim((0, 125))
     plt.xticks([0, 25, 50, 65],
                ['2014/12/31', '2016/12/30', '2018/12/31', '2020/04/30'])
     plt.grid(True)
     plt.xlim((0, 65))
     plt.legend()
     plt.savefig(para.result_path + para.factor +'_' + para.factor2 + '_' + para.weightMethod + '_performance_nav.png')
     plt.show()
     return performanceDf, compare
示例#6
0
 def portfolio_test(self):
     sharp_list = []
     ret_list = []
     std_list = []
     mdd_list = []
     r2var_list = []
     cr2var_list = []
     anl = []
     compare= pd.DataFrame()
     for oneleg in tqdm(range(len(self.meanDf.columns))):
         portfolioDF = pd.DataFrame()
         portfolioDF['ret'] = self.meanDf.iloc[:,oneleg]
         portfolioDF['nav'] = (portfolioDF['ret']+1).cumprod()
         performance_df = performance(portfolioDF,para)
         performance_df_anl = performance_anl(portfolioDF,para)
         sharp_list.append(np.array(performance_df.iloc[:,0].T)[0])
         ret_list.append(np.array(performance_df.iloc[:,1].T)[0])
         std_list.append(np.array(performance_df.iloc[:,2].T)[0])
         mdd_list.append(np.array(performance_df.iloc[:,3].T)[0])
         r2var_list.append(np.array(performance_df.iloc[:,4].T)[0])
         cr2var_list.append(np.array(performance_df.iloc[:,5].T)[0])
         anl.append(np.array(performance_df_anl.iloc[:,0].T))
         compare[str(oneleg)] = portfolioDF['nav']
     performanceDf = pd.concat([pd.Series(sharp_list),
                                pd.Series(ret_list),
                                pd.Series(std_list),
                                pd.Series(mdd_list),
                                pd.Series(r2var_list),
                                pd.Series(cr2var_list)],
                                 axis = 1, sort = True)
     performanceDf.columns = ['Sharp',
                              'RetYearly',
                              'STD',
                              'MDD',
                              'R2VaR',
                              'R2CVaR']
     anlDf = pd.DataFrame(anl)
     print(anlDf)
     compare.index = self.meanDf.index
     plt.plot(range(len(compare.iloc[1:, 1])),
              compare.iloc[1:, :])
     plt.title(para.factor)
     plt.xticks([0, 25, 50, 75, 100, 125],
                ['2009/12/31', '2011/01/31', '2013/02/28', '2015/03/31', '2017/04/30', '2020/04/30'])
     plt.grid(True)
     plt.xlim((0, 125))
     plt.legend()
     plt.savefig(para.result_path + para.factor +'_'+para.weightMethod+
             '_'+para.normalize+'_performance_nav.png')
     plt.show()
     return performanceDf,compare
def main(args):
    """Time-series prediction main function.
  
  Args:
    - train_rate: training data ratio
    - seq_len: sequence length
    - task: classification or regression
    - model_type: rnn, lstm, gru, or attention
    - h_dim: hidden state dimensions
    - n_layer: number of layers
    - batch_size: the number of samples in each mini-batch
    - epoch: the number of iterations
    - learning_rate: learning rates
    - metric_name: mse or mae
  """
    # Load data
    train_x, train_y, test_x, test_y = data_loader(args.train_rate,
                                                   args.seq_len)

    # Model traininig / testing
    model_parameters = {
        'task': args.task,
        'model_type': args.model_type,
        'h_dim': args.h_dim,
        'n_layer': args.n_layer,
        'batch_size': args.batch_size,
        'epoch': args.epoch,
        'learning_rate': args.learning_rate
    }

    if args.model_type in ['rnn', 'lstm', 'gru']:
        general_rnn = GeneralRNN(model_parameters)
        general_rnn.fit(train_x, train_y)
        test_y_hat = general_rnn.predict(test_x)
    elif args.model_type == 'attention':
        basic_attention = Attention(model_parameters)
        basic_attention.fit(train_x, train_y)
        test_y_hat = basic_attention.predict(test_x)

    # Evaluation
    result = performance(test_y, test_y_hat, args.metric_name)
    print('Performance (' + args.metric_name + '): ' + str(result))