def __init__(self, balance, tickers=None, base_model=LinearRegression(), n_prev=2, wait=100, steps_ahead=100, k=5, envelope='proportional', log=False, commission=.0002, flat_rate=8): if tickers is None: tickers = s_and_p_names('2014-1-1', '2015-11-02') super(TSEBuyAndHoldStrategy, self).__init__(balance, tickers, log=log, commission=commission, wait=wait, flat_rate=flat_rate) self.model = TimeSeriesRegressor(base_model, n_ahead=1, n_prev=n_prev) self.steps_ahead = steps_ahead self.k = k self.envelope = envelope
def get_data_dependece(X, data_sizes, folds=20, test_size=30, n_prev=2, log=True): bests = np.empty((len(data_sizes), folds, X.shape[1])) for i, data_size in enumerate(data_sizes): pairs = cascade_cv(len(X), folds, data_size=data_size, test_size=test_size, number=True) for j, pair in enumerate(pairs): if log: print('data size: {} trial {} '.format(data_size, j)) X_train, X_test = np.array(X.iloc[pair[0], :]), np.array( X.iloc[pair[1], :]) tsr = TimeSeriesRegressor(LinearRegression(), n_prev=n_prev) tsr.fit(X_train) fc = tsr.forecast(X_train, len(X_test)) def changes(X, start=0, end=-1): return np.array( [X[end, i] - X[start, i] for i in range(X.shape[1])]) best_is = changes(fc).argsort()[::-1] for k in range(X.shape[1]): bests[i, j, k] = changes(X_test)[best_is[k]] - np.mean( changes(X_test)) return bests
class TSEBuyAndHoldStrategy(InformedBuyAndHoldStrategy): def __init__(self, balance, tickers=None, base_model=LinearRegression(), n_prev=2, wait=100, steps_ahead=100, k=5, envelope='proportional', log=False, commission=.0002, flat_rate=8): if tickers is None: tickers = s_and_p_names('2014-1-1', '2015-11-02') super(TSEBuyAndHoldStrategy, self).__init__(balance, tickers, log=log, commission=commission, wait=wait, flat_rate=flat_rate) self.model = TimeSeriesRegressor(base_model, n_ahead=1, n_prev=n_prev) self.steps_ahead = steps_ahead self.k = k self.envelope = envelope def choose_stocks(self): self.model.fit(self.observed_data) fc = self.model.forecast(self.observed_data, self.steps_ahead) changes = np.array([fc[-1, i] - fc[0, i] for i in range(fc.shape[1])]) top_k = changes.argsort()[::-1][:self.k] top_tickers = self.names[top_k] if self.envelope == 'proportional': top_weights = changes[top_k] elif self.envelope == 'log_proportional': top_weights = np.log(changes[top_k]) elif self.envelope == 'uniform': top_weights = np.ones((self.k)) else: raise ValueError("Chose a proper strategy name") top_weights = np.array(map(lambda w: max(0, w), top_weights)) top_weights = top_weights / float(sum(top_weights)) return top_tickers, top_weights def __str__(self): return "TSE Buy and Hold Strategy"
def get_data_dependece(X, data_sizes, folds=20, test_size=30, n_prev=2, log=True): bests = np.empty((len(data_sizes), folds, X.shape[1])) for i, data_size in enumerate(data_sizes): pairs = cascade_cv(len(X), folds, data_size=data_size, test_size=test_size, number=True) for j, pair in enumerate(pairs): if log: print('data size: {} trial {} '.format(data_size, j)) X_train, X_test = np.array(X.iloc[pair[0], :]), np.array(X.iloc[pair[1], :]) tsr = TimeSeriesRegressor(LinearRegression(), n_prev=n_prev) tsr.fit(X_train) fc = tsr.forecast(X_train, len(X_test)) def changes(X, start=0, end=-1): return np.array([X[end, i] - X[start, i] for i in range(X.shape[1])]) best_is = changes(fc).argsort()[::-1] for k in range(X.shape[1]): bests[i, j, k] = changes(X_test)[best_is[k]] - np.mean(changes(X_test)) return bests
def __init__(self, balance, tickers=None, base_model=LinearRegression(), n_prev=2, wait=100, steps_ahead=100, k=5, envelope='proportional', log=False, commission=.0002, flat_rate=8): if tickers is None: tickers = s_and_p_names('2014-1-1', '2015-11-02') super(TSEBuyAndHoldStrategy, self).__init__(balance, tickers, log=log, commission=commission, wait=wait, flat_rate=flat_rate) self.model = TimeSeriesRegressor(base_model, n_ahead=1, n_prev=n_prev) self.steps_ahead = steps_ahead self.k = k self.envelope = envelope
class TSEBuyAndHoldStrategy(InformedBuyAndHoldStrategy): def __init__(self, balance, tickers=None, base_model=LinearRegression(), n_prev=2, wait=100, steps_ahead=100, k=5, envelope='proportional', log=False, commission=.0002, flat_rate=8): if tickers is None: tickers = s_and_p_names('2014-1-1', '2015-11-02') super(TSEBuyAndHoldStrategy, self).__init__(balance, tickers, log=log, commission=commission, wait=wait, flat_rate=flat_rate) self.model = TimeSeriesRegressor(base_model, n_ahead=1, n_prev=n_prev) self.steps_ahead = steps_ahead self.k = k self.envelope = envelope def choose_stocks(self): self.model.fit(self.observed_data) fc = self.model.forecast(self.observed_data, self.steps_ahead) changes = np.array([fc[-1, i] - fc[0, i] for i in range(fc.shape[1])]) top_k = changes.argsort()[::-1][:self.k] top_tickers = self.names[top_k] if self.envelope == 'proportional': top_weights = changes[top_k] elif self.envelope == 'log_proportional': top_weights = np.log(changes[top_k]) elif self.envelope == 'uniform': top_weights = np.ones((self.k)) else: raise ValueError("Chose a proper strategy name") top_weights = np.array(map(lambda w: max(0, w), top_weights)) top_weights = top_weights / float(sum(top_weights)) return top_tickers, top_weights def __str__(self): return "TSE Buy and Hold Strategy"
from TimeSeriesEstimator import TimeSeriesRegressor, time_series_split def mse(X1, X2, multioutput='raw_values'): if multioutput == 'raw_values': return np.mean((X1 - X2)**2, axis=0)**.5 if multioutput == 'uniform_average': return np.mean(np.mean((X1 - X2)**2, axis=0)**.5) X = datasets('sp500') names = list(X.columns.values) X_train, X_test = time_series_split(X) n_prev = 2 tsr = TimeSeriesRegressor(LinearRegression(), n_prev=n_prev) tsr.fit(X_train) fc = tsr.forecast(X_train, len(X_test), noise=.2, n_paths=200) fc_mean = tsr.forecast(X_train, len(X_test), noise=.2, n_paths=200, combine='mean') #or for speed #fc_mean = np.mean(fc, axis=0) plt.plot(np.transpose(fc[:, :, 1]), 'r', alpha=.05) plt.plot(np.transpose(fc_mean[:, 1]), 'b', label='Mean Forecast') plt.plot(X_test[:, 1], 'g', label='Actual Price') plt.legend() plt.xlabel('days')
plt.plot(pred_test[:, i], 'r', label="Predicted") plt.plot(y_test[n_prev:, i], 'b--', label="Actual") # nprev: because the first predicted point needed n_prev steps of data # plt.title("Testing performance of " + titles[i]) # plt.legend(loc='lower left') plt.gcf().set_size_inches(15, 6) plt.show() X = datasets('sp500') X_train, X_test = time_series_split(X) n_prev = 3 tsr = TimeSeriesRegressor(LinearRegression(), n_prev=n_prev) tsr.fit(X_train) fc = tsr.forecast(X_train, len(X_test)) def changes(X): return np.array([X[-1, i] - X[0, i] for i in range(X.shape[1])]) #X_test_change = np.log(changes(X_test)) #fc_change = np.log(changes(fc)) #plt.plot(np.linspace(0,8),np.linspace(0,8),'b') #plt.plot(X_test_change,fc_change, 'ro') for i in range(min(16,X_train.shape[1])): plt.subplot(4,4,i+1) plt.plot(fc[:, i],'r')
from sklearn.cross_validation import KFold def mse(X1, X2, multioutput="raw_values"): if multioutput == "raw_values": return np.mean((X1 - X2) ** 2, axis=0) ** 0.5 if multioutput == "uniform_average": return np.mean(np.mean((X1 - X2) ** 2, axis=0) ** 0.5) X = datasets("sp500") names = list(X.columns.values) X_train, X_test = time_series_split(X) n_prev = 2 tsr = TimeSeriesRegressor(LinearRegression(), n_prev=n_prev) tsr.fit(X_train) fc = tsr.forecast(X_train, len(X_test), noise=0.2, n_paths=200) fc_mean = tsr.forecast(X_train, len(X_test), noise=0.2, n_paths=200, combine="mean") # or for speed # fc_mean = np.mean(fc, axis=0) plt.plot(np.transpose(fc[:, :, 1]), "r", alpha=0.05) plt.plot(np.transpose(fc_mean[:, 1]), "b", label="Mean Forecast") plt.plot(X_test[:, 1], "g", label="Actual Price") plt.legend() plt.xlabel("days") plt.ylabel("Price") plt.title("Forecasting Alcoa (AA)") plt.show()