def train(): x_data, y_data = read(os.getcwd() + "\datasets\\2\parkinsons_updrs.data.csv") reduces = [y[0] for y in y_data] x_data = feature_selection(np.array(x_data), np.array(reduces)).tolist() x_data, m, s = normalize_data(x_data) x_train, x_validation, x_test = divide_data_set(x_data) y_train, y_validation, y_test = divide_data_set(y_data) motor_x_train = np.array([[y[0]] for y in y_train]) total_x_train = np.array([[y[1]] for y in y_train]) motor_x_test = np.array([[y[0]] for y in y_test]) total_x_test = np.array([[y[1]] for y in y_test]) x_test = np.array(x_test) y_test = np.array(y_test) print("\t\tFinished divide data...") n_x_train = np.array(x_train) n_y_train = np.array([[y[0]] for y in y_train]) n_y_train = np.array(y_train) print("\t\tTraining neural network...") # Step 6: train network path = input("Input output file name for neural network(enter for end): ") while path != "": for i in range(0, 10): net = create_neural_network(n_x_train, n_y_train, path=os.getcwd() + "\\" + path, lr=(i + 1) / 10) test(x_test, y_test, net, i) path = input( "Input output file name for neural network(enter for end): ")
def train(): x_data, y_data = read(os.getcwd() + "\datasets\\2\parkinsons_updrs.data-sorted.csv") # Step 2: feature selection reduces = [y[0] for y in y_data] x_data = feature_selection(np.array(x_data), np.array(reduces)).tolist() # Step 3: normalization x_data, m, s = normalize_data(x_data) #x_data, mean, sqrt = normalize_data(y_data) y_data, maximums = minmax_normalize(y_data) # Step 4: find min and max value for each attribute minmax = find_minmax(x_data) # Step 5: divide data x_train, x_validation, x_test = divide_data_set(x_data) y_train, y_validation, y_test = divide_data_set(y_data) n_x_train = x_train[0:1000] n_y_train = [[y[0]] for y in y_train[0:1000]] # Step 6: train network net, res = create_neural_network(n_x_train, n_y_train, minmax, os.getcwd() + "\\ds2-10i753h1o-800e-.net") #test(x_test, y_test, net, 0,means,sqrt) test(x_test, y_test, net, maximums)
def fit(self, X_train, y_train): X_train_nor = util.normalize_data(X_train) class_count = len(np.unique(y_train, False, False, True)[0]) self.initAll(X_train_nor.shape[0], X_train_nor.shape[1], class_count) X_modified_train = np.vstack( (np.ones(self.train_size), X_train_nor.transpose())) while (self.itr < 3): self.count = self.count + 1 if (self.count > 20): self.reset() delta_W = np.zeros((self.class_count, self.W_size)) for t in range(0, self.train_size): o = np.zeros(self.class_count) y = np.zeros(self.class_count) for i in range(0, self.class_count): for j in range(0, self.W_size): o[i] = o[i] + self.W[i][j] * X_modified_train[j][t] y = self.softmax(o) for i in range(0, self.class_count): r = 0 if (int(y_train[t]) == i): r = 1 for j in range(0, self.W_size): delta_W[i][j] = delta_W[i][j] + ( r - y[i]) * X_modified_train[j][t] for i in range(0, self.class_count): for j in range(0, self.W_size): self.W[i][j] = self.W[i][j] + self.factor * delta_W[i][j]
def plot_norm_data_vertical_lines(df_orders, portvals, portvals_bm, plot_vertical_lines=True, save_fig=True, fig_name="plot.png"): """Plots portvals and portvals_bm, showing vertical lines for orderss Parameters: df_orders: A dataframe that contains portfolio orders portvals: A dataframe with one column containing daily portfolio value portvals_bm: A dataframe with one column containing daily benchmark value save_fig: Whether to save the plot or not fig_name: The name of the saved figure Returns: Plot a chart of the portfolio and benchmark performances """ # Normalize data portvals = normalize_data(portvals) portvals_bm = normalize_data(portvals_bm) df = portvals_bm.join(portvals) # Plot the normalized benchmark and portfolio plt.plot(df.loc[:, "Benchmark"], label="Benchmark") plt.plot(df.loc[:, "Portfolio"], label="Portfolio") # Plot the vertical lines for buy and sell signals if plot_vertical_lines == True: for date in df_orders.index: if df_orders.loc[date, "Shares"] > 0: plt.axvline(date, color='g', linestyle='--') elif df_orders.loc[date, "Shares"] < 0: plt.axvline(date, color='r', linestyle='--') plt.title("Portfolio vs. Benchmark") plt.xlabel("Date") plt.ylabel("Normalized prices") plt.legend() # Set figure size fig = plt.gcf() fig.set_size_inches(12, 6) if save_fig == True: plt.savefig(fig_name) else: plt.show()
def get_data(x_data, y_data): reduces = [y[0] for y in y_data] fs = feature_selection(np.array(x_data), np.array(reduces)) n_x_data, means_x, sqrt_x = normalize_data(fs.tolist()) x_train, x_validation, x_test = divide_data_set(n_x_data) y_train, y_validation, y_test = divide_data_set(y_data) return x_train, x_validation, x_test, y_train, y_validation, y_test
def simulate_analytical_timetable_vary_mu( self, timetable: np.ndarray, theta: float = 0.5, mu: Callable[[float], float] = None, sigma: float = 0.05, n_paths: int = 10, re_normalize: bool = True, init_condition: float = 0)-> np.ndarray: """ Analytically simulate the OU paths with a given time table per path. Use Euler advancement to simulate the paths following the drifted OU model: dX = theta * (mu - X) * dt + sigma * dW, where mu(t) is a function of time, and theta, sigma are constants. Method used is the full analytical solution by [Doob (1942)]: X_t = X_0*exp(- theta t) + mu(t)*(1-exp(theta * t)) + sigma*exp(-theta*t)*Normal(0, exp(- 2 theta t)-1) / sqrt(2*theta) :param timetable: (np.ndarray) 1D numpy array that indicate the time of interest for calculation. :param theta: (float) Optional. Mean-reverting speed. Defaults to 0.5. :param mu: (func) Optional. Mean-reverting level as a function of time. Defalts to the constant function 0. Note that the time unit should be consistent with other inputs, and is t=0 at the beginning of the simulation. :param sigma: (float) Optional. Standard deviation for the Brownian motion. Defalts to 0.05. :param n_paths: (float) Optional. Number of paths in the simulation. Needs to be >=2. Defaults to 10. :param re_normalize: (bool) Optional. Whether to renormalize the Gaussians sampled at each time advancement. This will only be triggered if n_paths >= 2. Defalts to True. :param init_condition: (float) Optional. Initial start position for every path. Defaults to 0. :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths). """ # Initialize n_points = len(timetable) simulated_paths = np.zeros(shape=(n_points, n_paths)) simulated_paths[0, :] += init_condition if mu is None: # Default mu(t) = 0 mu = lambda t: 0 * t # 1. Draw from Gaussians accordingly with mean = 0. Will add back the mean = r*dt later. gaussians = np.random.normal(loc=0, scale=1, size=(n_points-1, n_paths)) # 2. Re-normalize the Gaussian per point or instance (row-wise) if re_normalize and n_paths >= 2: gaussians = util.normalize_data(data_matrix=gaussians, act_on='row') # 3. Scale time-transformed Wiener process, generate Gaussians that has mean=0, var=exp(2 theta t) - 1 trans_gaussian = np.zeros_like(gaussians) trans_gaussian[0] = (np.sqrt(np.exp(2 * theta * timetable[1]) - np.exp(2 * theta * timetable[0])) * gaussians[0]) for i in range(1, n_points-1): trans_gaussian[i,:] = (trans_gaussian[i-1,:] + np.sqrt(np.exp(2 * theta * timetable[i]) - np.exp(2 * theta * timetable[i-1])) * gaussians[i,:]) # 4. Construct the paths exactly. for i in range(1, n_points): exp_decay = np.exp(-theta * timetable[i]) simulated_paths[i,:] = (simulated_paths[0,:] * exp_decay + mu(timetable[i]) * (1 - exp_decay) + sigma * exp_decay * trans_gaussian[i-1,:] / np.sqrt(2 * theta)) # 5. Output the array. return simulated_paths
def plot_data_vert(df_orders, portvals, portvals_bm, insample=True): #Plot Data using vertical lines for the orders #Normalize Data portvals = normalize_data(portvals) portvals_bm = normalize_data(portvals_bm) df = portvals_bm.join(portvals, lsuffix='Benchmark', rsuffix='Portfolio') df.fillna(method="ffill", inplace=True) df.fillna(method="bfill", inplace=True) df.rename(columns={ '0Benchmark': 'Benchmark', '0Portfolio': 'Portfolio' }, inplace=True) plt.plot(df.loc[:, "Portfolio"], label='Portfolio', linewidth=1.2, color='black') plt.plot(df.loc[:, "Benchmark"], label='Benchmark', linewidth=1.2, color='b') # Plot Vertical Lines for date in df_orders.index: if df_orders.loc[date, "Order"] == "BUY": plt.axvline(x=date, color='g', linestyle='--') else: plt.axvline(x=date, color='r', linestyle='--') plt.title("Portfolio vs. Benchmark") plt.xlabel("Date") plt.xticks(rotation=50) plt.ylabel("Normalized prices") plt.legend(loc="upper left") if insample == True: filename = "portfolio_vs_benchmark(InSample).jpg" else: filename = "portfolio_vs_benchmark(OutSample).jpg" plt.savefig(filename) plt.clf() plt.cla()
def simulate_analytical_timetable(self, timetable: np.ndarray, r: float = 0, sigma: float = 0.05, n_paths: int = 10, re_normalize: bool = True, init_condition: float = 0) -> np.ndarray: """ Analytically simulate the Brownian motions according to a given timetable. Use the analytical solution to simulate the paths following the drifted Brownian model: dX = r*dt + sigma * dW, where r, sigma are constants. It turned out that the analytical approach is equivalent to the discrete case. dX is drawn from Gaussian(r*dt, sigma) for each advancement. :param r: (float) Optional. Drift rate. Defaults to 0. :param sigma: (float) Optional. Standard deviation for the Brownian motion. Defalts to 0.05. :param dt: (float) Optional. Time advancement step. Defaults to 1/252. :param n_points: (int) Optional. Number of steps per simulated path, including the initial condition. Defaults to 252. :param n_paths: (float) Optional. Number of paths in the simulation. Needs to be >=2. Defaults to 10. :param re_normalize: (bool) Optional. Whether to renormalize the Gaussians sampled at each time advancement. This will only be triggered if n_paths >= 2. Defalts to True. :param init_condition: (float) Optional. Initial start position for every path. Defaults to 0. :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths). """ # Initialize n_points = len(timetable) simulated_paths = np.zeros(shape=(n_points, n_paths)) simulated_paths[0, :] += init_condition # 1. Draw from Gaussians accordingly with mean = 0. Will add back the mean = r*dt later. gaussians = np.random.normal(loc=0, scale=1, size=(n_points - 1, n_paths)) # 2. Re-normalize the Gaussian per point or instance (row-wise) if re_normalize and n_paths >= 2: gaussians = util.normalize_data(data_matrix=gaussians, act_on='row') # 3. Add back the drift term r*dt, and rescale with sigma. exact_advancements = np.zeros_like(gaussians) for i in range(n_points - 1): dt = timetable[i + 1] - timetable[i] sqdt = np.sqrt(dt) exact_advancements[i, :] = sigma * gaussians[i, :] * sqdt + r * dt # 4. Construct the paths exactly. for i in range(1, n_points): simulated_paths[i, :] = simulated_paths[ i - 1, :] + exact_advancements[i - 1, :] # 5. Output the array. return simulated_paths
def simulate_discrete_timetable(self, timetable: np.ndarray, r: float = 0, sigma: float = 0.05, n_paths: int = 10, re_normalize: bool = True, init_condition: float = 0) -> np.ndarray: """ Discretely simulate the Brownian motions according to a given timetable. Use Euler advancement to simulate the paths following the drifted Brownian model: dX = r*dt + sigma * dW, where r, sigma are constants. It happens that the discrete result is exact. :param timetable: (np.ndarray) 1D numpy array that indicate the time of interest for calculation. :param r: (float) Optional. Drift rate. Defaults to 0. :param sigma: (float) Optional. Standard deviation for the Brownian motion. Defalts to 0.05. :param dt: (float) Optional. Time advancement step. Defaults to 1/252. :param n_points: (int) Optional. Number of steps per simulated path, including the initial condition. Defaults to 252. :param n_paths: (float) Optional. Number of paths in the simulation. Needs to be >=2. Defaults to 10. :param re_normalize: (bool) Optional. Whether to renormalize the Gaussians sampled at each time advancement. This will only be triggered if n_paths >= 2. Defalts to True. :param init_condition: (float) Optional. Initial start position for every path. Defaults to 0. :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths). """ # Initialize n_points = len(timetable) simulated_paths = np.zeros(shape=(n_points, n_paths)) simulated_paths[0, :] += init_condition # 1. Draw from the Gaussian distribution accordingly. gaussians = np.random.normal(loc=0, scale=1, size=(n_points - 1, n_paths)) # 2. Re-normalize the Gaussian per point or instance (normalize every row) if re_normalize and n_paths >= 2: gaussians = util.normalize_data(data_matrix=gaussians, act_on='row') # 3. Construct the paths via Euler advancement. for i in range(1, n_points): dt = timetable[i] - timetable[i - 1] sqdt = np.sqrt(dt) simulated_paths[i, :] = simulated_paths[ i - 1, :] + r * dt + sigma * gaussians[i - 1, :] * sqdt # 4. Output the array. return simulated_paths
def predict(self, x): X_test_nor = util.normalize_data(x) X_modified_test = np.vstack( (np.ones(X_test_nor.shape[0]), X_test_nor.transpose())) y_pred = [] for t in range(0, X_test_nor.shape[0]): o = np.zeros(self.class_count) y = np.zeros(self.class_count) for i in range(0, self.class_count): for j in range(0, self.W_size): o[i] = o[i] + self.W[i][j] * X_modified_test[j][t] y = self.softmax(o) y_pred.append(self.get_response_class(y)) return y_pred
def test_results(weights): print("Testing with weights...\n") start_date = '2014-01-02' end_date = '2016-08-31' dates = pd.date_range(start_date, end_date) spy = util.get_data(['SPY'], dates) normed_spy = util.normalize_data(spy) expected = get_expected(dates) correct_count = 0 test_inputs = create_inputs(dates, training=False) # Test the output for date, row in test_inputs.iterrows(): if(expected.ix[date][0] == output(date, test_inputs, weights)): correct_count += 1 print("Correct count: {}, Total: {}, Percent Correct: {}".format(correct_count, str(test_inputs.shape[0]), str((correct_count/test_inputs.shape[0])*100) ))
def simulate_discrete_timetable_vary_mu( self, timetable: np.ndarray, theta: float = 0.5, mu: Callable[[float], float] = None, sigma: float = 0.05, n_paths: int = 10, re_normalize: bool = True, init_condition: float = 0)-> np.ndarray: """ Discretely simulate the OU paths with a given time table and total number of points per path. Use Euler advancement to simulate the paths following the drifted OU model: dX = theta * (mu(t) - X) * dt + sigma * dW, where mu(t) is a function of time, and theta, sigma are constants. Assume t=0 at the beginning. :param timetable: (np.ndarray) 1D numpy array that indicate the time of interest for calculation. :param theta: (float) Optional. Mean-reverting speed. Defaults to 0.5. :param mu: (func) Optional. Mean-reverting level as a function of time. Defalts to the constant function 0. Note that the time unit should be consistent with other inputs, and is t=0 at the beginning of the simulation. :param sigma: (float) Optional. Standard deviation for the Brownian motion. Defalts to 0.05. :param n_paths: (float) Optional. Number of paths in the simulation. Needs to be >=2. Defaults to 10. :param re_normalize: (bool) Optional. Whether to renormalize the Gaussians sampled at each time advancement. This will only be triggered if n_paths >= 2. Defalts to True. :param init_condition: (float) Optional. Initial start position for every path. Defaults to 0. :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths). """ # Initialize n_points = len(timetable) simulated_paths = np.zeros(shape=(n_points, n_paths)) simulated_paths[0, :] += init_condition if mu is None: # Default mu(t) = 0 mu = lambda t: 0 * t # 1. Draw from the Gaussian distribution accordingly. gaussians = np.random.normal(loc=0, scale=1, size=(n_points-1, n_paths)) # 2. Re-normalize the Gaussian per point or instance (normalize every row) if re_normalize and n_paths >= 2: gaussians = util.normalize_data(data_matrix=gaussians, act_on='row') # 3. Construct the paths via Euler advancement. for i in range(1, n_points): dt = timetable[i] - timetable[i-1] sqdt = np.sqrt(dt) increments = theta * (mu(timetable[i]) - simulated_paths[i-1,:]) * dt + sigma * gaussians[i-1, :] * sqdt simulated_paths[i,:] = simulated_paths[i-1,:] + increments # 4. Output the array. return simulated_paths
def load_data(dataset, dataset_type='txt', normalize=False, print_data=False): """Get Data.""" print(f'Loading Data from file... {dataset}') data, nrows, ncols = util.get_data_as_matrix(dataset, Path(__file__), filetype=dataset_type) mu_rowvec = None sigma_rowvec = None if print_data: print('First 10 rows of the dataset:') print('\n'.join( f'rownum={i} : ' f'feature_matrix_row={j}, output_row={k}' for i, j, k in util.iterate_matrix(data, ((0, 10), ), ((0, ncols - 1), (ncols - 2, ncols))))) if normalize: _, mu_rowvec, sigma_rowvec = \ util.normalize_data(data[:, 0:np.shape(data)[1] - 1]) if print_data: print(f'mu={mu_rowvec}') print(f'sigma={sigma_rowvec}') print('First 10 rows of the dataset (After Normalization):') print('\n'.join( f'rownum={i} : ' f'feature_matrix_row={j}, output_row={k}' for i, j, k in util.iterate_matrix(data, ((0, 10), ), ( (0, ncols - 1), (ncols - 1, ncols))))) feature_matrix = np.append(np.ones(shape=(nrows, 1)), data[:, 0:ncols - 1], axis=1) output_colvec = np.reshape(data[:, ncols - 1], newshape=(nrows, 1)) return data, feature_matrix, output_colvec, \ nrows, ncols - 1, mu_rowvec, sigma_rowvec
def load_mnist(dataset): """Load MNIST dataset Parameters ---------- dataset : string address of MNIST dataset Returns ------- rval : list training, valid and testing dataset files """ # Load the dataset f = gzip.open(dataset, 'rb'); train_set, _, _ = pickle.load(f); f.close(); X=train_set[0].T; X=util.normalize_data(X); X=util.ZCA_whitening(X); return X.T;
def create_inputs(dates, training=True): inputs = pd.DataFrame(index=dates) spy = util.get_data(['SPY'], dates) normed_spy = util.normalize_data(spy) # SPY Normalized inputs = inputs.join(spy) #Set the inputs input_distance_from_upper, input_distance_from_lower = input_distance_from_upper_and_lower_bollinger_band(normed_spy) dollar_over_euro = download_indices_csv(dates, "data/indices", "DEXUSEU.csv", ["DATE", "DEXUSEU"]) # dollar / euro ten_year_treasury_bond = download_indices_csv(dates, "data/indices", "DGS10.csv", ["DATE", "DGS10"]) boa_high_yield_options = download_indices_csv(dates, "data/indices", "BAMLH0A0HYM2.csv", ["DATE", "BAMLH0A0HYM2"]) inputs = inputs.join(input_distance_from_upper) inputs = inputs.join(input_distance_from_lower) inputs = inputs.join(dollar_over_euro) inputs = inputs.join(ten_year_treasury_bond) inputs = inputs.join(boa_high_yield_options) # Join inputs together inputs = inputs.dropna(subset=["SPY"]) inputs = inputs.dropna(subset=["Distance From Upper"]) inputs = inputs.dropna(subset=["Distance From Lower"]) inputs = inputs.dropna(subset=["DEXUSEU"]) inputs = inputs.dropna(subset=["DGS10"]) inputs = inputs.dropna(subset=["BAMLH0A0HYM2"]) inputs = inputs.ix[:,1:] if training: inputs.shift(1) # input_distance_from_upper.shift(1) # input_distance_from_lower.shift(1) # dollar_over_euro.shift(1) # ten_year_treasury_bond.shift(1) # boa_high_yield_options.shift(1) return inputs
# Read image ## MNIST # train_set, valid_set, test_set=util.load_mnist("mnist.pkl.gz"); # X=util.sample_patches_mnist(train_set, 5000, 16); ### CIFAR-10 data_set=util.load_CIFAR_batch("./cifar-10/data_batch_1"); data_x=data_set[0]/255.0; data_x=np.mean(data_x, axis=3); X=util.sample_patches_cifar10(data_x, 5000, 16); # Normalization and whitening X=util.normalize_data(X); print "[MESSAGE] Data is normalized" X=util.ZCA_whitening(X); print "[MESSAGE] Data is whitened" # plt.figure(1); # for i in xrange(100): # plt.subplot(10,10,i+1); # plt.imshow(X[:,i].reshape(16,16), cmap = plt.get_cmap('gray'), interpolation='nearest'); # plt.axis('off') # # plt.show(); # K-means procedure
def get_features1(self, prices, symbol, print=False): ''' Compute technical indicators and use them as features to be fed into a Q-learner. :param: prices: Adj Close prices dataframe :param: Whether adding data for printing to dataframe or not :return: Normalize Features dataframe ''' # Fill NAN values if any prices.fillna(method="ffill", inplace=True) prices.fillna(method="bfill", inplace=True) prices.fillna(1.0, inplace=True) # Price adj_close = prices[symbol] # Compute Momentum mom = get_momentum(adj_close, window=10) # Compute RSI rsi = get_RSI(adj_close) # Compute rolling mean rm = get_rolling_mean(adj_close, window=10) # Compute rolling standard deviation rstd = get_rolling_std(adj_close, window=10) # Compute upper and lower bands upper_band, lower_band = get_bollinger_bands(rm, rstd) # Compute SMA sma = get_sma_indicator(adj_close, rm) df = prices.copy() df['Momentum'] = mom df['Adj. Close/SMA'] = adj_close/sma df['middle_band'] = rm # Delete 'Adj Close' column del df[symbol] if set(['cash']).issubset(df.columns): del df['cash'] # Normalize dataframe df_norm = normalize_data(df) # Add Adj Close, Bollinrt Bands and RSY if printing if print: df_norm['Adj Close'] = prices[symbol] df_norm['upper_band'] = upper_band df_norm['lower_band'] = lower_band df_norm['middle_band'] = rm df_norm['RSI'] = rsi # Drop NaN df_norm.dropna(inplace=True) return df_norm
def _simulate_over_grid(self, theta: float, g: Callable[[float], float], sigma: float, lambda_j: float, mu_j: [[float], float], sigma_j: float, dt: float, n_points: int, n_paths: int, re_normalize: bool, init_condition: float, jump_timestamp: List[np.ndarray], jump_size: List[list]) -> np.ndarray: """ The OU-Jump simulation engine with a given timestamp and size for jumps. The simulation is carried out discretely from the relation: X_t = g(t) + Y_t d Y_t = - theta * Y_t * dt + sigma * dW + logJ_t * dN_t where g(t) is the deterministic drift, and logJ_t ~ Normal(mu_j, sigma_j**2), N_t ~ Poission(lambda_j). Each path is simulated according to the following: X_t = g(t) + (X_0 - g(0))*exp(-theta*t) + sigma*int{_0^t}{exp(-theta(t-tau)), d W_tau} + int{_0^t}{exp(-theta(t - tau)) logJ_t, d N_tau} the two stochastic integration terms are evaluated in the Ito's sense (function evaluated at the beginning of each discretization interval) over the grid length dt. :param theta: (float) Mean-reverting speed. :param g: (Callable[[float], float]). The deterministic drift function for the OU-Jump process. :param sigma: (float) Standard deviation for the Brownian motion. :param lambda_j: (float) Jump rate. Defaults to 25. :param mu_j: (Callable[[float], float]) Jump average as a function of time. :param sigma_j: (float) The jump std constant. :param dt: (float) Time advancement step. :param n_points: (int) Number of steps per simulated path, including the initial condition. :param n_paths: (float) Number of paths in the simulation. Needs to be >=2. :param daily_dpoints: (int) The number of data points per day. :param re_normalize: (bool) Whether to renormalize the Gaussians sampled at each time advancement. This will only be triggered if n_paths >= 2. :param init_condition: (float) Initial start position for every path. :param jump_timestamp: (List[np.ndarray]) The time where the jump occurs. :param jump_size: (List[list]) The size of each jump. :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths). """ # Initialize the timetable. In this case it is a grid. total_time = dt * n_points timetable = np.arange(start=0, stop=total_time, step=dt) # Initializa simulated_paths simulated_paths = np.zeros(shape=(n_points, n_paths)) simulated_paths[0, :] += init_condition # 1. Handle the Brownian part # Draw from the Gaussian distribution accordingly. gaussians = np.random.normal(loc=0, scale=1, size=(n_points - 1, n_paths)) # Re-normalize the Gaussian per point or instance (normalize every row) if re_normalize and n_paths >= 2: gaussians = util.normalize_data(data_matrix=gaussians, act_on='row') # 2. Generate the paths for the jump diffusion process using discretization with interval dt. sqdt = np.sqrt(dt) for path in range(n_paths): # Precalculate part of the Brownian and jump stoch integral for faster calculation cumu_brownian_precalculate = np.cumsum([ np.exp(theta * timetable[j]) * gaussians[j][path] * sqdt for j in range(n_points - 1) ]) * sigma cumu_jump_precalculate = np.cumsum([ np.exp(theta * jump_timestamp[path][j]) * jump_size[path][j] for j in range(len(jump_timestamp[path])) ]) #print('jump_size', jump_size[path]) # Initializing variables for the jump process jump_counter = 0 # Count the number of jumps. Serve as a pointer. cumu_jump = 0 # Initialize the cumulative jump integral value jump_timestamp_path = set( jump_timestamp[path]) # Hash for faster calculation for point in range(1, n_points): cur_time = timetable[point] # Current time pre_time = timetable[point - 1] # Time one step back # Calculate the deterministic drift part, Brownian part and jump part separately drift_part = g(cur_time) + (simulated_paths[0, path] - g(0)) * np.exp(-theta * cur_time) # Brownian increment. Note the stoch integral is taken in the Ito's sense cumu_brownian = np.exp( -theta * pre_time) * cumu_brownian_precalculate[point - 1] # Jump increment. Note the stoch integral is taken in the Ito's sense if cur_time in jump_timestamp_path: # If this is the time for a jump cumu_jump = cumu_jump_precalculate[jump_counter] * np.exp( -theta * pre_time) jump_counter += 1 # Assemble together for this path simulated_paths[point, path] = drift_part + cumu_brownian + cumu_jump return simulated_paths
fm_transpose_fm = fm_transpose_fm + \ regularization_param * diag_matrix fm_transpose_fm_pinv = np.linalg.pinv(fm_transpose_fm) fm_transpose_output_colvec = feature_matrix.transpose() @ output_colvec return fm_transpose_fm_pinv @ fm_transpose_output_colvec if __name__ == '__main__': DATASET = 'resources/data/city_dataset_97_2.txt' print(f'Gradient Descent For Dataset : {DATASET}') data, nrows, ncols = util.\ get_data_as_matrix(DATASET, Path(__file__)) util.normalize_data(data[:, 0:ncols - 1]) output = data[:, ncols - 1:ncols] features = np.append(np.ones(shape=(nrows, 1)), data[:, 0:ncols - 1], axis=1) theta, cost_history = \ gradient_descent(features, output, nrows, ncols - 1, theta_colvec=np.zeros(shape=(ncols, 1)), alpha=0.03, num_iters=1500, debug=True, debug_print=True) print(f'theta(Gradient Descent)={theta}') print(f'cost_history={cost_history}') print(f'{"*" * 80}') optimal_theta, optimal_alpha_val, optimal_cost_val, \
if __name__ == "__main__": start_date = dt.datetime(2008, 1, 1) end_date = dt.datetime(2009, 12, 31) # Get NYSE trading dates dates = get_exchange_days(start_date, end_date, dirpath="../data/dates_lists", filename="NYSE_dates.txt") symbols = ["AAPL"] # Get stock data and normalize it df_price = get_data(symbols, dates) norm_price = normalize_data(df_price) window = 20 num_std = 2 for symbol in symbols: # Compute rolling mean rolling_mean = norm_price[symbol].rolling(window=window).mean() # Compute rolling standard deviation rolling_std = norm_price[symbol].rolling(window=window).std() # Get momentum momentum = get_momentum(norm_price[symbol], window) # Plot momentum plot_momentum(
def plot_norm_data_vertical_lines(df_orders, portvals, portvals_bm, vert_lines=False): """Plots portvals and portvals_bm, showing vertical lines for buy and sell orders Parameters: df_orders: A dataframe that contains portfolio orders portvals: A dataframe with one column containing daily portfolio value portvals_bm: A dataframe with one column containing daily benchmark value save_fig: Whether to save the plot or not fig_name: The name of the saved figure Returns: Plot a chart of the portfolio and benchmark performances """ # Normalize data portvals = normalize_data(portvals) portvals_bm = normalize_data(portvals_bm) df = portvals_bm.join(portvals) # Min range if (df.loc[:, "Benchmark"].min() < df.loc[:, "Portfolio"].min()): min_range = df.loc[:, "Benchmark"].min() else: min_range = df.loc[:, "Portfolio"].min() # Max range if (df.loc[:, "Benchmark"].max() > df.loc[:, "Portfolio"].max()): max_range = df.loc[:, "Benchmark"].max() else: max_range = df.loc[:, "Portfolio"].max() # Plot the normalized benchmark and portfolio trace_bench = go.Scatter(x=df.index, y=df.loc[:, "Benchmark"], name="Benchmark", line=dict(color='#17BECF'), opacity=0.8) trace_porfolio = go.Scatter(x=df.index, y=df.loc[:, "Portfolio"], name="Portfolio", line=dict(color='#000000'), opacity=0.8) data = [trace_bench, trace_porfolio] # Plot the vertical lines for buy and sell signals shapes = list() if vert_lines: buy_line = [] sell_line = [] for date in df_orders.index: if df_orders.loc[date, "Order"] == "BUY": buy_line.append(date) else: sell_line.append(date) # Vertical lines line_size = max_range + (max_range * 10 / 100) # Buy line for i in buy_line: shapes.append({ 'type': 'line', 'xref': 'x', 'yref': 'y', 'x0': i, 'y0': 0, 'x1': i, 'y1': line_size, 'line': { 'color': 'rgb(0, 102, 34)', 'width': 1, 'dash': 'dash', }, }) # Sell line for i in sell_line: shapes.append({ 'type': 'line', 'xref': 'x', 'yref': 'y', 'x0': i, 'y0': 0, 'x1': i, 'y1': line_size, 'line': { 'color': 'rgb(255, 0, 0)', 'width': 1, 'dash': 'dash', }, }) layout = dict( autosize=True, shapes=shapes, margin=go.Margin(l=50, r=50, b=100, t=100, pad=4), title="Portfolio vs Benchmark", xaxis=dict( title='Dates', rangeselector=dict(buttons=list([ dict(count=1, label='1m', step='month', stepmode='backward'), dict(count=6, label='6m', step='month', stepmode='backward'), dict(step='all') ])), range=[portvals.index[0], portvals.index[-1]]), yaxis=dict(title='Normalized Prices', range=[ min_range - (min_range * 10 / 100), max_range + (max_range * 10 / 100) ]), ) fig = dict(data=data, layout=layout) iplot(fig)
from util import get_data, normalize_data if __name__ == "__main__": start_val = 1000000 # Define date range start_date = '2015-10-02' end_date = '2017-10-17' symbols = ['SPY', 'XOM', 'GOOG', 'ALRM'] allocs = [0.4, 0.4, 0.1, 0.1] # Read data dates = pd.date_range(start_date, end_date) symbols = ['SPY', 'ALRM', 'GOOG', 'XOM'] df = get_data(symbols, dates) normed = normalize_data(df) alloced = normed * allocs pos_vals = alloced * start_val port_val = pos_vals.sum(axis=1) # Portfolio values daily_rets = (port_val / port_val.shift(1)) - 1 # Normalize (compute daily returns) daily_rets = daily_rets.ix[1:] # First row value is 0, so get rid of it # Compute statistics cumulative = (port_val[-1] / port_val[0]) - 1 print("Cumulative Return=", cumulative) mean = daily_rets.mean() print("Average Daily Return (Mean)=", mean) std = daily_rets.std() print("Risk (St.Dev)=", std)
""" if __name__ == '__main__': """ CONFIG vars """ FIRST = True USE_CUDA = True TRAIN = True NUM_EPOCHS = 15 # Parse cmdline args args = parse_cmd_args() # Get train, val and test data if FIRST: train, val, test = normalize_data('../data/') np.save('train_x', train[0]) np.save('train_y', train[1]) np.save('val_x', val[0]) np.save('val_y', val[1]) np.save('test_x', test) else: train = (np.load('train_x.npy'), np.load('train_y.npy')) val = (np.load('val_x.npy'), np.load('val_y.npy')) test = np.load('test_x.npy') print("Data Normalization Complete!") # Find Initializer if args.init == 1: initializer = nn.init.xavier_normal
plt.cla() if __name__ == "__main__": symbols = ['SPY', 'AAPL', 'GOOG', 'IBM', 'XOM'] dates = ['2012-01-01', '2012-12-28'] df = util.get_data(symbols, pd.date_range(dates[0], dates[1])) symbol = 'GOOG' #Plot Price/SMA price = df[symbol] sma = get_rolling_mean(price) price_sma = get_price_sma(price) price = util.normalize_data(price) sma = get_rolling_mean(price) plot_data(title=str(symbol) + " Price SMA", xlabel="Date", ylabel="Price (Normalized)", kwargs={ 'Price': price, 'SMA': sma }) #Plot Bollinger Bands plot_bolinger_bands(df, symbol) #Plot RSI prices = df[symbol]
def get_data_ds3(x_data, y_data): n_x_data, means_x, sqrt_x = normalize_data(x_data) x_train, x_validation, x_test = divide_data_set(n_x_data) y_train, y_validation, y_test = divide_data_set(y_data) return x_train, x_validation, x_test, y_train, y_validation, y_test