示例#1
0
def train():
    x_data, y_data = read(os.getcwd() +
                          "\datasets\\2\parkinsons_updrs.data.csv")

    reduces = [y[0] for y in y_data]
    x_data = feature_selection(np.array(x_data), np.array(reduces)).tolist()

    x_data, m, s = normalize_data(x_data)

    x_train, x_validation, x_test = divide_data_set(x_data)
    y_train, y_validation, y_test = divide_data_set(y_data)

    motor_x_train = np.array([[y[0]] for y in y_train])
    total_x_train = np.array([[y[1]] for y in y_train])
    motor_x_test = np.array([[y[0]] for y in y_test])
    total_x_test = np.array([[y[1]] for y in y_test])
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    print("\t\tFinished divide data...")
    n_x_train = np.array(x_train)
    n_y_train = np.array([[y[0]] for y in y_train])
    n_y_train = np.array(y_train)
    print("\t\tTraining neural network...")
    # Step 6: train network
    path = input("Input output file name for neural network(enter for end): ")
    while path != "":
        for i in range(0, 10):
            net = create_neural_network(n_x_train,
                                        n_y_train,
                                        path=os.getcwd() + "\\" + path,
                                        lr=(i + 1) / 10)
            test(x_test, y_test, net, i)
        path = input(
            "Input output file name for neural network(enter for end): ")
示例#2
0
def train():
    x_data, y_data = read(os.getcwd() +
                          "\datasets\\2\parkinsons_updrs.data-sorted.csv")

    # Step 2: feature selection
    reduces = [y[0] for y in y_data]
    x_data = feature_selection(np.array(x_data), np.array(reduces)).tolist()
    # Step 3: normalization
    x_data, m, s = normalize_data(x_data)
    #x_data, mean, sqrt = normalize_data(y_data)
    y_data, maximums = minmax_normalize(y_data)

    # Step 4: find min and max value for each attribute
    minmax = find_minmax(x_data)
    # Step 5: divide data
    x_train, x_validation, x_test = divide_data_set(x_data)
    y_train, y_validation, y_test = divide_data_set(y_data)

    n_x_train = x_train[0:1000]
    n_y_train = [[y[0]] for y in y_train[0:1000]]
    # Step 6: train network
    net, res = create_neural_network(n_x_train, n_y_train, minmax,
                                     os.getcwd() + "\\ds2-10i753h1o-800e-.net")
    #test(x_test, y_test, net, 0,means,sqrt)
    test(x_test, y_test, net, maximums)
示例#3
0
    def fit(self, X_train, y_train):
        X_train_nor = util.normalize_data(X_train)
        class_count = len(np.unique(y_train, False, False, True)[0])
        self.initAll(X_train_nor.shape[0], X_train_nor.shape[1], class_count)
        X_modified_train = np.vstack(
            (np.ones(self.train_size), X_train_nor.transpose()))

        while (self.itr < 3):
            self.count = self.count + 1
            if (self.count > 20):
                self.reset()
            delta_W = np.zeros((self.class_count, self.W_size))
            for t in range(0, self.train_size):
                o = np.zeros(self.class_count)
                y = np.zeros(self.class_count)
                for i in range(0, self.class_count):
                    for j in range(0, self.W_size):
                        o[i] = o[i] + self.W[i][j] * X_modified_train[j][t]

                y = self.softmax(o)

                for i in range(0, self.class_count):
                    r = 0
                    if (int(y_train[t]) == i):
                        r = 1
                    for j in range(0, self.W_size):
                        delta_W[i][j] = delta_W[i][j] + (
                            r - y[i]) * X_modified_train[j][t]

            for i in range(0, self.class_count):
                for j in range(0, self.W_size):
                    self.W[i][j] = self.W[i][j] + self.factor * delta_W[i][j]
示例#4
0
def plot_norm_data_vertical_lines(df_orders,
                                  portvals,
                                  portvals_bm,
                                  plot_vertical_lines=True,
                                  save_fig=True,
                                  fig_name="plot.png"):
    """Plots portvals and portvals_bm, showing vertical lines for orderss
    
    Parameters:
    df_orders: A dataframe that contains portfolio orders
    portvals: A dataframe with one column containing daily portfolio value
    portvals_bm: A dataframe with one column containing daily benchmark value
    save_fig: Whether to save the plot or not
    fig_name: The name of the saved figure

    Returns: Plot a chart of the portfolio and benchmark performances
    """
    # Normalize data
    portvals = normalize_data(portvals)
    portvals_bm = normalize_data(portvals_bm)
    df = portvals_bm.join(portvals)

    # Plot the normalized benchmark and portfolio
    plt.plot(df.loc[:, "Benchmark"], label="Benchmark")
    plt.plot(df.loc[:, "Portfolio"], label="Portfolio")

    # Plot the vertical lines for buy and sell signals
    if plot_vertical_lines == True:
        for date in df_orders.index:
            if df_orders.loc[date, "Shares"] > 0:
                plt.axvline(date, color='g', linestyle='--')
            elif df_orders.loc[date, "Shares"] < 0:
                plt.axvline(date, color='r', linestyle='--')

    plt.title("Portfolio vs. Benchmark")
    plt.xlabel("Date")
    plt.ylabel("Normalized prices")
    plt.legend()

    # Set figure size
    fig = plt.gcf()
    fig.set_size_inches(12, 6)

    if save_fig == True:
        plt.savefig(fig_name)
    else:
        plt.show()
示例#5
0
def get_data(x_data, y_data):
    reduces = [y[0] for y in y_data]
    fs = feature_selection(np.array(x_data), np.array(reduces))
    n_x_data, means_x, sqrt_x = normalize_data(fs.tolist())

    x_train, x_validation, x_test = divide_data_set(n_x_data)
    y_train, y_validation, y_test = divide_data_set(y_data)
    return x_train, x_validation, x_test, y_train, y_validation, y_test
示例#6
0
 def simulate_analytical_timetable_vary_mu(
         self, timetable: np.ndarray, theta: float = 0.5, mu: Callable[[float], float] = None, sigma: float = 0.05,
         n_paths: int = 10, re_normalize: bool = True, init_condition: float = 0)-> np.ndarray:
     """
     Analytically simulate the OU paths with a given time table per path.
     
     Use Euler advancement to simulate the paths following the drifted OU model: 
         dX = theta * (mu - X) * dt + sigma * dW,
     where mu(t) is a function of time, and theta, sigma are constants. Method used is the full analytical solution
     by [Doob (1942)]:
         X_t = X_0*exp(- theta t) + mu(t)*(1-exp(theta * t)) 
         + sigma*exp(-theta*t)*Normal(0, exp(- 2 theta t)-1) / sqrt(2*theta)
     
     :param timetable: (np.ndarray) 1D numpy array that indicate the time of interest for calculation.
     :param theta: (float) Optional. Mean-reverting speed. Defaults to 0.5.
     :param mu: (func) Optional. Mean-reverting level as a function of time. Defalts to the constant function 0.
         Note that the time unit should be consistent with other inputs, and is t=0 at the beginning of the
         simulation.
     :param sigma: (float) Optional. Standard deviation for the Brownian motion. Defalts to 0.05.
     :param n_paths: (float) Optional. Number of paths in the simulation. Needs to be >=2. Defaults to 10.
     :param re_normalize: (bool) Optional. Whether to renormalize the Gaussians sampled at each time advancement.
         This will only be triggered if n_paths >= 2. Defalts to True.
     :param init_condition: (float) Optional. Initial start position for every path. Defaults to 0.
     :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths).
     """
     
     # Initialize
     n_points = len(timetable)
     simulated_paths = np.zeros(shape=(n_points, n_paths))
     simulated_paths[0, :] += init_condition
     if mu is None:  # Default mu(t) = 0
         mu = lambda t: 0 * t
     
     # 1. Draw from Gaussians accordingly with mean = 0. Will add back the mean = r*dt later.
     gaussians = np.random.normal(loc=0, scale=1, size=(n_points-1, n_paths))
     
     # 2. Re-normalize the Gaussian per point or instance (row-wise)
     if re_normalize and n_paths >= 2:
         gaussians = util.normalize_data(data_matrix=gaussians, act_on='row')
     
     # 3. Scale time-transformed Wiener process, generate Gaussians that has mean=0, var=exp(2 theta t) - 1
     trans_gaussian = np.zeros_like(gaussians)
     trans_gaussian[0] = (np.sqrt(np.exp(2 * theta * timetable[1]) -
                                  np.exp(2 * theta * timetable[0])) * gaussians[0])
     for i in range(1, n_points-1):
         trans_gaussian[i,:] = (trans_gaussian[i-1,:] + np.sqrt(np.exp(2 * theta * timetable[i]) -
                                                                np.exp(2 * theta * timetable[i-1])) 
                                * gaussians[i,:])
     
     # 4. Construct the paths exactly.
     for i in range(1, n_points):
         exp_decay = np.exp(-theta * timetable[i])
         simulated_paths[i,:] = (simulated_paths[0,:] * exp_decay + mu(timetable[i]) * (1 - exp_decay)
                                 + sigma * exp_decay * trans_gaussian[i-1,:] / np.sqrt(2 * theta))
         
     # 5. Output the array.
     return simulated_paths
示例#7
0
def plot_data_vert(df_orders, portvals, portvals_bm, insample=True):
    #Plot Data using vertical lines for the orders
    #Normalize Data
    portvals = normalize_data(portvals)
    portvals_bm = normalize_data(portvals_bm)

    df = portvals_bm.join(portvals, lsuffix='Benchmark', rsuffix='Portfolio')
    df.fillna(method="ffill", inplace=True)
    df.fillna(method="bfill", inplace=True)
    df.rename(columns={
        '0Benchmark': 'Benchmark',
        '0Portfolio': 'Portfolio'
    },
              inplace=True)

    plt.plot(df.loc[:, "Portfolio"],
             label='Portfolio',
             linewidth=1.2,
             color='black')
    plt.plot(df.loc[:, "Benchmark"],
             label='Benchmark',
             linewidth=1.2,
             color='b')

    # Plot Vertical Lines
    for date in df_orders.index:
        if df_orders.loc[date, "Order"] == "BUY":
            plt.axvline(x=date, color='g', linestyle='--')
        else:
            plt.axvline(x=date, color='r', linestyle='--')

    plt.title("Portfolio vs. Benchmark")
    plt.xlabel("Date")
    plt.xticks(rotation=50)
    plt.ylabel("Normalized prices")
    plt.legend(loc="upper left")

    if insample == True:
        filename = "portfolio_vs_benchmark(InSample).jpg"
    else:
        filename = "portfolio_vs_benchmark(OutSample).jpg"
    plt.savefig(filename)
    plt.clf()
    plt.cla()
示例#8
0
    def simulate_analytical_timetable(self,
                                      timetable: np.ndarray,
                                      r: float = 0,
                                      sigma: float = 0.05,
                                      n_paths: int = 10,
                                      re_normalize: bool = True,
                                      init_condition: float = 0) -> np.ndarray:
        """
        Analytically simulate the Brownian motions according to a given timetable.
        
        Use the analytical solution to simulate the paths following the drifted Brownian model: dX = r*dt + sigma * dW,
        where r, sigma are constants. It turned out that the analytical approach is equivalent to the discrete case.
        
        dX is drawn from Gaussian(r*dt, sigma) for each advancement.
        
        :param r: (float) Optional. Drift rate. Defaults to 0.
        :param sigma: (float) Optional. Standard deviation for the Brownian motion. Defalts to 0.05.
        :param dt: (float) Optional. Time advancement step. Defaults to 1/252.
        :param n_points: (int) Optional. Number of steps per simulated path, including the initial condition. Defaults
            to 252.
        :param n_paths: (float) Optional. Number of paths in the simulation. Needs to be >=2. Defaults to 10.
        :param re_normalize: (bool) Optional. Whether to renormalize the Gaussians sampled at each time advancement.
            This will only be triggered if n_paths >= 2. Defalts to True.
        :param init_condition: (float) Optional. Initial start position for every path. Defaults to 0.
        :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths).
        """

        # Initialize
        n_points = len(timetable)
        simulated_paths = np.zeros(shape=(n_points, n_paths))
        simulated_paths[0, :] += init_condition

        # 1. Draw from Gaussians accordingly with mean = 0. Will add back the mean = r*dt later.
        gaussians = np.random.normal(loc=0,
                                     scale=1,
                                     size=(n_points - 1, n_paths))

        # 2. Re-normalize the Gaussian per point or instance (row-wise)
        if re_normalize and n_paths >= 2:
            gaussians = util.normalize_data(data_matrix=gaussians,
                                            act_on='row')

        # 3. Add back the drift term r*dt, and rescale with sigma.
        exact_advancements = np.zeros_like(gaussians)
        for i in range(n_points - 1):
            dt = timetable[i + 1] - timetable[i]
            sqdt = np.sqrt(dt)
            exact_advancements[i, :] = sigma * gaussians[i, :] * sqdt + r * dt

        # 4. Construct the paths exactly.
        for i in range(1, n_points):
            simulated_paths[i, :] = simulated_paths[
                i - 1, :] + exact_advancements[i - 1, :]

        # 5. Output the array.
        return simulated_paths
示例#9
0
    def simulate_discrete_timetable(self,
                                    timetable: np.ndarray,
                                    r: float = 0,
                                    sigma: float = 0.05,
                                    n_paths: int = 10,
                                    re_normalize: bool = True,
                                    init_condition: float = 0) -> np.ndarray:
        """
        Discretely simulate the Brownian motions according to a given timetable.
        
        Use Euler advancement to simulate the paths following the drifted Brownian model: dX = r*dt + sigma * dW,
        where r, sigma are constants. It happens that the discrete result is exact.
        
        :param timetable: (np.ndarray) 1D numpy array that indicate the time of interest for calculation. 
        :param r: (float) Optional. Drift rate. Defaults to 0.
        :param sigma: (float) Optional. Standard deviation for the Brownian motion. Defalts to 0.05.
        :param dt: (float) Optional. Time advancement step. Defaults to 1/252.
        :param n_points: (int) Optional. Number of steps per simulated path, including the initial condition. Defaults
            to 252.
        :param n_paths: (float) Optional. Number of paths in the simulation. Needs to be >=2. Defaults to 10.
        :param re_normalize: (bool) Optional. Whether to renormalize the Gaussians sampled at each time advancement.
            This will only be triggered if n_paths >= 2. Defalts to True.
        :param init_condition: (float) Optional. Initial start position for every path. Defaults to 0.
        :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths).
        """

        # Initialize
        n_points = len(timetable)
        simulated_paths = np.zeros(shape=(n_points, n_paths))
        simulated_paths[0, :] += init_condition

        # 1. Draw from the Gaussian distribution accordingly.
        gaussians = np.random.normal(loc=0,
                                     scale=1,
                                     size=(n_points - 1, n_paths))

        # 2. Re-normalize the Gaussian per point or instance (normalize every row)
        if re_normalize and n_paths >= 2:
            gaussians = util.normalize_data(data_matrix=gaussians,
                                            act_on='row')

        # 3. Construct the paths via Euler advancement.
        for i in range(1, n_points):
            dt = timetable[i] - timetable[i - 1]
            sqdt = np.sqrt(dt)
            simulated_paths[i, :] = simulated_paths[
                i - 1, :] + r * dt + sigma * gaussians[i - 1, :] * sqdt

        # 4. Output the array.
        return simulated_paths
示例#10
0
 def predict(self, x):
     X_test_nor = util.normalize_data(x)
     X_modified_test = np.vstack(
         (np.ones(X_test_nor.shape[0]), X_test_nor.transpose()))
     y_pred = []
     for t in range(0, X_test_nor.shape[0]):
         o = np.zeros(self.class_count)
         y = np.zeros(self.class_count)
         for i in range(0, self.class_count):
             for j in range(0, self.W_size):
                 o[i] = o[i] + self.W[i][j] * X_modified_test[j][t]
         y = self.softmax(o)
         y_pred.append(self.get_response_class(y))
     return y_pred
def test_results(weights):
	print("Testing with weights...\n")
	start_date = '2014-01-02'
	end_date = '2016-08-31'
	dates = pd.date_range(start_date, end_date)
	spy = util.get_data(['SPY'], dates)
	normed_spy = util.normalize_data(spy)
	expected = get_expected(dates)
	correct_count = 0
	test_inputs = create_inputs(dates, training=False)
	# Test the output
	for date, row in test_inputs.iterrows():
		if(expected.ix[date][0] == output(date, test_inputs, weights)):
			correct_count += 1

	print("Correct count: {}, Total: {}, Percent Correct: {}".format(correct_count, str(test_inputs.shape[0]), str((correct_count/test_inputs.shape[0])*100) ))
示例#12
0
    def simulate_discrete_timetable_vary_mu(
            self, timetable: np.ndarray, theta: float = 0.5, mu: Callable[[float], float] = None, sigma: float = 0.05,
            n_paths: int = 10, re_normalize: bool = True, init_condition: float = 0)-> np.ndarray:
        """
        Discretely simulate the OU paths with a given time table and total number of points per path.
        
        Use Euler advancement to simulate the paths following the drifted OU model: 
            dX = theta * (mu(t) - X) * dt + sigma * dW,
        where mu(t) is a function of time, and theta, sigma are constants. Assume t=0 at the beginning.
        
        :param timetable: (np.ndarray) 1D numpy array that indicate the time of interest for calculation.
        :param theta: (float) Optional. Mean-reverting speed. Defaults to 0.5.
        :param mu: (func) Optional. Mean-reverting level as a function of time. Defalts to the constant function 0.
            Note that the time unit should be consistent with other inputs, and is t=0 at the beginning of the
            simulation.
        :param sigma: (float) Optional. Standard deviation for the Brownian motion. Defalts to 0.05.
        :param n_paths: (float) Optional. Number of paths in the simulation. Needs to be >=2. Defaults to 10.
        :param re_normalize: (bool) Optional. Whether to renormalize the Gaussians sampled at each time advancement.
            This will only be triggered if n_paths >= 2. Defalts to True.
        :param init_condition: (float) Optional. Initial start position for every path. Defaults to 0.
        :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths).
        """
        
        # Initialize
        n_points = len(timetable)
        simulated_paths = np.zeros(shape=(n_points, n_paths))
        simulated_paths[0, :] += init_condition
        if mu is None:  # Default mu(t) = 0
            mu = lambda t: 0 * t
        
        # 1. Draw from the Gaussian distribution accordingly.
        gaussians = np.random.normal(loc=0, scale=1, size=(n_points-1, n_paths))

        # 2. Re-normalize the Gaussian per point or instance (normalize every row)
        if re_normalize and n_paths >= 2:
            gaussians = util.normalize_data(data_matrix=gaussians, act_on='row')

        # 3. Construct the paths via Euler advancement.
        for i in range(1, n_points):
            dt = timetable[i] - timetable[i-1]
            sqdt = np.sqrt(dt)
            increments = theta * (mu(timetable[i]) - simulated_paths[i-1,:]) * dt + sigma * gaussians[i-1, :] * sqdt
            simulated_paths[i,:] = simulated_paths[i-1,:] + increments

        # 4. Output the array.
        return simulated_paths
示例#13
0
def load_data(dataset, dataset_type='txt', normalize=False, print_data=False):
    """Get Data."""
    print(f'Loading Data from file... {dataset}')
    data, nrows, ncols = util.get_data_as_matrix(dataset,
                                                 Path(__file__),
                                                 filetype=dataset_type)

    mu_rowvec = None
    sigma_rowvec = None

    if print_data:
        print('First 10 rows of the dataset:')
        print('\n'.join(
            f'rownum={i} : '
            f'feature_matrix_row={j}, output_row={k}'
            for i, j, k in util.iterate_matrix(data, ((0, 10), ), ((0,
                                                                    ncols - 1),
                                                                   (ncols - 2,
                                                                    ncols)))))

    if normalize:
        _, mu_rowvec, sigma_rowvec = \
            util.normalize_data(data[:, 0:np.shape(data)[1] - 1])

        if print_data:
            print(f'mu={mu_rowvec}')
            print(f'sigma={sigma_rowvec}')
            print('First 10 rows of the dataset (After Normalization):')
            print('\n'.join(
                f'rownum={i} : '
                f'feature_matrix_row={j}, output_row={k}'
                for i, j, k in util.iterate_matrix(data, ((0, 10), ), (
                    (0, ncols - 1), (ncols - 1, ncols)))))

    feature_matrix = np.append(np.ones(shape=(nrows, 1)),
                               data[:, 0:ncols - 1],
                               axis=1)
    output_colvec = np.reshape(data[:, ncols - 1], newshape=(nrows, 1))

    return data, feature_matrix, output_colvec, \
        nrows, ncols - 1, mu_rowvec, sigma_rowvec
示例#14
0
def load_mnist(dataset):
    """Load MNIST dataset
    
    Parameters
    ----------
    dataset : string
        address of MNIST dataset
    
    Returns
    -------
    rval : list
        training, valid and testing dataset files
    """
    
    # Load the dataset
    f = gzip.open(dataset, 'rb');
    train_set, _, _ = pickle.load(f);
    f.close();
    
    X=train_set[0].T;
    X=util.normalize_data(X);
    X=util.ZCA_whitening(X);
    
    return X.T;
def load_mnist(dataset):
    """Load MNIST dataset
    
    Parameters
    ----------
    dataset : string
        address of MNIST dataset
    
    Returns
    -------
    rval : list
        training, valid and testing dataset files
    """
    
    # Load the dataset
    f = gzip.open(dataset, 'rb');
    train_set, _, _ = pickle.load(f);
    f.close();
    
    X=train_set[0].T;
    X=util.normalize_data(X);
    X=util.ZCA_whitening(X);
    
    return X.T;
def create_inputs(dates, training=True):
	inputs = pd.DataFrame(index=dates)
	spy = util.get_data(['SPY'], dates)
	normed_spy = util.normalize_data(spy) # SPY Normalized
	inputs = inputs.join(spy)

	#Set the inputs
	input_distance_from_upper, input_distance_from_lower = input_distance_from_upper_and_lower_bollinger_band(normed_spy)

	dollar_over_euro = download_indices_csv(dates, "data/indices", "DEXUSEU.csv", ["DATE", "DEXUSEU"]) # dollar / euro
	ten_year_treasury_bond = download_indices_csv(dates, "data/indices", "DGS10.csv", ["DATE", "DGS10"])
	boa_high_yield_options = download_indices_csv(dates, "data/indices", "BAMLH0A0HYM2.csv", ["DATE", "BAMLH0A0HYM2"])
	inputs = inputs.join(input_distance_from_upper)
	inputs = inputs.join(input_distance_from_lower)
	inputs = inputs.join(dollar_over_euro)
	inputs = inputs.join(ten_year_treasury_bond)
	inputs = inputs.join(boa_high_yield_options)

	# Join inputs together
	inputs = inputs.dropna(subset=["SPY"])
	inputs = inputs.dropna(subset=["Distance From Upper"])
	inputs = inputs.dropna(subset=["Distance From Lower"])
	inputs = inputs.dropna(subset=["DEXUSEU"])
	inputs = inputs.dropna(subset=["DGS10"])
	inputs = inputs.dropna(subset=["BAMLH0A0HYM2"])
	inputs = inputs.ix[:,1:]
	
	if training:
		inputs.shift(1)
		# input_distance_from_upper.shift(1)
		# input_distance_from_lower.shift(1)
		# dollar_over_euro.shift(1)
		# ten_year_treasury_bond.shift(1)
		# boa_high_yield_options.shift(1)

	return inputs
示例#17
0
# Read image

## MNIST
# train_set, valid_set, test_set=util.load_mnist("mnist.pkl.gz");  
# X=util.sample_patches_mnist(train_set, 5000, 16);

### CIFAR-10
data_set=util.load_CIFAR_batch("./cifar-10/data_batch_1");
data_x=data_set[0]/255.0;
data_x=np.mean(data_x, axis=3);

X=util.sample_patches_cifar10(data_x, 5000, 16);

# Normalization and whitening

X=util.normalize_data(X);
print "[MESSAGE] Data is normalized"

X=util.ZCA_whitening(X);
print "[MESSAGE] Data is whitened"

# plt.figure(1);
# for i in xrange(100):
#   plt.subplot(10,10,i+1);
#   plt.imshow(X[:,i].reshape(16,16), cmap = plt.get_cmap('gray'), interpolation='nearest');
#   plt.axis('off')
#       
# plt.show();

# K-means procedure
示例#18
0
    def get_features1(self, prices, symbol, print=False):
        '''
        Compute technical indicators and use them as features to be fed
        into a Q-learner.
        :param: prices: Adj Close prices dataframe
        :param: Whether adding data for printing to dataframe or not
        :return: Normalize Features dataframe
        '''

        # Fill NAN values if any
        prices.fillna(method="ffill", inplace=True)
        prices.fillna(method="bfill", inplace=True)
        prices.fillna(1.0, inplace=True)

        # Price
        adj_close = prices[symbol]

        # Compute Momentum
        mom = get_momentum(adj_close, window=10)

        # Compute RSI
        rsi = get_RSI(adj_close)

        # Compute rolling mean
        rm = get_rolling_mean(adj_close, window=10)

        # Compute rolling standard deviation
        rstd = get_rolling_std(adj_close, window=10)

        # Compute upper and lower bands
        upper_band, lower_band = get_bollinger_bands(rm, rstd)



        # Compute SMA
        sma = get_sma_indicator(adj_close, rm)

        df = prices.copy()
        df['Momentum'] = mom
        df['Adj. Close/SMA'] = adj_close/sma
        df['middle_band'] = rm

        # Delete 'Adj Close' column
        del df[symbol]

        if set(['cash']).issubset(df.columns):
            del df['cash']

        # Normalize dataframe
        df_norm = normalize_data(df)

        # Add Adj Close, Bollinrt Bands and RSY if printing
        if print:
            df_norm['Adj Close'] = prices[symbol]
            df_norm['upper_band'] = upper_band
            df_norm['lower_band'] = lower_band
            df_norm['middle_band'] = rm
            df_norm['RSI'] = rsi



        # Drop NaN
        df_norm.dropna(inplace=True)

        return df_norm
    def _simulate_over_grid(self, theta: float, g: Callable[[float], float],
                            sigma: float, lambda_j: float,
                            mu_j: [[float], float], sigma_j: float, dt: float,
                            n_points: int, n_paths: int, re_normalize: bool,
                            init_condition: float,
                            jump_timestamp: List[np.ndarray],
                            jump_size: List[list]) -> np.ndarray:
        """
        The OU-Jump simulation engine with a given timestamp and size for jumps.

        The simulation is carried out discretely from the relation:
            X_t = g(t) + Y_t
            d Y_t = - theta * Y_t * dt + sigma * dW + logJ_t * dN_t
        where g(t) is the deterministic drift, and logJ_t ~ Normal(mu_j, sigma_j**2), N_t ~ Poission(lambda_j).
        Each path is simulated according to the following:
            X_t = g(t) + (X_0 - g(0))*exp(-theta*t) + sigma*int{_0^t}{exp(-theta(t-tau)), d W_tau}
            + int{_0^t}{exp(-theta(t - tau)) logJ_t, d N_tau}
        the two stochastic integration terms are evaluated in the Ito's sense (function evaluated at the beginning of
        each discretization interval) over the grid length dt.

        :param theta: (float) Mean-reverting speed.
        :param g: (Callable[[float], float]). The deterministic drift function for the OU-Jump process.
        :param sigma: (float) Standard deviation for the Brownian motion.
        :param lambda_j: (float) Jump rate. Defaults to 25.
        :param mu_j: (Callable[[float], float]) Jump average as a function of time.
        :param sigma_j: (float) The jump std constant.
        :param dt: (float) Time advancement step.
        :param n_points: (int) Number of steps per simulated path, including the initial condition.
        :param n_paths: (float) Number of paths in the simulation. Needs to be >=2.
        :param daily_dpoints: (int) The number of data points per day.
        :param re_normalize: (bool) Whether to renormalize the Gaussians sampled at each time advancement.
            This will only be triggered if n_paths >= 2.
        :param init_condition: (float) Initial start position for every path.
        :param jump_timestamp: (List[np.ndarray]) The time where the jump occurs.
        :param jump_size: (List[list]) The size of each jump.
        :return: (np.ndarray) The simulated paths, dimension is (n_points, n_paths).
        """

        # Initialize the timetable. In this case it is a grid.
        total_time = dt * n_points
        timetable = np.arange(start=0, stop=total_time, step=dt)
        # Initializa simulated_paths
        simulated_paths = np.zeros(shape=(n_points, n_paths))
        simulated_paths[0, :] += init_condition

        # 1. Handle the Brownian part
        # Draw from the Gaussian distribution accordingly.
        gaussians = np.random.normal(loc=0,
                                     scale=1,
                                     size=(n_points - 1, n_paths))
        # Re-normalize the Gaussian per point or instance (normalize every row)
        if re_normalize and n_paths >= 2:
            gaussians = util.normalize_data(data_matrix=gaussians,
                                            act_on='row')

        # 2. Generate the paths for the jump diffusion process using discretization with interval dt.
        sqdt = np.sqrt(dt)
        for path in range(n_paths):
            # Precalculate part of the Brownian and jump stoch integral for faster calculation
            cumu_brownian_precalculate = np.cumsum([
                np.exp(theta * timetable[j]) * gaussians[j][path] * sqdt
                for j in range(n_points - 1)
            ]) * sigma
            cumu_jump_precalculate = np.cumsum([
                np.exp(theta * jump_timestamp[path][j]) * jump_size[path][j]
                for j in range(len(jump_timestamp[path]))
            ])
            #print('jump_size', jump_size[path])
            # Initializing variables for the jump process
            jump_counter = 0  # Count the number of jumps. Serve as a pointer.
            cumu_jump = 0  # Initialize the cumulative jump integral value
            jump_timestamp_path = set(
                jump_timestamp[path])  # Hash for faster calculation

            for point in range(1, n_points):
                cur_time = timetable[point]  # Current time
                pre_time = timetable[point - 1]  # Time one step back

                # Calculate the deterministic drift part, Brownian part and jump part separately
                drift_part = g(cur_time) + (simulated_paths[0, path] -
                                            g(0)) * np.exp(-theta * cur_time)
                # Brownian increment. Note the stoch integral is taken in the Ito's sense
                cumu_brownian = np.exp(
                    -theta * pre_time) * cumu_brownian_precalculate[point - 1]
                # Jump increment. Note the stoch integral is taken in the Ito's sense
                if cur_time in jump_timestamp_path:  # If this is the time for a jump
                    cumu_jump = cumu_jump_precalculate[jump_counter] * np.exp(
                        -theta * pre_time)
                    jump_counter += 1
                # Assemble together for this path
                simulated_paths[point,
                                path] = drift_part + cumu_brownian + cumu_jump

        return simulated_paths
示例#20
0
        fm_transpose_fm = fm_transpose_fm + \
            regularization_param * diag_matrix

    fm_transpose_fm_pinv = np.linalg.pinv(fm_transpose_fm)
    fm_transpose_output_colvec = feature_matrix.transpose() @ output_colvec

    return fm_transpose_fm_pinv @ fm_transpose_output_colvec


if __name__ == '__main__':
    DATASET = 'resources/data/city_dataset_97_2.txt'
    print(f'Gradient Descent For Dataset : {DATASET}')
    data, nrows, ncols = util.\
        get_data_as_matrix(DATASET, Path(__file__))

    util.normalize_data(data[:, 0:ncols - 1])

    output = data[:, ncols - 1:ncols]
    features = np.append(np.ones(shape=(nrows, 1)),
                         data[:, 0:ncols - 1],
                         axis=1)
    theta, cost_history = \
        gradient_descent(features, output, nrows, ncols - 1,
                         theta_colvec=np.zeros(shape=(ncols, 1)),
                         alpha=0.03, num_iters=1500,
                         debug=True, debug_print=True)
    print(f'theta(Gradient Descent)={theta}')
    print(f'cost_history={cost_history}')

    print(f'{"*" * 80}')
    optimal_theta, optimal_alpha_val, optimal_cost_val, \

if __name__ == "__main__":
    start_date = dt.datetime(2008, 1, 1)
    end_date = dt.datetime(2009, 12, 31)

    # Get NYSE trading dates
    dates = get_exchange_days(start_date,
                              end_date,
                              dirpath="../data/dates_lists",
                              filename="NYSE_dates.txt")

    symbols = ["AAPL"]
    # Get stock data and normalize it
    df_price = get_data(symbols, dates)
    norm_price = normalize_data(df_price)
    window = 20
    num_std = 2

    for symbol in symbols:
        # Compute rolling mean
        rolling_mean = norm_price[symbol].rolling(window=window).mean()

        # Compute rolling standard deviation
        rolling_std = norm_price[symbol].rolling(window=window).std()

        # Get momentum
        momentum = get_momentum(norm_price[symbol], window)

        # Plot momentum
        plot_momentum(
示例#22
0
def plot_norm_data_vertical_lines(df_orders,
                                  portvals,
                                  portvals_bm,
                                  vert_lines=False):
    """Plots portvals and portvals_bm, showing vertical lines for buy and sell orders
    
    Parameters:
    df_orders: A dataframe that contains portfolio orders
    portvals: A dataframe with one column containing daily portfolio value
    portvals_bm: A dataframe with one column containing daily benchmark value
    save_fig: Whether to save the plot or not
    fig_name: The name of the saved figure

    Returns: Plot a chart of the portfolio and benchmark performances
    """
    # Normalize data
    portvals = normalize_data(portvals)
    portvals_bm = normalize_data(portvals_bm)
    df = portvals_bm.join(portvals)

    # Min range
    if (df.loc[:, "Benchmark"].min() < df.loc[:, "Portfolio"].min()):
        min_range = df.loc[:, "Benchmark"].min()
    else:
        min_range = df.loc[:, "Portfolio"].min()

    # Max range
    if (df.loc[:, "Benchmark"].max() > df.loc[:, "Portfolio"].max()):
        max_range = df.loc[:, "Benchmark"].max()
    else:
        max_range = df.loc[:, "Portfolio"].max()

    # Plot the normalized benchmark and portfolio
    trace_bench = go.Scatter(x=df.index,
                             y=df.loc[:, "Benchmark"],
                             name="Benchmark",
                             line=dict(color='#17BECF'),
                             opacity=0.8)

    trace_porfolio = go.Scatter(x=df.index,
                                y=df.loc[:, "Portfolio"],
                                name="Portfolio",
                                line=dict(color='#000000'),
                                opacity=0.8)

    data = [trace_bench, trace_porfolio]

    # Plot the vertical lines for buy and sell signals
    shapes = list()
    if vert_lines:
        buy_line = []
        sell_line = []
        for date in df_orders.index:
            if df_orders.loc[date, "Order"] == "BUY":
                buy_line.append(date)
            else:
                sell_line.append(date)
        # Vertical lines
        line_size = max_range + (max_range * 10 / 100)

        # Buy line
        for i in buy_line:
            shapes.append({
                'type': 'line',
                'xref': 'x',
                'yref': 'y',
                'x0': i,
                'y0': 0,
                'x1': i,
                'y1': line_size,
                'line': {
                    'color': 'rgb(0, 102, 34)',
                    'width': 1,
                    'dash': 'dash',
                },
            })
        # Sell line
        for i in sell_line:
            shapes.append({
                'type': 'line',
                'xref': 'x',
                'yref': 'y',
                'x0': i,
                'y0': 0,
                'x1': i,
                'y1': line_size,
                'line': {
                    'color': 'rgb(255, 0, 0)',
                    'width': 1,
                    'dash': 'dash',
                },
            })

    layout = dict(
        autosize=True,
        shapes=shapes,
        margin=go.Margin(l=50, r=50, b=100, t=100, pad=4),
        title="Portfolio vs Benchmark",
        xaxis=dict(
            title='Dates',
            rangeselector=dict(buttons=list([
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(count=6, label='6m', step='month', stepmode='backward'),
                dict(step='all')
            ])),
            range=[portvals.index[0], portvals.index[-1]]),
        yaxis=dict(title='Normalized Prices',
                   range=[
                       min_range - (min_range * 10 / 100),
                       max_range + (max_range * 10 / 100)
                   ]),
    )

    fig = dict(data=data, layout=layout)
    iplot(fig)
示例#23
0
from util import get_data, normalize_data

if __name__ == "__main__":
    start_val = 1000000
    # Define date range
    start_date = '2015-10-02'
    end_date = '2017-10-17'
    symbols = ['SPY', 'XOM', 'GOOG', 'ALRM']
    allocs = [0.4, 0.4, 0.1, 0.1]

    # Read data
    dates = pd.date_range(start_date, end_date)
    symbols = ['SPY', 'ALRM', 'GOOG', 'XOM']
    df = get_data(symbols, dates)

    normed = normalize_data(df)
    alloced = normed * allocs
    pos_vals = alloced * start_val
    port_val = pos_vals.sum(axis=1)  # Portfolio values

    daily_rets = (port_val /
                  port_val.shift(1)) - 1  # Normalize (compute daily returns)
    daily_rets = daily_rets.ix[1:]  # First row value is 0, so get rid of it

    # Compute statistics
    cumulative = (port_val[-1] / port_val[0]) - 1
    print("Cumulative Return=", cumulative)
    mean = daily_rets.mean()
    print("Average Daily Return (Mean)=", mean)
    std = daily_rets.std()
    print("Risk (St.Dev)=", std)
示例#24
0
"""
if __name__ == '__main__':
    """
		CONFIG vars
	"""
    FIRST = True
    USE_CUDA = True
    TRAIN = True
    NUM_EPOCHS = 15

    # Parse cmdline args
    args = parse_cmd_args()

    # Get train, val and test data
    if FIRST:
        train, val, test = normalize_data('../data/')
        np.save('train_x', train[0])
        np.save('train_y', train[1])
        np.save('val_x', val[0])
        np.save('val_y', val[1])
        np.save('test_x', test)
    else:
        train = (np.load('train_x.npy'), np.load('train_y.npy'))
        val = (np.load('val_x.npy'), np.load('val_y.npy'))
        test = np.load('test_x.npy')

    print("Data Normalization Complete!")

    # Find Initializer
    if args.init == 1:
        initializer = nn.init.xavier_normal
示例#25
0
    plt.cla()


if __name__ == "__main__":
    symbols = ['SPY', 'AAPL', 'GOOG', 'IBM', 'XOM']
    dates = ['2012-01-01', '2012-12-28']
    df = util.get_data(symbols, pd.date_range(dates[0], dates[1]))

    symbol = 'GOOG'

    #Plot Price/SMA
    price = df[symbol]
    sma = get_rolling_mean(price)
    price_sma = get_price_sma(price)

    price = util.normalize_data(price)
    sma = get_rolling_mean(price)

    plot_data(title=str(symbol) + " Price SMA",
              xlabel="Date",
              ylabel="Price (Normalized)",
              kwargs={
                  'Price': price,
                  'SMA': sma
              })

    #Plot Bollinger Bands
    plot_bolinger_bands(df, symbol)

    #Plot RSI
    prices = df[symbol]
示例#26
0
def get_data_ds3(x_data, y_data):
    n_x_data, means_x, sqrt_x = normalize_data(x_data)

    x_train, x_validation, x_test = divide_data_set(n_x_data)
    y_train, y_validation, y_test = divide_data_set(y_data)
    return x_train, x_validation, x_test, y_train, y_validation, y_test