dataset_name = st.sidebar.selectbox("Select Dataset",
                                    ("Iris", "Breast Cancer", "Wine"))

classifier = st.sidebar.selectbox("Select Classifiers",
                                  ("KNN", "SVM", "Random Forest"))

scaling = st.sidebar.checkbox("Scaling?")

# Get the data
X, y = utilities.get_dataset(dataset_name)
st.write("Shape of the data:", X.shape)
st.write("Number of Classes:", len(np.unique(y)))

# Add parameters to the UI based on the classifier
params = utilities.add_parameter_ui(classifier)

# Get our classifier with the correct classifiers
clf = utilities.get_classifier(classifier, params)

# Check if scaling is required
if scaling:
    X = utilities.scale_data(X)

# Make predictions and get accuray
accuracy = utilities.classification(X, y, clf)
st.write("**Classifer:** ", classifier)
st.write("**Accuracy:** ", accuracy)

# Plot the components of the data
utilities.plot_data(X, y)
Пример #2
0
from sklearn import tree
import os
import utilities as util
import pandas as pd
import numpy as np

os.chdir('E:/decision-trees')

tamu = pd.read_csv("tamu.txt", sep=' ', header=None)

#explore the dataframe
tamu.shape
tamu.info()

X = np.array(tamu[[1, 0]])
y = np.array(tamu[2])

util.plot_data(X, y)

tree_estimator = tree.DecisionTreeClassifier(random_state=2017, max_depth=1)
tree_estimator.fit(X, y)
util.plot_decision_boundary(lambda x: tree_estimator.predict(x), X, y)
Пример #3
0
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 11 11:30:39 2017

@author: venkat
"""

import os
os.getcwd()
os.chdir("E:\\deep_learning")
from utilities import plot_data, plot_confusion_matrix, plot_loss_accuracy, plot_decision_boundary
from sklearn.datasets import make_moons, make_circles
from keras.models import Sequential
from keras.layers import Dense

x, y = make_circles(n_samples=1000, noise=0.05, random_state=0, factor=0.3)

plot_data(x, y)

model = Sequential()
model.add(Dense(units=4, activation='tanh', input_shape=(2, )))
model.add(Dense(units=2, activation='tanh'))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
history = model.fit(x, y, epochs=1, verbose=0)
plot_decision_boundary(lambda x: model.predict(x), x, y)
plot_confusion_matrix(model, x, y)
from utilities import plot_data, plot_confusion_matrix, plot_loss_accuracy, plot_decision_boundary
from sklearn.datasets import make_moons, make_circles
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.utils import plot_model

X, y = make_circles(n_samples=1000, noise=0.05, factor=0.3, random_state=0)
#X, y = make_moons(n_samples=1000, noise=0.05, random_state=0)
plot_data(X, y)

#single perceptron model for binary classifcation
model1 = Sequential()
model1.add(Dense(1, input_shape=(2, ), activation='sigmoid'))

model1.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
plot_model(model1, show_shapes=True, to_file='model1.png')

history1 = model1.fit(X, y, verbose=0, epochs=100)
plot_loss_accuracy(history1)
plot_decision_boundary(lambda x: model1.predict(x), X, y)

y_pred = model1.predict_classes(X, verbose=0)
plot_confusion_matrix(model1, X, y)

#mlp model for binary classification
model2 = Sequential()
model2.add(Dense(4, input_shape=(2, ), activation='tanh'))
model2.add(Dense(2, activation='tanh'))
model2.add(Dense(1, activation='sigmoid'))
Пример #5
0
import os
import pandas as pd
from sklearn.manifold import TSNE
import utilities as util
import numpy as np

#changes working directory
os.chdir('D:/Data/DataScience/Practice/titanic')

titanic_train = pd.read_csv("train.csv")

#EDA
titanic_train.shape
titanic_train.info()

titanic_train1 = pd.get_dummies(titanic_train,
                                columns=['Sex', 'Pclass', 'Embarked'])
titanic_train1.shape
titanic_train1.info()

X_train = titanic_train1.drop(
    ['PassengerId', 'Name', 'Age', 'Ticket', 'Cabin', 'Survived'],
    axis=1,
    inplace=False)
X_train.shape

tsne = TSNE(perplexity=30.0, n_components=2, n_iter=10000)
titanic_2 = tsne.fit_transform(X_train)

util.plot_data(titanic_2, np.array(titanic_train1['Survived']))
Пример #6
0
        r_t = data['r'][t]
        b_t = data['b'][t]

        localization = e.localize(u_t, r_t, b_t)
        temp_corr = localization[0].T
        temp_pred = localization[2].T
        temp_P = localization[1]
        temp_P_1 = localization[3]

        if t == 0:
            prediction = np.array([temp_corr])
            roboterr_data = np.array([temp_pred])
        else:
            prediction = np.vstack((prediction, np.array([temp_corr])))
            roboterr_data = np.vstack((roboterr_data, np.array([temp_pred])))

        plot_data(data['l'], ground_truth[:t + 1, :], prediction,
                  roboterr_data, temp_P, temp_P_1, t)

        print("Odometery Norm:", np.linalg.norm(ground_truth[t] - temp_pred))
        print("Corrected Norm:", np.linalg.norm(ground_truth[t] - temp_corr),
              '\n')

    mean_odometry_error = np.mean(
        np.sqrt((ground_truth[:iterations] - roboterr_data[:iterations])**2))
    mean_corrected_error = np.mean(
        np.sqrt((ground_truth[:iterations] - prediction[:iterations])**2))

    print("Mean Square Error in Odometry: {}".format(mean_odometry_error))
    print(
        "Mean Square Error in EKF Correction: {}".format(mean_corrected_error))
Пример #7
0
def test_experiment_one(n_days=21,
                        data_size=12,
                        train_size=0.7,
                        max_k=50,
                        max_trade_size=0.1,
                        years_to_go_back=2,
                        initial_investment=10000,
                        gen_plot=False,
                        verbose=False,
                        savelogs=False):

    today = dt.date.today()
    yr = today.year - years_to_go_back
    mo = today.month - 1  # Just temporary, take out 1 when data download is fixed.
    da = today.day - 1

    start_date = dt.datetime(yr, mo, da)
    end_date = dt.datetime(yr + 1, mo, da)

    adr = [None] * 12
    vol = [None] * 12
    sr = [None] * 12

    myport = ['AAPL', 'GLD']
    myalloc = [0.5, 0.5]

    # Portfolio values for Holding the Same Allocation (conservative case)
    actual_prices = util.load_data(myport, start_date, end_date)
    actual_prices.fillna(method='ffill', inplace=True)
    actual_prices.fillna(method='bfill', inplace=True)
    prices_SPY = actual_prices['SPY']
    actual_prices = actual_prices[myport]

    adr_cons, vol_cons, sharpe_cons, pv_cons = util.compute_returns(
        actual_prices, myalloc, sf=252.0, rfr=0.0)

    # Portfolio values with monthly optimization using hindsight (best possible case)

    # Portfolio values for Machine Learner

    ml_allocs = []
    ml_trade_dates = []

    for i in range(int(252 / n_days)):
        temp = round(i * 52 * n_days / 252)
        test_date = start_date + dt.timedelta(weeks=round(i * 52 * n_days /
                                                          252))
        #print(i, temp, test_date)

        if verbose:
            print(('EXPERIMENT %i - %s') %
                  (i, str(test_date.strftime("%m/%d/%Y"))))

        myalloc, trade_date = run_today(end_date=test_date,
                                        n_days=n_days,
                                        data_size=data_size,
                                        myport=myport,
                                        allocations=myalloc,
                                        train_size=train_size,
                                        max_k=max_k,
                                        max_trade_size=max_trade_size,
                                        gen_plot=gen_plot,
                                        verbose=verbose,
                                        savelogs=savelogs)

        ml_allocs.append(myalloc)
        ml_trade_dates.append(trade_date)

    ml_allocations = pd.DataFrame(data=ml_allocs,
                                  index=ml_trade_dates,
                                  columns=myport)
    all_dates = actual_prices.index
    #ml_allocaations = ml_allocaations.reindex(all_dates, method='ffill')

    actual_prices['Cash'] = 1.0

    ml_holdings = pd.DataFrame(data=0.0, index=all_dates, columns=myport)
    ml_holdings['Cash'] = 0.0
    ml_holdings.ix[0, 'Cash'] = initial_investment
    values = ml_holdings * actual_prices
    porvals = values.sum(axis=1)

    for index, allocation in ml_allocations.iterrows():
        if index < ml_holdings.index.min():
            index = ml_holdings.index.min()
        #else:
        #    index = ml_holdings.index.get_loc(tdate, method='ffill')
        tomorrow = ml_holdings.index.get_loc(index) + 1

        for symbol in myport:
            ml_holdings.loc[tomorrow:, symbol] = porvals.loc[
                index] * allocation[symbol] / actual_prices.loc[index, symbol]

        values = ml_holdings * actual_prices
        porvals = values.sum(axis=1)

    if gen_plot:
        # add code to plot here
        df_temp = pd.concat([pv_cons, porvals, prices_SPY],
                            keys=['Conservative', 'ML', 'SPY'],
                            axis=1)
        df_temp = df_temp / df_temp.ix[0, :]
        util.plot_data(df_temp, 'Daily portfolio value and SPY', 'Date',
                       'Normalized Price')

    ret_cons = (pv_cons[-1] / pv_cons[0]) - 1
    ret_porvals = (porvals[-1] / porvals[0]) - 1
    ret_SPY = (prices_SPY[-1] / prices_SPY[0]) - 1

    return ret_cons, ret_porvals, ret_SPY
Пример #8
0
def run_today(start_date=dt.datetime(2015, 1, 1),
              end_date=dt.datetime(2017, 1, 1),
              n_days=21,
              data_size=12,
              myport=['AAPL', 'GOOG'],
              allocations=[0.5, 0.5],
              train_size=0.7,
              max_k=50,
              max_trade_size=0.1,
              gen_plot=False,
              verbose=False,
              savelogs=False):
    """



    :param start_date: Beginning of time period
    :param end_date: End of time period
    :param n_days: Number of days into the future to predict the daily returns of a fund
    :param data_size: The number of months of data to use in the machine learning model.
    :param myport: The funds available in your portfolio
    :param allocations: The percentage of your portfolio invested in the funds
    :param train_size: The percentage of data used for training the ML model, remained used for testing.
    :param max_k: Maximum number of neighbors used in kNN
    :param max_trade_size: The maximum percentage of your portfolio permitted to be traded in any one transaction.
    :param gen_plot: Boolean to see if you want to plot results
    :param verbose: Boolean to print out information during execution of application.
    :return:
    """

    start_date = calc_start_date(
        end_date,
        data_size)  #end_date - dt.timedelta(weeks=int(data_size * 52/12))
    #print('start:', start_date, 'end:', end_date)

    if verbose: print('-' * 20 + '\nFORECAST\n' + '-' * 20)
    forecast = fc.forecast(start_date,
                           end_date,
                           symbols=myport,
                           train_size=train_size,
                           n_days=n_days,
                           max_k=max_k,
                           gen_plot=gen_plot,
                           verbose=verbose,
                           savelogs=savelogs)

    if verbose: print('\n' + '-' * 20 + '\nOPTIMIZE\n' + '-' * 20)
    target_allocations = opt.optimize_return(forecast,
                                             myport,
                                             allocations,
                                             gen_plot=gen_plot,
                                             verbose=verbose,
                                             savelogs=savelogs)

    if verbose: print('\n' + '-' * 20 + '\nORDERS\n' + '-' * 20)
    trade_date = forecast.index.max()
    orders = td.create_orders(myport,
                              allocations,
                              target_allocations,
                              trade_date=trade_date,
                              max_trade_size=max_trade_size,
                              verbose=verbose,
                              savelogs=savelogs)

    if verbose: print(orders)

    new_allocations = allocations.copy()
    for i in range(orders.shape[0]):
        # fix this code so that the correct allocations are updated!
        index = myport.index(orders.loc[i, 'Symbol'])
        #symbol = orders.loc[i, 'Symbol']

        if orders.loc[i, 'Action'] == 'SELL':
            new_allocations[index] -= orders.loc[i, 'Quantity']
        else:
            new_allocations[index] += orders.loc[i, 'Quantity']

    adr_current, vol_current, sr_current, pv_current = util.compute_returns(
        forecast, allocations=allocations)
    adr_target, vol_target, sr_target, pv_target = util.compute_returns(
        forecast, allocations=target_allocations)
    adr_new, vol_new, sr_new, pv_new = util.compute_returns(
        forecast, allocations=new_allocations)

    if verbose:
        print("Portfolios:", "Current", "Target", "New")
        print("Daily return: %.5f %.5f %.5f" %
              (adr_current, adr_target, adr_new))
        print("Daily Risk: %.5f %.5f %.5f" %
              (vol_current, vol_target, vol_new))
        print("Sharpe Ratio: %.5f %.5f %.5f" % (sr_current, sr_target, sr_new))
        print("Return vs Risk: %.5f %.5f %.5f" %
              (adr_current / vol_current, adr_target / vol_target,
               adr_new / vol_new))
        print("\nALLOCATIONS\n" + "-" * 40)
        print("Symbol", "Current", "Target", 'New')
        for i, symbol in enumerate(myport):
            print("%s %.3f %.3f %.3f" %
                  (symbol, allocations[i], target_allocations[i],
                   new_allocations[i]))

    # Compare daily portfolio value with SPY using a normalized plot
    if gen_plot:

        fig, ax = plt.subplots()
        ax.scatter(vol_current, adr_current, c='green', s=15,
                   alpha=0.5)  # Current portfolio
        ax.scatter(vol_target, adr_target, c='red', s=15, alpha=0.5)  # ef
        ax.scatter(vol_new, adr_new, c='black', s=25, alpha=0.75)  # ef
        ax.set_xlabel('St. Dev. Daily Returns')
        ax.set_ylabel('Mean Daily Returns')
        #ax.set_xlim(min(vol)/1.5, max(vol)*1.5)
        #ax.set_ylim(min(adr)/1.5, max(adr)*1.5)
        ax.grid()
        ax.grid(linestyle=':')
        fig.tight_layout()
        plt.show()

        # add code to plot here
        df_temp = pd.concat([pv_current, pv_target, pv_new],
                            keys=['Current', 'Target', 'New'],
                            axis=1)
        df_temp = df_temp / df_temp.ix[0, :]
        util.plot_data(df_temp, 'Forecasted Daily portfolio value and SPY',
                       'Date-21', 'Normalized Price')

    if False:  # meh was going to plot portfolio values for the last year but trying something else now
        prior_prices = util.load_data(myport, start_date, end_date)
        prior_prices.fillna(method='ffill', inplace=True)
        prior_prices.fillna(method='bfill', inplace=True)

        #prices_SPY = prior_prices['SPY']  # SPY prices, for benchmark comparison
        prior_prices = prior_prices[myport]  # prices of portfolio symbols

        forecast_prices = forecast * prior_prices

        time_span = pd.date_range(forecast.index.min(),
                                  end_date + dt.timedelta(days=n_days * 2))
        forecast_prices = forecast_prices.reindex(time_span)
        forecast_prices = forecast_prices.shift(periods=n_days * 2)
        forecast_prices = forecast_prices.dropna()

        forecast_prices = pd.concat([prior_prices, forecast_prices], axis=0)

        adr_current, vol_current, sr_current, pv_current = util.compute_returns(
            forecast_prices, allocations=allocations)
        adr_target, vol_target, sr_target, pv_target = util.compute_returns(
            forecast_prices, allocations=target_allocations)
        adr_new, vol_new, sr_new, pv_new = util.compute_returns(
            forecast_prices, allocations=new_allocations)

        df_temp = pd.concat([pv_current, pv_target, pv_new],
                            keys=['Current', 'Target', 'New'],
                            axis=1)
        df_temp = df_temp / df_temp.ix[0, :]
        util.plot_data(df_temp, 'Daily portfolio value and SPY', 'Date',
                       'Normalized Price')

    return new_allocations, trade_date
Пример #9
0
def do_linear_search(test=False, test_dim=32):
    """
    Linear search function...

    Returns
    -------
    None.

    """
    logger = ut.get_logger()

    device = "cuda"
    model_name = "EDSR"
    config = toml.load("../config.toml")
    run = config["run"]
    scale = int(config["scale"]) if config["scale"] else 4
    # device information
    _, device_name = ut.get_device_details()
    total, _, _ = ut.get_gpu_details(
        device, "\nDevice info:", logger, print_details=False
    )
    log_message = (
        "\nDevice: "
        + device
        + "\tDevice name: "
        + device_name
        + "\tTotal memory: "
        + str(total)
    )
    logger.info(log_message)

    ut.clear_cuda(None, None)

    state = "Before loading model: "
    total, used, _ = ut.get_gpu_details(device, state, logger, print_details=True)

    model = md.load_edsr(device=device)

    state = "After loading model: "
    total, used, _ = ut.get_gpu_details(device, state, logger, print_details=True)

    # =============================================================================
    #     file = open("temp_max_dim.txt", "r")
    #     line = file.read()
    #     max_dim = int(line.split(":")[1])
    # =============================================================================
    config = toml.load("../config.toml")
    max_dim = int(config["max_dim"])
    if test == False:
        detailed_result, memory_used, memory_free = result_from_dimension_range(
            device, logger, config, model, 1, max_dim
        )
    else:
        detailed_result, memory_used, memory_free = result_from_dimension_range(
            device, logger, config, model, test_dim, test_dim
        )
    if test == False:
        # get mean
        # get std
        mean_time, std_time = ut.get_mean_std(detailed_result)
        mean_memory_used, std_memory_used = ut.get_mean_std(memory_used)
        mean_memory_free, std_memory_free = ut.get_mean_std(memory_free)

        # make folder for saving results
        plt_title = "Model: {} | GPU: {} | Memory: {} MB".format(
            model_name, device_name, total
        )
        date = "_".join(str(time.ctime()).split())
        date = "_".join(date.split(":"))
        foldername = date
        os.mkdir("results/" + foldername)
        # plot data
        ut.plot_data(
            foldername,
            "dimension_vs_meantime",
            mean_time,
            "Dimensionn of Patch(nxn)",
            "Mean Processing Time: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="mean time",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_stdtime",
            std_time,
            "Dimension n of Patch(nxn)",
            "Std of Processing Time: LR -> SR, Scale: {} ( {} runs )".format(
                scale, run
            ),
            mode="std time",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_meanmemoryused",
            mean_memory_used,
            "Dimension n of Patch(nxn)",
            "Mean Memory used: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="mean memory used",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_stdmemoryused",
            std_memory_used,
            "Dimension n of Patch(nxn)",
            "Std Memory Used: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="std memory used",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_meanmemoryfree",
            mean_memory_free,
            "Dimension n of Patch(nxn)",
            "Mean Memory Free: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="mean memory free",
            title=plt_title,
        )
        ut.plot_data(
            foldername,
            "dimension_vs_stdmemoryfree",
            std_memory_free,
            "Dimension n of Patch(nxn)",
            "Std Memory Free: LR -> SR, Scale: {} ( {} runs )".format(scale, run),
            mode="std memory free",
            title=plt_title,
        )
        # save data
        ut.save_csv(
            foldername,
            "total_stat",
            device,
            device_name,
            total,
            mean_time,
            std_time,
            mean_memory_used,
            std_memory_used,
            mean_memory_free,
            std_memory_free,
        )