Exemplo n.º 1
0
def create_cast_plot():

    links, nodes = get_data()
    dnd = create_graph_object(links, nodes)
    fig = create_plot(links, nodes, dnd)

    # fig.write_html("output/campaign-cast.html")                

 
    return fig
def main():

    # Initialize data and set X and y
    mcd_main = get_data()
    T40 = drop_nulls(mcd_main, ['T40.4'])
    T40_complete = impute_df(T40, KNN(5))
    y = T40_complete['T40.4']
    X = T40_complete.drop(columns=[
        'T40.4', 'year', 'county_code', 'T40.7',
        'poverty_rate_native_american', 'poverty_rate_pacific_islander',
        'college_degree', 'poverty_rate'
    ])

    # Create regression models for comparison
    l1_ratio = np.linspace(0.1, 1, 100)

    cv = 5  # Number of k-fold cross validations

    alphas = np.linspace(0.1, 100, 100)
    elastic = LinearDataset(X,
                            y,
                            ElasticNetCV(l1_ratio=l1_ratio),
                            name='ElasticNet')

    ridge = LinearDataset(X, y, RidgeCV(cv=cv, alphas=alphas), name='Ridge')
    lasso = LinearDataset(X, y, LassoCV(cv=cv), name='Lasso')
    linear = LinearDataset(X, y, LinearRegression(), name='linear')

    models = [linear, elastic, ridge, lasso]

    # Compare models
    coef_matrix, error_matrix = model_comparison(X, y, models)

    print(tabulate(coef_matrix.round(2), headers='keys', tablefmt='pipe'))
    print(tabulate(error_matrix.round(2), headers='keys', tablefmt='pipe'))
    all_plot_actual_predicted(models)

    # Plot coefficient path for selected model
    fig, ax = plt.subplots()
    lasso.plot_coeff_paths(ax=ax, c_title='Lasso ')
    plt.show()
def get_baseline(all=True):
    years, past_values, values = get_data()
    train_x, train_y, test_x, test_y = train_test_split(past_values, values)

    pred = train_x
    train_score = mean_squared_error(train_y, pred)
    print('Baseline Training Score: RMSE: %s' %
          '{:,.0f}'.format(math.sqrt(train_score)))

    pred = test_x
    test_score = mean_squared_error(test_y, pred)
    print('Baseline Test Score: RMSE: %s' %
          '{:,.0f}'.format(math.sqrt(test_score)))

    bttscore = 'RMSE: %s/%s' % ('{:,.0f}'.format(
        math.sqrt(train_score)), '{:,.0f}'.format(math.sqrt(test_score)))

    if all:
        plot_y = [i for i in train_y] + [x for x in test_y]
        plot_pred = [i for i in train_x] + [x for x in test_x]
    else:
        plot_y = [None for i in train_y] + [x for x in test_y]
        plot_pred = [None for i in train_x] + [x for x in test_x]
    return np.array(plot_y), np.array(plot_pred), np.array(years), bttscore
Exemplo n.º 4
0
        xa = 'ploth'
    try:
        name, epochs, batches = sys.argv[1:4]
    except ValueError:
        print('Usage: %s model_name epochs batch_size %s' % (script, xa))
        exit(1)
    try:
        plot = sys.argv[4]
    except IndexError:
        plot = False

    return name, int(epochs), int(batches), plot


if __name__ == '__main__':
    X, Y = get_data()
    train_x, train_y, test_x, test_y = prep_data(X, Y)
    # Getting our command line parameters
    name, epochs, batches, plot = get_params()
    # Do the training
    model, name, mp, history = train_model(name, train_x, train_y, epochs,
                                           batches, test_x, test_y)
    # Save models and the training history for later use
    mname = 'models/model-%s-%d-%d' % (name, epochs, batches)
    model.save(mname + '.h5')
    title = '%s (epochs=%d, batch_size=%d)' % (name, epochs, batches)
    # Test our model on both data that has been seen
    # (training data set) and unseen (test data set)
    print('Scores for %s' % title)
    # Notice that we need to specify batch_size in evaluate when we're
    # using LSTM.
Exemplo n.º 5
0
from prep import get_data, KMeansSoft
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans


def get_cost(m, X, responsibilities):
    k = len(m)
    dist = np.array([[x] * k for x in X])
    dist = (np.sum(((dist - m)**2), axis=2)**0.5) * responsibilities
    return dist.sum()


if __name__ == '__main__':
    X, y = get_data(num_clusters=10, num_samples=200, num_features=100)
    costs = []
    for k in [1, 2, 3, 5, 8, 15, 30]:
        kmeans = KMeansSoft(k)
        KMeans_2 = KMeans(n_clusters=k).fit(X)
        y_pred, m, responsibilities = kmeans.fit(X, 5)
        cost = get_cost(m, X, responsibilities)
        costs.append(cost)
    plt.plot(costs)
Exemplo n.º 6
0
import numpy as np

import os

if __name__ == '__main__':
    name, epochs, batches, _ = get_params(script='predict.py')
    model, _ = confs[name]
    mname = 'models/model-%s-%d-%d.h5' % (name, epochs, batches)
    # Loading the model.
    if os.path.exists(mname):
        model = load_model(mname)
        print('Model loaded!')
    else:
        print("Can't find %s model, train it first using 'train.py %s %d %d'" %
              (mname, name, epochs, batches))
    years, _, values = get_data()
    # We're using the last value of our dataset
    # as a base for prediction.
    values, years = list(values), list(years)
    predict_on_value = values[-1:]
    predict_for_year = years[-1] + 'next'
    # This is where magic happens,
    # we get the predicted value
    x = model.predict(np.array(predict_on_value))
    # Calcularing the error,
    # About 3% error in our best models
    error = x[0][0] * 0.02
    print('Prediction from %s based on %s' %
          (years[-1], "{:,.0f}".format(predict_on_value[0])))
    print(
        'Prediction for %s is %s +/- %s' %
Exemplo n.º 7
0
        '-m',
        nargs='+',
        type=int,
        default=[1],
    )

    parser.add_argument(
        '--lspn',
        '-l',
        type=str2bool,
        default='true',
    )

    FLAGS, unparsed = parser.parse_known_args()

    data, ncat = get_data(FLAGS.dataset)

    # min_sample_leaf is the minimum number of samples at leaves
    # Define which values of min_sample_leaf to test
    min_sample_leaves = FLAGS.msl
    min_sample_leaves = [
        msl for msl in min_sample_leaves if msl < data.shape[0] / 10
    ]

    # filepath = os.path.join('missing', FLAGS.dataset + '_test.csv')
    meanspath = os.path.join('missing', FLAGS.dataset + '_means.csv')
    cispath = os.path.join('missing', FLAGS.dataset + '_cis.csv')
    Path('missing').mkdir(parents=True, exist_ok=True)

    df_all = pd.DataFrame()

if __name__ == '__main__':
    import sys
    style_img_name, content_img_name, out_img_name = sys.argv[1], sys.argv[
        2], sys.argv[3]
    # How much content should be
    # "visible", heavier/higher weight=more content details.
    content_weight = 1
    try:
        content_weight = int(sys.argv[4])
    except IndexError:
        pass

    # Get style and image tensors.
    style_img, content_img = get_data(style_img_name, content_img_name)
    # Create input image.
    input_img = content_img.clone()

    # Load pretrained network.
    cnn = models.vgg19(pretrained=True).features.to(device).eval()

    # Get blended image.
    output = run_style_transfer(cnn,
                                style_img,
                                content_img,
                                input_img,
                                content_weight=content_weight)

    print('Saving an output image to %s...' % out_img_name)
    # Remove an extra dimension that we needed to add
                guess, _ = get_category(output, categories)
                stats_total[cat]+=1
                if (guess == cat):
                    stats_correct[cat]+=1
        for c in categories:
            print('Test accuracy for %s on %d (%d correct) words:%d %%' % (c, stats_total[c], stats_correct[c], 100 *  stats_correct[c] / stats_total[c]))

if __name__ == '__main__':
    # Initialize our language detector
    rnn = RNN(n_letters, n_categories)
    # Initialize optimizer
    optimizer = torch.optim.Adam(rnn.parameters())
    # Initialize our loss function
    loss_function = nn.CrossEntropyLoss()
    # Get training data
    print('Getting training data...')
    categories, train_words=get_data()
    # Train using 10000 words choosen randomly for
    # each language, in general we get around 50% words
    # for each language.
    train(rnn, optimizer, loss_function, 10000, categories, train_words)
    # Get test data, don't include words from training set.
    print('Getting test data...')
    test_categories, test_words=get_data_test(
                exclude_words=[ train_words[c] for c in all_categories ])
    # Test our model on totally fresh and unique list of words.
    test(rnn, optimizer, test_categories, test_words)
    # Save our model,so we can use it for detection.
    torch.save(rnn.state_dict(), 'model.ckpt')
Exemplo n.º 10
0
    for i in range(n):
        pair = random.choice(pairs)
        print('Question in %s: %s' % (ilang.name, pair[0].ljust(20)))
        print('Question in %s: %s' % (olang.name, pair[1].ljust(20)))
        output_words = test(encoder, decoder, pair[0], ilang, olang)
        output_sentence = ' '.join(output_words).strip()
        tick = 'V' if output_sentence == pair[1] else 'X'
        print('Our guess:%s %s' % (output_sentence.ljust(20), tick))
        print('')


if __name__ == '__main__':
    hidden_size = hidden_size
    # Get maximum of 100 sentences.
    # Remember that in prep.py we get only questions that match secific criteria.
    pairs, input_lang, output_lang = get_data('en', 'spa', limit=100)
    # Building two GRUs, encoder and decoder.
    encoder = EncoderGRU(input_lang.n_words, hidden_size).to(device)
    decoder = DecoderGRU(hidden_size, output_lang.n_words).to(device)
    print('Training models...')
    train_all(pairs,
              encoder,
              decoder,
              input_lang,
              output_lang,
              900,
              print_every=100)
    print('Saving both models...')
    torch.save(encoder.state_dict(), 'encoder.ckpt')
    torch.save(decoder.state_dict(), 'decoder.ckpt')
    print('Testing with random data...')
Exemplo n.º 11
0
import pandas as pd
from prep import get_data
from stats import check_chi, check_normal_dist
import seaborn as sns
from scipy.stats import ttest_ind

df = get_data()

check_normal_dist(
    df)  # 2 not normall distributed [employeecount, standardhours]
df.employeecount.describe()  # all == 1
df.standardhours.describe()  # all == 80
df.drop(columns=["employeecount", "standardhours"], inplace=True)

for col in df.columns:
    if df[col].dtype.name == "category":
        print(f"checking: {col}")
        check_chi(df.attrition, df[col])

# jobrole: dependent, H0 rejected.
# joblevel: dependent, H0 rejected.
# overtime: dependent, H0 rejected.
# department: dependent, H0 rejected.
# maritalstatus: dependent, H0 rejected.
# businesstravel: dependent, H0 rejected.
# jobinvolvement: dependent, H0 rejected.
# educationfield: dependent, H0 rejected.
# jobsatisfaction: dependent, H0 rejected.
# worklifebalance: dependent, H0 rejected.
# stockoptionlevel: dependent, H0 rejected.
# environmentsatisfaction: dependent, H0 rejected.
def get_params(script='train.py'):
    """
    Get command line parameters.
    """
    try:
        name, epochs, batches = sys.argv[1:4]
    except ValueError:
        print('Usage: %s model_name epochs batch_size' % sys.argv[0])
        exit(1)
    return name, int(epochs), int(batches)


if __name__ == '__main__':
    # Getting our command line parameters
    name, epochs, batches = get_params()
    train_x, train_y, test_x, test_y, inputs, max_length, t = get_data(
        do_cleanup=True, filter_stopwords=True)
    print('Train/Test Data lenght', len(train_x), len(test_x))
    model, name, mp = train_model(name, train_x, train_y, epochs, batches,
                                  inputs, max_length, test_x, test_y)
    # Save model to use for classification later on
    mname = 'models/model-%s-%d-%d' % (name, epochs, batches)
    model.save(mname + '.h5')
    with open(mname + '-tokenizer.pickle', 'wb') as ts:
        pickle.dump(t, ts)
    title = '%s (epochs=%d, batch_size=%d)' % (name, epochs, batches)
    # Test our model on both data that has been seen
    # (training data set) and unseen (test data set)
    print('Evaluation for %s' % title)
    loss, acc = model.evaluate(train_x, train_y, verbose=2)
    print('Train Accuracy: %.2f%%' % (acc * 100))
    loss, acc = model.evaluate(test_x, test_y, verbose=2)
Exemplo n.º 13
0
        self.s = 0
        self.h = 0

    def train(self, X, y):
        n = X.shape[0]

        self.spam = (X[y == 1] / 100).sum(axis=0) / n
        self.ham = (X[y == 0] / 100).sum(axis=0) / n

        self.s = len(X[y == 1])
        self.h = len(X[y == 0])

    def score(self, X, y):
        y_pred = np.argmax(np.vstack((X @ np.log(self.ham) + np.log(self.h),
                                      X @ np.log(self.spam) + np.log(self.s))),
                           axis=0)
        return (y == y_pred).mean()


if __name__ == '__main__':
    X, y = get_data()
    X_train, y_train = X[:3000], y[:3000]
    X_test, y_test = X[3000:], y[3000:]

    nb = NB()

    nb.train(X_train, y_train)
    score = nb.score(X_test, y_test)
    print(score)
Exemplo n.º 14
0

def train_model(train_x, train_y, epochs, batches):
    model = get_mlp(train_x.shape[1])

    model.compile(loss='mean_squared_error',
                  optimizer='adam', metrics=['mse', 'mape'])
    model.fit(train_x, train_y, verbose=2,
              epochs=epochs, batch_size=batches)

    return model


def r2_score(y_test, y_pred):
    return 1 - sum((y_test - y_pred) ** 2) / sum((y_test - y_pred.mean()) ** 2)


if __name__ == '__main__':
    data = get_data()
    X, Y, years = get_xy(data, 2)
    train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.2)
    epochs, batches = 64, 2
    model = train_model(train_x, train_y, epochs, batches)

    y_pred = np.array(model.predict(test_x)).flatten()
    print(r2_score(test_y, y_pred))

    y_plt = np.array(model.predict(X).flatten())
    plt.plot(years, y_plt)
    plt.plot(years, Y)
        'Amazon': 450,
        'Apple': 50,
        'Dell': 20,
        'Facebook': 55,
        'Google': 410,
        'Microsoft': 45,
        'Tesla': 150,
        'Twitter': 20,
        'Wallmart': 40
    }

    for cname in comp_list:

        data_path = 'data/' + cname + '/' + cname + '.csv'

        X, Y = get_data(data_path)
        train_x, train_y, test_x, test_y = prep_data(X, Y)
        # Getting our command line parameters
        #name, epochs, batches, plot=get_params()
        name = "default"
        epochs = optimal_epochs_map[cname]
        batches = 1
        plot = "ploth"
        plot = False

        # Do the training
        model, name, mp, history = train_model(name, train_x, train_y, epochs,
                                               batches, test_x, test_y)
        # Save models and the training history for later use
        mname = 'models/' + cname + '/model-%s-%d-%d' % (name, epochs, batches)
        model.save(mname + '.h5')
Exemplo n.º 16
0
import dash_table
#Sub Modules
from prep import get_data

from homepage import Homepage
from appA import AppA, build_graphA1, build_graphA2, build_graphA3, split_filter_part
from appB import AppB, core_layoutB, build_graphBA1, build_graphBA2, build_graphBA3
#from appB import AppB, core_layoutB, build_graphBA1, build_graphBA2, build_graphBA3 #real B

from interact import AppC, build_graphC1

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.COSMO])

app.config.suppress_callback_exceptions = True

df, df_num, df_noncumun_whole, noncumun_dfs = get_data()

app.layout = html.Div(
    [dcc.Location(id='url', refresh=False),
     html.Div(id='page-content')])


#NavBar
@app.callback(Output('page-content', 'children'), [Input('url', 'pathname')])
def display_page(pathname):
    if pathname == '/Pipe-dreams':
        return AppA()
    if pathname == '/Scattergories':
        return AppB()
    if pathname == '/Interact':
        return AppC()
Exemplo n.º 17
0
    except ValueError:
        print('Usage: %s model_name epochs batch_size filename' % sys.argv[0])
        exit(1)
    filename = None
    if predict:
        try:
            filename = sys.argv[4]
        except IndexError:
            pass
    return name, int(epochs), int(batches), filename


if __name__ == '__main__':
    # Getting our command line parameters
    name, epochs, batches, _ = get_params()
    # Getting our images correctly converted
    # to the right format of arrays/matrices.
    train_x, train_y, inputs, classes = get_data()
    # Time for training!
    model, name, mp, train_x, train_y, test_x, test_y = train_model(
        name, train_x, train_y, epochs, batches, inputs, classes)
    # Save model to use for classification later on.
    mname = 'models/model-%s-%d-%d' % (name, epochs, batches)
    model.save(mname + '.h5')
    title = '%s (epochs=%d, batch_size=%d)' % (name, epochs, batches)
    print('Evaluation for %s' % title)
    loss, acc = model.evaluate(train_x, train_y, verbose=2)
    print('Train accuracy: %.2f%%' % (acc * 100))
    loss, acc = model.evaluate(test_x, test_y, verbose=2)
    print('Test accuracy: %.2f%%' % (acc * 100))
Exemplo n.º 18
0
    # Loss is our loss or cost function - mean_squared_error
    # is a good choice assuming we don't have a lot of "outliers"
    # in our dataset.
    # Adam optimizer works great for most problems.
    #
    # Metrics are loss metrics that we want to have available for each epoch,
    # so we can review how are we doing at each training stage.
    # mse is mean_squared_error, mpe is mean_absolute_percentage_error
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse','mape'])
    # Here we're starting our training
    history=model.fit(train_x, train_y, verbose=2, epochs=epochs, batch_size=batches)
    return model, name, mparams, history

if __name__ == '__main__':
    # Getting data formatted as a supervised problem
    years, past_values, values=get_data()
    X, Y = past_values, values
    # Split data into two parts: one for training, one for testing
    # Test part won't be seen by a model during training so it will
    # give us some idea how our model performs on a unseen data.
    train_x, train_y, test_x, test_y = train_test_split(X, Y)
    # Getting our command line parameters
    name, epochs, batches, plot=get_params()
    # Do the training
    model, name, mp, history=train_model(name, train_x, train_y, epochs, batches)
    # Save models and the training history for later use
    mname='models/model-%s-%d-%d' % (name, epochs, batches)
    model.save(mname+'.h5')
    with open(mname+'-history.pickle', 'wb') as ms:
        pickle.dump(history.history, ms)
    print()
Exemplo n.º 19
0
as well as a custom CNN model if it's available.

Custom CNN model has to be available in model.ckpt file.
You can generate this file by running ./train.py script.
"""
import torch
from torchvision import transforms, models
from PIL import Image
import os.path

from train import BeaverNet
from prep import get_data

# Getting the mapping between classes index and
# their names.
_, _, cifar100_classes = get_data()


def get_imgnet_classes():
    """
    Get labels/classes for ImageNet (AlexNet).
    Source: https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a
    """
    return eval(open('imagenet1000_clsid_to_human.txt').read())


def prep_pretrained(imgf):
    """
    Process an image so it can be used with
    pre trained models available in PyTorch
    (including AlexNet).