Пример #1
0
def load_dataset(id=None):
    """Load and return the embeddings of the new dataset
    """
    if not id:
        return jsonify({})
    model.load_data(id)
    return jsonify({})
Пример #2
0
def runTests():
    model.load_data('data/train.csv', 'data/test.csv')

    methodPreprocess_list = [1, 2]

    methodFeatureExtraction = [1, 2]
    maxFeatures_list = [16000, 20000, 25000]
    ngrams_list = [(1, 3), (1, 4), (1, 5), (1, 2)]
    maxdf_list = [1.0, 0.95, 0.9, 0.85]
    mindf_list = [0.0001]
    binary_list = [True]

    for methodPreprocess in methodPreprocess_list:
        train_tweets, test_tweets = model.preprocess_data(
            method=methodPreprocess)
        for maxfeat in maxFeatures_list:
            for ng in ngrams_list:
                for maxd in maxdf_list:
                    for mind in mindf_list:
                        for bin in binary_list:
                            for featureExt in methodFeatureExtraction:
                                X, y, test, feature_names = model.feature_extraction(
                                    train_tweets,
                                    test_tweets,
                                    maxFeatures=maxfeat,
                                    ngrams=ng,
                                    maxdf=maxd,
                                    mindf=mind,
                                    isbinary=bin,
                                    method=featureExt)
                                print('\n\n\n')
                                print(
                                    "#########################################################################"
                                )
                                print(
                                    "##############################################"
                                )
                                print(
                                    'Params preprocessing and features extraction:'
                                )
                                print(
                                    '{0}, {1}, {2}, {3}, {4}, {5}, {6}'.format(
                                        methodPreprocess, maxfeat, ng, maxd,
                                        mind, bin, featureExt))
                                print(
                                    "##############################################"
                                )
                                gsearch(X, y)
                                print(
                                    "#########################################################################"
                                )
                                print('\n\n\n')
Пример #3
0
def make_plots(whichx, fname):

    x, y, xerr, yerr = load_data(whichx)

    with h5py.File("%s_samples.h5" % whichx) as f:
        samp = f["samples"][...]
    m, c, sig = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
               zip(*np.percentile(samp, [16, 50, 84], axis=0)))
    pars = [m[0], c[0], sig[0]]
    print pars

    plt.clf()
    plt.errorbar(x, y, xerr=xerr, yerr=yerr, fmt="k.", capsize=0, ecolor=".7")
    plt.plot(x, model1(pars, x), "k")
    ndraws = 100
    p0s = np.random.choice(samp[:, 0], ndraws)
    p1s = np.random.choice(samp[:, 1], ndraws)
    p2s = np.random.choice(samp[:, 2], ndraws)
    for i in range(ndraws):
        y = p0s[i] * x + p1s[i]
        plt.plot(x, (y + p2s[i]), "k", alpha=.1)
    plt.savefig("mcmc_%s_%s" % (whichx, fname))
    labels = ["$m$", "$c$", "$\sigma$"]
    plt.clf()
    fig = triangle.corner(samp, labels=labels)
    fig.savefig("triangle_%s_%s" % (whichx, fname))
def test_load_data():
    X, y = model.load_data(file_path)

    assert type(X) == np.ndarray
    assert X[0] == "data/IMG/center_2016_12_01_13_30_48_287.jpg"
    N = X.shape[0]
    assert y.shape == (N, )
Пример #5
0
def make_plots(whichx, fname):

    x, y, xerr, yerr = load_data(whichx)

    with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f:
        samp = f["samples"][...]
    m, c, sig = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
                    zip(*np.percentile(samp, [16, 50, 84], axis=0)))
    pars = [m[0], c[0], sig[0]]
    print pars

    plt.clf()
    plt.errorbar(x, y, xerr=xerr, yerr=yerr, fmt="k.", capsize=0, ecolor=".7")
    plt.plot(x, model1(pars, x), "k")
    ndraws = 100
    p0s = np.random.choice(samp[:, 0], ndraws)
    p1s = np.random.choice(samp[:, 1], ndraws)
    p2s = np.random.choice(samp[:, 2], ndraws)
    for i in range(ndraws):
        y = p0s[i] * x + p1s[i]
        plt.plot(x, (y + p2s[i]), "k", alpha=.1)
    plt.savefig("mcmc_%s_%s" % (whichx, fname))
    labels = ["$m$", "$c$", "$\sigma$"]
    plt.clf()
    fig = triangle.corner(samp, labels=labels)
    fig.savefig("triangle_%s_%s" % (whichx, fname))
Пример #6
0
def main():
    """
    main
    """
    models = {
        "resnet": model.ResNet50Benchmark,
        "mobilenet": model.ResNet50Benchmark,
        "vgg": model.ResNet50Benchmark,
        "googlenet": model.ResNet50Benchmark,
        "shufflenet": model.ResNet50Benchmark,
        "MobileNet_SSD": model.ResNet50Benchmark,
        "deeplab": model.ResNet50Benchmark,
        "rcnn": model.RcnnBenchmark,
        "yolo": model.YoloBenchmark,
        "transformer": model.TransformerBenchmark,
        "bert": model.BertBenchmark
    }
    args = parse_args()
    model = models.get(args.model)()
    model.set_config(use_gpu=args.device == 'gpu',
                     model_dir=args.model_dir,
                     model_filename=args.model_filename,
                     params_filename=args.params_filename,
                     use_tensorrt=args.use_tensorrt,
                     use_anakin=args.use_anakin,
                     model_precision=args.model_precision)
    tensor = model.load_data(args.filename)
    warmup = args.warmup
    repeat = args.repeat
    model.run(tensor, warmup, repeat)
Пример #7
0
def split_batch_worker_task(ps, worker_index, num_workers, batch_size=64):
    # Download ds.
    ds = model.load_data()

    # Initialize the model.
    net = selected_model()
    keys = net.get_weights()[0]
    block_size = batch_size // num_workers
    start = worker_index * block_size
    end = batch_size if worker_index == num_workers - 1 else start + block_size

    worker_iter = 0

    while True:
        if ray.get(ps.blocked.remote(worker_iter)):
            continue

        # Get the current weights from the parameter server.
        weights = ray.get(ps.pull.remote(keys))
        net.set_weights(keys, weights)

        # Compute an update and push it to the parameter server.
        xs, ys = ds.train.next_batch(batch_size)
        xs = xs[start:end]
        ys = ys[start:end]
        gradients = net.compute_update(xs, ys)
        ps.push.remote(keys, gradients, worker_index, worker_iter)

        worker_iter += 1
Пример #8
0
def main():
    # dataset has format like [user_id, song_id, play_count]
    file = 'train_triplets.txt'

    print("Loading data...")
    load_data(file)

    print("Starting evaluation...")
    calc_neighbours()
    print("Finished evaluations.")

    print_top_songs_for_user(1)

    print("Starting cross validation...")
    print("RMSE result: ", str(rmse(train_set, test_set)))
    print("MAE result: ", str(mae(train_set, test_set)))
    print("NDCG result: ", str(ndcg(train_set, test_set)))
def test_generator():
    X, y = model.load_data(file_path)
    X, y = next(model.generator(X, y, train=False))

    assert type(X) == np.ndarray
    assert X.shape[1:] == (160, 320, 3)
    N = len(X)
    assert y.shape == (N, )
Пример #10
0
 def __init__(self,
              worker_index,
              num_workers,
              batch_size=256,
              learning_rate=1e-4):
     self.worker_index = worker_index
     self.num_workers = num_workers
     self.batch_size = batch_size
     self.block_size = batch_size // num_workers
     self.ds = model.load_data()
     self.net = selected_model(learning_rate)
Пример #11
0
def main():
    in_arg = get_input_args()  # Creates and returns command line arguments

    print('\nData Directory:\n', in_arg.data_directory, '\n')

    print('Optional Command Line Arguments:\n',
          'Save Checkpoint [--save_dir]: ', in_arg.save_dir, '\n',
          'Pretrained Network [--arch]: ', in_arg.arch, '\n',
          'Learning Rate [--learning_rate]: ', in_arg.learning_rate, '\n',
          'Hidden Units [--hidden_units]: ', in_arg.hidden_units, '\n',
          'Epochs [--epochs]: ', in_arg.epochs, '\n', 'GPU [--gpu]: ',
          in_arg.gpu, '\n')

    if 'checkpoints' not in listdir(
    ):  # makes checkpoints folder if it doesn't already exist
        mkdir('checkpoints')

    train_dir, valid_dir, test_dir = util.get_data(
        in_arg.data_directory
    )  # Returns Train, Validation and Test Directories

    transformed_train, transformed_valid, transformed_test = mod.transform_data(
        train_dir, valid_dir, test_dir)  # Returns transformed datasets

    train_loader, valid_loader, test_loader = mod.load_data(
        transformed_train, transformed_valid,
        transformed_test)  # Returns Data loaders

    model = mod.build_model(
        util.label_count(train_dir), in_arg.hidden_units, in_arg.arch,
        transformed_train.class_to_idx)  # Returns built model

    epochs = in_arg.epochs  # Epochs initially set by command line argument in_arg.epochs.  Can be changed with m.load_checkpoint()
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.classifier.parameters(),
                           lr=in_arg.learning_rate)

    use_gpu = mod.use_gpu(model,
                          in_arg.gpu)  # Returns True or False for GPU use

    mod.train(
        model, criterion, optimizer, train_loader, valid_loader, use_gpu,
        in_arg.epochs
    )  # Trains the model.  Prints Training Loss, Validation Loss & Validation Accuracy

    mod.save_checkpoint(
        in_arg.arch,
        model.classifier.state_dict(), transformed_train.class_to_idx,
        util.label_count(train_dir), in_arg.hidden_units, in_arg.epochs,
        in_arg.save_dir
    )  # Saves classifier and other model parameters to checkpoint
Пример #12
0
def make_plots(whichx, fname):

    x, y, xerr, yerr = load_data(whichx)

    with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f:
        samp = f["samples"][...]
    m, c, sig, Y, V, P = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
               zip(*np.percentile(samp, [16, 50, 84], axis=0)))
    pars = [m[0], c[0], sig[0], Y[0], V[0], P[0]]
    print pars

    labels = ["$m$", "$c$", "$\sigma$", "$Y$", "$V$", "$P$"]
    plt.clf()
    fig = triangle.corner(samp, labels=labels)
    fig.savefig("triangle_%s_%s" % (whichx, fname))
Пример #13
0
def make_plots(whichx, fname):

    x, y, xerr, yerr = load_data(whichx)

    with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f:
        samp = f["samples"][...]
    m, c, sig, Y, V, P = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
                             zip(*np.percentile(samp, [16, 50, 84], axis=0)))
    pars = [m[0], c[0], sig[0], Y[0], V[0], P[0]]
    print pars

    labels = ["$m$", "$c$", "$\sigma$", "$Y$", "$V$", "$P$"]
    plt.clf()
    fig = triangle.corner(samp, labels=labels)
    fig.savefig("triangle_%s_%s" % (whichx, fname))
Пример #14
0
def worker_task(ps, worker_index, num_workers, batch_size=64):
    # Download ds.
    ds = model.load_data()

    # Initialize the model.
    net = selected_model()
    keys = net.get_weights()[0]
    while True:
        # Get the current weights from the parameter server.
        weights = ray.get(ps.pull.remote(keys))
        net.set_weights(keys, weights)

        # Compute an update and push it to the parameter server.
        xs, ys = ds.train.next_batch(batch_size)
        gradients = net.compute_update(xs, ys)
        ps.push.remote(keys, gradients)
Пример #15
0
def MCMC(whichx, nsamp, fname, nd, bigdata, burnin=500, run=500):

    rho_pars = [-2., 6., .0065]
    logg_pars = [-1.850, 7., .0065]
    pars_init = logg_pars
    if whichx == "rho":
        pars_init = rho_pars

    x, y, xerr, yerr = load_data(whichx, nd=nd, bigdata=True)

    # format data and generate samples
    obs = np.vstack((x, y))
    u = np.vstack((xerr, yerr))
    up = np.vstack((xerr, yerr))
    um = np.vstack((xerr*.5, yerr*.5))
#     s = generate_samples_log(obs, up, um, nsamp) # FIXME
    s = generate_samples(obs, u, nsamp) # FIXME
#     if nsamp == 1:
#         s[0, :, :] = x
#         s[1, :, :] = y
#     print np.shape(s)
#     assert 0

    # set up and run emcee
    ndim, nwalkers = len(pars_init), 32
    pos = [pars_init + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob,
                                    args=(s, obs, u))
    print "burning in..."
    pos, _, _, = sampler.run_mcmc(pos, burnin)
    sampler.reset()
    print "production run..."
    sampler.run_mcmc(pos, run)
    samp = sampler.chain[:, 50:, :].reshape((-1, ndim))
    m, c, sig = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
               zip(*np.percentile(samp, [16, 50, 84], axis=0)))
    pars = [m[0], c[0], sig[0]]

    # save samples
    f = h5py.File("%s_samples_%s.h5" % (whichx, fname), "w")
    data = f.create_dataset("samples", np.shape(samp))
    data[:, 0] = samp[:, 0]
    data[:, 1] = samp[:, 1]
    data[:, 2] = samp[:, 2]
    f.close()
Пример #16
0
def MCMC(whichx, nsamp, fname, nd, bigdata, burnin=500, run=500):

    rho_pars = [-2., 6., .0065]
    logg_pars = [-1.850, 7., .0065]
    pars_init = logg_pars
    if whichx == "rho":
        pars_init = rho_pars

    x, y, xerr, yerr = load_data(whichx, nd=nd, bigdata=True)

    # format data and generate samples
    obs = np.vstack((x, y))
    u = np.vstack((xerr, yerr))
    up = np.vstack((xerr, yerr))
    um = np.vstack((xerr * .5, yerr * .5))
    #     s = generate_samples_log(obs, up, um, nsamp) # FIXME
    s = generate_samples(obs, u, nsamp)  # FIXME
    #     if nsamp == 1:
    #         s[0, :, :] = x
    #         s[1, :, :] = y
    #     print np.shape(s)
    #     assert 0

    # set up and run emcee
    ndim, nwalkers = len(pars_init), 32
    pos = [pars_init + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)]
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(s, obs, u))
    print "burning in..."
    pos, _, _, = sampler.run_mcmc(pos, burnin)
    sampler.reset()
    print "production run..."
    sampler.run_mcmc(pos, run)
    samp = sampler.chain[:, 50:, :].reshape((-1, ndim))
    m, c, sig = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
                    zip(*np.percentile(samp, [16, 50, 84], axis=0)))
    pars = [m[0], c[0], sig[0]]

    # save samples
    f = h5py.File("%s_samples_%s.h5" % (whichx, fname), "w")
    data = f.create_dataset("samples", np.shape(samp))
    data[:, 0] = samp[:, 0]
    data[:, 1] = samp[:, 1]
    data[:, 2] = samp[:, 2]
    f.close()
Пример #17
0
def predict_frame(frame: pd.DataFrame) -> str:
    """Takes in a pd.DataFrame from an uploaded .csv file.

    [For the purposes of the exercise] -- Drops the Y column for predictions.

    Reindexes the DataFrame according to a one-hot encoded representation of
    all of the columns.

    Applies the loaded logistic regression model across the reshaped numpy
    array representation of each row, creating a new pd.Series column in the
    process. Rounds the values for nicer viewing.

    ---
    The reason why reindex_frame is used for the application and is not
    returned is to reduce the load on to_json() when it returns the resulting
    JSON string from the predictions (instead of returning all of the OHE cols
    as well, it returns the original columns only).
    ---

    Finally, returns the pd.DataFrame as a JSON string back to the /predict/csv
    API route.

    Arguments:
        frame {pd.DataFrame} -- pd.DataFrame of the uploaded .csv file.

    Returns:
        str -- Record-oriented JSON string containing all original columns.

    """
    model = joblib.load("models/LogReg.pkl")
    frame = load_data(frame)
    frame.drop("y", axis=1, inplace=True)
    reindex_frame = frame.reindex(columns=TRAINING_COLUMNS.columns,
                                  fill_value=0)
    frame["yes_prob"] = reindex_frame.apply(
        lambda x: model.predict_proba(np.array(x).reshape(1, -1))[0][1],
        axis=1).round(3)
    return frame.to_json(orient="records")
Пример #18
0
def make_plots(whichx, fname):

    x, y, xerr, yerr = load_data(whichx)

    with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f:
        samp = f["samples"][:, :-1]

    if fname == "f_extra" or "short":
        m, c, ln_sig, lnf = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
                   zip(*np.percentile(samp, [16, 50, 84], axis=0)))
        pars = [m[0], c[0], ln_sig[0], lnf[0]]
        labels = ["$m$", "$c$", "$\sigma$", "$f$"]
    else:
        m, c, ln_sig = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
                   zip(*np.percentile(samp, [16, 50, 84], axis=0)))
        pars = [m[0], c[0], ln_sig[0]]
        labels = ["$m$", "$c$", "$\ln(\sigma)$"]

    print pars

    plt.clf()
    fig = triangle.corner(samp, labels=labels)
    fig.savefig("triangle_%s_%s" % (whichx, fname))
Пример #19
0
def make_plots(whichx, fname):

    x, y, xerr, yerr = load_data(whichx)

    with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f:
        samp = f["samples"][:, :-1]

    if fname == "f_extra" or "short":
        m, c, ln_sig, lnf = map(
            lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
            zip(*np.percentile(samp, [16, 50, 84], axis=0)))
        pars = [m[0], c[0], ln_sig[0], lnf[0]]
        labels = ["$m$", "$c$", "$\sigma$", "$f$"]
    else:
        m, c, ln_sig = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
                           zip(*np.percentile(samp, [16, 50, 84], axis=0)))
        pars = [m[0], c[0], ln_sig[0]]
        labels = ["$m$", "$c$", "$\ln(\sigma)$"]

    print pars

    plt.clf()
    fig = triangle.corner(samp, labels=labels)
    fig.savefig("triangle_%s_%s" % (whichx, fname))
Пример #20
0
    request = "INSERT INTO {0} (label, mean, median, sd, variance, iqr, mode, min, max) " \
              "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)".format(db_setup.table_name)
    values = (dataset_label, features[columns[0]], features[columns[1]],
              features[columns[2]], features[columns[3]], features[columns[4]],
              features[columns[5]], features[columns[6]], features[columns[7]])

    print(dataset_label)

    db_setup.cursor.execute(request, values)

    db_setup.db.commit()
    db_setup.close_db()

    model.train()
    test_data = pd.DataFrame(columns=model.get_columns())


if __name__ == '__main__':
    getAllLabels()
    model.load_data()
    model.prepare_data()
    model.load_models()

    app.run(host=config.HOST_ADDRESS,
            port=config.HOST_PORT,
            debug=config.DEBUG,
            threaded=config.THREADED)
    model.save_models()
    print("Server Closed")
Пример #21
0
#!/usr/bin/env python3
import mxnet as mx
from model import load_data, prepare_data, load_LSTM_net, load_CNN_net, train_k_fold, plot_k_fold, train_on_all_data
from mxnet import gluon
from mxnet.gluon import loss as gloss

"Prepare the data"
all_data = load_data()
train_data = all_data[0:int(len(all_data) * 0.75)]
test_data = all_data[int(len(all_data) * 0.75):]
vocab, train_features, train_labels, test_features, test_labels = prepare_data(
    train_data, test_data, 80)

"Load the LSTM model, set model parameters"
embed_size, num_hiddens, num_layers, ctx, bidirectional, fasttext = 300, 60, 2, mx.gpu(
), True, True
net = load_LSTM_net(fasttext, vocab, embed_size, num_hiddens, num_layers, ctx,
                    bidirectional)  # load the LSTM Model

# "Load the CNN model, set model parameters"
# embed_size, kernel_sizes, nums_channels, ctx, fasttext = 300, [2, 3, 4, 5], [65, 65, 65, 65], mx.gpu(), True
# net = load_CNN_net(fasttext, vocab, embed_size, kernel_sizes, nums_channels, ctx)

"Define training parameter"
learning_rate, num_epochs, K, batch_size = 0.005, 65, 5, 64
trainer = gluon.Trainer(net.collect_params(), 'adam',
                        {'learning_rate': learning_rate})
loss = gloss.SoftmaxCrossEntropyLoss()

"K-fold cross-validation"
result_train, result_valid = train_k_fold(net, K, train_features, train_labels,
Пример #22
0
if __name__ == "__main__":

    plotpar = {'axes.labelsize': 18,
               'text.fontsize': 26,
               'legend.fontsize': 18,
               'xtick.labelsize': 18,
               'ytick.labelsize': 18,
               'text.usetex': True}
    plt.rcParams.update(plotpar)

    whichx = str(sys.argv[1]) # should be either "rho" or "logg"
    fname = str(sys.argv[2]) # mixture, f_extra, f, test, simple

#     x, y, xerr, yerr = load_data(whichx, bigdata=True)
    x, y, xerr, yerr = load_data(whichx, bigdata=False)

    # load chains
    with h5py.File("%s_samples_%s.h5" % (whichx, fname), "r") as f:
        samples = f["samples"][...]
    samples = samples.T

    fractional, extra = False, False
    if fname == "f":
       fractional = True
    elif fname == "f_extra" or "short":
       extra = True
    make_flicker_plot(x, xerr, y, yerr, samples, whichx, fname, 10000,
                              fractional=fractional, extra=extra)
#     make_inverse_flicker_plot(x, xerr, y, yerr, samples, whichx, fname, 1000,
#                               fractional=fractional, extra=extra)
#!/usr/bin/env python3
import mxnet as mx
from model import load_data, create_idx_dataset, get_centers_and_contexts, get_negatives, get_batch, check_lenghth, create_data_iter, train

# User define
batch_size = 512
max_window_size = 3
k = 5  # number of negative sampling
lr = 0.001  # learning rate
num_epoch = 25
embed_size = 150

raw_dataset = load_data()
counter, idx_to_token, token_to_idx, num_tokens, subsampled_dataset = create_idx_dataset(
    raw_dataset, 0.75)
all_centers, all_contexts = get_centers_and_contexts(subsampled_dataset,
                                                     max_window_size)
all_negatives = get_negatives(counter, num_tokens, all_centers, all_contexts,
                              0.75, idx_to_token,
                              k)  # negative sampling k words
check_lenghth(all_centers, all_contexts, all_negatives)
data_iter = create_data_iter(get_batch, batch_size, all_centers, all_contexts,
                             all_negatives)
train(embed_size, idx_to_token, lr, num_epoch, mx.gpu(), data_iter, batch_size)
Пример #24
0
import model
from scipy.sparse import coo_matrix, hstack
#
#model.load_data('data/train.csv', 'data/test.csv')
#train_tweets, test_tweets = model.preprocess_data(method=1)
#dictFeatures_train, dictFeatures_test, y = model.new_extractFeatures(train_tweets, test_tweets)
#results, s = model.new_gsearchRidge(dictFeatures_train, dictFeatures_test, y, nSplits = 3, testSize=0.4)
#
#
#
#clfs = model.new_train3(dictFeatures_train, y)
#train_prediction, l = model.new_predict3(clfs, dictFeatures_train, y=y)
#test_prediction, l = model.new_predict3(clfs, dictFeatures_test, y=None, clfsLabels=l)
#model.saveResults('output/xxx1.csv', test_prediction)

model.load_data('data/train.csv', 'data/test.csv')
train_tweets, test_tweets = model.preprocess_data(method=1)
emo_train, emo_test, sent_train, sent_test, k_train, k_test, w_train, w_test = model.new_extract_addicional_features(
)

l_tfidf = []
l_cv = []

print()
print()
for mF in [
        12500, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 55000,
        60000
]:
    print('Max Features = {0}'.format(mF))
    for maxd in [0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2]:
Пример #25
0
def prepare():
    model.load_data()
    model.prepare_data()
    model.load_models()
Пример #26
0
from model import load_data,NeuralNetwork

x_train, x_test, y_train, y_test = load_data(random_state = 41)

#show_data(x_train, y_train)

model = NeuralNetwork()
model.create_posla_net()

model.train(x_train= x_train, y_train = y_train, epochs= 50, learning_rate=1e-4, batch_size= 256)

# model.evaluate(x_test, y_test)
# model.show_resualt()
# #model.save_model(path = './model_data/VGG_model_e50_lr4_bs256.h5')
#model.load_model(path = './model_data/VGG_model.h5')
model.show_prediction(x_test, y_test)
model.evaluate(x_test, y_test)









Пример #27
0
__author__ = 'zhengwang'

from model import load_data, NeuralNetwork

input_size = 120 * 320
data_path = "training_data/*.npz"

X_train, X_valid, y_train, y_valid = load_data(input_size, data_path)

# train a neural network
layer_sizes = [input_size, 32, 4]
nn = NeuralNetwork()
nn.create(layer_sizes)
nn.train(X_train, y_train)

# evaluate on train data
train_accuracy = nn.evaluate(X_train, y_train)
print("Train accuracy: ", "{0:.2f}%".format(train_accuracy * 100))

# evaluate on validation data
validation_accuracy = nn.evaluate(X_valid, y_valid)
print("Validation accuracy: ", "{0:.2f}%".format(validation_accuracy * 100))

# save model
model_path = "saved_model/nn_model.xml"
nn.save_model(model_path)
Пример #28
0
# From now on, everything the model does is in the directory
# corresponding to this particular dataset
os.chdir(args.dataset)

# path to saved version of trained model
load_path = os.path.join('checkpoints', 'checkpoint')

# check if a model has been previously trained
already_trained = os.path.exists(load_path)
if not (args.train or already_trained):
    check_if_ok_to_continue('Model has not been trained. '
                            'Train it now (this may take several hours)? ')
    args.train = True

dataset = model.load_data(args.dataset)
if args.train:
    model.run_training(dataset)

# predict a rating for the user
if args.user_id and (args.movie or args.top):
    instance = dataset.get_ratings(args.user_id)
    ratings = data.unnormalize(instance.ravel())
    output = model.predict(instance, dataset).ravel()
    if args.movie:
        col = dataset.get_col(args.movie)
        rating = output[col]

        # purty stars
        num_stars = int(round(rating * 2))
        stars = ''.join(u'\u2605' for _ in range(num_stars))
Пример #29
0
def MCMC(whichx, nsamp, fname, nd, extra, f, bigdata, burnin=500, run=1000):
    """
    nsamp (int) = number of samples.
    whichx (str) = logg or rho.
    fname (str) = the name for saving all output
    nd (int) = number of data points (for truncation).
    If this is zero, all the data are used.
    bigdata (boolean) which data file to use.
    """

    # set initial parameters
    if extra:
        rho_pars = [-1.793214679, 5.34215688, 0.02334097, .0002600777]
        logg_pars = [-1.02143776, 5.69156647, .24239756, .049233887]
    else:
        rho_pars = [-1.69293833, 5.1408906, .0065]
        logg_pars = [-1.05043614, 5.66819525, .0065]
    pars_init = logg_pars
    if whichx == "rho":
        pars_init = rho_pars

    # load the data
    x, y, xerr, yerr = load_data(whichx, nd=nd, bigdata=False)

    # format data and generate samples
    obs = np.vstack((x, y))
    u = np.vstack((xerr, yerr))
    up = np.vstack((xerr, yerr))
    um = np.vstack((xerr*.5, yerr*.5))
    s = generate_samples(obs, u, nsamp)

    # set up and run emcee
    ndim, nwalkers = len(pars_init), 32
    pos = [pars_init + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob,
                                    args=(s, obs, u, extra, f))
    print "burning in..."
    pos, _, _, _ = sampler.run_mcmc(pos, burnin)
    sampler.reset()
    print "production run..."
    sampler.run_mcmc(pos, run)

    # load likelihood
    lls = sampler.blobs
    flat_lls = np.reshape(lls, (np.shape(lls)[0]*np.shape(lls)[1]))
    samp = np.vstack((sampler.chain[:, :, :].reshape(-1, ndim).T, flat_lls)).T

    sa = samp.T[0]
    print type(sa)
    print np.isfinite(sa)
    print sa
    print np.shape(sa), np.shape(sa[np.isfinite(sa)])
    # save samples
    f = h5py.File("%s_samples_%s.h5" % (whichx, fname), "w")
    data = f.create_dataset("samples", np.shape(samp))
    data[:, 0] = samp[:, 0]
    data[:, 1] = samp[:, 1]
    data[:, 2] = samp[:, 2]
    data[:, 3] = samp[:, 3]
    print "samp shape = ", np.shape(samp), np.shape(data)
    if extra:
        data[:, 4] = samp[:, 4]
    f.close()
Пример #30
0
    plotpar = {
        'axes.labelsize': 18,
        'text.fontsize': 26,
        'legend.fontsize': 18,
        'xtick.labelsize': 18,
        'ytick.labelsize': 18,
        'text.usetex': True
    }
    plt.rcParams.update(plotpar)

    whichx = str(sys.argv[1])  # should be either "rho" or "logg"
    fname = str(sys.argv[2])  # mixture, f_extra, f, test, simple

    #     x, y, xerr, yerr = load_data(whichx, bigdata=True)
    x, y, xerr, yerr = load_data(whichx, bigdata=False)

    # load chains
    with h5py.File("%s_samples_%s.h5" % (whichx, fname), "r") as f:
        samples = f["samples"][...]
    samples = samples.T

    fractional, extra = False, False
    if fname == "f":
        fractional = True
    elif fname == "f_extra" or "short":
        extra = True
    make_flicker_plot(x,
                      xerr,
                      y,
                      yerr,
Пример #31
0
import tensorflow as tf
from model import load_data, load_loopable_model

x, y, output, keep_prob = load_loopable_model()

loss = tf.reduce_mean(
    tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=y))
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

data_x, data_y = load_data(x, y)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess.run(init)

    correct_prediction = tf.equal(tf.argmax(output, 1), y)

    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.histogram("normal/accuracy", accuracy)
    try:
        for i in range(9999):
            sess.run(train_step,
                     feed_dict={
                         x: data_x,
                         y: data_y,
                         keep_prob: 0.75
                     })

            train_accuracy = sess.run(accuracy,
                                      feed_dict={
Пример #32
0
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input
import numpy as np
from model import MnistModel, load_data

# Press the green button in the gutter to run the script.
if __name__ == '__main__':

    inputs = Input(shape=(28, 28))
    mnist = MnistModel(inputs=inputs)

    # plot_model(mnist, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

    (x_train, y_train), (x_test, y_test) = load_data()

    x_pred = x_train[:15]
    y_pred = y_train[:15]

    mnist.my_compile()
    mnist.my_fit(x=x_train, y=y_train)
    mnist.summary()
    acc = mnist.evaluate(x=x_test, y=y_test)
    print(acc)

    y_hat = mnist.predict(x_pred)

    print(np.argmax(y_hat))

    # # Plot a random sample of 10 test images, their predicted labels and ground truth
Пример #33
0
import model
import process
from datetime import datetime, timedelta
from sys import exit

model.load_data()

if not model.is_running_challenge():
	create_challenge = raw_input('No challenge running. Create a new one? (y/n)')
	
	if create_challenge == 'y':
		duration = raw_input('What is the duration of the new challenge (in weeks): ')
		model.initialize_challenge(duration)
	else:
		exit()
		
start_date = model.get_reference_date()
end_date = start_date + timedelta(weeks=model.get_duration())

if datetime.now() > end_date:
	print 'Current challenge is over!'
	print 'Last challenge started at {:%Y-%m-%d} '\
	      'and finished at {:%Y-%m-%d}'.format(start_date,end_date)

	my_acc = model.get_accumulated_minutes()
	opp_acc = process.compute_acc(model.get_reference_date(), duration=model.get_duration(), reference_end=end_date)
	
	if my_acc > opp_acc:
		print 'You won it!'
	else:
		print 'You lost it :(... better luck next time!'
Пример #34
0
def train_wgan(batch_size, epochs, image_shape):

    enc_model_1 = model.make_encoder()
    img = Input(shape=input_shape)
    z = enc_model_1(img)
    encoder1 = Model(img, z)

    z = Input(shape=(latent_dim,))
    modelG = model.construct_generator()
    gen_img = modelG(z)
    generator = Model(z, gen_img)
    critic = model.construct_critic(image_shape)

    critic.trainable = False
    img = Input(shape=input_shape)
    z = encoder1(img)

    img_ = generator(z)
    real = critic(img_)
    optimizer = RMSprop(0.0002)
    gan = Model(img, [real, img_])
    gan.compile(loss=[model.wasserstein_loss, 'mean_absolute_error'], optimizer=optimizer, metrics=None)

    X_train = model.load_data(168, 224)
    number_of_batches = int(X_train.shape[0] / batch_size)

    generator_iterations = 0
    d_loss = 0

    for epoch in range(epochs):

        current_batch = 0

        while current_batch < number_of_batches:

            start_time = time.time()
            # In the first 25 epochs, the critic is updated 100 times
            # for each generator update. In the other epochs the default value is 5
            if generator_iterations < 25 or (generator_iterations + 1) % 500 == 0:
                critic_iterations = 100
            else:
                critic_iterations = 5

            # Update the critic a number of critic iterations
            for critic_iteration in range(critic_iterations):

                if current_batch > number_of_batches:
                    break

                # real_images = dataset_generator.next()
                it_index = np.random.randint(0, number_of_batches - 1)
                real_images = X_train[it_index * batch_size:(it_index + 1) * batch_size]

                current_batch += 1

                # The last batch is smaller than the other ones, so we need to
                # take that into account
                current_batch_size = real_images.shape[0]
                # Generate images
                z = encoder1.predict(real_images)
                generated_images = generator.predict(z)
                # generated_images = generator.predict(noise)

                # Add some noise to the labels that will be fed to the critic
                real_y = np.ones(current_batch_size)
                fake_y = np.ones(current_batch_size) * -1
                # print('real_y', real_y)

                # Let's train the critic
                critic.trainable = True

                # Clip the weights to small numbers near zero
                for layer in critic.layers:
                    weights = layer.get_weights()
                    weights = [np.clip(w, -0.01, 0.01) for w in weights]
                    layer.set_weights(weights)

                d_real = critic.train_on_batch(real_images, real_y)
                d_fake = critic.train_on_batch(generated_images, fake_y)

                d_loss = d_real - d_fake

            # Update the generator
            critic.trainable = False
            itt_index = np.random.randint(0, number_of_batches - 1)
            imgs = X_train[itt_index * batch_size:(itt_index + 1) * batch_size]
            # We try to mislead the critic by giving the opposite labels
            fake_yy = np.ones(current_batch_size)
            g_loss = gan.train_on_batch(imgs, [fake_yy, imgs])

            time_elapsed = time.time() - start_time
            print('[%d/%d][%d/%d][%d] Loss_D: %f Loss_G: %f Loss_G_imgs: %f -> %f s'
                  % (epoch, epochs, current_batch, number_of_batches, generator_iterations,
                     d_loss, g_loss[0], g_loss[1], time_elapsed))

            generator_iterations += 1
Пример #35
0
from flask import Flask, render_template

from flask_compress import Compress
from flask_cors import CORS

from api import backend_api
from model import load_data

app = Flask(__name__, static_folder='static', template_folder='static')
CORS(app)

app.config.from_object('config')
app.register_blueprint(backend_api)

comp = Compress(app)


@app.route('/')
def index():
    return render_template('index.html')


if __name__ == "__main__":
    graph_file_path = 'data/reddit_graphs.pkl'
    graph_embeddings_path = 'data/reddit_embeddings.pkl'

    load_data(graph_file_path, graph_embeddings_path)

    app.run(port=8000, debug=True)
Пример #36
0
    resids = y - (alpha+beta*x)
    normed_resids = resids / np.sqrt(yerr**2 + sigma**2)
    np.savetxt("normed_resids_%s.txt", np.transpose(normed_resids))
    plt.clf()
    plt.hist(normed_resids, 20, histtype="stepfilled", color="w")
    plt.xlabel("Normalised residuals")
    plt.savefig("residual_hist_%s" % whichx)

if __name__ == "__main__":

    plotpar = {'axes.labelsize': 18,
               'text.fontsize': 26,
               'legend.fontsize': 18,
               'xtick.labelsize': 18,
               'ytick.labelsize': 18,
               'text.usetex': True}
    plt.rcParams.update(plotpar)

    whichx = str(sys.argv[1]) # should be either "rho" or "logg"

    x, y, xerr, yerr = load_data(whichx, bigdata=True)

    # load chains
    fname = "test"
    with h5py.File("%s_samples_%s.h5" % (whichx, fname), "r") as f:
        samples = f["samples"][...]
    samples = samples.T

    make_flicker_plot(x, xerr, y, yerr, samples, whichx)
    make_inverse_flicker_plot(x, xerr, y, yerr, samples, whichx, 200)
from model import load_data, NeuralNetwork

input_size = 120 * 320
data_path = "training_data/*.npz"

X_train, X_valid, y_train, y_valid = load_data(input_size, data_path)

# train a neural network
layer_sizes = [input_size, 32, 4]
nn = NeuralNetwork()
nn.create(layer_sizes)
nn.train(X_train, y_train)

# evaluate on train data
train_accuracy = nn.evaluate(X_train, y_train)
print("Train accuracy: ", "{0:.2f}%".format(train_accuracy * 100))

# evaluate on validation data
validation_accuracy = nn.evaluate(X_valid, y_valid)
print("Validation accuracy: ", "{0:.2f}%".format(validation_accuracy * 100))

# save model
model_path = "saved_model/nn_model.xml"
nn.save_model(model_path)
Пример #38
0
import numpy as np
import model as md

x_input, y_input, n, k, wh = md.get_data()
model = md.Model(n, k, wh)

print(n, k, wh)
#model.try_load()

x_train, y_train, x_test, y_test = md.load_data(x_input, y_input)

for lr in [0.1, 0.01, 0.001]:
    model.train(x_train,
                y_train,
                x_test,
                y_test,
                batch_size=100,
                learning_late=lr)
    model.model_test(x_test, y_test)
Пример #39
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('expert_data', type=str)
    parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('envname', type=str)
    parser.add_argument('save_model', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_rollouts',
                        type=int,
                        default=10,
                        help='Number of expert roll outs')
    args = parser.parse_args()

    print('loading and building expert policy')
    policy_fn = load_policy.load_policy(args.expert_policy_file)
    print('loaded and built')

    expert_data = load_data(args.expert_data)
    obs_data = np.array(expert_data['observations'])
    a_data = np.array(expert_data['actions'])

    batch_size = 16
    env = gym.make(args.envname)

    net_param = dict()
    net_param['d1'] = 128
    net_param['d2'] = 64
    net_param['d3'] = 32

    bc = BCModel(net_param=net_param,
                 batch_size=batch_size,
                 input_size=env.observation_space.shape[0],
                 action_size=env.action_space.shape[0],
                 epoch=20)

    for i in range(5):
        print('-------' + str(i) + '--------')
        n = obs_data.shape[0]
        obs_data, a_data = shuffle(obs_data, a_data, random_state=0)

        train_num = int(0.7 * n)
        x_train = np.reshape(obs_data[:train_num],
                             newshape=[-1, env.observation_space.shape[0]])
        y_train = np.reshape(a_data[:train_num],
                             newshape=[-1, env.action_space.shape[0]])
        x_test = np.reshape(obs_data[train_num:],
                            newshape=[-1, env.observation_space.shape[0]])
        y_test = np.reshape(a_data[train_num:],
                            newshape=[-1, env.action_space.shape[0]])

        with tf.Session() as sess:

            tf_util.initialize()

            if i > 0:
                bc.load(args.envname + '_dag', sess)
            bc.fit(x_train, y_train, sess)
            bc.save(args.envname + '_dag', sess)
            max_steps = args.max_timesteps or env.spec.timestep_limit

            returns = []
            observations = []
            actions = []
            for i in range(args.num_rollouts):

                obs = env.reset()
                done = False
                totalr = 0.
                steps = 0
                while not done:
                    bc_action = bc.predict([obs], sess)
                    ex_action = policy_fn(obs[None, :])

                    observations.append(obs)
                    actions.append(ex_action)
                    obs, r, done, _ = env.step(bc_action)
                    totalr += r
                    steps += 1
                    if args.render:
                        env.render()
                    #if steps % 100 == 0: print("%i/%i"%(steps, max_steps))
                    if steps >= max_steps:
                        break
                returns.append(totalr)

            print('returns', returns)
            print('mean return', np.mean(returns))
            print('std of return', np.std(returns))

            obs_data = np.concatenate((obs_data, observations))
            a_data = np.concatenate((a_data, actions))

    env.close()
Пример #40
0
def evaluateModel(model=None, model_name=None, inp_dir=None, work_dir=None, selection_dir=None):

    X, y, dates = getFeatures()

    all_data = load_data(inp_dir=inp_dir, work_dir=work_dir)

    all_models = {
        'all': model
    }

    results = pd.DataFrame(columns=['date', 'y_pred', 'y', 'diff'])
    for idx, d in enumerate(dates):
        date = pd.to_datetime(d)
        error = False
        answ = None
        try:
            answ = model_predict('all', str(date.year), str(date.month), str(date.day), test=False, all_data=all_data, all_models=all_models)
        except:
            print('system error:' + str(sys.exc_info()[1]))
            error = True
        y_pred = None
        diff = None
        yt = y[idx]
        if not error:
            y_pred = answ['y_pred'][0]

            diff = abs(y_pred - yt)

        results = results.append({
            'date': date,
            'y_pred': y_pred,
            'y': yt,
            'diff': diff
        }, ignore_index=True)
        # take only the last dates
    today = dt.datetime.today()

    monname = "{}-results-{}-{}-{}".format(model_name, today.year, today.month, today.day)
    results.to_csv(join(selection_dir, monname + ".csv"))

    rmse = mean_squared_error(results['y'].values, results['y_pred'].values, squared=False)

    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(15, 8)
    ax.set_title(' error distribution  ')
    sns.distplot(results['diff'], bins=50, color='#008899', ax=ax)
    fig.savefig(join(selection_dir, monname) + '.png', dpi=200)

    statistics_path = join(selection_dir, '{}_statistics.csv'.format(model_name))
    statDF = pd.DataFrame()
    today_iso = today.strftime('%y-%m-%d')

    mse = mean_squared_error(results['y'].values, results['y_pred'].values)
    if exists(statistics_path):
        statDF = pd.read_csv(statistics_path)
        found = statDF[statDF['date'] == today_iso]
        if(found.shape[0] > 0):
            statDF.loc[statDF['date'] == today_iso, ['mse']] = mse
    else:
        statDF = statDF.append(
            {
                'date': today_iso,
                'mse': mse
            }, ignore_index=True
        )
    statDF.to_csv(statistics_path)