def load_dataset(id=None): """Load and return the embeddings of the new dataset """ if not id: return jsonify({}) model.load_data(id) return jsonify({})
def runTests(): model.load_data('data/train.csv', 'data/test.csv') methodPreprocess_list = [1, 2] methodFeatureExtraction = [1, 2] maxFeatures_list = [16000, 20000, 25000] ngrams_list = [(1, 3), (1, 4), (1, 5), (1, 2)] maxdf_list = [1.0, 0.95, 0.9, 0.85] mindf_list = [0.0001] binary_list = [True] for methodPreprocess in methodPreprocess_list: train_tweets, test_tweets = model.preprocess_data( method=methodPreprocess) for maxfeat in maxFeatures_list: for ng in ngrams_list: for maxd in maxdf_list: for mind in mindf_list: for bin in binary_list: for featureExt in methodFeatureExtraction: X, y, test, feature_names = model.feature_extraction( train_tweets, test_tweets, maxFeatures=maxfeat, ngrams=ng, maxdf=maxd, mindf=mind, isbinary=bin, method=featureExt) print('\n\n\n') print( "#########################################################################" ) print( "##############################################" ) print( 'Params preprocessing and features extraction:' ) print( '{0}, {1}, {2}, {3}, {4}, {5}, {6}'.format( methodPreprocess, maxfeat, ng, maxd, mind, bin, featureExt)) print( "##############################################" ) gsearch(X, y) print( "#########################################################################" ) print('\n\n\n')
def make_plots(whichx, fname): x, y, xerr, yerr = load_data(whichx) with h5py.File("%s_samples.h5" % whichx) as f: samp = f["samples"][...] m, c, sig = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], sig[0]] print pars plt.clf() plt.errorbar(x, y, xerr=xerr, yerr=yerr, fmt="k.", capsize=0, ecolor=".7") plt.plot(x, model1(pars, x), "k") ndraws = 100 p0s = np.random.choice(samp[:, 0], ndraws) p1s = np.random.choice(samp[:, 1], ndraws) p2s = np.random.choice(samp[:, 2], ndraws) for i in range(ndraws): y = p0s[i] * x + p1s[i] plt.plot(x, (y + p2s[i]), "k", alpha=.1) plt.savefig("mcmc_%s_%s" % (whichx, fname)) labels = ["$m$", "$c$", "$\sigma$"] plt.clf() fig = triangle.corner(samp, labels=labels) fig.savefig("triangle_%s_%s" % (whichx, fname))
def test_load_data(): X, y = model.load_data(file_path) assert type(X) == np.ndarray assert X[0] == "data/IMG/center_2016_12_01_13_30_48_287.jpg" N = X.shape[0] assert y.shape == (N, )
def make_plots(whichx, fname): x, y, xerr, yerr = load_data(whichx) with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f: samp = f["samples"][...] m, c, sig = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], sig[0]] print pars plt.clf() plt.errorbar(x, y, xerr=xerr, yerr=yerr, fmt="k.", capsize=0, ecolor=".7") plt.plot(x, model1(pars, x), "k") ndraws = 100 p0s = np.random.choice(samp[:, 0], ndraws) p1s = np.random.choice(samp[:, 1], ndraws) p2s = np.random.choice(samp[:, 2], ndraws) for i in range(ndraws): y = p0s[i] * x + p1s[i] plt.plot(x, (y + p2s[i]), "k", alpha=.1) plt.savefig("mcmc_%s_%s" % (whichx, fname)) labels = ["$m$", "$c$", "$\sigma$"] plt.clf() fig = triangle.corner(samp, labels=labels) fig.savefig("triangle_%s_%s" % (whichx, fname))
def main(): """ main """ models = { "resnet": model.ResNet50Benchmark, "mobilenet": model.ResNet50Benchmark, "vgg": model.ResNet50Benchmark, "googlenet": model.ResNet50Benchmark, "shufflenet": model.ResNet50Benchmark, "MobileNet_SSD": model.ResNet50Benchmark, "deeplab": model.ResNet50Benchmark, "rcnn": model.RcnnBenchmark, "yolo": model.YoloBenchmark, "transformer": model.TransformerBenchmark, "bert": model.BertBenchmark } args = parse_args() model = models.get(args.model)() model.set_config(use_gpu=args.device == 'gpu', model_dir=args.model_dir, model_filename=args.model_filename, params_filename=args.params_filename, use_tensorrt=args.use_tensorrt, use_anakin=args.use_anakin, model_precision=args.model_precision) tensor = model.load_data(args.filename) warmup = args.warmup repeat = args.repeat model.run(tensor, warmup, repeat)
def split_batch_worker_task(ps, worker_index, num_workers, batch_size=64): # Download ds. ds = model.load_data() # Initialize the model. net = selected_model() keys = net.get_weights()[0] block_size = batch_size // num_workers start = worker_index * block_size end = batch_size if worker_index == num_workers - 1 else start + block_size worker_iter = 0 while True: if ray.get(ps.blocked.remote(worker_iter)): continue # Get the current weights from the parameter server. weights = ray.get(ps.pull.remote(keys)) net.set_weights(keys, weights) # Compute an update and push it to the parameter server. xs, ys = ds.train.next_batch(batch_size) xs = xs[start:end] ys = ys[start:end] gradients = net.compute_update(xs, ys) ps.push.remote(keys, gradients, worker_index, worker_iter) worker_iter += 1
def main(): # dataset has format like [user_id, song_id, play_count] file = 'train_triplets.txt' print("Loading data...") load_data(file) print("Starting evaluation...") calc_neighbours() print("Finished evaluations.") print_top_songs_for_user(1) print("Starting cross validation...") print("RMSE result: ", str(rmse(train_set, test_set))) print("MAE result: ", str(mae(train_set, test_set))) print("NDCG result: ", str(ndcg(train_set, test_set)))
def test_generator(): X, y = model.load_data(file_path) X, y = next(model.generator(X, y, train=False)) assert type(X) == np.ndarray assert X.shape[1:] == (160, 320, 3) N = len(X) assert y.shape == (N, )
def __init__(self, worker_index, num_workers, batch_size=256, learning_rate=1e-4): self.worker_index = worker_index self.num_workers = num_workers self.batch_size = batch_size self.block_size = batch_size // num_workers self.ds = model.load_data() self.net = selected_model(learning_rate)
def main(): in_arg = get_input_args() # Creates and returns command line arguments print('\nData Directory:\n', in_arg.data_directory, '\n') print('Optional Command Line Arguments:\n', 'Save Checkpoint [--save_dir]: ', in_arg.save_dir, '\n', 'Pretrained Network [--arch]: ', in_arg.arch, '\n', 'Learning Rate [--learning_rate]: ', in_arg.learning_rate, '\n', 'Hidden Units [--hidden_units]: ', in_arg.hidden_units, '\n', 'Epochs [--epochs]: ', in_arg.epochs, '\n', 'GPU [--gpu]: ', in_arg.gpu, '\n') if 'checkpoints' not in listdir( ): # makes checkpoints folder if it doesn't already exist mkdir('checkpoints') train_dir, valid_dir, test_dir = util.get_data( in_arg.data_directory ) # Returns Train, Validation and Test Directories transformed_train, transformed_valid, transformed_test = mod.transform_data( train_dir, valid_dir, test_dir) # Returns transformed datasets train_loader, valid_loader, test_loader = mod.load_data( transformed_train, transformed_valid, transformed_test) # Returns Data loaders model = mod.build_model( util.label_count(train_dir), in_arg.hidden_units, in_arg.arch, transformed_train.class_to_idx) # Returns built model epochs = in_arg.epochs # Epochs initially set by command line argument in_arg.epochs. Can be changed with m.load_checkpoint() criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=in_arg.learning_rate) use_gpu = mod.use_gpu(model, in_arg.gpu) # Returns True or False for GPU use mod.train( model, criterion, optimizer, train_loader, valid_loader, use_gpu, in_arg.epochs ) # Trains the model. Prints Training Loss, Validation Loss & Validation Accuracy mod.save_checkpoint( in_arg.arch, model.classifier.state_dict(), transformed_train.class_to_idx, util.label_count(train_dir), in_arg.hidden_units, in_arg.epochs, in_arg.save_dir ) # Saves classifier and other model parameters to checkpoint
def make_plots(whichx, fname): x, y, xerr, yerr = load_data(whichx) with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f: samp = f["samples"][...] m, c, sig, Y, V, P = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], sig[0], Y[0], V[0], P[0]] print pars labels = ["$m$", "$c$", "$\sigma$", "$Y$", "$V$", "$P$"] plt.clf() fig = triangle.corner(samp, labels=labels) fig.savefig("triangle_%s_%s" % (whichx, fname))
def make_plots(whichx, fname): x, y, xerr, yerr = load_data(whichx) with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f: samp = f["samples"][...] m, c, sig, Y, V, P = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], sig[0], Y[0], V[0], P[0]] print pars labels = ["$m$", "$c$", "$\sigma$", "$Y$", "$V$", "$P$"] plt.clf() fig = triangle.corner(samp, labels=labels) fig.savefig("triangle_%s_%s" % (whichx, fname))
def worker_task(ps, worker_index, num_workers, batch_size=64): # Download ds. ds = model.load_data() # Initialize the model. net = selected_model() keys = net.get_weights()[0] while True: # Get the current weights from the parameter server. weights = ray.get(ps.pull.remote(keys)) net.set_weights(keys, weights) # Compute an update and push it to the parameter server. xs, ys = ds.train.next_batch(batch_size) gradients = net.compute_update(xs, ys) ps.push.remote(keys, gradients)
def MCMC(whichx, nsamp, fname, nd, bigdata, burnin=500, run=500): rho_pars = [-2., 6., .0065] logg_pars = [-1.850, 7., .0065] pars_init = logg_pars if whichx == "rho": pars_init = rho_pars x, y, xerr, yerr = load_data(whichx, nd=nd, bigdata=True) # format data and generate samples obs = np.vstack((x, y)) u = np.vstack((xerr, yerr)) up = np.vstack((xerr, yerr)) um = np.vstack((xerr*.5, yerr*.5)) # s = generate_samples_log(obs, up, um, nsamp) # FIXME s = generate_samples(obs, u, nsamp) # FIXME # if nsamp == 1: # s[0, :, :] = x # s[1, :, :] = y # print np.shape(s) # assert 0 # set up and run emcee ndim, nwalkers = len(pars_init), 32 pos = [pars_init + 1e-4*np.random.randn(ndim) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(s, obs, u)) print "burning in..." pos, _, _, = sampler.run_mcmc(pos, burnin) sampler.reset() print "production run..." sampler.run_mcmc(pos, run) samp = sampler.chain[:, 50:, :].reshape((-1, ndim)) m, c, sig = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], sig[0]] # save samples f = h5py.File("%s_samples_%s.h5" % (whichx, fname), "w") data = f.create_dataset("samples", np.shape(samp)) data[:, 0] = samp[:, 0] data[:, 1] = samp[:, 1] data[:, 2] = samp[:, 2] f.close()
def MCMC(whichx, nsamp, fname, nd, bigdata, burnin=500, run=500): rho_pars = [-2., 6., .0065] logg_pars = [-1.850, 7., .0065] pars_init = logg_pars if whichx == "rho": pars_init = rho_pars x, y, xerr, yerr = load_data(whichx, nd=nd, bigdata=True) # format data and generate samples obs = np.vstack((x, y)) u = np.vstack((xerr, yerr)) up = np.vstack((xerr, yerr)) um = np.vstack((xerr * .5, yerr * .5)) # s = generate_samples_log(obs, up, um, nsamp) # FIXME s = generate_samples(obs, u, nsamp) # FIXME # if nsamp == 1: # s[0, :, :] = x # s[1, :, :] = y # print np.shape(s) # assert 0 # set up and run emcee ndim, nwalkers = len(pars_init), 32 pos = [pars_init + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(s, obs, u)) print "burning in..." pos, _, _, = sampler.run_mcmc(pos, burnin) sampler.reset() print "production run..." sampler.run_mcmc(pos, run) samp = sampler.chain[:, 50:, :].reshape((-1, ndim)) m, c, sig = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], sig[0]] # save samples f = h5py.File("%s_samples_%s.h5" % (whichx, fname), "w") data = f.create_dataset("samples", np.shape(samp)) data[:, 0] = samp[:, 0] data[:, 1] = samp[:, 1] data[:, 2] = samp[:, 2] f.close()
def predict_frame(frame: pd.DataFrame) -> str: """Takes in a pd.DataFrame from an uploaded .csv file. [For the purposes of the exercise] -- Drops the Y column for predictions. Reindexes the DataFrame according to a one-hot encoded representation of all of the columns. Applies the loaded logistic regression model across the reshaped numpy array representation of each row, creating a new pd.Series column in the process. Rounds the values for nicer viewing. --- The reason why reindex_frame is used for the application and is not returned is to reduce the load on to_json() when it returns the resulting JSON string from the predictions (instead of returning all of the OHE cols as well, it returns the original columns only). --- Finally, returns the pd.DataFrame as a JSON string back to the /predict/csv API route. Arguments: frame {pd.DataFrame} -- pd.DataFrame of the uploaded .csv file. Returns: str -- Record-oriented JSON string containing all original columns. """ model = joblib.load("models/LogReg.pkl") frame = load_data(frame) frame.drop("y", axis=1, inplace=True) reindex_frame = frame.reindex(columns=TRAINING_COLUMNS.columns, fill_value=0) frame["yes_prob"] = reindex_frame.apply( lambda x: model.predict_proba(np.array(x).reshape(1, -1))[0][1], axis=1).round(3) return frame.to_json(orient="records")
def make_plots(whichx, fname): x, y, xerr, yerr = load_data(whichx) with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f: samp = f["samples"][:, :-1] if fname == "f_extra" or "short": m, c, ln_sig, lnf = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], ln_sig[0], lnf[0]] labels = ["$m$", "$c$", "$\sigma$", "$f$"] else: m, c, ln_sig = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], ln_sig[0]] labels = ["$m$", "$c$", "$\ln(\sigma)$"] print pars plt.clf() fig = triangle.corner(samp, labels=labels) fig.savefig("triangle_%s_%s" % (whichx, fname))
def make_plots(whichx, fname): x, y, xerr, yerr = load_data(whichx) with h5py.File("%s_samples_%s.h5" % (whichx, fname)) as f: samp = f["samples"][:, :-1] if fname == "f_extra" or "short": m, c, ln_sig, lnf = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], ln_sig[0], lnf[0]] labels = ["$m$", "$c$", "$\sigma$", "$f$"] else: m, c, ln_sig = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samp, [16, 50, 84], axis=0))) pars = [m[0], c[0], ln_sig[0]] labels = ["$m$", "$c$", "$\ln(\sigma)$"] print pars plt.clf() fig = triangle.corner(samp, labels=labels) fig.savefig("triangle_%s_%s" % (whichx, fname))
request = "INSERT INTO {0} (label, mean, median, sd, variance, iqr, mode, min, max) " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)".format(db_setup.table_name) values = (dataset_label, features[columns[0]], features[columns[1]], features[columns[2]], features[columns[3]], features[columns[4]], features[columns[5]], features[columns[6]], features[columns[7]]) print(dataset_label) db_setup.cursor.execute(request, values) db_setup.db.commit() db_setup.close_db() model.train() test_data = pd.DataFrame(columns=model.get_columns()) if __name__ == '__main__': getAllLabels() model.load_data() model.prepare_data() model.load_models() app.run(host=config.HOST_ADDRESS, port=config.HOST_PORT, debug=config.DEBUG, threaded=config.THREADED) model.save_models() print("Server Closed")
#!/usr/bin/env python3 import mxnet as mx from model import load_data, prepare_data, load_LSTM_net, load_CNN_net, train_k_fold, plot_k_fold, train_on_all_data from mxnet import gluon from mxnet.gluon import loss as gloss "Prepare the data" all_data = load_data() train_data = all_data[0:int(len(all_data) * 0.75)] test_data = all_data[int(len(all_data) * 0.75):] vocab, train_features, train_labels, test_features, test_labels = prepare_data( train_data, test_data, 80) "Load the LSTM model, set model parameters" embed_size, num_hiddens, num_layers, ctx, bidirectional, fasttext = 300, 60, 2, mx.gpu( ), True, True net = load_LSTM_net(fasttext, vocab, embed_size, num_hiddens, num_layers, ctx, bidirectional) # load the LSTM Model # "Load the CNN model, set model parameters" # embed_size, kernel_sizes, nums_channels, ctx, fasttext = 300, [2, 3, 4, 5], [65, 65, 65, 65], mx.gpu(), True # net = load_CNN_net(fasttext, vocab, embed_size, kernel_sizes, nums_channels, ctx) "Define training parameter" learning_rate, num_epochs, K, batch_size = 0.005, 65, 5, 64 trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': learning_rate}) loss = gloss.SoftmaxCrossEntropyLoss() "K-fold cross-validation" result_train, result_valid = train_k_fold(net, K, train_features, train_labels,
if __name__ == "__main__": plotpar = {'axes.labelsize': 18, 'text.fontsize': 26, 'legend.fontsize': 18, 'xtick.labelsize': 18, 'ytick.labelsize': 18, 'text.usetex': True} plt.rcParams.update(plotpar) whichx = str(sys.argv[1]) # should be either "rho" or "logg" fname = str(sys.argv[2]) # mixture, f_extra, f, test, simple # x, y, xerr, yerr = load_data(whichx, bigdata=True) x, y, xerr, yerr = load_data(whichx, bigdata=False) # load chains with h5py.File("%s_samples_%s.h5" % (whichx, fname), "r") as f: samples = f["samples"][...] samples = samples.T fractional, extra = False, False if fname == "f": fractional = True elif fname == "f_extra" or "short": extra = True make_flicker_plot(x, xerr, y, yerr, samples, whichx, fname, 10000, fractional=fractional, extra=extra) # make_inverse_flicker_plot(x, xerr, y, yerr, samples, whichx, fname, 1000, # fractional=fractional, extra=extra)
#!/usr/bin/env python3 import mxnet as mx from model import load_data, create_idx_dataset, get_centers_and_contexts, get_negatives, get_batch, check_lenghth, create_data_iter, train # User define batch_size = 512 max_window_size = 3 k = 5 # number of negative sampling lr = 0.001 # learning rate num_epoch = 25 embed_size = 150 raw_dataset = load_data() counter, idx_to_token, token_to_idx, num_tokens, subsampled_dataset = create_idx_dataset( raw_dataset, 0.75) all_centers, all_contexts = get_centers_and_contexts(subsampled_dataset, max_window_size) all_negatives = get_negatives(counter, num_tokens, all_centers, all_contexts, 0.75, idx_to_token, k) # negative sampling k words check_lenghth(all_centers, all_contexts, all_negatives) data_iter = create_data_iter(get_batch, batch_size, all_centers, all_contexts, all_negatives) train(embed_size, idx_to_token, lr, num_epoch, mx.gpu(), data_iter, batch_size)
import model from scipy.sparse import coo_matrix, hstack # #model.load_data('data/train.csv', 'data/test.csv') #train_tweets, test_tweets = model.preprocess_data(method=1) #dictFeatures_train, dictFeatures_test, y = model.new_extractFeatures(train_tweets, test_tweets) #results, s = model.new_gsearchRidge(dictFeatures_train, dictFeatures_test, y, nSplits = 3, testSize=0.4) # # # #clfs = model.new_train3(dictFeatures_train, y) #train_prediction, l = model.new_predict3(clfs, dictFeatures_train, y=y) #test_prediction, l = model.new_predict3(clfs, dictFeatures_test, y=None, clfsLabels=l) #model.saveResults('output/xxx1.csv', test_prediction) model.load_data('data/train.csv', 'data/test.csv') train_tweets, test_tweets = model.preprocess_data(method=1) emo_train, emo_test, sent_train, sent_test, k_train, k_test, w_train, w_test = model.new_extract_addicional_features( ) l_tfidf = [] l_cv = [] print() print() for mF in [ 12500, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 55000, 60000 ]: print('Max Features = {0}'.format(mF)) for maxd in [0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2]:
def prepare(): model.load_data() model.prepare_data() model.load_models()
from model import load_data,NeuralNetwork x_train, x_test, y_train, y_test = load_data(random_state = 41) #show_data(x_train, y_train) model = NeuralNetwork() model.create_posla_net() model.train(x_train= x_train, y_train = y_train, epochs= 50, learning_rate=1e-4, batch_size= 256) # model.evaluate(x_test, y_test) # model.show_resualt() # #model.save_model(path = './model_data/VGG_model_e50_lr4_bs256.h5') #model.load_model(path = './model_data/VGG_model.h5') model.show_prediction(x_test, y_test) model.evaluate(x_test, y_test)
__author__ = 'zhengwang' from model import load_data, NeuralNetwork input_size = 120 * 320 data_path = "training_data/*.npz" X_train, X_valid, y_train, y_valid = load_data(input_size, data_path) # train a neural network layer_sizes = [input_size, 32, 4] nn = NeuralNetwork() nn.create(layer_sizes) nn.train(X_train, y_train) # evaluate on train data train_accuracy = nn.evaluate(X_train, y_train) print("Train accuracy: ", "{0:.2f}%".format(train_accuracy * 100)) # evaluate on validation data validation_accuracy = nn.evaluate(X_valid, y_valid) print("Validation accuracy: ", "{0:.2f}%".format(validation_accuracy * 100)) # save model model_path = "saved_model/nn_model.xml" nn.save_model(model_path)
# From now on, everything the model does is in the directory # corresponding to this particular dataset os.chdir(args.dataset) # path to saved version of trained model load_path = os.path.join('checkpoints', 'checkpoint') # check if a model has been previously trained already_trained = os.path.exists(load_path) if not (args.train or already_trained): check_if_ok_to_continue('Model has not been trained. ' 'Train it now (this may take several hours)? ') args.train = True dataset = model.load_data(args.dataset) if args.train: model.run_training(dataset) # predict a rating for the user if args.user_id and (args.movie or args.top): instance = dataset.get_ratings(args.user_id) ratings = data.unnormalize(instance.ravel()) output = model.predict(instance, dataset).ravel() if args.movie: col = dataset.get_col(args.movie) rating = output[col] # purty stars num_stars = int(round(rating * 2)) stars = ''.join(u'\u2605' for _ in range(num_stars))
def MCMC(whichx, nsamp, fname, nd, extra, f, bigdata, burnin=500, run=1000): """ nsamp (int) = number of samples. whichx (str) = logg or rho. fname (str) = the name for saving all output nd (int) = number of data points (for truncation). If this is zero, all the data are used. bigdata (boolean) which data file to use. """ # set initial parameters if extra: rho_pars = [-1.793214679, 5.34215688, 0.02334097, .0002600777] logg_pars = [-1.02143776, 5.69156647, .24239756, .049233887] else: rho_pars = [-1.69293833, 5.1408906, .0065] logg_pars = [-1.05043614, 5.66819525, .0065] pars_init = logg_pars if whichx == "rho": pars_init = rho_pars # load the data x, y, xerr, yerr = load_data(whichx, nd=nd, bigdata=False) # format data and generate samples obs = np.vstack((x, y)) u = np.vstack((xerr, yerr)) up = np.vstack((xerr, yerr)) um = np.vstack((xerr*.5, yerr*.5)) s = generate_samples(obs, u, nsamp) # set up and run emcee ndim, nwalkers = len(pars_init), 32 pos = [pars_init + 1e-4*np.random.randn(ndim) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(s, obs, u, extra, f)) print "burning in..." pos, _, _, _ = sampler.run_mcmc(pos, burnin) sampler.reset() print "production run..." sampler.run_mcmc(pos, run) # load likelihood lls = sampler.blobs flat_lls = np.reshape(lls, (np.shape(lls)[0]*np.shape(lls)[1])) samp = np.vstack((sampler.chain[:, :, :].reshape(-1, ndim).T, flat_lls)).T sa = samp.T[0] print type(sa) print np.isfinite(sa) print sa print np.shape(sa), np.shape(sa[np.isfinite(sa)]) # save samples f = h5py.File("%s_samples_%s.h5" % (whichx, fname), "w") data = f.create_dataset("samples", np.shape(samp)) data[:, 0] = samp[:, 0] data[:, 1] = samp[:, 1] data[:, 2] = samp[:, 2] data[:, 3] = samp[:, 3] print "samp shape = ", np.shape(samp), np.shape(data) if extra: data[:, 4] = samp[:, 4] f.close()
plotpar = { 'axes.labelsize': 18, 'text.fontsize': 26, 'legend.fontsize': 18, 'xtick.labelsize': 18, 'ytick.labelsize': 18, 'text.usetex': True } plt.rcParams.update(plotpar) whichx = str(sys.argv[1]) # should be either "rho" or "logg" fname = str(sys.argv[2]) # mixture, f_extra, f, test, simple # x, y, xerr, yerr = load_data(whichx, bigdata=True) x, y, xerr, yerr = load_data(whichx, bigdata=False) # load chains with h5py.File("%s_samples_%s.h5" % (whichx, fname), "r") as f: samples = f["samples"][...] samples = samples.T fractional, extra = False, False if fname == "f": fractional = True elif fname == "f_extra" or "short": extra = True make_flicker_plot(x, xerr, y, yerr,
import tensorflow as tf from model import load_data, load_loopable_model x, y, output, keep_prob = load_loopable_model() loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=y)) train_step = tf.train.AdamOptimizer(0.001).minimize(loss) data_x, data_y = load_data(x, y) with tf.Session() as sess: init = tf.global_variables_initializer() saver = tf.train.Saver() sess.run(init) correct_prediction = tf.equal(tf.argmax(output, 1), y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.histogram("normal/accuracy", accuracy) try: for i in range(9999): sess.run(train_step, feed_dict={ x: data_x, y: data_y, keep_prob: 0.75 }) train_accuracy = sess.run(accuracy, feed_dict={
# Press Shift+F10 to execute it or replace it with your code. # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. import matplotlib.pyplot as plt from tensorflow.keras.layers import Input import numpy as np from model import MnistModel, load_data # Press the green button in the gutter to run the script. if __name__ == '__main__': inputs = Input(shape=(28, 28)) mnist = MnistModel(inputs=inputs) # plot_model(mnist, to_file='model_plot.png', show_shapes=True, show_layer_names=True) (x_train, y_train), (x_test, y_test) = load_data() x_pred = x_train[:15] y_pred = y_train[:15] mnist.my_compile() mnist.my_fit(x=x_train, y=y_train) mnist.summary() acc = mnist.evaluate(x=x_test, y=y_test) print(acc) y_hat = mnist.predict(x_pred) print(np.argmax(y_hat)) # # Plot a random sample of 10 test images, their predicted labels and ground truth
import model import process from datetime import datetime, timedelta from sys import exit model.load_data() if not model.is_running_challenge(): create_challenge = raw_input('No challenge running. Create a new one? (y/n)') if create_challenge == 'y': duration = raw_input('What is the duration of the new challenge (in weeks): ') model.initialize_challenge(duration) else: exit() start_date = model.get_reference_date() end_date = start_date + timedelta(weeks=model.get_duration()) if datetime.now() > end_date: print 'Current challenge is over!' print 'Last challenge started at {:%Y-%m-%d} '\ 'and finished at {:%Y-%m-%d}'.format(start_date,end_date) my_acc = model.get_accumulated_minutes() opp_acc = process.compute_acc(model.get_reference_date(), duration=model.get_duration(), reference_end=end_date) if my_acc > opp_acc: print 'You won it!' else: print 'You lost it :(... better luck next time!'
def train_wgan(batch_size, epochs, image_shape): enc_model_1 = model.make_encoder() img = Input(shape=input_shape) z = enc_model_1(img) encoder1 = Model(img, z) z = Input(shape=(latent_dim,)) modelG = model.construct_generator() gen_img = modelG(z) generator = Model(z, gen_img) critic = model.construct_critic(image_shape) critic.trainable = False img = Input(shape=input_shape) z = encoder1(img) img_ = generator(z) real = critic(img_) optimizer = RMSprop(0.0002) gan = Model(img, [real, img_]) gan.compile(loss=[model.wasserstein_loss, 'mean_absolute_error'], optimizer=optimizer, metrics=None) X_train = model.load_data(168, 224) number_of_batches = int(X_train.shape[0] / batch_size) generator_iterations = 0 d_loss = 0 for epoch in range(epochs): current_batch = 0 while current_batch < number_of_batches: start_time = time.time() # In the first 25 epochs, the critic is updated 100 times # for each generator update. In the other epochs the default value is 5 if generator_iterations < 25 or (generator_iterations + 1) % 500 == 0: critic_iterations = 100 else: critic_iterations = 5 # Update the critic a number of critic iterations for critic_iteration in range(critic_iterations): if current_batch > number_of_batches: break # real_images = dataset_generator.next() it_index = np.random.randint(0, number_of_batches - 1) real_images = X_train[it_index * batch_size:(it_index + 1) * batch_size] current_batch += 1 # The last batch is smaller than the other ones, so we need to # take that into account current_batch_size = real_images.shape[0] # Generate images z = encoder1.predict(real_images) generated_images = generator.predict(z) # generated_images = generator.predict(noise) # Add some noise to the labels that will be fed to the critic real_y = np.ones(current_batch_size) fake_y = np.ones(current_batch_size) * -1 # print('real_y', real_y) # Let's train the critic critic.trainable = True # Clip the weights to small numbers near zero for layer in critic.layers: weights = layer.get_weights() weights = [np.clip(w, -0.01, 0.01) for w in weights] layer.set_weights(weights) d_real = critic.train_on_batch(real_images, real_y) d_fake = critic.train_on_batch(generated_images, fake_y) d_loss = d_real - d_fake # Update the generator critic.trainable = False itt_index = np.random.randint(0, number_of_batches - 1) imgs = X_train[itt_index * batch_size:(itt_index + 1) * batch_size] # We try to mislead the critic by giving the opposite labels fake_yy = np.ones(current_batch_size) g_loss = gan.train_on_batch(imgs, [fake_yy, imgs]) time_elapsed = time.time() - start_time print('[%d/%d][%d/%d][%d] Loss_D: %f Loss_G: %f Loss_G_imgs: %f -> %f s' % (epoch, epochs, current_batch, number_of_batches, generator_iterations, d_loss, g_loss[0], g_loss[1], time_elapsed)) generator_iterations += 1
from flask import Flask, render_template from flask_compress import Compress from flask_cors import CORS from api import backend_api from model import load_data app = Flask(__name__, static_folder='static', template_folder='static') CORS(app) app.config.from_object('config') app.register_blueprint(backend_api) comp = Compress(app) @app.route('/') def index(): return render_template('index.html') if __name__ == "__main__": graph_file_path = 'data/reddit_graphs.pkl' graph_embeddings_path = 'data/reddit_embeddings.pkl' load_data(graph_file_path, graph_embeddings_path) app.run(port=8000, debug=True)
resids = y - (alpha+beta*x) normed_resids = resids / np.sqrt(yerr**2 + sigma**2) np.savetxt("normed_resids_%s.txt", np.transpose(normed_resids)) plt.clf() plt.hist(normed_resids, 20, histtype="stepfilled", color="w") plt.xlabel("Normalised residuals") plt.savefig("residual_hist_%s" % whichx) if __name__ == "__main__": plotpar = {'axes.labelsize': 18, 'text.fontsize': 26, 'legend.fontsize': 18, 'xtick.labelsize': 18, 'ytick.labelsize': 18, 'text.usetex': True} plt.rcParams.update(plotpar) whichx = str(sys.argv[1]) # should be either "rho" or "logg" x, y, xerr, yerr = load_data(whichx, bigdata=True) # load chains fname = "test" with h5py.File("%s_samples_%s.h5" % (whichx, fname), "r") as f: samples = f["samples"][...] samples = samples.T make_flicker_plot(x, xerr, y, yerr, samples, whichx) make_inverse_flicker_plot(x, xerr, y, yerr, samples, whichx, 200)
from model import load_data, NeuralNetwork input_size = 120 * 320 data_path = "training_data/*.npz" X_train, X_valid, y_train, y_valid = load_data(input_size, data_path) # train a neural network layer_sizes = [input_size, 32, 4] nn = NeuralNetwork() nn.create(layer_sizes) nn.train(X_train, y_train) # evaluate on train data train_accuracy = nn.evaluate(X_train, y_train) print("Train accuracy: ", "{0:.2f}%".format(train_accuracy * 100)) # evaluate on validation data validation_accuracy = nn.evaluate(X_valid, y_valid) print("Validation accuracy: ", "{0:.2f}%".format(validation_accuracy * 100)) # save model model_path = "saved_model/nn_model.xml" nn.save_model(model_path)
import numpy as np import model as md x_input, y_input, n, k, wh = md.get_data() model = md.Model(n, k, wh) print(n, k, wh) #model.try_load() x_train, y_train, x_test, y_test = md.load_data(x_input, y_input) for lr in [0.1, 0.01, 0.001]: model.train(x_train, y_train, x_test, y_test, batch_size=100, learning_late=lr) model.model_test(x_test, y_test)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('expert_data', type=str) parser.add_argument('expert_policy_file', type=str) parser.add_argument('envname', type=str) parser.add_argument('save_model', type=str) parser.add_argument('--render', action='store_true') parser.add_argument("--max_timesteps", type=int) parser.add_argument('--num_rollouts', type=int, default=10, help='Number of expert roll outs') args = parser.parse_args() print('loading and building expert policy') policy_fn = load_policy.load_policy(args.expert_policy_file) print('loaded and built') expert_data = load_data(args.expert_data) obs_data = np.array(expert_data['observations']) a_data = np.array(expert_data['actions']) batch_size = 16 env = gym.make(args.envname) net_param = dict() net_param['d1'] = 128 net_param['d2'] = 64 net_param['d3'] = 32 bc = BCModel(net_param=net_param, batch_size=batch_size, input_size=env.observation_space.shape[0], action_size=env.action_space.shape[0], epoch=20) for i in range(5): print('-------' + str(i) + '--------') n = obs_data.shape[0] obs_data, a_data = shuffle(obs_data, a_data, random_state=0) train_num = int(0.7 * n) x_train = np.reshape(obs_data[:train_num], newshape=[-1, env.observation_space.shape[0]]) y_train = np.reshape(a_data[:train_num], newshape=[-1, env.action_space.shape[0]]) x_test = np.reshape(obs_data[train_num:], newshape=[-1, env.observation_space.shape[0]]) y_test = np.reshape(a_data[train_num:], newshape=[-1, env.action_space.shape[0]]) with tf.Session() as sess: tf_util.initialize() if i > 0: bc.load(args.envname + '_dag', sess) bc.fit(x_train, y_train, sess) bc.save(args.envname + '_dag', sess) max_steps = args.max_timesteps or env.spec.timestep_limit returns = [] observations = [] actions = [] for i in range(args.num_rollouts): obs = env.reset() done = False totalr = 0. steps = 0 while not done: bc_action = bc.predict([obs], sess) ex_action = policy_fn(obs[None, :]) observations.append(obs) actions.append(ex_action) obs, r, done, _ = env.step(bc_action) totalr += r steps += 1 if args.render: env.render() #if steps % 100 == 0: print("%i/%i"%(steps, max_steps)) if steps >= max_steps: break returns.append(totalr) print('returns', returns) print('mean return', np.mean(returns)) print('std of return', np.std(returns)) obs_data = np.concatenate((obs_data, observations)) a_data = np.concatenate((a_data, actions)) env.close()
def evaluateModel(model=None, model_name=None, inp_dir=None, work_dir=None, selection_dir=None): X, y, dates = getFeatures() all_data = load_data(inp_dir=inp_dir, work_dir=work_dir) all_models = { 'all': model } results = pd.DataFrame(columns=['date', 'y_pred', 'y', 'diff']) for idx, d in enumerate(dates): date = pd.to_datetime(d) error = False answ = None try: answ = model_predict('all', str(date.year), str(date.month), str(date.day), test=False, all_data=all_data, all_models=all_models) except: print('system error:' + str(sys.exc_info()[1])) error = True y_pred = None diff = None yt = y[idx] if not error: y_pred = answ['y_pred'][0] diff = abs(y_pred - yt) results = results.append({ 'date': date, 'y_pred': y_pred, 'y': yt, 'diff': diff }, ignore_index=True) # take only the last dates today = dt.datetime.today() monname = "{}-results-{}-{}-{}".format(model_name, today.year, today.month, today.day) results.to_csv(join(selection_dir, monname + ".csv")) rmse = mean_squared_error(results['y'].values, results['y_pred'].values, squared=False) fig, ax = plt.subplots(1, 1) fig.set_size_inches(15, 8) ax.set_title(' error distribution ') sns.distplot(results['diff'], bins=50, color='#008899', ax=ax) fig.savefig(join(selection_dir, monname) + '.png', dpi=200) statistics_path = join(selection_dir, '{}_statistics.csv'.format(model_name)) statDF = pd.DataFrame() today_iso = today.strftime('%y-%m-%d') mse = mean_squared_error(results['y'].values, results['y_pred'].values) if exists(statistics_path): statDF = pd.read_csv(statistics_path) found = statDF[statDF['date'] == today_iso] if(found.shape[0] > 0): statDF.loc[statDF['date'] == today_iso, ['mse']] = mse else: statDF = statDF.append( { 'date': today_iso, 'mse': mse }, ignore_index=True ) statDF.to_csv(statistics_path)