def sgd_ons(): x_train, y_train, x_test, y_test = load_processed_data(dir_data) results = [] alpha = 0.1 gamma = 0.1 radius = 100 T = 10000 _, logger = train_sgd_proj( a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, radius=radius, alpha=alpha, ) results.append(logger) _, logger = train_ons( a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, gamma=gamma, radius=radius, alpha=alpha, ) results.append(logger) plot_results_(results) quit()
def gd_gd_proj_z(): x_train, y_train, x_test, y_test = load_processed_data(dir_data) results = [] for radius in [0.5, 1.0, 10, 100, 500]: alpha = 0.33 # gamma = 0.1 # radius = 100 T = 1000 _, logger = train_gd( a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, # gamma = gamma, # radius=radius, alpha=alpha, ) results.append(logger) _, logger = train_gd_proj( a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, # gamma = gamma, radius=radius, alpha=alpha, ) results.append(logger) plot_results_(results)
def plot_hogwild(): x_train, y_train, x_test, y_test = load_processed_data(dir_data) # --- 2. plot hogwild for various values of K n_runs = 3 n_workers = 8 T = 1000000 alpha = 0.33 beta = 0.37 theta = 0.2 results = [ AvgLogger([ train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta, K=K, theta=theta, n_processes=n_workers, sequential=False, seed=s)[1] for s in range(n_runs) ]) for K in [3, 10, 50] ] # --- 1. plot comparison between SGD and hogwild, fixed K # n_runs = 3 # n_workers = 8 # T = 1000000 # alpha = 0.33 # beta = 0.37 # theta = 0.2 # results = [AvgLogger([ # train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta, # K=K, theta=theta, n_processes=n_workers, sequential=False, seed=s)[1] # for s in range(n_runs) # ]) for K in [3]] # results.append(AvgLogger([ # train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta, # K=3, theta=theta, n_processes=n_workers, sequential=True, seed=s)[1] # for s in range(n_runs) # ])) # results.append(AvgLogger([ # train_sgd(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, return_avg=True, seed=s)[1] # for s in range(n_runs) # ])) plot_results( results, add_to_title= rf" ($\alpha={alpha}, \beta={beta}, \theta={theta}$, n_runs={n_runs})")
def sgd_ons_var(): x_train, y_train, x_test, y_test = load_processed_data(dir_data) results = [] n_runs = 5 T = 1000 alpha = 0.1 radius = 100 gamma = 0.1 results.append( AvgLogger([ train_ons( a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, gamma=gamma, radius=radius, seed=s, alpha=alpha, )[1] for s in range(n_runs) ])) results.append( AvgLogger([ train_sgd_proj( a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, radius=100, seed=s, )[1] for s in range(n_runs) ])) plot_results_(results, add_to_title=rf" - $\alpha={alpha}$, n_runs={n_runs}")
from datetime import datetime from pathlib import Path import pandas as pd from algos import train_hogwild, train_sgd from data_utils import load_processed_data if __name__ == "__main__": dir_data = Path(__file__).resolve().parents[1].joinpath("data/") save_folder = dir_data.joinpath("../csv_results") try: os.mkdir(save_folder) except FileExistsError: pass x_train, y_train, x_test, y_test = load_processed_data(dir_data) n_runs = 1 T_config = 2017521 alpha = 0.33 beta = 0.37 theta = 0.2 K = 3 results = { "algo": [], "time": [], "test_error": [], "T": [], "n_workers": [], "K": [], }
202313, 520268, 628267, 760933, 761105, 761274, 767884, 767948, 768051, 778196, 781774, 790989, 791094, 913179, 1073703, 1132513, 1132676, 1140226, 1141794, 1237426, 1241905, 1387080, 1388043, 1570724, 1585962, 1586097 ] # all errors in training START = 761200 WINDOW = 200 r_filename = "training_data.csv" p_filename = "training_data_ps.csv" if not PREPROCESSED: array, labels = load_raw_data(r_filename, START + WINDOW) else: array, labels = load_processed_data(p_filename, START + WINDOW) array2 = np.zeros(shape=array.shape) array2[1:, :] = np.copy(array[:-1, :]) array3 = array[START:, :] if not os.path.exists("images"): os.makedirs("images") min = array3.min(axis=0) max = array3.max(axis=0) array4 = (array3 - min) / (max - min) scipy.misc.toimage(array4, cmin=-1.0, cmax=1.0).save('images/data1.jpg')
def main(argv): if len(argv) < 3: print("Correct arguments: <model> <data_file> |-<i(interactive)>|") exit() model_file = argv[1] weights_file = argv[1] + ".h5" data_file = argv[2] if not (os.path.exists(model_file) and os.path.exists(weights_file) and os.path.exists(data_file)): print("One of the specified files {}, {}, {} doesn't exist".format( model_file, weights_file, data_file)) exit() print("# Loading data from files {}".format(data_file)) (X, y_test) = load_processed_data(data_file, TEST_ROWS) print("### Loaded {} test rows".format(X.shape[0])) print("## X_test shape: ", X.shape) print("## Y_test shape: ", y_test.shape) # y_train = np.random.choice([0, 1], size=y_train.shape, p=[0.99, 0.01]) # Modifying labels to time series prediction print("### Modifying labels") nonzero_test = np.count_nonzero(y_test) print("# Number of non-error labels: {}".format(y_test.shape[0] - nonzero_test)) print("# Number of error labels: {}".format(nonzero_test)) y_test = warp_labels(y_test, PREDICTION_LENGTH, WINDOW_SIZE) nonzero_test = np.count_nonzero(y_test) print("## Labels modified") print("# Number of non-error labels: {}".format(y_test.shape[0] - nonzero_test)) print("# Number of error labels: {}".format(nonzero_test)) print("### Modified labels a to signal errors in the next {} samples.". format(PREDICTION_LENGTH)) # Modifying x's to be 3D vectors X = make_timeseries_instances(X, WINDOW_SIZE) print("### Modified data to tensors with height {}".format(WINDOW_SIZE)) # Something with adding the channel count X = np.expand_dims(X, axis=3) y_test = y_test[:X.shape[0]] print("### Loading the model from file {}".format(model_file)) json_file = open(model_file, 'r') model_json = json_file.read() json_file.close() model = model_from_json(model_json) print("### Loading weights from file {}".format(weights_file)) model.load_weights(weights_file) print("### Loaded model from disk") model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) print("### Evaluating the model") if len(argv) < 4: score = model.evaluate(X, y_test, verbose=1) print("#### Results ####") print('Test loss:', score[0]) print('Test accuracy:', score[1]) elif "-i" in argv: print("Enter sample numbers for prediction:") while True: line = sys.stdin.readline() i = int(line) - PREDICTION_LENGTH if 0 < i < X.shape[0]: prediction = model.predict(X[i, ].reshape( 1, WINDOW_SIZE, PCA_TARGET_SIZE, 1)) value = 0 if math.isnan( np.sum(prediction)) else np.sum(prediction) if value > 0.0001: print("Will fail in {}".format(PREDICTION_LENGTH)) else: print("Will not fail in {}".format(PREDICTION_LENGTH))
PREPROCESSED = True training_filename = 'training_data.csv' test_filename = 'test_data.csv' prep_training_filename = 'training_data_ps.csv' prep_test_filename = 'test_data_ps.csv' import sys # Usually we will use pre-processed data, this is for a special case if PREPROCESSED: # Normal turn of events print("# Loading prepared data from files {} and {}".format(prep_training_filename, prep_test_filename)) (x_train, y_train), (x_test, y_test) = load_processed_data(prep_training_filename, TRAIN_ROWS), \ load_processed_data(prep_test_filename, TEST_ROWS) else: # Loading raw unprocessed data print("# Loading raw data from files {} and {}".format(training_filename, test_filename)) (x_train, y_train), (x_test, y_test) = load_raw_data(training_filename, TRAIN_ROWS), \ load_raw_data(test_filename, TEST_ROWS) # PCA dimensionality reduction pca = decomposition.PCA(n_components=PCA_TARGET_SIZE) pca.fit(x_train) x_train = pca.transform(x_train) pca.fit(x_test) x_test = pca.transform(x_test) print("# Reduced data to {} dimensions", PCA_TARGET_SIZE)
sampler=sampler.SubsetRandomSampler( range(NUM_VAL))) optimizer = optim.Adadelta(model.parameters()) train_model(model, optimizer, loader_train, loader_val, epoches=2) # Upload data to memory #data_utils.proccess_data() regular_train_images, regular_test_images, regular_val_images,\ regular_train_labels, regular_test_labels, regular_val_labels,\ inverted_train_images, inverted_test_images, inverted_val_images,\ inverted_train_labels, inverted_test_labels, inverted_val_labels,\ train_one_group_images, test_one_group_images, val_one_group_images,\ train_one_group_labels, test_one_group_labels, val_one_group_labels = data_utils.load_processed_data() # Check for GPU availability: device = my_models.device_gpu_cpu() print('using device:', device) dtype = torch.float32 # we will be using float # Constant to control how frequently we print train loss print_every = 100 # Create models: model = my_models.model_2() my_models.test_model_size(model, dtype) # test model size output: scene_1()