def test_compare_accuracy_against_reference(self, tarantella_framework, model_runners, micro_batch_size, number_epochs, nbatches): batch_size = micro_batch_size * tarantella_framework.get_size() nsamples = nbatches * batch_size tnt_model_runner, reference_model_runner = model_runners # reuse model with its initial weights tnt_model_runner.reset_weights() reference_model_runner.reset_weights() # verify that both models have identical weights tnt_initial_weights = tnt_model_runner.get_weights() reference_initial_weights = reference_model_runner.get_weights() util.compare_weights(tnt_initial_weights, reference_initial_weights, 1e-6) # train reference model (ref_train_dataset, ref_test_dataset) = util.load_dataset(mnist.load_mnist_dataset, train_size=nsamples, train_batch_size=batch_size, test_size=10000, test_batch_size=batch_size) reference_model_runner.train_model(ref_train_dataset, number_epochs) reference_loss_accuracy = reference_model_runner.evaluate_model( ref_test_dataset) # train Tarantella model (train_dataset, test_dataset) = util.load_dataset(mnist.load_mnist_dataset, train_size=nsamples, train_batch_size=batch_size, test_size=10000, test_batch_size=batch_size) tnt_model_runner.train_model(train_dataset, number_epochs) tnt_loss_accuracy = tnt_model_runner.evaluate_model(test_dataset) rank = tarantella_framework.get_rank() logging.getLogger().info("[Rank %d] Tarantella[loss, accuracy] = %s" % (rank, str(tnt_loss_accuracy))) logging.getLogger().info("[Rank %d] Reference [loss, accuracy] = %s" % (rank, str(reference_loss_accuracy))) assert np.isclose(tnt_loss_accuracy[0], reference_loss_accuracy[0], atol=1e-2) # losses might not be identical assert np.isclose(tnt_loss_accuracy[1], reference_loss_accuracy[1], atol=1e-2)
def main(): batch_size = 32 # the number of training examples in one forward/backward pass num_classes = 10 # number of cifar-10 dataset classes epochs = 3 # number of forward and backward passes of all the training examples ''' dataset contains the hyper parameters for loading data and the dataset: dataset = { 'batch_size': batch_size, 'num_classes': num_classes, 'epochs': epochs, 'x_train': x_train, 'x_test': x_test, 'y_train': y_train, 'y_test': y_test } ''' dataset = load_dataset(batch_size, num_classes, epochs) num_population = 4 num_generation = 4 num_offspring = 2 # plot the best model obtained optCNN = genetic_algorithm(num_population, num_generation, num_offspring, dataset) # plot the training and validation loss and accuracy num_epoch = 3 model = optCNN.build_model() model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) history = model.fit(dataset['x_train'], dataset['y_train'], batch_size=dataset['batch_size'], epochs=num_epoch, validation_data=(dataset['x_test'], dataset['y_test']), shuffle=True) optCNN.model = model # model optCNN.fitness = history.history['val_loss'][-1] # fitness print("\n\n-------------------------------------") print("The initial CNN has been evolved successfully in the individual", optCNN.name) print("-------------------------------------\n") daddy = load_network('parent_0') model = tf.keras.models.load_model('parent_0.h5') print("\n\n-------------------------------------") print("Summary of initial CNN") print(model.summary()) print("Fitness of initial CNN:", daddy.fitness) print("\n\n-------------------------------------") print("Summary of evolved individual") print(optCNN.model.summary()) print("Fitness of the evolved individual:", optCNN.fitness) print("-------------------------------------\n") plot_training(history)
def preprocessing(): df = load_dataset('ionosphere_csv.csv') # load data X = (df.drop(['class'], 1)) X = (X - X.min()) / (X.max() - X.min()) # standardize data X = X.replace(np.NaN, 0) y = df['class'].transform(lambda x: 1 if x is 'g' else 0) # one hot encode target return X, y
def preprocessing(): categorical_columns = ['Orientation', 'Glazing Area Distribution'] categories = {1: 'uniform', 2: 'north', 3: 'east', 4: 'south', 5: 'west'} target = 'Heating Load' df = load_dataset('EnergyEfficiency_data.csv') # load data df = one_hot_encode(df, categorical_columns, categories) # one hot encode categorical columns df = df.drop('Glazing Area Distribution_0', 1) df = (df - df.min()) / (df.max() - df.min()) # standardize data X = df.drop(target, 1) # split data into training and test y = df[target] return X, y
def test_metrics_names_after_fit(self): tnt_model = tnt.Model(mnist.lenet5_model_generator()) tnt_model.compile(optimizer=tf.keras.optimizers.Adam(), loss="sparse_categorical_crossentropy", metrics=["sparse_categorical_accuracy"]) train_dataset, _, _ = util.load_dataset(mnist.load_mnist_dataset, train_size=24, train_batch_size=24) tnt_model.fit(train_dataset) assert tnt_model.metrics_names == [ "loss", "sparse_categorical_accuracy" ]
def train_val_dataset_generator(): micro_batch_size = 64 nbatches = 1 batch_size = micro_batch_size * tnt.get_size() nsamples = nbatches * batch_size train_dataset, val_dataset, _ = util.load_dataset( mnist.load_mnist_dataset, train_size=nsamples, train_batch_size=batch_size, val_size=nsamples, val_batch_size=batch_size) return train_dataset, val_dataset
def test_reset_metrics(self): tnt_model = tnt.Model(mnist.lenet5_model_generator()) tnt_model.compile(optimizer=tf.keras.optimizers.Adam(), loss="sparse_categorical_crossentropy", metrics=["sparse_categorical_accuracy"]) train_dataset, _, _ = util.load_dataset(mnist.load_mnist_dataset, train_size=60, train_batch_size=60) tnt_model.fit(train_dataset) assert all(float(m.result()) != 0 for m in tnt_model.metrics) tnt_model.reset_metrics() assert all(float(m.result()) == 0 for m in tnt_model.metrics)
def load_reference_datasets(batch_size, num_batches, num_test_batches): util.set_tf_random_seed() train_size = num_batches * batch_size test_size = num_test_batches * batch_size train_dataset, val_dataset, test_dataset = util.load_dataset(mnist.load_mnist_dataset, train_size = train_size, train_batch_size = batch_size, val_size = test_size, val_batch_size = batch_size, test_size = test_size, test_batch_size = batch_size, shuffle = True) return {"train" : train_dataset, "val" : val_dataset, "test" : test_dataset }
def main(): (x_train, y_train) = utilities.load_dataset() if os.path.isfile('trained_model.json'): print("Model found, loading...") model_def = load_model('trained_model') else: input_shape = [384, 256, 3] output_shape = 3 model_def = build_inference_graph(input_shape, utilities.hyper_param, output_shape) model_def.summary() model_def.compile(optimizer=Adam(lr=0.0001), loss='mse', metrics=['accuracy']) model_def.fit(x_train, y_train, batch_size=16, epochs=10) save_model(model_def, 'trained_model')
def test_compare_accuracy_optimizers(self, tarantella_framework, mnist_model_runner, optimizer, micro_batch_size, nbatches): batch_size = micro_batch_size * tarantella_framework.get_size() nsamples = nbatches * batch_size (number_epochs, lr) = mnist.get_hyperparams(optimizer) (train_dataset, test_dataset) = util.load_dataset(mnist.load_mnist_dataset, train_size=nsamples, train_batch_size=batch_size, test_size=10000, test_batch_size=batch_size) mnist_model_runner.compile_model(optimizer(learning_rate=lr)) mnist_model_runner.reset_weights() mnist_model_runner.train_model(train_dataset, number_epochs) results = mnist_model_runner.evaluate_model(test_dataset) util.check_accuracy_greater(results[1], 0.91)
def main(): (x_train, y_train) = ut.load_dataset() print("Dataset loaded...") model_def = tm.load_model('trained_model') y_predicted = model_def.predict(x_train) np.savetxt("gt", y_train) np.savetxt("pred", y_predicted) path_input_image = '../../Dataset/GehlerShi_input/' path_output = '../../Dataset/Prediction/' file_names = [] for i in range(1, 569): file_names.append('00' + ut.zero_string(4-ut.nr_digits(i)) + str(i)) for index,file_name in enumerate(file_names): image_blob = Image.open(os.path.join(path_input_image, file_name+".png")) gt_lumminance = y_train[index] pred_lumminance = y_predicted[index] White_bal_groundtruth = to_pil(white_balance(image_blob,gt_lumminance)) White_bal_prediction = to_pil(white_balance(image_blob,pred_lumminance)) White_bal_groundtruth.save(os.path.join(path_output, file_name+"_gt.png")) White_bal_prediction.save(os.path.join(path_output, file_name+"_pred.png"))
def test_compare_sgd_momentum(self, tarantella_framework, mnist_model_runner, lr, nesterov, momentum, micro_batch_size, nbatches, number_epochs): batch_size = micro_batch_size * tarantella_framework.get_size() nsamples = nbatches * batch_size (train_dataset, test_dataset) = util.load_dataset(mnist.load_mnist_dataset, train_size=nsamples, train_batch_size=batch_size, test_size=10000, test_batch_size=batch_size) mnist_model_runner.compile_model( keras.optimizers.SGD(learning_rate=lr, momentum=momentum, nesterov=nesterov)) mnist_model_runner.reset_weights() mnist_model_runner.train_model(train_dataset, number_epochs) results = mnist_model_runner.evaluate_model(test_dataset) util.check_accuracy_greater(results[1], 0.91)
def test_compare_weights_across_ranks(self, tarantella_framework, model_runner, micro_batch_size, nbatches, number_epochs): comm_size = tarantella_framework.get_size() batch_size = micro_batch_size * comm_size nsamples = nbatches * batch_size (train_dataset, _) = util.load_dataset(mnist.load_mnist_dataset, train_size=nsamples, train_batch_size=batch_size, test_size=0, test_batch_size=batch_size) model_runner.reset_weights() model_runner.train_model(train_dataset, number_epochs) final_weights = model_runner.get_weights() # broadcast the weights from the master rank to all the participating ranks model_runner.model._broadcast_weights() reference_rank_weights = model_runner.get_weights() util.compare_weights(final_weights, reference_rank_weights, 1e-6)
def test_cifar_alexnet(self, tarantella_framework, cifar_model_runner, optimizer, micro_batch_size, nbatches): batch_size = micro_batch_size * tarantella_framework.get_size() nsamples = nbatches * batch_size (number_epochs, lr) = cifar.get_hyperparams(optimizer) (train_dataset, test_dataset) = util.load_dataset(cifar.load_cifar_dataset, train_size=nsamples, train_batch_size=batch_size, test_size=10000, test_batch_size=batch_size) if optimizer.__name__ == 'SGD': cifar_model_runner.compile_model( optimizer(learning_rate=lr, momentum=0.9)) else: cifar_model_runner.compile_model(optimizer(learning_rate=lr)) cifar_model_runner.reset_weights() cifar_model_runner.train_model(train_dataset, number_epochs) results = cifar_model_runner.evaluate_model(test_dataset) util.check_accuracy_greater(results[1], 0.5)
def main(): x_train_sel, x_train_hyp, y_train_hyp = ut.load_dataset() print(x_train_sel.shape) print(y_train_hyp.shape) #Training Hypothesis network if os.path.isfile('trained_model_hyp.json'): print("Model found, loading...") model_def_hyp = load_model('trained_model_hyp') else: input_shape = [44, 44, 2] output_shape = 2 #along two branches model_def_hyp = build_inference_graph(input_shape, ut.hyper_param_hyp, output_shape) model_def_hyp.summary() #Define loss for Hyp-Net print(x_train_hyp.shape) print(y_train_hyp.shape) model_def_hyp.compile(optimizer=Adam(lr=0.0001), loss=hyp_loss) model_def_hyp.fit(x_train_hyp, y_train_hyp, batch_size=1, epochs=10) save_model(model_def_hyp, 'trained_model_hyp') #We need inference output of hypnet to train selnet y_train_sel_a, y_train_sel_b = model_def_hyp.predict(x_train_hyp) y_train_sel = prepare_sel_data(y_train_sel_a, y_train_sel_b, y_train_hyp) #Training Selection network if os.path.isfile('trained_model_sel.json'): print("Model found, loading...") model_def_hyp = load_model('trained_model_sel') else: input_shape = [44, 44, 2] output_shape = 2 model_def_sel = build_inference_graph(input_shape, ut.hyper_param_sel, output_shape) model_def_sel.summary() model_def_sel.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy']) model_def_sel.fit(x_train, y_train_sel, batch_size=1, epochs=10) save_model(model_def_sel, 'trained_model_sel')
data[columns] = data[columns].apply(lambda x: (x - x.min()) / (x.max() - x.min())) # Discretizzazione for column in columns: data[column] = pd.cut(data[column], m, labels=False) data['classes'] = classi return data if __name__ == "__main__": #Load original dataset print("Uploading dataset..") dataset = util.load_dataset() print(dataset) #Extract features from the dataset print("Extracting useful features from the dataset..") dataset = features_extraction(dataset) print('Dataset with extracted features') print(dataset[:100]) #Sample dataset print("Sampling dataset...") dataset_sampled = sample_dataset(dataset) print(dataset_sampled) #Save dataframe as pickle print('Saving dataset_sampled to pickle object...')
weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv4') conv4 = utils.lrelu(conv4) conv5 = tcl.conv2d(conv4, 1, 4, 1, activation_fn=tf.identity, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv5') return conv5 # training parameters batch_size = cfg.batch_size lr = 0.0002 train_epoch = 2 rotate = False #load dataset train_set, train_setY, val_set, val_setY, test_set, test_setY = utils.load_dataset(name='MNIST') # variables : input x = tf.placeholder(tf.float32, shape=(cfg.batch_size, 28, 28, 1)) z = tf.placeholder(tf.float32, shape=(cfg.batch_size, 1, 1, 100)) isTrain = tf.placeholder(dtype=tf.bool) fixed_z_ = np.random.normal(0, 1, (cfg.batch_size, 1, 1, 100)) # networks : generator G_z = generator(z, isTrain) flatG_z = tf.reshape(G_z, [batch_size, -1]) # networks : discriminator D_real = discriminator(x, isTrain) D_fake = discriminator(G_z, isTrain, reuse=True)
# get chromosome info chr_lst = get_chr_info(vp_info['genome'], property='chr_name') chr_size = get_chr_info(vp_info['genome'], property='chr_size') n_chr = len(chr_lst) # load RE positions print('Loading 1st ResEnz: {:s}'.format(vp_info['res_enzyme'])) re1_pos_lst = get_re_info(re_name=vp_info['res_enzyme'], property='pos', genome=vp_info['genome']) if ('second_cutter' in vp_info) and (isinstance(vp_info['second_cutter'], str)): print('Loading 2nd ResEnz: {:s}'.format(vp_info['second_cutter'])) re2_pos_lst = get_re_info(re_name=vp_info['second_cutter'], property='pos', genome=vp_info['genome']) else: re2_pos_lst = [np.empty(0, dtype=int)] * n_chr # load data data_pd = load_dataset(vp_info, target_field='frg_np', verbose=True, data_path=inp_args.dataset_dir) data = data_pd[['chr', 'pos', '#read']].values.astype('int32') del data_pd vp_info['#rd_all'] = np.sum(data[:, 2]) # Downsampling, if requested if inp_args.downsample is not None: # TODO: Adding other types of downsampling, such as #captures assert inp_args.downsample[:4] == 'nmap' n_map = int(float(inp_args.downsample[4:])) print('Downsampling #mapped: From {:,d} mapped fragment to {:0,.0f} fragment.'.format(np.sum(data[:, 2]), n_map)) idx_set = np.repeat(np.arange(data.shape[0]), data[:, 2]) ds_set = np.random.choice(idx_set, size=n_map, replace=False) # Note: Here, we are only downsampling covered restriction fragments, empty restriction fragments are not selected del idx_set rf_uid, rf_frq = np.unique(ds_set, return_counts=True)
def train(epochs=100, batch_size=512, validation_split=0.1, drop_probability=0.5, extra_training=True, save=False): # These take some time to load, and need at least 650 MB of memory print("Loading dataset...") Y, features = load_dataset() print("Saving item list...") with open("anime_list.txt", 'w') as f: for anime in list(Y.columns): f.write(anime + "\n") print("Creating feature and target data...") # This mask drops (with 50% probability) values across our dataset mask = np.random.randint(1 // drop_probability + 1, size=Y.shape) X = Y * mask # Convert user's item-ratings to user's features X = X @ features # normalizea X = X.apply(lambda x: x / x.max(), axis=1) print("Defining model...") ## Model definition and training inp = Input(shape=(X.shape[1], )) x = Dense(64, activation='relu')(inp) x = Dense(128, activation='relu')(x) x = Dense(256, activation='relu')(x) x = Dense(128, activation='relu')(x) out = Dense(Y.shape[1], activation='linear')(x) model = Model(inp, out) print(model.summary()) model.compile(SGD(lr=0.01, momentum=0.9, decay=1e-2), loss='mse') print("Training model...") h = model.fit(X, Y, batch_size, epochs=epochs, validation_split=validation_split) plt.figure(figsize=(12, 8)) plt.plot(np.arange(0, 100), h.history['loss'], label="train_loss", color='blue') plt.plot(np.arange(0, 100), h.history['val_loss'], label='val_loss', color='orange') if extra_training: print("Extra model training...") model.compile(SGD(lr=0.001, momentum=0.9, decay=1e-3), loss='mse') h2 = model.fit(X, Y, batch_size // 2, epochs=epochs // 5, validation_split=validation_split) plt.plot(np.arange(100, 120), h2.history['loss'], color='blue') plt.plot(np.arange(100, 120), h2.history['val_loss'], color='orange') plt.legend() plt.show() if save: print("Saving model...") model.save('weights/' + save)
# Package imports import numpy as np import matplotlib.pyplot as plt import h5py import scipy from utilities import load_dataset ##--------------------------------------------------------- # PREPARE DATASET ##--------------------------------------------------------- # Loading the data (cat/non-cat) train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() # Reshape the training and test examples train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T # Standardize dataset train_set_x = train_set_x_flatten / 255. test_set_x = test_set_x_flatten / 255. ##--------------------------------------------------------- # SETUP LINEAR REGRESSION ALGORITHM ##--------------------------------------------------------- # Compute the sigmoid function of z # Arguments: z -- A scalar or numpy array of any size. # Return: s -- sigmoid(z)
from cnn_model import CNNModel from utilities import load_dataset parser = argparse.ArgumentParser() parser.add_argument("--use_evaluation_dataset", help="use evaluation dataset", action="store_true") args = parser.parse_args() if args.use_evaluation_dataset: test_dataset_path = "/tmp/deers_and_trucks_evaluation" else: test_dataset_path = "data/deers_and_trucks_test" # Load the dataset. images_test, cls_test = load_dataset(test_dataset_path) n_classes = 2 cls_names = ["deers", "trucks"] # Encode the labels as one hot. cls_test_one_hot_encoded = np.eye(n_classes, dtype=float)[cls_test] # Create a convolutional neural network. model = CNNModel(is_training=False) # Load the saved model. model.load("model/") # Create a dictionary for evaluating the network on the full validation data. testing_dict = model.make_dictionary(images_test, cls_test_one_hot_encoded)
def train_on_unified_dataset(): # Name of the classifier to use in learning process and its path of serialization classifierType = "svm" modelFilePath = "models/" + classifierType + ".pickle" # Defining categories of the classes to use in model categories = {'Positive': 'pos', 'Negative': 'neg'} counter = 0 # ------------------------------------------------------------------------------------------------------------------- # Loading the data-set. The data-set is loaded as a dictionary with each # element contains the content of the example file dataset_labels, dataset = load_dataset('datasets/Twitter') # ------------------------------------------------------------------------------------------------------------------ # Calls the csv_dict_list function, passing the named csv dataset_labels2, dataset2, counter = csv_dict_list("datasets/ATT.csv", counter) dataset_labels += dataset_labels2 dataset.update(dataset2) # ------------------------------------------------------------------------------------------------------------------ # Calls the csv_dict_list function, passing the named csv dataset_labels3, dataset3, counter = csv_dict_list("datasets/HTL.csv", counter) dataset_labels += dataset_labels3 dataset.update(dataset3) # ------------------------------------------------------------------------------------------------------------------ # Calls the csv_dict_list function, passing the named csv dataset_labels4, dataset4, counter = csv_dict_list("datasets/MOV.csv", counter) dataset_labels += dataset_labels4 dataset.update(dataset4) # ------------------------------------------------------------------------------------------------------------------ # Calls the csv_dict_list function, passing the named csv dataset_labels5, dataset5, counter = csv_dict_list("datasets/PROD.csv", counter) dataset_labels += dataset_labels5 dataset.update(dataset5) # ------------------------------------------------------------------------------------------------------------------ # Calls the csv_dict_list function, passing the named csv dataset_labels6, dataset6, counter = csv_dict_list("datasets/RES.csv", counter) dataset_labels += dataset_labels6 dataset.update(dataset6) # Preprocessing of data-set dataset = preprocessing(dataset) # feature extraction, tf-idf transformation count_vect, X_train_tfidf, tfidf_transformer = tf_idf_features(dataset) # an object from sentiment analysis module to use in training and testing sent_anal = sentiment_analysis() # train a classifier print('Training a classifier is in progress ...') classifier = sent_anal.sentiment_analysis_train(X_train_tfidf, dataset_labels, classifierType, modelFilePath) # Cross validation build_pipeline(dataset, dataset_labels, 'accuracy') print('Training done') print('-------------------------------------------------------------') # loading the classifier. Un-commit if a classifier already exists classifier = readSerializedClassifier(modelFilePath) # testing a new input example print('Testing a new data example ...') input_text = ['الحياة صعبة شباب'] filtered_input_text = list() filtered_input_text.append(preprocessing(''.join(input_text))) sent_anal.sentiment_analysis_test(filtered_input_text, classifier, count_vect, X_train_tfidf, tfidf_transformer, categories)
# This exercise has been inspired by Magnus Erik Hvass Pedersen's tutorial on CNN: https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/02_Convolutional_Neural_Network.ipynb import argparse import numpy as np import time from datetime import timedelta from batchmaker import Batchmaker from cnn_model import CNNModel from utilities import load_dataset, plot_images DATASET_PATH = "data/deers_and_trucks" # Load the dataset. images_train, cls_train = load_dataset(DATASET_PATH) n_classes = 2 cls_names = ["deers", "trucks"] # Plot a few samples if not disabled. parser = argparse.ArgumentParser() parser.add_argument("--disable_visualization", help="disable image visualization", action="store_true") args = parser.parse_args() if not (args.disable_visualization): plot_images(images_train[0:9], np.asarray(cls_names)[cls_train[0:9]]) # Encode the labels as one hot. cls_train_one_hot_encoded = np.eye(n_classes, dtype=float)[cls_train]
# The main method of the script. The script train and test the classifier on a new input data # ----------------------------------------------------------------------------------------------------------------------- if __name__ == "__main__": # Name of the classifier to use in learning process and its path of serialization classifierType = "svm" modelFilePath = "models/" + classifierType + ".pickle" # Defining categories of the classes to use in model categories = {'Positive': 'pos', 'Negative': 'neg'} counter = 0 # ------------------------------------------------------------------------------------------------------------------- # Loading the data-set. The data-set is loaded as a dictionary with each # element contains the content of the example file dataset_labels, dataset = load_dataset('datasets/Twitter') # ------------------------------------------------------------------------------------------------------------------ # Calls the csv_dict_list function, passing the named csv dataset_labels2, dataset2, counter = csv_dict_list("datasets/ATT.csv", counter) dataset_labels += dataset_labels2 dataset.update(dataset2) # Preprocessing of data-set dataset = preprocessing(dataset) # feature extraction, tf-idf transformation count_vect, X_train_tfidf, tfidf_transformer = tf_idf_features(dataset) # an object from sentiment analysis module to use in training and testing