def train_face_classifier(ntrain, ntest, orientations, wrap180, model_save_file): print("Loading training data...") descriptors_train, classes_train = get_training_data(ntrain, orientations, wrap180=wrap180) print("Finished loading training data.") print("Loading test data...") descriptors_test, classes_test = get_testing_data(ntest, orientations, wrap180=wrap180) print("Finished loading test data.") print("Start training...") start_time = time.time() params, _ = logistic_fit(descriptors_train, classes_train) print("Training took {} seconds.".format(time.time() - start_time)) np.save(model_save_file, params) predicted_train = logistic_prob(descriptors_train, params) plot_errors(predicted_train, classes_train, is_training=True) train_success_rate = classification_rate(predicted_train, classes_train) print("Training classification rate: {}".format(train_success_rate)) predicted_test = logistic_prob(descriptors_test, params) plot_errors(predicted_test, classes_test, is_training=False) test_success_rate = classification_rate(predicted_test, classes_test) print("Testing classification rate: {}".format(test_success_rate))
def locations(path=None): _, y = get_training_data() occurrences = Counter(y) location = list() for key, value in occurrences.items(): print("{}: {}".format(key, value)) location.append(key) return location
def train_random_forest(): model_file = "../data/rf.pkl" X, y = get_training_data() lp = make_pipeline(DictVectorizer(sparse=False), RandomForestClassifier(n_estimators=100, class_weight="balanced")) lp.fit(X, y) with open(model_file, "wb") as f: pickle.dump(lp, f) return lp
def train(): PYRNG = Random(0) ttv_proportions = dict(test=0.001, train=.96, validation=0.039) # Whiten, add flips, mask regions outside circle, train/test/val split DATA = (get_training_data().to_gpu().normalize().enrich().mask_circle(). test_train_validation(PYRNG, **ttv_proportions)) VALDATA = DATA.validation.get_examples(50, PYRNG) VALIMGS = Variable(T.from_numpy(VALDATA.images).type(TP.FloatTensor)) VALCLASSES = Variable(TP.LongTensor(VALDATA.is_iceberg)) BETA = 1e1 BETA_FACTOR = .9999 BATCH_SIZE = 32 model = BentesModel() if T.cuda.is_available(): model = model.cuda() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9999) for i in range(1_000_000_000): scheduler.step() optimizer.zero_grad() batch = DATA.train.get_examples(BATCH_SIZE, PYRNG).rotate(PYRNG) imgvar = Variable(T.from_numpy(batch.images).type(TP.FloatTensor)) result = model(imgvar) classvar = Variable(TP.LongTensor(batch.is_iceberg)) accuracy = F.cross_entropy(result.activations, classvar) kl = T.mean(result.kl) loss = accuracy + BETA * kl loss.backward() valresult = model(VALIMGS) valaccuracy = F.cross_entropy(valresult.activations, VALCLASSES) optimizer.step() gf = lambda t: f'{t.data[0]:12.3f}' # noqa: E731 print(f'Step: {i:6d} CE: {gf(accuracy)} KL: {gf(kl)} loss: {gf(loss)} ' f'val: {gf(valaccuracy)}') scores = (F.log_softmax( result.activations, dim=1).data.cpu().numpy()[list(range(BATCH_SIZE)), batch.is_iceberg]) print( np.array( list(zip(*(s.astype(float) for s in np.histogram(scores))))).T) probs = F.softmax(result.activations).data.cpu().numpy().tolist() pprint(list(zip(batch.is_iceberg, probs))) BETA *= BETA_FACTOR print('first layer parameters/gradients for first kernel') print('convolution') print(model.layers[1].layer.weight[0]) print(model.layers[1].layer.weight.grad[0]) print('noise') print(model.layers[1].noise.weight[0]) print(model.layers[1].noise.weight.grad[0]) print('prior mean') print(model.layers[1].prior.mean[0]) print(model.layers[1].prior.mean.grad[0]) print('prior alpha') print(model.layers[1].prior.alpha[0]) print(model.layers[1].prior.alpha.grad[0])
def train_model_neural_network(): model_file = "../data/nn.pkl" X, y = get_training_data() if len(X) == 0: raise ValueError("No wifi access points have been found during training") lp = make_pipeline(DictVectorizer(sparse=False), DecisionTreeClassifier(max_depth=None, min_samples_split=2, random_state=0)) lp.fit(X, y) with open(model_file, "wb") as f: pickle.dump(lp, f) return lp
def build_training_data_loader(self) -> keras.InputData: hparams = self.context.get_hparams() # Return a tf.keras.Sequence. return get_training_data( data_directory=self.download_directory, batch_size=self.context.get_per_slot_batch_size(), width_shift_range=hparams.get("width_shift_range", 0.0), height_shift_range=hparams.get("height_shift_range", 0.0), horizontal_flip=hparams.get("horizontal_flip", False), )
def train_xgb(): model_file = "../data/xgb.pkl" X, y = get_training_data() if len(X) == 0: raise ValueError("No wifi access points have been found during training") #lp = make_pipeline(DictVectorizer(sparse=False), GradientBoostingClassifier(n_estimators=100)) lp = make_pipeline(DictVectorizer(sparse=False), MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1)) lp.fit(X, y) with open(model_file, "wb") as f: pickle.dump(lp, f) return lp
def main(): parser = argparse.ArgumentParser(description=__doc__, epilog=__epilog__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-w', '--work-directory', metavar='DIR', default='tmp', help='Path to the working directory to use for storing files (defaults to `%(default)s\')') parser.add_argument('module', default='project', nargs='?', metavar='MODULE', help='Name of the module containing the code to execute') args = parser.parse_args() if not os.path.exists(args.work_directory): print("Creating directory `%s'..." % args.work_directory) os.makedirs(args.work_directory) else: print("Using existing directory `%s'..." % args.work_directory) print("Loading your project from `%s'..." % args.module) exec 'from %s import create_background_model, enroll' % args.module from data import get_training_data, get_data background_filename = os.path.join(args.work_directory, 'background.model') print("Creating background model -> `%s'..." % background_filename) if os.path.exists(background_filename): os.unlink(background_filename) create_background_model(get_training_data(), background_filename) print("Training models...") for group in ('devel', 'test'): print("... for `%s' group ..." % group) data = get_data(group, 'enroll') for identity, images in data.iteritems(): filename = os.path.join(args.work_directory, 'client-%d.model' % identity) if os.path.exists(filename): os.unlink(filename) print("Enrolling client %d -> `%s'..." % (identity, filename)) enroll(images, background_filename, filename) print("All done. Models saved at directory `%s'." % args.work_directory) print("You can proceed with the evaluation using `compute_performance.py'.")
label = label.ravel() return ((pred > 0.5) == label).mean() # setting mx.random.seed(random.randint(1, 10000)) logging.basicConfig(level=logging.DEBUG) # create output dir try: os.makedirs(opt.data_path) except OSError: pass # get training data train_data = get_training_data(opt.batch_size) # get model g_net = get_generator() d_net = get_descriptor(CTX) # define loss function loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() # initialization g_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX) d_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX) g_trainer = gluon.Trainer( g_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT}) d_trainer = gluon.Trainer( d_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
from http.server import HTTPServer, BaseHTTPRequestHandler from io import BytesIO import json # get relevant training data import data training_data = data.get_training_data() # instantiate classifier import classifier nltk_classifier = classifier.NLTKClassifier(data.get_training_data()) # define a simple http server that will use the chatbot to respond to a user class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): def do_POST(self): content_length = int(self.headers['Content-Length']) body = json.loads(self.rfile.read(content_length)) classification = nltk_classifier.classify(body['message']) self.send_response(200) self.end_headers() response = BytesIO() response.write(classification.encode('utf-8')) self.wfile.write(response.getvalue()) # run the server on port 8000 httpd = HTTPServer(('localhost', 8000), SimpleHTTPRequestHandler) httpd.serve_forever()
import time from data import get_training_data import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D from tensorflow.keras.callbacks import TensorBoard x_train, y_train = get_training_data() # Normalise the data x_train = x_train / 255.0 # Parameter options dense_layers = [0, 1, 2] layer_sizes = [32, 64, 128] conv_layers = [1, 2, 3] for num_dense_layers in dense_layers: for layer_size in layer_sizes: for num_conv_layers in conv_layers: NAME = f'{num_conv_layers}-conv-{layer_size}-nodes-{num_dense_layers}-dense-{int(time.time())}' tensorboard = TensorBoard(log_dir=f'logs/{NAME}') # Create model model = Sequential() # (3, 3) is the convolution kernel size (window size) model.add(Conv2D(layer_size, (3, 3), input_shape = x_train.shape[1:])) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) for _ in range(num_conv_layers - 1):
def main(): # Get args args = parser.parse_args() # Init wandb run = wandb.init() run.config.learning_rate = args.learning_rate or 1e-4 run.config.num_epochs = args.epochs or 100 run.config.steps_per_epoch = args.steps or 300 run.config.batch_size = args.batch_size or 8 run.config.image_size = (288, 512) run.config.num_predictions = args.num_predictions or 24 run.config.beta = args.beta or 50 wandb.save('*.py') training_data_generator = get_training_data( run.config.batch_size, 'data/train', 'images', 'labels', augmentation_params, target_size=run.config.image_size) validation_data_generator = get_training_data( run.config.batch_size, 'data/valid', 'images', 'labels', {}, target_size=run.config.image_size) validation_data_generator_2 = get_training_data( run.config.batch_size, 'data/valid', 'images', 'labels', {}, target_size=run.config.image_size) os.makedirs('model', exist_ok=True) model = unet(image_size=run.config.image_size) metrics = ['accuracy', km.precision(), km.recall()] model.compile( optimizer=Adam(lr=run.config.learning_rate), loss=weighted_cross_entropy(run.config.beta), # loss='binary_crossentropy', metrics=metrics) # Save best model model_path = 'model/unet_witness.hdf5' model_checkpoint = ModelCheckpoint(model_path, monitor='loss', verbose=1, save_best_only=True) # Upload examples to W&B wandb_callback = WandbCallback(data_type='image', predictions=run.config.num_predictions, generator=validation_data_generator_2, save_model=True, monitor='loss', mode='min', labels=['void', 'puzzle']) # Save to tensorboard tensorboard_callback = TensorBoard(log_dir=wandb.run.dir, histogram_freq=0, write_graph=True, write_images=True) callbacks = [ model_checkpoint, wandb_callback, # tensorboard_callback, ] model.fit_generator(training_data_generator, validation_data=validation_data_generator, validation_steps=run.config.num_predictions, steps_per_epoch=run.config.steps_per_epoch, epochs=run.config.num_epochs, callbacks=callbacks) # Upload best model to W&B wandb.save(model_path) all_imgs = glob.glob('data/all/images/*.jpg') all_labels = [ f.replace('/images/', '/labels/').replace('.jpg', '.png') for f in all_imgs ] run.summary['results'] = make_segmentation_dataframe( model, all_imgs, all_labels, image_size=run.config.image_size, loss_function=weighted_cross_entropy(run.config.beta), )
def main(_): train_x_batches, train_y_batches = get_training_data(FLAGS.batch_size * FLAGS.time_steps) run_training(train_x_batches, train_y_batches)
elif option == "-dropout_rates": dropout_rates = [float(r) for r in sys.argv[1].split(",")] del sys.argv[1] else: print sys.argv[0], ": invalid option", option sys.exit(1) np.seterr(over="ignore", divide="ignore") print "Neural Networks" print print "Reading data..." # reading the data, applying configured pre-processing, and adding 1.0 to each vector as a bias input X_train, T_train = data.get_training_data(ntrain, normalize=normalize, deskew=deskew, add_ones=True) X_test, T_test = data.get_testing_data(ntest, normalize=normalize, deskew=deskew, add_ones=True) print "{0} training data read".format(len(X_train)) print "{0} testing data read".format(len(X_test)) print input_dim = X_train.shape[1] output_dim = T_train.max() + 1 weights, errors, params = [], [], [] print "{0:40}\tV. Loss\t\tV. Error".format( "(Func, Hidden, Batch, Learn, Drop)")
import pickle import tensorflow as tf import tflearn import data train_x, train_y, words, classes = data.get_training_data() # Reset underlying graph data tf.reset_default_graph() def build_model(): # Init model net = tflearn.input_data(shape=[None, len(train_x[0])]) net = tflearn.fully_connected(net, 8) net = tflearn.fully_connected(net, 8) net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax') net = tflearn.regression(net) model = tflearn.DNN(net, tensorboard_dir='tflearn_logs') return model if __name__ == '__main__': model = build_model() model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True) model.save('model.tflearn') # save all data structures pickle.dump(
# input layer size must match the number of features # output layer size must match the number of classes network_architecture = [4, 5, 3] activation_function = activation.tanh activation_function_back = activation.tanh_back loss_metric = metrics.mse learning_rate = 0.000001 training_epochs = 5000 metrics_period = 10000 # calculate metrics every `metrics_period` iterations test_examples = 10 # number of test examples for the trained network if __name__ == '__main__': features, labels = data.get_training_data() start = time.time() train.train(features, labels, activation_function, activation_function_back, network_architecture, loss_metric, learning_rate, training_epochs, metrics_period) finish = time.time() print('\nTraining finished in {0:.1f} seconds\n'.format(finish - start)) # Uncomment to do the random test on the trained NN # test_features = [] # test_labels = [] # for i in range(test_examples):
def main(args): # Since we are doing batch normalization, we have to keep track of # whether we are training or testing. Also determines dropout # probability. training = tf.placeholder(tf.bool, name='training') example_sequence = tf.placeholder(tf.float32, [None, 700, 22]) example_profile = tf.placeholder(tf.float32, [None, 700, 22]) labels = tf.placeholder(tf.float32, [None, 700, 9]) with tf.device('/gpu:0'): model = MODEL_MAPPING[args.model_num](example_sequence, example_profile, training) losses = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=model.logits) loss = tf.reduce_mean(losses) tf.summary.scalar('loss', loss) accuracy = validate.accuracy(model.logits, labels) # We need to execute update_ops before training for batch # normalization. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): training_step = tf.train.AdamOptimizer(.001).minimize(loss) training_data_sequence, training_data_profile, training_labels = ( data.get_training_data(args.train_files[0], args.num_epochs, args.batch_size)) validation_step, validation_initializer = data.get_validation_data( args.eval_files[0], args.batch_size) (validation_data_sequence, validation_data_profile, validation_labels) = validation_step summary = tf.summary.merge_all() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess: print "BEGINNING TRANING..." def validation_pass(): print "VALIDATING..." sess.run([validation_initializer]) losses = [] accuracies = [] count_val = 0 while True: print count_val count_val += 1 try: val_ex_seq, val_ex_prof, val_label = sess.run([ validation_data_sequence, validation_data_profile, validation_labels ]) _loss, _accuracy = sess.run( [loss, accuracy], feed_dict={ example_sequence: val_ex_seq, example_profile: val_ex_prof, labels: val_label, training: False }) losses.append(_loss) accuracies.append(_accuracy) except tf.errors.OutOfRangeError: break total_loss = sum(losses) / float(len(losses)) total_accuracy = sum(accuracies) / float(len(accuracies)) print " -- TOTAL LOSS: " + str(total_loss) print " -- TOTAL ACCURACY: " + str(total_accuracy) sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter(args.job_dir, sess.graph) step = 0 while True: try: step += 1 print step itr_ex_sequence, itr_ex_profile, itr_label = sess.run([ training_data_sequence, training_data_profile, training_labels ]) _, s, l = sess.run( [training_step, summary, loss], feed_dict={ example_sequence: itr_ex_sequence, example_profile: itr_ex_profile, labels: itr_label, training: True }) # Log every step for now summary_writer.add_summary(s, step) print "LOSS: " + str(l) # Validation if step % args.validation_step == 0: validation_pass() except tf.errors.OutOfRangeError: validation_pass() print("DONE TRAINING") break
ntest = int(sys.argv[1]); del sys.argv[1] elif option == "-deskew": deskew = int(sys.argv[1]); del sys.argv[1] elif option == "-normalize": normalize = int(sys.argv[1]); del sys.argv[1] elif option == "-lsquared": lsquared = float(sys.argv[1]); del sys.argv[1] else: print sys.argv[0], ": invalid option", option sys.exit(1) print "Gaussian Processes" print print "Reading data..." # reading the data and applying configured pre-processing steps X_train, T_train = data.get_training_data(ntrain, normalize=normalize, deskew=deskew) X_test, T_test = data.get_testing_data(ntest, normalize=normalize, deskew=deskew) print "{0} training data read".format(len(X_train)) print "{0} testing data read".format(len(X_test)) print # running a Gaussian process on training and testing sets, with "lsquared" T_predicted = gaussian_process(X_train, T_train, X_test, lsquared=lsquared) # evaluating the model performance on the testing set print "Testing Set Error: {0:.3f}".format( get_error_score(T_predicted, T_test) ) print
import numpy as np from data import input_neurons, hidden_neurons, output_neurons from data import get_test_data, get_training_data from network import Network network = Network([input_neurons, hidden_neurons, output_neurons]) # uncomment this stretch to start the network with a great test result # saved_weights = np.load("weights.npy",mmap_mode=None, allow_pickle=True) # network.weights = saved_weights training_inputs = get_training_data() test_inputs = get_test_data() network.start_training(training_inputs, 1000, learning_rate=0.7, test_inputs=test_inputs) print("\nRESULTADOS:") for x, single_test in enumerate(test_inputs): if x == 0: print("\nSEM RUIDOS:") if x == 34: print("\nRUIDO MÍNIMO:") if x == 54: print("\nRUIDO MÉDIO:") if x == 74: print("\nRUIDO AVANÇADO:") if x == 94: print("\nNÃO FAZEM PARTE:") network.identify(single_test, log=True)