class StereotypeExecutor(object): def __init__(self): self.markov_chain = SimpleMarkovChain() self.markov_current_state = 'MakeResponse' # there should be an initial state @ can be random self.inter_arrivals_manager = InterArrivalsManager() self.data_generator = DataGenerator() # self.data_generator.initialize_file_system() # self.sender def initialize_from_stereotype_recipe(self, stereotype_recipe): '''Initialize the Markov Chain states''' self.markov_chain.initialize_from_recipe(stereotype_recipe) self.markov_chain.calculate_chain_relative_probabilities() '''Initialize the inter-arrival times''' self.inter_arrivals_manager.initialize_from_recipe(stereotype_recipe) '''Initialize the file system''' self.data_generator.initialize_file_system() def get_waiting_time(self): return self.inter_arrivals_manager.get_waiting_time(self.markov_chain.previous_state, self.markov_chain.current_state) def next_operation(self): '''Get the next operation to be done''' self.markov_chain.next_step_in_random_navigation() '''Do an execution step as a client''' def execute(self): raise NotImplemented
def main(): init_means = [-1, 1] mean_dimensions = 10 points_per_class = 250 knc = KNeighborsClassifier(n_neighbors=5) means = [[init_mean for mean_dim in range(mean_dimensions)]for init_mean in init_means] variances = [np.eye(len(means[0])), np.eye(len(means[0]))] data = dg.generate_prob_mixture(class_means=means, class_variances=variances, num_components=5, num_desired_points=points_per_class, dim_uniform=2) class_0 = np.hstack((data[0], np.zeros((len(data[0]), 1)))) class_1 = np.hstack((data[1], np.ones((len(data[0]), 1)))) train_data_class_0, test_data_class_0 = split_train_test(class_0) train_data_class_1, test_data_class_1 = split_train_test(class_1) train_data = np.vstack((train_data_class_0, train_data_class_1)) test_data = np.vstack((test_data_class_0, test_data_class_1)) corr_ranked_features, _ = CorrelationCoefficient.rank_features(train_data[:, :-1], train_data[:, -1]) relief_ranked_features, _ = Relief.rank_features(train_data[:, :-1], train_data[:, -1]) knc.fit(train_data[:, :-1], train_data[:, -1]) pred = pred_test_default = knc.predict(test_data[:, :-1]) print len(np.where(pred != test_data[:, -1])[0]) corr_train_removed_features = remove_features(train_data[:, :-1], corr_ranked_features) corr_test_removed_features = remove_features(test_data[:, :-1], corr_ranked_features) knc.fit(corr_train_removed_features, train_data[:, -1]) pred = knc.predict(corr_test_removed_features) print len(np.where(pred != test_data[:, -1])[0]) corr_train_removed_features = remove_features(train_data[:, :-1], relief_ranked_features) corr_test_removed_features = remove_features(test_data[:, :-1], relief_ranked_features) knc.fit(corr_train_removed_features, train_data[:, -1]) pred = knc.predict(corr_test_removed_features) print len(np.where(pred != test_data[:, -1])[0]) return
def main(): class_means = np.array([[0, 0], [2.5, 2.5]]) class_variances = [np.eye(2), np.eye(2)] num_components = 10 num_desired_points_per_class = 200 class_0, class_1 = DataGenerator.generate_gaussian_mixture( class_means, class_variances, num_components, num_desired_points_per_class ) combined_dataset = np.vstack((class_0, class_1)) combined_labels = np.hstack( (np.zeros(num_desired_points_per_class, dtype=np.int32), np.ones(num_desired_points_per_class, dtype=np.int32)) ) ln = LinearModel() ln.fit(train_data=combined_dataset, labels=combined_labels) r_linear_model = ln.predict(test_data=combined_dataset, extra_params_dict={"threshold": 0.5}) print_accuracy(r_linear_model, combined_labels) knn = KNN(num_neighbors=5) knn.fit(train_data=combined_dataset, labels=combined_labels) r_knn = knn.predict(test_data=combined_dataset) print_accuracy(r_knn, combined_labels) plot_results( combined_dataset, np.vstack((np.vstack((combined_labels[np.newaxis, :], r_linear_model[np.newaxis, :])), r_knn[np.newaxis, :])), titles=["gt", "lin", "knn"], ) return
def main(): means = [[-1, -1], [1.0, 1.0]] variances = [np.random.rand] knn_models = [3, 5, 10] data_sizes = [10, 25, 50, 75, 100, 125, 150, 175, 200] points_per_class = 500 data = dg.generate_gaussian_mixture(class_means=means, class_variances=np.eye(2), num_components=5, num_desired_points_per_class=points_per_class) class_0 = np.hstack((data[0], np.zeros((len(data[0]), 1)))) class_1 = np.hstack((data[1], np.ones((len(data[0]), 1)))) results_train = np.empty((len(knn_models), len(data_sizes))) results_test = np.empty((len(knn_models), len(data_sizes))) train_data_class_0, test_data_class_0 = split_train_test(class_0) train_data_class_1, test_data_class_1 = split_train_test(class_1) print 'train size, test size', len(train_data_class_1), len(test_data_class_1) train_data = np.vstack((train_data_class_0, train_data_class_1)) test_data = np.vstack((test_data_class_0, test_data_class_1)) for i, knn_model in enumerate(knn_models): kncs = KNeighborsClassifier(n_neighbors=knn_model) for j, data_size in enumerate(data_sizes): curr_train_class_0, curr_train_class_1 = train_data_class_0[:data_size], train_data_class_1[:data_size] curr_train_data = np.vstack((curr_train_class_0, curr_train_class_1)) kncs.fit(curr_train_data[:, :2], curr_train_data[:, -1]) predictions_train = kncs.predict(train_data[:, :2]) predictions_test = kncs.predict(test_data[:, :2]) results_train[i][j] = len(np.where(predictions_train != train_data[:, -1])[0]) / float(len(train_data)) results_test[i][j] = len(np.where(predictions_test != test_data[:, -1])[0]) / float(len(test_data)) plt.plot(data_sizes, results_test[0, :], 'r') plt.plot(data_sizes, results_test[1, :], 'b') plt.plot(data_sizes, results_test[2, :], 'g') plt.plot(data_sizes, results_train[0, :], 'r--') plt.plot(data_sizes, results_train[1, :], 'b--') plt.plot(data_sizes, results_train[2, :], 'g--') plt.show()
def __init__(self): self.mongo_client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT) self.db = self.mongo_client[MONGO_DB] self.commerces_col = self.db[MONGO_COMMERCE_COLLECTION] self.clients_col = self.db[MONGO_CLIENT_COLLECTION] self.transactions_col = self.db[MONGO_TRANSACTION_COLLECTION] self.gen = DataGenerator()
class MongoDataLoader(): def __init__(self): self.mongo_client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT) self.db = self.mongo_client[MONGO_DB] self.commerces_col = self.db[MONGO_COMMERCE_COLLECTION] self.clients_col = self.db[MONGO_CLIENT_COLLECTION] self.transactions_col = self.db[MONGO_TRANSACTION_COLLECTION] self.gen = DataGenerator() def go(self, clients, commerces, transactions): self.__generate_and_save_clients(clients) self.__generate_and_save_commerces(commerces) self.__generate_and_save_transactions(transactions) def __generate_and_save_transactions(self, transactions): print "Generating Transactions" self.gen.generate_random_transaction_list(transactions) print "Saving Transactions" for transaction in self.gen.transaction_list: self.transactions_col.insert(transaction) def __generate_and_save_clients(self, quantity): print "Generating Clients" self.gen.generate_client_list(quantity) print "Saving Clients" for client in self.gen.client_list: self.clients_col.insert(client) def __generate_and_save_commerces(self, commerces): print "Generating Commerces" self.gen.generate_random_commerce_list(commerces) print "Saving Commerces" for commerce in self.gen.commerce_list: self.commerces_col.insert(commerce)
def test_should_raise_error_when_channel_axis_is_not_present(self): images = np.random.rand(20, 64, 64) labels = np.random.rand(20) with self.assertRaises(ValueError) as e: DataGenerator().fit(images, labels) self.assertEqual("Channel Axis should have vale", str(e.exception))
def test_should_resize_images_to_given_target_dimension(self): images = np.random.rand(20, 64, 64, 3) labels = np.random.rand(20) generator = DataGenerator().fit(images, labels) batch, _ = generator.get_next_batch(10, target_dimension=(28, 28)) self.assertEqual(batch.shape, (10, 28, 28, 3))
sys.exit("Exit.") nevents=len(list(labels_train.keys())) partition = {'train' : list(labels_train.keys()), 'validation' : list(labels_test.keys())} # do_cache: copy from hdfs to local or not... cache_mode, cache_dir = do_cache(cache_path) TRAIN_PARAMS['cache_mode'] = cache_mode if cache_mode != 'nocache': TRAIN_PARAMS['images_uri'] = cache_dir ''' ************** GENERATORS ************** ''' training_generator = DataGenerator(**TRAIN_PARAMS).generate(labels_train, partition['train'], True) validation_generator = DataGenerator(**TRAIN_PARAMS).generate(labels_test, partition['validation'], True) # Horovod: initialize Horovod. hvd.init() print('[hvd] local_ip:%s, local_rank:%d' % (local_ip, hvd.local_rank())) # Horovod: pin GPU to be used to process local rank (one GPU per process) gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if gpus: tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') # Build model...
def main(unused_args): hparams = { 'datasource': FLAGS.datasource, 'num_classes_train': FLAGS.num_classes, 'num_classes_val': FLAGS.num_classes, 'num_classes_test': FLAGS.num_classes_test, 'num_shot_train': FLAGS.num_shot_train, 'num_shot_test': FLAGS.num_shot_test, 'steps': FLAGS.steps, 'meta_batch_size': FLAGS.meta_batch_size, 'meta_lr': FLAGS.meta_lr, 'notes': FLAGS.notes, } hparams = check_default_config(hparams) if FLAGS.train and not FLAGS.load: hparams['mode'] = 'train' save_string = [ hparams['datasource'], str(hparams['num_classes_train']) + 'way', str(hparams['num_shot_train']) + 'shot', strftime('%y%m%d_%H%M'), ] save_folder = '_'.join(map(str, save_string)) + '/' os.makedirs(FLAGS.savepath + save_folder) hparams['savepath'] = FLAGS.savepath + save_folder save_config(hparams, FLAGS.savepath + save_folder) # elif FLAGS.test: # hparams = load_config(FLAGS.savepath + 'config.json', test=True, notes=FLAGS.notes) if FLAGS.comet: experiment = Experiment(api_key=os.environ['COMETML_API_KEY'], project_name='meta') experiment.log_multiple_params(hparams) if FLAGS.train and FLAGS.datasource in [ 'omniglot', 'miniimagenet', 'cifar' ]: num_shot_train = FLAGS.num_shot_train or 1 num_shot_test = FLAGS.num_shot_test or 1 data_generator = DataGenerator( datasource=FLAGS.datasource, num_classes=FLAGS.num_classes, num_samples_per_class=num_shot_train + num_shot_test, batch_size=FLAGS.meta_batch_size, test_set=False, ) # Tensorflow queue for metatraining dataset # metatrain_image_tensor - (batch_size, num_classes * num_samples_per_class, 28 * 28) # metatrain_label_tensor - (batch_size, num_classes * num_samples_per_class, num_classes) metatrain_image_tensor, metatrain_label_tensor = data_generator.make_data_tensor( train=True, load=True, savepath='test.pkl') train_inputs = tf.slice(metatrain_image_tensor, [0, 0, 0], [-1, FLAGS.num_classes * num_shot_train, -1]) test_inputs = tf.slice(metatrain_image_tensor, [0, FLAGS.num_classes * num_shot_train, 0], [-1, -1, -1]) train_labels = tf.slice(metatrain_label_tensor, [0, 0, 0], [-1, FLAGS.num_classes * num_shot_train, -1]) test_labels = tf.slice(metatrain_label_tensor, [0, FLAGS.num_classes * num_shot_train, 0], [-1, -1, -1]) metatrain_input_tensors = { 'train_inputs': train_inputs, # batch_size, num_classes * (num_samples_per_class - update_batch_size), 28 * 28 'train_labels': train_labels, # batch_size, num_classes * (num_samples_per_class - update_batch_size), num_classes 'test_inputs': test_inputs, # batch_size, num_classes * update_batch_size, 28 * 28 'test_labels': test_labels, # batch_size, num_classes * update_batch_size, num_classes } data_generator = DataGenerator( datasource=FLAGS.datasource, num_classes=hparams['num_classes_val'], num_samples_per_class=num_shot_train + num_shot_test, batch_size=16, test_set=False, ) # Tensorflow queue for metavalidation dataset metaval_image_tensor, metaval_label_tensor = data_generator.make_data_tensor( train=False) train_inputs = tf.slice( metaval_image_tensor, [0, 0, 0], [-1, hparams['num_classes_val'] * num_shot_train, -1]) test_inputs = tf.slice( metaval_image_tensor, [0, hparams['num_classes_val'] * num_shot_train, 0], [-1, -1, -1]) train_labels = tf.slice( metaval_label_tensor, [0, 0, 0], [-1, hparams['num_classes_val'] * num_shot_train, -1]) test_labels = tf.slice( metaval_label_tensor, [0, hparams['num_classes_val'] * num_shot_train, 0], [-1, -1, -1]) metaval_input_tensors = { 'train_inputs': train_inputs, # batch_size, num_classes * (num_samples_per_class - update_batch_size), 28 * 28 'train_labels': train_labels, # batch_size, num_classes * (num_samples_per_class - update_batch_size), num_classes 'test_inputs': test_inputs, # batch_size, num_classes * update_batch_size, 28 * 28 'test_labels': test_labels, # batch_size, num_classes * update_batch_size, num_classes } # Graph for metatraining # using scope reuse=tf.AUTO_REUSE, not sure if this is the best way to do it if FLAGS.datasource == 'miniimagenet': # model_metatrain = CNN_MiniImagenet('model', n_way=FLAGS.num_classes, layers=4, input_tensors=metatrain_input_tensors) model_metatrain = CNN_miniimagenet( 'model', num_classes=FLAGS.num_classes, input_tensors=metatrain_input_tensors) elif FLAGS.datasource == 'cifar': model_metatrain = CNN_cifar('model', num_classes=FLAGS.num_classes, input_tensors=metatrain_input_tensors) else: model_metatrain = CNN_omniglot( 'model', num_classes=FLAGS.num_classes, input_tensors=metatrain_input_tensors) # model_metatrain = CNN2('model', n_way=FLAGS.num_classes, layers=4, input_tensors=metatrain_input_tensors) # Graph for metavalidation if FLAGS.datasource == 'miniimagenet': # model_metaval = CNN_MiniImagenet('model', n_way=FLAGS.num_classes, layers=4, input_tensors=metaval_input_tensors) model_metaval = CNN_miniimagenet( 'model', num_classes=hparams['num_classes_val'], input_tensors=metaval_input_tensors) elif FLAGS.datasource == 'cifar': model_metaval = CNN_cifar('model', num_classes=hparams['num_classes_val'], input_tensors=metaval_input_tensors) else: model_metaval = CNN_omniglot('model', num_classes=FLAGS.num_classes, input_tensors=metaval_input_tensors) # model_metaval = CNN2('model', n_way=FLAGS.num_classes, layers=4, input_tensors=metaval_input_tensors) sess = tf.InteractiveSession() tf.global_variables_initializer().run() if FLAGS.load: model_metatrain.load(sess, FLAGS.savepath, verbose=True) model_metaval.load(sess, FLAGS.savepath, verbose=True) tf.train.start_queue_runners() saved_metaval_loss = np.inf steps = FLAGS.steps or 40000 try: for step in np.arange(steps): # metatrain_loss, metatrain_accuracy, _, _ = sess.run([model_metatrain.loss, model_metatrain.test_accuracy, model_metatrain.optimize, model_metatrain.ae_optimize], {model_metatrain.is_training: True}) metatrain_loss, metatrain_accuracy, _ = sess.run( [ model_metatrain.loss, model_metatrain.test_accuracy, model_metatrain.optimize ], {model_metatrain.is_training: True}) if step > 0 and step % FLAGS.print_every == 0: # model_metatrain.writer.add_summary(metatrain_summary, step) print('Step #{} - Loss : {:.3f} - Acc : {:.3f}'.format( step, metatrain_loss, metatrain_accuracy)) if FLAGS.comet: experiment.log_metric("train_loss", metatrain_loss, step=step) experiment.log_metric("train_accuracy", metatrain_accuracy, step=step) if step > 0 and (step % FLAGS.validate_every == 0 or step == (steps - 1)): if step == (steps - 1): print('Training complete!') metaval_loss, metaval_accuracy = sess.run( [model_metaval.loss, model_metaval.test_accuracy], {model_metaval.is_training: False}) # model_metaval.writer.add_summary(metaval_summary, step) print('Validation Results - Loss : {:.3f} - Acc : {:.3f}'. format(metaval_loss, metaval_accuracy)) if FLAGS.comet: experiment.log_metric("val_loss", metaval_loss, step=step) experiment.log_metric("val_accuracy", metaval_accuracy, step=step) if metaval_loss < saved_metaval_loss: saved_metaval_loss = metaval_loss if not FLAGS.load: model_metatrain.save(sess, FLAGS.savepath + save_folder, global_step=step, verbose=True) else: model_metatrain.save(sess, FLAGS.savepath, global_step=step, verbose=True) # Catch Ctrl-C event and allow save option except KeyboardInterrupt: response = raw_input( '\nSave latest model at Step #{}? (y/n)\n'.format(step)) if response == 'y': model_metatrain.save(sess, FLAGS.savepath, global_step=step, verbose=True) else: print('Latest model not saved.') if FLAGS.test and FLAGS.datasource in [ 'omniglot', 'miniimagenet', 'cifar' ]: NUM_TEST_SAMPLES = 600 num_classes_test = FLAGS.num_classes_test or FLAGS.num_classes num_shot_train = FLAGS.num_shot_train or 1 num_shot_test = FLAGS.num_shot_test or 1 data_generator = DataGenerator( datasource=FLAGS.datasource, num_classes=num_classes_test, num_samples_per_class=num_shot_train + num_shot_test, batch_size=1, # use 1 for testing to calculate stdev and ci95 test_set=True, ) image_tensor, label_tensor = data_generator.make_data_tensor( train=False) train_inputs = tf.slice(image_tensor, [0, 0, 0], [-1, num_classes_test * num_shot_train, -1]) test_inputs = tf.slice(image_tensor, [0, num_classes_test * num_shot_train, 0], [-1, -1, -1]) train_labels = tf.slice(label_tensor, [0, 0, 0], [-1, num_classes_test * num_shot_train, -1]) test_labels = tf.slice(label_tensor, [0, num_classes_test * num_shot_train, 0], [-1, -1, -1]) input_tensors = { 'train_inputs': train_inputs, # batch_size, num_classes * (num_samples_per_class - update_batch_size), 28 * 28 'train_labels': train_labels, # batch_size, num_classes * (num_samples_per_class - update_batch_size), num_classes 'test_inputs': test_inputs, # batch_size, num_classes * update_batch_size, 28 * 28 'test_labels': test_labels, # batch_size, num_classes * update_batch_size, num_classes } if FLAGS.datasource == 'miniimagenet': # model = CNN_MiniImagenet('model', n_way=FLAGS.num_classes, layers=4, input_tensors=input_tensors) model = CNN_miniimagenet('model', num_classes=FLAGS.num_classes, input_tensors=input_tensors) elif FLAGS.datasource == 'cifar': model = CNN_cifar('model', num_classes=FLAGS.num_classes, input_tensors=input_tensors) else: model = CNN_omniglot('model', num_classes=FLAGS.num_classes, input_tensors=input_tensors) # model = CNN2('model', n_way=FLAGS.num_classes, layers=4, input_tensors=input_tensors) sess = tf.InteractiveSession() tf.global_variables_initializer().run() model.load(sess, FLAGS.savepath, verbose=True) tf.train.start_queue_runners() # BEGIN PLOT if FLAGS.plot: activations, labels = sess.run( [model.train_features, model.train_labels], {model.is_training: False}) activations = activations.reshape( [num_shot_train * FLAGS.num_classes, -1]) from sklearn.manifold import TSNE from sklearn.decomposition import PCA pca = PCA(50) print('Compressing with PCA...') activations_50dim = pca.fit_transform(activations) tsne = TSNE() print('Compressing with tSNE...') activations_2dim = tsne.fit_transform(activations_50dim) labels = np.argmax(labels, axis=1) fig, ax = plt.subplots() for i in np.arange(FLAGS.num_classes): ax.scatter(activations_2dim[np.where(labels == i)][:, 0], activations_2dim[np.where(labels == i)][:, 1], s=5.) plt.show() quit() # END PLOT accuracy_list = [] for task in np.arange(NUM_TEST_SAMPLES): accuracy = sess.run(model.test_accuracy, {model.is_training: False}) accuracy_list.append(accuracy) if task > 0 and task % 100 == 0: print('Metatested on {} tasks...'.format(task)) avg = np.mean(accuracy_list) stdev = np.std(accuracy_list) ci95 = 1.96 * stdev / np.sqrt(NUM_TEST_SAMPLES) print('\nEnd of Test!') print('Accuracy : {:.4f}'.format(avg)) print('StdDev : {:.4f}'.format(stdev)) print('95% Confidence Interval : {:.4f}'.format(ci95)) if FLAGS.comet: experiment.log_metric("test_accuracy_mean", avg) experiment.log_metric("test_accuracy_stdev", stdev) experiment.log_metric("test_accuracy_ci95", ci95) if FLAGS.train and FLAGS.datasource in ['sinusoid', 'multimodal', 'step']: num_shot_train = FLAGS.num_shot_train or 10 num_shot_test = FLAGS.num_shot_test or 10 data_generator = DataGenerator( datasource=FLAGS.datasource, num_classes=None, num_samples_per_class=num_shot_train + num_shot_test, batch_size=FLAGS.meta_batch_size, test_set=None, ) model = FFN('model') sess = tf.InteractiveSession() tf.global_variables_initializer().run() saved_loss = np.inf steps = FLAGS.steps or 50000 try: for step in np.arange(steps): if FLAGS.datasource == 'multimodal': batch_x, batch_y, amp, phase, slope, intercept, modes = data_generator.generate( ) amp = amp * modes + (modes == False).astype(np.float32) elif FLAGS.datasource == 'step': batch_x, batch_y, start_step = data_generator.generate() amp = np.ones(batch_x.shape[0]) else: batch_x, batch_y, amp, phase = data_generator.generate() amp = np.ones(batch_x.shape[0]) train_inputs = batch_x[:, :num_shot_train, :] train_labels = batch_y[:, :num_shot_train, :] test_inputs = batch_x[:, num_shot_train:, :] test_labels = batch_y[:, num_shot_train:, :] feed_dict = { model.train_inputs: train_inputs, model.train_labels: train_labels, model.test_inputs: test_inputs, model.test_labels: test_labels, model.amp: amp, # use amplitude to scale loss } metatrain_postloss, _ = sess.run([model.loss, model.optimize], feed_dict) if step > 0 and step % FLAGS.print_every == 0: # model.writer.add_summary(metatrain_summary, step) print('Step #{} - PreLoss : {:.3f} - PostLoss : {:.3f}'. format(step, 0., metatrain_postloss)) if step == (steps - 1): print('Training complete!') if metatrain_postloss < saved_loss: saved_loss = metatrain_postloss model.save(sess, FLAGS.savepath + save_folder, global_step=step, verbose=True) # Catch Ctrl-C event and allow save option except KeyboardInterrupt: response = raw_input( '\nSave latest model at Step #{}? (y/n)\n'.format(step)) if response == 'y': model.save(sess, FLAGS.savepath, global_step=step, verbose=True) else: print('Latest model not saved.') if FLAGS.test and FLAGS.datasource in ['sinusoid', 'multimodal', 'step']: num_shot_train = FLAGS.num_shot_train or 10 data_generator = DataGenerator( datasource=FLAGS.datasource, num_classes=None, num_samples_per_class=num_shot_train, batch_size=1, test_set=None, ) model = FFN('model', num_train_samples=num_shot_train, num_test_samples=50) sess = tf.InteractiveSession() model.load(sess, FLAGS.savepath, verbose=True) if FLAGS.datasource == 'multimodal': train_inputs, train_labels, amp, phase, slope, intercept, modes = data_generator.generate( ) amp = amp * modes + (modes == False).astype(np.float32) x = np.arange(-5., 5., 0.2) if modes[0] == 0: y = slope * x + intercept else: y = amp * np.sin(x - phase) elif FLAGS.datasource == 'step': train_inputs, train_labels, start_step = data_generator.generate() x = np.arange(-5., 5., 0.2) y = np.ones_like(x) - (x < start_step).astype( np.float32) - (x > (start_step + 2)).astype(np.float32) else: train_inputs, train_labels, amp, phase = data_generator.generate() amp = 5. phase = 0. x = np.arange(5., 15., 0.2).reshape(1, -1, 1) y = amp * np.sin(x - phase).reshape(1, -1, 1) train_inputs = np.arange(5., 10., .5).reshape(1, -1, 1) # train_inputs = np.arange(-5., 0., .5).reshape(1, -1, 1) train_labels = amp * np.sin(train_inputs - phase) feed_dict = { model.train_inputs: train_inputs, model.train_labels: train_labels, model.test_inputs: x.reshape(1, -1, 1), model.test_labels: y.reshape(1, -1, 1), } postprediction, postloss = sess.run( [model.predictions, model.plain_loss], feed_dict) print(postloss) fig, ax = plt.subplots() ax.plot(x.reshape(-1), y.reshape(-1), color='#2c3e50', linewidth=0.8, label='Truth') ax.scatter(train_inputs.reshape(-1), train_labels.reshape(-1), color='#2c3e50', label='Training Set') ax.plot(x.reshape(-1), postprediction.reshape(-1), label='Prediction', color='#e74c3c', linestyle='--') ax.legend() ax.set_title(postloss) plt.show() if FLAGS.filename is not None: fig.savefig('figures/' + FLAGS.filename + '.png', dpi=72, bbox_inches='tight')
def _start_train(self, batch_size, model_base_dir, epochs, initial_epoch, model_subdir, current_time, subset): print(f'Loss function selected: {self.loss_str}') print(f'Image augmentation strength selected: {self.image_aug_str}') print(f'Optimizer function selected: {self.optimizer_str}') print(f'Activation function selected: {self.activation_str}') print(f'Learning rate selected: {self.learningrate}') print( f'Filtering out annotations <= {self.kp_filtering_gt:d}') print() self.loss = get_loss_from_string(self.loss_str) self._compile_model() # insert logic here for pickled dataframes if self.pickle_name is not None: train_df, val_df, test_df = get_pickle(self.pickle_name) else: train_df, val_df = self.load_and_filter_annotations( DEFAULT_TRAIN_ANNOT_PATH, DEFAULT_VAL_ANNOT_PATH, subset) img_aug_strength = str_to_enum(ImageAugmentationStrength, self.image_aug_str) train_generator = DataGenerator(train_df, DEFAULT_TRAIN_IMG_PATH, self.inres, self.outres, self.num_stacks, shuffle=TRAIN_SHUFFLE, \ batch_size=batch_size, img_aug_strength=img_aug_strength) # Validation does not shuffle and does not augment images, by default. val_generator = DataGenerator(val_df, DEFAULT_VAL_IMG_PATH, self.inres, self.outres, self.num_stacks, shuffle=VAL_SHUFFLE, batch_size=batch_size, img_aug_strength=None) print( '\n\n********* Unique Model Training ID. Add the args: --resume True --resume-subdir "training ID shown below" to resume training *********\n' ) print(model_subdir) print( '\n******************************* END OF Unique Model Training ID *******************************\n\n' ) modelDir = os.path.join(model_base_dir, model_subdir) logsDir = os.path.join(DEFAULT_LOGS_BASE_DIR, model_subdir) # If this path is changed, the corresponding logic to resume should be updated in util.py modelSavePath = os.path.join( modelDir, f'{HPE_EPOCH_PREFIX}{{epoch:02d}}_val_loss_{{val_loss:.4f}}_train_loss_{{loss:.4f}}.hdf5' ) if not os.path.exists(modelDir): print(f"Model save directory created: {modelDir}") os.makedirs(modelDir) # Create callbacks mc_val = ModelCheckpoint(modelSavePath, monitor='val_loss') mc_train = ModelCheckpoint(modelSavePath, monitor='loss') csv_logger = CSVLogger( os.path.join(modelDir, 'csv_tr' + current_time + '.csv')) tb = TensorBoard(log_dir=logsDir, histogram_freq=0, write_graph=True, write_images=True) # TODO potentially add learning rate scheduler callback callbacks = [mc_val, mc_train, tb, csv_logger] architecture_json_file = os.path.join( modelDir, f'{HPE_HOURGLASS_STACKS_PREFIX}_{self.num_stacks:02d}_batchsize_{batch_size:03d}.json' ) if not os.path.exists(architecture_json_file): with open(architecture_json_file, 'w') as f: print( f"Model architecture json saved to: {architecture_json_file}" ) f.write(self.model.to_json()) print(f"Model checkpoints saved to: {modelSavePath}") self.model.fit_generator(generator=train_generator, validation_data=val_generator, steps_per_epoch=len(train_generator), \ validation_steps=len(val_generator), epochs=epochs, initial_epoch=initial_epoch, callbacks=callbacks)
sample['mistakes'] if 'mistakes' in sample else [], 'predict': pred['mistakes'] if 'mistakes' in pred else [] }, ensure_ascii=False, indent=4) F.write(s + '\n') F.close() precision = TP / (TP + FP + 1e-10) recall = TP / (TP + FN + 1e-10) accuracy = (TP + TN) / (TP + FP + TN + FN) f1 = 2 * precision * recall / (precision + recall) return f1, precision, recall, accuracy BATCH_SIZE = 32 assert BATCH_SIZE <= len(train_data) train_generator = DataGenerator(train_data, tokenizer, SEQ_LEN, BATCH_SIZE) evaluator = Evaluate() if __name__ == '__main__': initial_epoch = 0 if initial_epoch > 0: model.load_weights('best_model.weights') model.fit(train_generator, epochs=20, initial_epoch=initial_epoch, callbacks=[evaluator]) model.save_weights("last_model.weights") else: model.load_weights('best_model.weights')
def main(): data_generator = DataGenerator(datasource=args.datasource, batch_size=args.batch_size, random_sample=args.random_sample, disjoint_data=not args.not_disjoint_data, num_samples=args.num_samples, num_samples_range=args.num_samples_range, input_range=args.input_range, task_limit=args.task_limit) space_samples = data_generator.generate_space_sample() # x_plot = np.linspace(args.input_range[0], args.input_range[1], args.num_samples[-1]).reshape(-1, 1) # x_plot = np.linspace(x_min, x_max, n_observation).reshape(-1, 1) if args.load_model is not None: model = torch.load(args.load_model) elif args.model_layers is not None: model = CNP_Net(io_dims=data_generator.io_dims, layers_dim={ 'h': [8, 32, 128], 'g': [128, 64, 32, 16, 8] }) #.float() else: model = CNP_Net(io_dims=data_generator.io_dims) #.float() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) if args.fig_show: fig = plt.figure() fig.show() ax = data_generator.make_fig_ax(fig) fig.canvas.draw() for t in range(args.max_epoch): # print(t) loss = 0 #x, y = data_generator.generate_batch() for p in range(args.batch_size): # data # x, y = data_generator.generate_sample() train, test = data_generator.get_train_test_sample() # print('train shape', train) # print('test shape', test) x_train, y_train = train x_test, y_test = test # print(x_train, y_train, x_test, y_test) # print('shapes', x_train.shape, y_train.shape, x_test.shape, y_test.shape) # print(x_test.shape, y_test.shape, x_train.shape, y_train.shape) # if self.num_samples == 0: # N = np.random.randint(self.num_samples_min, self.num_samples_max) # else: # N = self.num_samples # if args.datasource == 'gp1d': # gp = data_generator.gp # gp.fit(x_train, y_train) # #y_mu, y_cov = gp.predict(x_plot, return_cov=True) # y_mu, y_cov = gp.predict(space_samples, return_cov=True) # elif args.datasource == 'branin': # pass # print('train', x_train.shape, y_train.shape) training_set = torch.cat( (torch.tensor(x_train), torch.tensor(y_train)), dim=1).float() test_set = torch.cat( (torch.tensor(x_test).float(), torch.tensor(y_test).float()), dim=1).float() # print('train, test', training_set.shape, test_set.shape) phi, log_prob = model(training_set, test_set) # print('phi', phi.shape) loss += -torch.sum(log_prob) loss = loss / args.batch_size if args.log: with open("logs/%s/log.txt" % args.log_folder, "a") as log_file: log_file.write("%5d\t%10.4f\n" % (t, loss.item())) if t % args.interval == 0: print('%5d' % t, '%10.4f' % loss.item()) if args.fig_show: plt.clf() ax = data_generator.make_fig_ax(fig) # train, test points # print(x_test.shape, y_test.shape) # data_generator.plot_data(fig, np.concatenate((x_test, y_test), axis=1)) if args.fig_show: data_generator.scatter_data(ax, np.concatenate((x_test, y_test), axis=1), c='y') data_generator.scatter_data(ax, np.concatenate((x_train, y_train), axis=1), c='r') # if x_test.shape[1] == 1: # plt.scatter(x_test, y_test, c='y') # plt.scatter(x_train, y_train, c='r') # # # plot gp prediction (base line) # plot_fig(fig, x_plot, y_mu, y_cov) # plot model prediction # print(x_plot.shape) # test_set = torch.cat((torch.tensor(x_plot), # torch.tensor(np.zeros(len(x_plot)).reshape(-1, 1))), # dim=1).float() # print(space_samples) test_set = torch.cat( (torch.tensor(space_samples), torch.tensor(np.zeros(len(space_samples)).reshape(-1, 1))), dim=1).float() # print('train, test', training_set.shape, test_set.shape) phi, _ = model(training_set, test_set) # print('phi', phi.shape) predict_y_mu = phi[:, :data_generator.io_dims[1]].data.numpy() predict_y_cov = phi[:, data_generator.io_dims[1]:].data.numpy()**2 # predict_y_mu_, predict_y_cov_, _ = model(training_set, test_set) # predict_y_mu = predict_y_mu_.data.numpy() # predict_y_cov = np.diag(predict_y_cov_.data.numpy())**2 # plot_fig(fig, x_plot, predict_y_mu, predict_y_cov, color='b') # print(space_samples.shape, predict_y_mu.shape, predict_y_cov.shape) data_generator.plot_data( ax, np.concatenate( (space_samples, predict_y_mu, predict_y_cov), axis=1)) fig.canvas.draw() if args.log: plt.savefig('logs/%s/%05d.png' % (args.log_folder, t)) torch.save(model, "logs/%s/%05d.pt" % (args.log_folder, t)) optimizer.zero_grad() loss.backward() optimizer.step()
def main(): if FLAGS.datasource == 'sinusoid': if FLAGS.train: test_num_updates = 5 else: test_num_updates = 10 else: if FLAGS.datasource == 'miniimagenet': if FLAGS.train == True: test_num_updates = 1 # eval on at least one update during training else: test_num_updates = 10 else: test_num_updates = 10 if FLAGS.train == False: orig_meta_batch_size = FLAGS.meta_batch_size # always use meta batch size of 1 when testing. FLAGS.meta_batch_size = 1 #data_generator的获取 if FLAGS.datasource == 'sinusoid': data_generator = DataGenerator(FLAGS.update_batch_size * 2, FLAGS.meta_batch_size) else: if FLAGS.metatrain_iterations == 0 and FLAGS.datasource == 'miniimagenet': assert FLAGS.meta_batch_size == 1 assert FLAGS.update_batch_size == 1 data_generator = DataGenerator( 1, FLAGS.meta_batch_size) # only use one datapoint, else: if FLAGS.datasource == 'miniimagenet': # TODO - use 15 val examples for imagenet? if FLAGS.train: data_generator = DataGenerator( FLAGS.update_batch_size + 15, FLAGS.meta_batch_size ) # only use one datapoint for testing to save memory else: data_generator = DataGenerator( FLAGS.update_batch_size * 2, FLAGS.meta_batch_size ) # only use one datapoint for testing to save memory else: data_generator = DataGenerator( FLAGS.update_batch_size * 2, FLAGS.meta_batch_size ) # only use one datapoint for testing to save memory dim_output = data_generator.dim_output if FLAGS.baseline == 'oracle': assert FLAGS.datasource == 'sinusoid' dim_input = 3 FLAGS.pretrain_iterations += FLAGS.metatrain_iterations FLAGS.metatrain_iterations = 0 else: dim_input = data_generator.dim_input if FLAGS.datasource == 'miniimagenet' or FLAGS.datasource == 'omniglot': tf_data_load = True num_classes = data_generator.num_classes if FLAGS.train: # only construct training model if needed random.seed(5) image_tensor, label_tensor = data_generator.make_data_tensor() inputa = tf.slice(image_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) inputb = tf.slice(image_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) labela = tf.slice(label_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) labelb = tf.slice(label_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) input_tensors = { 'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb } random.seed(6) image_tensor, label_tensor = data_generator.make_data_tensor( train=False) inputa = tf.slice(image_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) inputb = tf.slice(image_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) labela = tf.slice(label_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) labelb = tf.slice(label_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) metaval_input_tensors = { 'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb } else: tf_data_load = False input_tensors = None model = MAML(dim_input, dim_output, test_num_updates=test_num_updates) if FLAGS.train or not tf_data_load: model.construct_model(input_tensors=input_tensors, prefix='metatrain_') if tf_data_load: model.construct_model(input_tensors=metaval_input_tensors, prefix='metaval_') model.summ_op = tf.summary.merge_all( ) #merge_all 可以将所有summary全部保存到磁盘,以便tensorboard显示。如果没有特殊要求,一般用这一句就可一显示训练时的各种信息了。 saver = loader = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=10) sess = tf.InteractiveSession() if FLAGS.train == False: # change to original meta batch size when loading model. FLAGS.meta_batch_size = orig_meta_batch_size if FLAGS.train_update_batch_size == -1: FLAGS.train_update_batch_size = FLAGS.update_batch_size if FLAGS.train_update_lr == -1: FLAGS.train_update_lr = FLAGS.update_lr exp_string = 'cls_' + str(FLAGS.num_classes) + '.mbs_' + str( FLAGS.meta_batch_size) + '.ubs_' + str( FLAGS.train_update_batch_size) + '.numstep' + str( FLAGS.num_updates) + '.updatelr' + str(FLAGS.train_update_lr) if FLAGS.num_filters != 64: exp_string += 'hidden' + str(FLAGS.num_filters) if FLAGS.max_pool: exp_string += 'maxpool' if FLAGS.stop_grad: exp_string += 'stopgrad' if FLAGS.baseline: exp_string += FLAGS.baseline if FLAGS.norm == 'batch_norm': exp_string += 'batchnorm' elif FLAGS.norm == 'layer_norm': exp_string += 'layernorm' elif FLAGS.norm == 'None': exp_string += 'nonorm' else: print('Norm setting not recognized.') resume_itr = 0 model_file = None tf.global_variables_initializer().run() tf.train.start_queue_runners() if FLAGS.resume or not FLAGS.train: model_file = tf.train.latest_checkpoint(FLAGS.logdir + '/' + exp_string) if FLAGS.test_iter > 0: model_file = model_file[:model_file.index('model' )] + 'model' + str( FLAGS.test_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1 + 5:]) print("Restoring model weights from " + model_file) saver.restore(sess, model_file) if FLAGS.train: train(model, saver, sess, exp_string, data_generator, resume_itr) else: test(model, saver, sess, exp_string, data_generator, test_num_updates)
def test_should_raise_error_when_time_delay_was_not_set_and_input_is_time_series(self): images = np.random.rand(10, 2, 64, 64, 3) labels = np.random.rand(10) with self.assertRaises(ValueError) as e: DataGenerator().fit(images, labels) self.assertEqual("Images have time axis length 2 but time_delay parameter was set to None", str(e.exception))
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv,"g:f:a:b:c:s:m:t:r:") except getopt.GetoptError: print "Bad argument given to sgl_eval.py" sys.exit(2) settings = SGL_Settings() for opt, arg in opts: if opt == '-g': settings.expert_num_groups = int(arg) elif opt == '-f': settings.num_features = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert(arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.sparse_groups() run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def setUp(self): self.data = DataGenerator.load_json()
def test_can_convert_single_item(self): raw_data_item = [ "SM7333269", "376-8006", "Enim. Road", "Raurkela Civil Township", "L95 4ZD", "Denise", "Bates", "Kerry", "Myers", "freehold", "good", "208648", { "type": "Feature", "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:EPSG:27700" } }, "geometry": { "type": "Polygon", "coordinates": [[ [530857.01, 181500.00], [530857.00, 181500.00], [530857.00, 181500.00], [530857.00, 181500.00], [530857.01, 181500.00] ] ] }, "properties" : {} } ] title = DataGenerator.convert_item(raw_data_item) self._check_title_structure(title, raw_data_item[0]) expected_title = { 'title_number': 'TEST_' + raw_data_item[0], 'proprietors': [ { 'full_name': raw_data_item[5] + ' ' + raw_data_item[6] }, { 'full_name': raw_data_item[7] + ' ' + raw_data_item[8] } ], 'property': { 'address': { 'house_number': raw_data_item[1], 'road': raw_data_item[2], 'town': raw_data_item[3], 'postcode' : raw_data_item[4] }, 'tenure': raw_data_item[9], 'class_of_title': raw_data_item[10] }, 'payment': { 'price_paid': raw_data_item[11], 'titles': ['TEST_' + raw_data_item[0]] }, 'extent': { "type": "Feature", "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:EPSG:27700" } }, "geometry": { "type": "Polygon", "coordinates": [[ [530857.01, 181500.00], [530857.00, 181500.00], [530857.00, 181500.00], [530857.00, 181500.00], [530857.01, 181500.00] ] ] }, "properties" : {} } } self.assertEqual(expected_title, title)
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv,"f:z:a:b:c:s:m:t:r:i") except getopt.GetoptError: sys.exit(2) settings = Sparse_Add_Models_Settings() for opt, arg in opts: if opt == '-f': settings.num_funcs = int(arg) elif opt == '-z': settings.num_zero_funcs = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert(arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) elif opt == "-i": settings.big_init_set = True # SP does not care about initialization assert(not (settings.big_init_set == True and settings.method in ["SP", "SP0"])) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() assert(settings.num_funcs <= len(settings.smooth_fcns)) smooth_fcn_list = settings.smooth_fcns[:settings.num_funcs] + [const_zero] * settings.num_zero_funcs data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.make_additive_smooth_data(smooth_fcn_list) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
from models import VanillaConvAE, BetaConvVAE from data_generator import DataGenerator if __name__ == "__main__": #Load Data data_generator = DataGenerator(folder_name="data/dSprites/", image_size=100, data_split=0.8, number_to_augment=0) x_train, x_valid, x_test = data_generator.generate() # cherry_picked_data = data_generator.cherry_pick(folder_name="cherry_picked") # Vanilla Convolutional Autoencoder # vanilla_conv_ae = VanillaConvAE(data_generator=data_generator) # vanilla_conv_ae.build() # vanilla_conv_ae.compile(optimizer="adadelta", loss="binary_crossentropy") # vanilla_conv_ae.fit(x=x_train, # y=x_train, # epochs=150, # batch_size=128, # shuffle=True, # validation_data=(x_valid, x_valid)) # encoded_imgs = vanilla_conv_ae.encode(x_test) # decoded_imgs = vanilla_conv_ae.predict(x_test) # vanilla_conv_ae.plot_learning_curve() # vanilla_conv_ae.reconstruct(encoded_imgs, decoded_imgs, x_test)
def main(): test_num_updates = 10 if FLAGS.train == False: orig_meta_batch_size = FLAGS.meta_batch_size FLAGS.meta_batch_size = 1 if FLAGS.train == False: # change to original meta batch size when loading model. FLAGS.meta_batch_size = orig_meta_batch_size if FLAGS.train_update_batch_size == -1: FLAGS.train_update_batch_size = FLAGS.update_batch_size if FLAGS.train_update_lr == -1: FLAGS.train_update_lr = FLAGS.update_lr exp_string = 'cls_' + str(FLAGS.num_classes) + '.mbs_' + str( FLAGS.meta_batch_size) + '.ubs_' + str( FLAGS.train_update_batch_size) + '.numstep' + str( FLAGS.num_updates) + '.updatelr' + str( FLAGS.train_update_lr) + '.poison_lr' + str( FLAGS.poison_lr) if FLAGS.num_filters != 64: exp_string += 'hidden' + str(FLAGS.num_filters) if FLAGS.max_pool: exp_string += 'maxpool' if FLAGS.baseline: exp_string += FLAGS.baseline if FLAGS.norm == 'batch_norm': exp_string += 'batchnorm' elif FLAGS.norm == 'layer_norm': exp_string += 'layernorm' elif FLAGS.norm == 'None': exp_string += 'nonorm' else: print('Norm setting not recognized.') num_images_per_class = FLAGS.update_batch_size * 3 data_generator = DataGenerator( num_images_per_class, FLAGS.meta_batch_size ) # only use one datapoint for testing to save memory dim_output = data_generator.dim_output dim_input = data_generator.dim_input if FLAGS.mode == 'train_with_poison': print('Loading poison examples from %s' % FLAGS.poison_path) poison_example = np.load(FLAGS.poison_dir) # poison_example=np.load(FLAGS.logdir + '/' + exp_string+'/poisonx_%d.npy'%FLAGS.poison_itr) else: poison_example = None model = MAML(dim_input=dim_input, dim_output=dim_output, num_images_per_class=num_images_per_class, num_classes=FLAGS.num_classes, poison_example=poison_example) sess = tf.InteractiveSession() print('Session created') if FLAGS.datasource == 'omniglot': tf_data_load = True num_classes = data_generator.num_classes if FLAGS.train: # only construct training model if needed random.seed(5) image_tensor, label_tensor = data_generator.make_data_tensor( train=True, poison=(model.poisonx, model.poisony), sess=sess) if FLAGS.reptile: inputa = image_tensor labela = label_tensor else: inputa = tf.slice( image_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) labela = tf.slice( label_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) inputb = tf.slice(image_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) labelb = tf.slice(label_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) image_tensor, label_tensor = data_generator.make_data_tensor( train=False) if FLAGS.mode == 'train_poison': inputa_test = tf.slice( image_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) inputb_test = tf.slice( image_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) labela_test = tf.slice( label_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) labelb_test = tf.slice( label_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) input_tensors = { 'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb, 'inputa_test': inputa_test, 'inputb_test': inputb_test, 'labela_test': labela_test, 'labelb_test': labelb_test } else: input_tensors = { 'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb } random.seed(6) image_tensor, label_tensor = data_generator.make_data_tensor( train=False) inputa = tf.slice(image_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) inputb = tf.slice(image_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) labela = tf.slice(label_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) labelb = tf.slice(label_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) metaval_input_tensors = { 'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb } else: tf_data_load = False input_tensors = None if FLAGS.train or not tf_data_load: model.construct_model(input_tensors=input_tensors, prefix=FLAGS.mode) if tf_data_load: model.construct_model(input_tensors=metaval_input_tensors, prefix='metaval_') print('Model built') model.summ_op = tf.summary.merge_all() saver = loader = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=10) resume_itr = 0 model_file = None tf.train.start_queue_runners() tf.global_variables_initializer().run() if FLAGS.resume or not FLAGS.train: model_file = tf.train.latest_checkpoint(FLAGS.logdir + '/' + exp_string) if FLAGS.test_iter > 0: model_file = model_file[:model_file.index('model' )] + 'model' + str( FLAGS.test_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1 + 5:]) print("Restoring model weights from " + model_file) saver.restore(sess, model_file) test_params = [ model, saver, sess, exp_string, data_generator, test_num_updates ] test(model, saver, sess, exp_string, data_generator, test_num_updates) if FLAGS.train: train(model, saver, sess, exp_string, data_generator, resume_itr, test_params=test_params) else: test(model, saver, sess, exp_string, data_generator, test_num_updates)
import os import sys import pickle import numpy as np module_home = os.environ['NEURAL_PATH'] sys.path.insert(0, module_home) from data_generator import DataGenerator path = os.path.join(module_home, "datasets/toy_dataset/cnn_processed/") batch_size = 32 trainData = DataGenerator(batch_size, path, 'training') counter = 0 X, y = trainData.next() for i in xrange(10): print "--------------" X, y = trainData.next() if (X[0][0].shape != (1948,)): print "X[0][0] instead of (1948,)" print X[0][0].shape print "--" if (X[1][0].shape != (20,)): print "X[1][0] instead of (20,)" print X[1][0].shape print "--" if (y[0].shape != (367,)): print "y[0] instead of (367,)" print y[0].shape print "--" if (X[0].shape != (batch_size,1948)):
return sorted(os.listdir(dir), key=lambda x: int(x.split('.')[0])) if len(os.listdir('images')) != len(os.listdir('annotated')): generate_missing_json() image_paths = [os.path.join('images', x) for x in sorted_fns('images')] annot_paths = [os.path.join('annotated', x) for x in sorted_fns('annotated')] if 'unet' in model_name: model = unet(pretrained=False, base=4) elif 'fcn_8' in model_name: model = fcn_8(pretrained=False, base=4) tg = DataGenerator(image_paths=image_paths, annot_paths=annot_paths, batch_size=3, augment=True) checkpoint = ModelCheckpoint(os.path.join('models', model_name + '.model'), monitor='dice', verbose=1, mode='max', save_best_only=True, save_weights_only=False, period=10) train_val = TrainValTensorBoard(write_graph=True) tb_mask = TensorBoardMask(log_freq=10) model.fit_generator(generator=tg, steps_per_epoch=len(tg),
def main(): tf.set_random_seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) random.seed(FLAGS.random_seed) # Build up environment to prevent segfault if not FLAGS.train: if 'reach' in FLAGS.experiment: env = gym.make('ReacherMILTest-v1') ob = env.reset() # import pdb; pdb.set_trace() graph = tf.Graph() gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) tf_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(graph=graph, config=tf_config) network_config = { 'num_filters': [FLAGS.num_filters] * FLAGS.num_conv_layers, 'strides': [[1, 2, 2, 1]] * FLAGS.num_strides + [[1, 1, 1, 1]] * (FLAGS.num_conv_layers - FLAGS.num_strides), 'filter_size': FLAGS.filter_size, 'image_width': FLAGS.im_width, 'image_height': FLAGS.im_height, 'image_channels': FLAGS.num_channels, 'n_layers': FLAGS.num_fc_layers, 'layer_size': FLAGS.layer_size, 'initialization': FLAGS.init, } data_generator = DataGenerator() state_idx = data_generator.state_idx img_idx = range( len(state_idx), len(state_idx) + FLAGS.im_height * FLAGS.im_width * FLAGS.num_channels) # need to compute x_idx and img_idx from data_generator model = MIL(data_generator._dU, state_idx=state_idx, img_idx=img_idx, network_config=network_config) # TODO: figure out how to save summaries and checkpoints exp_string = FLAGS.experiment+ '.' + FLAGS.init + '_init.' + str(FLAGS.num_conv_layers) + '_conv' + '.' + str(FLAGS.num_strides) + '_strides' + '.' + str(FLAGS.num_filters) + '_filters' + \ '.' + str(FLAGS.num_fc_layers) + '_fc' + '.' + str(FLAGS.layer_size) + '_dim' + '.bt_dim_' + str(FLAGS.bt_dim) + '.mbs_'+str(FLAGS.meta_batch_size) + \ '.ubs_' + str(FLAGS.update_batch_size) + '.numstep_' + str(FLAGS.num_updates) + '.updatelr_' + str(FLAGS.train_update_lr) if FLAGS.clip: exp_string += '.clip_' + str(int(FLAGS.clip_max)) if FLAGS.conv_bt: exp_string += '.conv_bt' if FLAGS.all_fc_bt: exp_string += '.all_fc_bt' if FLAGS.fp: exp_string += '.fp' if FLAGS.learn_final_eept: exp_string += '.learn_ee_pos' if FLAGS.no_action: exp_string += '.no_action' if FLAGS.zero_state: exp_string += '.zero_state' if FLAGS.two_head: exp_string += '.two_heads' if FLAGS.two_arms: exp_string += '.two_arms' if FLAGS.temporal_conv_2_head: exp_string += '.1d_conv_act_' + str( FLAGS.temporal_num_layers) + '_' + str(FLAGS.temporal_num_filters) if FLAGS.temporal_conv_2_head_ee: exp_string += '_ee_' + str( FLAGS.temporal_num_layers_ee) + '_' + str( FLAGS.temporal_num_filters_ee) exp_string += '_' + str(FLAGS.temporal_filter_size) + 'x1_filters' if FLAGS.training_set_size != -1: exp_string += '.' + str(FLAGS.training_set_size) + '_trials' log_dir = FLAGS.log_dir + '/' + exp_string # put here for now if FLAGS.train: data_generator.generate_batches(noisy=FLAGS.use_noisy_demos) #data_generator.generate_batches() with graph.as_default(): train_image_tensors = data_generator.make_batch_tensor( network_config, restore_iter=FLAGS.restore_iter) inputa = train_image_tensors[:, :FLAGS.update_batch_size * FLAGS.T, :] inputb = train_image_tensors[:, FLAGS.update_batch_size * FLAGS.T:, :] train_input_tensors = {'inputa': inputa, 'inputb': inputb} val_image_tensors = data_generator.make_batch_tensor( network_config, restore_iter=FLAGS.restore_iter, train=False) inputa = val_image_tensors[:, :FLAGS.update_batch_size * FLAGS.T, :] inputb = val_image_tensors[:, FLAGS.update_batch_size * FLAGS.T:, :] val_input_tensors = {'inputa': inputa, 'inputb': inputb} model.init_network(graph, input_tensors=train_input_tensors, restore_iter=FLAGS.restore_iter) model.init_network(graph, input_tensors=val_input_tensors, restore_iter=FLAGS.restore_iter, prefix='Validation_') else: model.init_network(graph, prefix='Testing') with graph.as_default(): # Set up saver. saver = tf.train.Saver(max_to_keep=10) # Initialize variables. init_op = tf.global_variables_initializer() sess.run(init_op, feed_dict=None) # Start queue runners (used for loading videos on the fly) tf.train.start_queue_runners(sess=sess) if FLAGS.resume: model_file = tf.train.latest_checkpoint(log_dir) if FLAGS.restore_iter > 0: model_file = model_file[:model_file.index('model' )] + 'model_' + str( FLAGS.restore_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1 + 6:]) print("Restoring model weights from " + model_file) with graph.as_default(): saver.restore(sess, model_file) if FLAGS.train: train(graph, model, saver, sess, data_generator, log_dir, restore_itr=FLAGS.restore_iter) else: if 'reach' in FLAGS.experiment: generate_test_demos(data_generator) evaluate_vision_reach(env, graph, model, data_generator, sess, exp_string, FLAGS.record_gifs, log_dir) elif 'push' in FLAGS.experiment: evaluate_push(sess, graph, model, data_generator, exp_string, log_dir, FLAGS.demo_file + '/', save_video=FLAGS.record_gifs) else: raise NotImplementedError
def __init__(self): self.markov_chain = SimpleMarkovChain() self.markov_current_state = 'MakeResponse' # there should be an initial state @ can be random self.inter_arrivals_manager = InterArrivalsManager() self.data_generator = DataGenerator()
def run(batch_id, source_file_name, output_file_name, accounts_file_name, contacts_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'AccountExternalId__c', 'Owner.External_Id__c', 'LeadSource', 'CloseDate', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) # load accounts as dataset account_columns = [ 'External_Id__c', 'Name', 'BillingState', 'Industry' ] account_dataset = data_gen.load_dataset('accounts', accounts_file_name, account_columns) accounts_by_id = account_dataset.group_by('External_Id__c') # load contacts as dataset contact_columns = [ 'External_Id__c', 'FirstName', 'LastName' ] contact_dataset = data_gen.load_dataset('contacts', contacts_file_name, contact_columns) contacts_by_id = contact_dataset.group_by('External_Id__c') # helper method to get account data def get_account_data(column_values, account_column_name): return accounts_by_id.get(column_values['ConvertedAccount.External_Id__c'])[0].get(account_column_name) # helper method to get contact data def get_contact_data(column_values, contact_column_name): return contacts_by_id.get(column_values['ConvertedContact.External_Id__c'])[0].get(contact_column_name) # rename columns data_gen.rename_column('External_Id__c', 'ConvertedOpportunity.External_Id__c') data_gen.rename_column('AccountExternalId__c', 'ConvertedAccount.External_Id__c') data_gen.rename_column('CloseDate', 'ConvertedDate__c') # generate converted lead at a random ratio data_gen.duplicate_rows(duplication_factor=lambda: choice([0, 1], p=[.75, .25])) # generate id data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Lead.' + str(data_gen.current_row + 1)) # generate create date def create_date_formula(column_values): oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) return oppty_create_date - timedelta(days=randint(0, 45)) data_gen.add_formula_column('CreatedDate__c', create_date_formula) # generate status data_gen.add_formula_column('Status', formula=lead.lead_status) # generate status data_gen.add_map_column('IsConverted', 'Status', { 'Qualified - Convert': 'true', None: 'false' }) # generate opportunity data_gen.add_map_column('ConvertedOpportunity.External_Id__c', 'Status', { 'Qualified - Convert': lambda cv: cv['ConvertedOpportunity.External_Id__c'], None: '' }) # generate account data_gen.add_map_column('ConvertedAccount.External_Id__c', 'Status', { 'Qualified - Convert': lambda cv: cv['ConvertedAccount.External_Id__c'], None: '' }) # generate contact data_gen.add_map_column('ConvertedContact.External_Id__c', 'Status', { 'Qualified - Convert': lambda cv: cv['ConvertedAccount.External_Id__c'].replace('W_Account', 'W_Contact'), None: '' }) # generate converted date data_gen.add_map_column('ConvertedDate__c', 'Status', { 'Qualified - Convert': lambda cv: cv['ConvertedDate__c'], None: '' }) # generate name data_gen.add_map_column('FirstName', 'Status', { 'Qualified - Convert': lambda cv: get_contact_data(cv, 'FirstName'), None: lambda: fake.first_name() }) data_gen.add_map_column('LastName', 'Status', { 'Qualified - Convert': lambda cv: get_contact_data(cv, 'LastName'), None: lambda: fake.last_name() }) # generate company data_gen.add_map_column('Company', 'Status', { 'Qualified - Convert': lambda cv: get_account_data(cv, 'Name'), None: 'Not Applicable' }) # generate industry data_gen.add_map_column('Industry', 'Status', { 'Qualified - Convert': lambda cv: get_account_data(cv, 'Industry'), None: '' }) # generate state data_gen.add_map_column('State', 'Status', { 'Qualified - Convert': lambda cv: get_account_data(cv, 'BillingState'), None: '' }) # generate is unread by owner data_gen.add_map_column('IsUnreadByOwner', 'Status', { 'Qualified - Convert': 'false', None: lead.lead_is_unread_by_owner }) # generate rating data_gen.add_formula_column('Rating', formula=lead.lead_rating) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'KnowledgeArticle.External_Id__c', 'User.External_Id__c', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'Parent.External_Id__c') data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c') data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_KCSArticle_ViewStat.' + str(data_gen.current_row + 1)) channels = [ 'App', 'Desktop Site', 'Mobile Site' ] data_gen.add_formula_column('Channel__c', channels) data_gen.add_formula_column('ViewCount__c', formula=lambda: randint(1, 100)) data_gen.add_formula_column('NormalizedScore__c', formula=lambda: round(uniform(1, 10), 3)) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Channel__c', 'Parent.External_Id__c', 'ViewCount__c', 'NormalizedScore__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
# model = multi_gpu_model(model, gpus=conf1.multi_gpu) # model.compile(loss='mean_absolute_error', # optimizer=Adam(lr=conf1.lr)) # load previous model if conf1.retrain != 0: model_path = os.path.join(conf1.model_dir, "md_%diters.h5" % conf1.retrain) model = load_model(model_path) callback = TensorBoard(log_dir=conf1.logs) callback.set_model(model) train_names = ['train_loss', 'train_mae'] val_names = ['val_loss', 'val_mae'] # Data generator. tr_gen = DataGenerator(batch_size=conf1.batch_size, gtype='train') eval_te_gen = DataGenerator(batch_size=conf1.batch_size, gtype='test', te_max_iter=100) eval_tr_gen = DataGenerator(batch_size=conf1.batch_size, gtype='test', te_max_iter=100) # Train. t1 = time.time() iter = 0 epochs = 0 # while iter < conf1.iterations: while epochs < conf1.epochs: random.shuffle(h5_train_list) print("Epoch number ---> %d" % epochs)
def train(args, subTrain, subTest, cv_split, img_rows=36, img_cols=36): print('================================') print('Train...') print('subTrain', subTrain) print('subTest', subTest) input_shape = (img_rows, img_cols, 3) path_of_video_tr = sort_video_list(args.data_dir, taskList, subTrain) path_of_video_test = sort_video_list(args.data_dir, taskList, subTest) path_of_video_tr = list( itertools.chain(*path_of_video_tr)) # Fllaten the list path_of_video_test = list(itertools.chain(*path_of_video_test)) print('sample path: ', path_of_video_tr[0]) nframe_per_video = get_nframe_video(path_of_video_tr[0]) print('Trian Length: ', len(path_of_video_tr)) print('Test Length: ', len(path_of_video_test)) print('nframe_per_video', nframe_per_video) strategy = tf.distribute.MirroredStrategy() print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): if strategy.num_replicas_in_sync == 4: print("Using 4 GPUs for training") if args.temporal == 'CAN' or args.temporal == 'MT_CAN': args.batch_size = 32 elif args.temporal == 'CAN_3D' or args.temporal == 'MT_CAN_3D': args.batch_size = 12 elif args.temporal == 'TS_CAN' or args.temporal == 'MTTS_CAN': args.batch_size = 32 elif args.temporal == 'Hybrid_CAN' or args.temporal == 'MT_Hybrid_CAN': args.batch_size = 16 else: raise ValueError('Unsupported Model Type!') elif strategy.num_replicas_in_sync == 8: print('Using 8 GPUs for training!') args.batch_size = args.batch_size * 2 elif strategy.num_replicas_in_sync == 2: args.batch_size = args.batch_size // 2 else: raise Exception( 'Only supporting 4 GPUs or 8 GPUs now. Please adjust learning rate in the training script!' ) if args.temporal == 'CAN': print('Using CAN!') model = CAN(args.nb_filters1, args.nb_filters2, input_shape, dropout_rate1=args.dropout_rate1, dropout_rate2=args.dropout_rate2, nb_dense=args.nb_dense) elif args.temporal == 'MT_CAN': print('Using MT_CAN!') model = MT_CAN(args.nb_filters1, args.nb_filters2, input_shape, dropout_rate1=args.dropout_rate1, dropout_rate2=args.dropout_rate2, nb_dense=args.nb_dense) elif args.temporal == 'CAN_3D': print('Using CAN_3D!') input_shape = (img_rows, img_cols, args.frame_depth, 3) model = CAN_3D(args.frame_depth, args.nb_filters1, args.nb_filters2, input_shape, dropout_rate1=args.dropout_rate1, dropout_rate2=args.dropout_rate2, nb_dense=args.nb_dense) elif args.temporal == 'MT_CAN_3D': print('Using MT_CAN_3D!') input_shape = (img_rows, img_cols, args.frame_depth, 3) model = MT_CAN_3D(args.frame_depth, args.nb_filters1, args.nb_filters2, input_shape, dropout_rate1=args.dropout_rate1, dropout_rate2=args.dropout_rate2, nb_dense=args.nb_dense) elif args.temporal == 'TS_CAN': print('Using TS_CAN!') input_shape = (img_rows, img_cols, 3) model = TS_CAN(args.frame_depth, args.nb_filters1, args.nb_filters2, input_shape, dropout_rate1=args.dropout_rate1, dropout_rate2=args.dropout_rate2, nb_dense=args.nb_dense) elif args.temporal == 'MTTS_CAN': print('Using MTTS_CAN!') input_shape = (img_rows, img_cols, 3) model = MTTS_CAN(args.frame_depth, args.nb_filters1, args.nb_filters2, input_shape, dropout_rate1=args.dropout_rate1, dropout_rate2=args.dropout_rate2, nb_dense=args.nb_dense) elif args.temporal == 'Hybrid_CAN': print('Using Hybrid_CAN!') input_shape_motion = (img_rows, img_cols, args.frame_depth, 3) input_shape_app = (img_rows, img_cols, 3) model = Hybrid_CAN(args.frame_depth, args.nb_filters1, args.nb_filters2, input_shape_motion, input_shape_app, dropout_rate1=args.dropout_rate1, dropout_rate2=args.dropout_rate2, nb_dense=args.nb_dense) elif args.temporal == 'MT_Hybrid_CAN': print('Using MT_Hybrid_CAN!') input_shape_motion = (img_rows, img_cols, args.frame_depth, 3) input_shape_app = (img_rows, img_cols, 3) model = MT_Hybrid_CAN(args.frame_depth, args.nb_filters1, args.nb_filters2, input_shape_motion, input_shape_app, dropout_rate1=args.dropout_rate1, dropout_rate2=args.dropout_rate2, nb_dense=args.nb_dense) else: raise ValueError('Unsupported Model Type!') optimizer = tf.keras.optimizers.Adadelta(learning_rate=args.lr) if args.temporal == 'MTTS_CAN' or args.temporal == 'MT_Hybrid_CAN' or args.temporal == 'MT_CAN_3D' or \ args.temporal == 'MT_CAN': losses = { "output_1": "mean_squared_error", "output_2": "mean_squared_error" } loss_weights = {"output_1": 1.0, "output_2": 1.0} model.compile(loss=losses, loss_weights=loss_weights, optimizer=optimizer) else: model.compile(loss='mean_squared_error', optimizer=optimizer) print('learning rate: ', args.lr) # %% Create data genener training_generator = DataGenerator(path_of_video_tr, nframe_per_video, (img_rows, img_cols), batch_size=args.batch_size, frame_depth=args.frame_depth, temporal=args.temporal, respiration=args.respiration) validation_generator = DataGenerator(path_of_video_test, nframe_per_video, (img_rows, img_cols), batch_size=args.batch_size, frame_depth=args.frame_depth, temporal=args.temporal, respiration=args.respiration) # %% Checkpoint Folders checkpoint_folder = str(os.path.join(args.save_dir, args.exp_name)) if not os.path.exists(checkpoint_folder): os.makedirs(checkpoint_folder) cv_split_path = str( os.path.join(checkpoint_folder, "cv_" + str(cv_split))) # %% Callbacks if args.save_all == 1: save_best_callback = tf.keras.callbacks.ModelCheckpoint( filepath=cv_split_path + "_epoch{epoch:02d}_model.hdf5", save_best_only=False, verbose=1) else: save_best_callback = tf.keras.callbacks.ModelCheckpoint( filepath=cv_split_path + "_last_model.hdf5", save_best_only=False, verbose=1) csv_logger = tf.keras.callbacks.CSVLogger(filename=cv_split_path + '_train_loss_log.csv') hb_callback = HeartBeat(training_generator, validation_generator, args, str(cv_split), checkpoint_folder) # %% Model Training and Saving Results history = model.fit( x=training_generator, validation_data=validation_generator, epochs=args.nb_epoch, verbose=1, shuffle=False, callbacks=[csv_logger, save_best_callback, hb_callback], validation_freq=4) val_loss_history = history.history['val_loss'] val_loss = np.array(val_loss_history) np.savetxt((cv_split_path + '_val_loss_log.csv'), val_loss, delimiter=",") score = model.evaluate_generator(generator=validation_generator, verbose=1) print('****************************************') if args.temporal == 'MTTS_CAN' or args.temporal == 'MT_Hybrid_CAN' or args.temporal == 'MT_CAN_3D' \ or args.temporal == 'MT_CAN': print('Average Test Score: ', score[0]) print('PPG Test Score: ', score[1]) print('Respiration Test Score: ', score[2]) else: print('Test score:', score) print('****************************************') print('Start saving predicitions from the last epoch') training_generator = DataGenerator(path_of_video_tr, nframe_per_video, (img_rows, img_cols), batch_size=args.batch_size, frame_depth=args.frame_depth, temporal=args.temporal, respiration=args.respiration, shuffle=False) validation_generator = DataGenerator(path_of_video_test, nframe_per_video, (img_rows, img_cols), batch_size=args.batch_size, frame_depth=args.frame_depth, temporal=args.temporal, respiration=args.respiration, shuffle=False) yptrain = model.predict(training_generator, verbose=1) scipy.io.savemat(checkpoint_folder + '/yptrain_best_' + '_cv' + str(cv_split) + '.mat', mdict={'yptrain': yptrain}) yptest = model.predict(validation_generator, verbose=1) scipy.io.savemat(checkpoint_folder + '/yptest_best_' + '_cv' + str(cv_split) + '.mat', mdict={'yptest': yptest}) print('Finish saving the results from the last epoch')
def main(): if FLAGS.train: test_num_updates = 20 elif FLAGS.from_scratch: test_num_updates = 200 else: test_num_updates = 50 if FLAGS.train == False: orig_meta_batch_size = FLAGS.meta_batch_size # always use meta batch size of 1 when testing. FLAGS.meta_batch_size = 1 sess = tf.InteractiveSession() if not FLAGS.dataset == 'imagenet': data_generator = DataGenerator(FLAGS.inner_update_batch_size_train + FLAGS.outer_update_batch_size, FLAGS.inner_update_batch_size_val + FLAGS.outer_update_batch_size, FLAGS.meta_batch_size) else: data_generator = DataGeneratorImageNet(FLAGS.inner_update_batch_size_train + FLAGS.outer_update_batch_size, FLAGS.inner_update_batch_size_val + FLAGS.outer_update_batch_size, FLAGS.meta_batch_size) dim_output_train = data_generator.dim_output_train dim_output_val = data_generator.dim_output_val dim_input = data_generator.dim_input tf_data_load = True num_classes_train = data_generator.num_classes_train num_classes_val = data_generator.num_classes_val if FLAGS.train: # only construct training model if needed random.seed(5) image_tensor, label_tensor = data_generator.make_data_tensor() inputa = tf.slice(image_tensor, [0,0,0], [-1,num_classes_train*FLAGS.inner_update_batch_size_train, -1]) inputb = tf.slice(image_tensor, [0,num_classes_train*FLAGS.inner_update_batch_size_train, 0], [-1,-1,-1]) labela = tf.slice(label_tensor, [0,0,0], [-1,num_classes_train*FLAGS.inner_update_batch_size_train, -1]) labelb = tf.slice(label_tensor, [0,num_classes_train*FLAGS.inner_update_batch_size_train, 0], [-1,-1,-1]) input_tensors = {'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb} random.seed(6) image_tensor, label_tensor = data_generator.make_data_tensor(train=False) inputa = tf.slice(image_tensor, [0,0,0], [-1,num_classes_val*FLAGS.inner_update_batch_size_val, -1]) inputb = tf.slice(image_tensor, [0,num_classes_val*FLAGS.inner_update_batch_size_val, 0], [-1,-1,-1]) labela = tf.slice(label_tensor, [0,0,0], [-1,num_classes_val*FLAGS.inner_update_batch_size_val, -1]) labelb = tf.slice(label_tensor, [0,num_classes_val*FLAGS.inner_update_batch_size_val, 0], [-1,-1,-1]) metaval_input_tensors = {'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb} model = MAML(dim_input, dim_output_train, dim_output_val, test_num_updates=test_num_updates) if FLAGS.train or not tf_data_load: model.construct_model(input_tensors=input_tensors, prefix='metatrain_') if tf_data_load: model.construct_model(input_tensors=metaval_input_tensors, prefix='metaval_') model.summ_op = tf.summary.merge_all() saver = loader = tf.train.Saver(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=10) if FLAGS.debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess) if FLAGS.train == False: # change to original meta batch size when loading model. FLAGS.meta_batch_size = orig_meta_batch_size if FLAGS.log_inner_update_batch_size_val == -1: FLAGS.log_inner_update_batch_size_val = FLAGS.inner_update_batch_size_val if FLAGS.train_update_lr == -1: FLAGS.train_update_lr = FLAGS.update_lr exp_string = '' exp_string += '.nu_' + str(FLAGS.num_updates) + '.ilr_' + str(FLAGS.train_update_lr) if FLAGS.meta_lr != 0.001: exp_string += '.olr_' + str(FLAGS.meta_lr) if FLAGS.mt_mode != 'gtgt': if FLAGS.partition_algorithm == 'hyperplanes': exp_string += '.m_' + str(FLAGS.margin) if FLAGS.partition_algorithm == 'kmeans' or FLAGS.partition_algorithm == 'kmodes': exp_string += '.k_' + str(FLAGS.num_clusters) exp_string += '.p_' + str(FLAGS.num_partitions) if FLAGS.scaled_encodings and FLAGS.num_partitions != 1: exp_string += '.scaled' if FLAGS.mt_mode == 'encenc': exp_string += '.ned_' + str(FLAGS.num_encoding_dims) elif FLAGS.mt_mode == 'semi': exp_string += '.pgtgt_' + str(FLAGS.p_gtgt) exp_string += '.mt_' + FLAGS.mt_mode exp_string += '.mbs_' + str(FLAGS.meta_batch_size) + \ '.nct_' + str(FLAGS.num_classes_train) + \ '.iubst_' + str(FLAGS.inner_update_batch_size_train) + \ '.iubsv_' + str(FLAGS.log_inner_update_batch_size_val) + \ '.oubs' + str(FLAGS.outer_update_batch_size) exp_string = exp_string[1:] # get rid of leading period if FLAGS.on_encodings: exp_string += '.onenc' exp_string += '.nhl_' + str(FLAGS.num_hidden_layers) if FLAGS.num_filters != 64: exp_string += '.hidden' + str(FLAGS.num_filters) if FLAGS.max_pool: exp_string += '.maxpool' if FLAGS.stop_grad: exp_string += '.stopgrad' if FLAGS.norm == 'batch_norm': exp_string += '.batchnorm' elif FLAGS.norm == 'layer_norm': exp_string += '.layernorm' elif FLAGS.norm == 'None': exp_string += '.nonorm' else: print('Norm setting not recognized.') if FLAGS.resnet: exp_string += '.res{}parts{}'.format(FLAGS.num_res_blocks, FLAGS.num_parts_per_res_block) if FLAGS.miniimagenet_only: exp_string += '.mini' if FLAGS.suffix != '': exp_string += '.' + FLAGS.suffix resume_itr = 0 model_file = None tf.global_variables_initializer().run() print(exp_string) if FLAGS.resume or not FLAGS.train: model_file = tf.train.latest_checkpoint(logdir + '/' + exp_string) if FLAGS.test_iter > 0: model_file = model_file[:model_file.index('model')] + 'model' + str(FLAGS.test_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1+5:]) print("Restoring model weights from " + model_file) saver.restore(sess, model_file) else: print("No checkpoint found") if FLAGS.from_scratch: exp_string = '' if FLAGS.from_scratch and not os.path.isdir(logdir): os.makedirs(logdir) if FLAGS.train: train(model, saver, sess, exp_string, data_generator, resume_itr) else: test(model, saver, sess, exp_string, data_generator, test_num_updates)
from keras import Model from keras.models import load_model from data_generator import DataGenerator from utils.categories_conversion_utils import * from utils.directory_utils import * model_name = "2018-03-07_20:51:35-2018-03-07_23:02:18" model_path = "trained_models/" + model_name + ".h5" model = load_model(model_path) generator = DataGenerator() _, test_data = split_data() i = 0 for batch_images, batch_labels in generator.generate_data(test_data): predictions = model.predict(batch_images) for i, prediction in enumerate(predictions): print("Original category: ", one_hot_to_category(batch_labels[i])) print("Predicted category: ", one_hot_to_category(prediction)) break
for cluster in self.clusters: for point_2d in cluster: self.canvas.create_oval(point_2d.x - 2, point_2d.y - 2, point_2d.x + 2, point_2d.y + 2, fill=self.colors[color]) color = color + 1 self.canvas.pack() bb1 = BoundingBox(100, 200, 100, 500) bb2 = BoundingBox(300, 400, 300, 400) bb3 = BoundingBox(600, 700, 100, 300) bb4 = BoundingBox(600, 700, 500, 600) bounding_boxes = [bb1, bb2, bb3, bb4] window = Tk() num_points_per_bb = 100 data_generator = DataGenerator(bounding_boxes, num_points_per_bb, window) #points = data_generator.generate_points() #data_generator.save_points_2_csv('points.csv') points = data_generator.load_points_from_csv('points.csv') canvas = Canvas(window, width=1024, height=768, bg='white') k_means = KMeans(4, canvas) k_means.run(points) k_means.draw_clusters() window.mainloop()
def main(argv): num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv,"g:f:a:b:c:s:m:t:r:i") except getopt.GetoptError: print "Bad argument given to sgl_eval.py" sys.exit(2) settings = SGL_Settings() for opt, arg in opts: if opt == '-g': settings.expert_num_groups = int(arg) elif opt == '-f': settings.num_features = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert(arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.sparse_groups() run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def train(args): logging.info('config=%s', json.dumps(vars(args))) # Arguments & parameters workspace = args.workspace cuda = args.cuda # Load model model_class, model_params = MODELS[args.model] model = model_class(**{k: args.model_params[k] for k in model_params if k in args.model_params}) if args.train_model is not None: logging.info("continue training ...") model_path = os.path.join(workspace, 'logs', get_filename(__file__), args.train_model) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) logging.info("sequence length: {}".format(model.seq_len)) if cuda: model.cuda() # Paths hdf5_path = os.path.join(workspace, 'data.h5') models_dir = os.path.join(workspace, 'models', get_filename(__file__)) create_folder(models_dir) # Data generator generator = DataGenerator(hdf5_path=hdf5_path, target_device=args.target_device, train_house_list=args.train_house_list, validate_house_list=args.validate_house_list, batch_size=args.batch_size, seq_len=model.seq_len, width=args.width, binary_threshold=args.binary_threshold, balance_threshold=args.balance_threshold, balance_positive=args.balance_positive) # Optimizer learning_rate = 1e-3 optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) iteration = 0 train_bgn_time = time.time() for (batch_x, batch_y) in generator.generate(): if iteration > 1000*300: break # Evaluate if iteration % 1000 == 0: train_fin_time = time.time() tr_result_dict = evaluate(model=model, generator=generator, data_type='train', max_iteration=args.validate_max_iteration, cuda=cuda, binary=args.binary_threshold is not None) va_result_dict = evaluate(model=model, generator=generator, data_type='validate', max_iteration=args.validate_max_iteration, cuda=cuda, binary=args.binary_threshold is not None) logging.info('train: {}'.format(tr_result_dict)) logging.info('validate: {}'.format(va_result_dict)) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s, learning rate: {}'.format( iteration, train_time, validate_time, learning_rate)) logging.info('------------------------------------') train_bgn_time = time.time() # Reduce learning rate if iteration % 1000 == 0 and iteration > 0 and learning_rate > 5e-5: for param_group in optimizer.param_groups: learning_rate *= 0.9 param_group['lr'] = learning_rate batch_x = move_data_to_gpu(batch_x, cuda) batch_y = move_data_to_gpu(batch_y, cuda) # Forward forward_time = time.time() model.train() output = model(batch_x) # Loss if args.binary_threshold is not None: loss = loss_func_binary(output, batch_y) else: loss = loss_func(output, batch_y) # Backward optimizer.zero_grad() loss.backward() if args.max_norm is not None: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.max_norm) optimizer.step() # Save model if (iteration>1) and (iteration % 1000 == 0) and ((iteration//1000+4) // (((iteration//1000-1)//100+1)*100) == 1): save_out_dict = {'iteration': iteration, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()} save_out_path = args.basename + '_{}_{}_iter_{}_wd_{}_sl_{}.tar'.format( args.target_device, args.model, iteration, args.width, model.seq_len ) create_folder(os.path.dirname(save_out_path)) torch.save(save_out_dict, save_out_path) logging.info('Save model to {}'.format(save_out_path)) iteration += 1
debug_print = True '___ENVIRONMENT PARAMETERS___' environment_parameters = Parameters() '___CREATE TEST FOLDER___' path = create_test_folder('ai') writeParameterToFile(environment_parameters, training_episodes, testing_episodes, epsilon, epsilon_min, alpha, gamma, decay, replace_target_iter, memory_size, batch_size, use_seed, path, test_seed) if (debug_print == True): print("Parameter written to Test folder.") '___DATA GENERATION___' generator = DataGenerator(use_seed, environment_parameters) training_data = generator.generateDataSet(training_episodes, training_seed) test_data = generator.generateDataSet(testing_episodes, test_seed) if (debug_print == True): print("Data generated.") '___CREATE ENVIRONMENT___' environment = Environment(training_episodes, testing_episodes, epsilon, epsilon_min, alpha, gamma, decay, replace_target_iter, memory_size, batch_size, training_data, test_data, environment_parameters) '___TRAIN___' loss, train_rewards = environment.train() if (debug_print == True): print("Agent trained.")
def test_should_raise_error_when_labels_and_samples_are_mis_matched(self): images = np.random.rand(20, 64, 64) with self.assertRaises(ValueError) as e: DataGenerator().fit(images, [1]) self.assertEqual("Samples are not labeled properly", str(e.exception))
import json import numpy as np from data_generator import DataGenerator data = DataGenerator("/Users/tkrollins/OneDrive/Courses/capstone/question-answering/data/interim/SQuAD-v1.1-dev_embeddings.json") paragraphs = [] questions = [] # contexts = [] sentences = [] for article in data.children: paragraphs.append(len(article)) for paragraph in article: sentences.append(len(paragraph.data)) questions.append(len(paragraph)) print('Paragraphs Per Article') print(f'MAX: {np.max(paragraphs)}') print(f'MIN: {np.min(paragraphs)}') print(f'MEAN: {np.mean(paragraphs)}') print(f'STD: {np.std(paragraphs)}\n') print('Questions Per Paragraph') print(f'MAX: {np.max(questions)}') print(f'MIN: {np.min(questions)}') print(f'MEAN: {np.mean(questions)}') print(f'STD: {np.std(questions)}\n') # print('Paragraph Character Length')
def test_should_raise_error_when_time_delay_parameter_is_set_and_input_is_simple_images(self): images = np.random.rand(10, 64, 64, 3) labels = np.random.rand(10) with self.assertRaises(ValueError) as e: DataGenerator(time_delay=4).fit(images, labels) self.assertEqual("Time_delay parameter was set but Images say otherwise", str(e.exception))
def run_mcts(options): random.seed() cols = options["cols"] rows = options["rows"] n = options["n_tiles"] from_file = options["from_file"] n_sim = options["n_sim"] strategy = "avg_depth" if options["avg_depth"] else "max_depth" dg = DataGenerator(cols, rows) instances = [] their_info = None count = 0 if from_file: instances_from_file = dg.read_instances() for cols_rows, v in instances_from_file[n].items(): for instance_from_file in v: instance = dg.transform_instance_visual_to_tiles_and_board( cols_rows[1], cols_rows[0], instance_from_file.bins, order_tiles=True, ) # this is intentionally unindentend; one instance per rows, cols, n_tiles if instance: instances.append((instance, instance_from_file)) print(len(instances), "this many instances") for i, instance in enumerate(instances): if from_file: tiles, board = instance[0] their_info = instance[1] else: tiles, board = instance their_info = None if int(their_info.id) not in [22983]: continue for n_sim in [1000]: for strategy in ["avg_depth"]: ret = run_one_simulation( tiles, board, board.shape[1], board.shape[0], n_sim, from_file, strategy=strategy, their_info=their_info, ) print(f"In total {count} will be solved") else: for i in range(1000): cols = random.randint(10, 20) rows = random.randint(10, 20) dg = DataGenerator(cols, rows) tiles, board = dg.gen_tiles_and_board(n, cols, rows, order_tiles=True, from_file=from_file) problem_identifier = uuid.uuid4() for n_sim in [5000]: for strategy in ["max_depth", "avg_depth"]: run_one_simulation( tiles, board, board.shape[1], board.shape[0], n_sim, from_file, strategy=strategy, their_info=their_info, problem_identifier=problem_identifier, )
def test_should_raise_error_if_time_delay_is_not_matching_input_time_axis(self): images = np.random.rand(10, 4, 64, 64, 3) labels = np.random.rand(10) with self.assertRaises(ValueError) as e: DataGenerator(time_delay=5).fit(images, labels) self.assertEqual("Images have time axis length 4 but time_delay parameter was set to 5", str(e.exception))
def main(options): #N_TILES = 8 #HEIGHT = 8 #WIDTH = 8 HEIGHT = 25 WIDTH = 25 N_TILES = 50 for (WIDTH, HEIGHT) in [(30, 30)]: #for N_TILES in [50]: SCALAR_TILES = True predict_move_index = True g = Game(HEIGHT, WIDTH, N_TILES) dg = DataGenerator(WIDTH, HEIGHT) # from alpha.binpack.tensorflow.NNet import NNetWrapper as nn from alpha.binpack.keras.NNet import NNetWrapper as nn nnet = nn(g, scalar_tiles=SCALAR_TILES) n_tiles, height, width = N_TILES, HEIGHT, WIDTH if options['load_model']: nnet.load_checkpoint() else: # place tiles one by one # generate pair x and y where x is stack of state + tiles print('Preparing examples') N_EXAMPLES = 1000 examples = get_n_examples(N_EXAMPLES, width, height, n_tiles, dg, scalar_tiles=SCALAR_TILES) if options['load_examples']: with open( f'models/train_examples_{N_TILES}_{HEIGHT}_{WIDTH}.pickle', 'rb') as f: train_examples = pickle.load(f) else: train_examples = get_examples(examples, N_TILES, height, width, dg, return_binary_mask=True, predict_move_index=True, scalar_tiles=SCALAR_TILES) with open( f'models/train_examples_{N_TILES}_{HEIGHT}_{WIDTH}.pickle', 'wb') as f: pickle.dump(train_examples, f) if options['load_val_examples']: with open( f'models/validation_examples_{N_TILES}_{HEIGHT}_{WIDTH}.pickle', 'rb') as f: validation_examples = pickle.load(f) else: N_EXAMPLES = 100 validation_examples = get_n_examples(N_EXAMPLES, width, height, n_tiles, dg, scalar_tiles=SCALAR_TILES) validation_examples = get_examples(validation_examples, N_TILES, height, width, dg, from_file=False, return_binary_mask=True, predict_move_index=True, scalar_tiles=SCALAR_TILES, shuffle_tiles_times=1) validation_examples = get_val_examples_not_in_overlap( train_examples, validation_examples) with open( f'models/validation_examples_{N_TILES}_{HEIGHT}_{WIDTH}.pickle', 'wb') as f: pickle.dump(validation_examples, f) if not options['load_model']: print(f'In total: {len(train_examples)} train examples') print(f'In total: {len(validation_examples)} validation examples') if options['load_model']: nnet.load_checkpoint() else: nnet.train(train_examples, validation_examples) nnet.save_checkpoint() np.set_printoptions( formatter={'float': lambda x: "{0:0.2f}".format(x)}, linewidth=115) total_correct = 0 total_random_correct = 0 total_max_col_correct = 0 total_max_row_correct = 0 total_biggest_tile_correct = 0 total_smallest_tile_correct = 0 total_most_common_tile_correct = 0 total_count = 0 n_empty_tiles_with_fails = [0] * (N_TILES + 1) if False: # overlap was 39/1868 between val and train get_overlap_between_examples(train_examples, validation_examples) return if False: get_only_random_predictions(validation_examples) return for example in validation_examples: prediction = nnet.predict([example[0], example[1]]) random_prediction = random.randint( 0, SolutionChecker.get_n_nonplaced_tiles(example[1]) - 1) output_str = '' if VISUALIZE_PREDICTIONS: output_str += f'----------------------------------------------------------' output_str += '\n' if predict_move_index: _prediction = prediction if VISUALIZE_PREDICTIONS: output_str += f'{prediction}\n' max_index = np.argmax(prediction) _prediction_index = max_index if SCALAR_TILES: expected = np.argmax(example[2]) else: expected = np.argmax(example[1]) # if not scalar_tiles: # print('grid state') # print(example[0][:, :, 0]) # print(state_to_tiles_dims(example[0], dg)) # print('expected') if SCALAR_TILES: expected_tile = example[1][expected] prediction_tile = example[1][_prediction_index] if VISUALIZE_PREDICTIONS: output_str += f'{example[1].tolist()}\n' else: expected_tile = dg.get_matrix_tile_dims( example[0][:, :, expected + 1]) prediction_tile = dg.get_matrix_tile_dims( example[0][:, :, _prediction_index + 1]) # print(expected, expected_tile) #print('prediction') # print(_prediction) #print(_prediction_index, prediction_tile) if VISUALIZE_PREDICTIONS: output_str += f'{example[0]}\n' output_str += f'expected: {expected_tile}, got: {prediction_tile}' output_str += f'random: {example[1][random_prediction]}' if SCALAR_TILES: widest_tile = example[1][0] highest_tile = example[1][0] biggest_tile = example[1][0] smallest_tile = example[1][0] counter = Counter() for i, tile in enumerate(example[1]): if tile[1] > widest_tile[1]: widest_tile = tile elif tile[1] == widest_tile[1]: if tile[0] > widest_tile[0]: widest_tile = tile if tile[0] > highest_tile[0]: highest_tile = tile elif tile[0] == highest_tile[0]: if tile[1] > highest_tile[1]: highest_tile = tile if tile[1] * tile[0] > (biggest_tile[1] * biggest_tile[0]): biggest_tile = tile if tile[1] * tile[0] < (smallest_tile[1] * smallest_tile[0]): smallest_tile = tile counter[tuple(tile.tolist())] += 1 if np.array_equal(expected_tile, widest_tile): total_max_col_correct += 1 if np.array_equal(expected_tile, highest_tile): total_max_row_correct += 1 if np.array_equal(expected_tile, biggest_tile): total_biggest_tile_correct += 1 if np.array_equal(expected_tile, smallest_tile): total_smallest_tile_correct += 1 most_common_tile = np.array(counter.most_common(1)[0][0]) if np.array_equal(most_common_tile, np.array([0, 0])): most_common_tile = np.array( counter.most_common(2)[1][0]) if np.array_equal(expected_tile, most_common_tile): total_most_common_tile_correct += 1 if VISUALIZE_PREDICTIONS: output_str += f'max_tile: {widest_tile}\n' if np.array_equal(expected_tile, prediction_tile): total_correct += 1 # visualize predictions #if not np.array_equal(expected_tile, widest_tile) and VISUALIZE_PREDICTIONS: # print(output_str) if VISUALIZE_PREDICTIONS: print(output_str) else: n_empty_tiles_with_fails[count_n_of_non_placed_tiles( example[1]) // 2] += 1 # print(example[1][random_prediction]) if np.array_equal(expected_tile, example[1][random_prediction]): total_random_correct += 1 else: if expected_tile == prediction_tile: total_correct += 1 else: n_empty_tiles_with_fails[count_n_of_non_placed_tiles( state_to_tiles_dims(example[0], dg)) // 2] += 1 total_count += 1 else: _prediction = np.reshape(prediction, (width, height)) _prediction = get_prediction_masked(_prediction, example[0][:, :, 0]) expected = np.reshape(example[1], (width, height)) if VISUALIZE_PREDICTIONS: # visualize predictions # print('-' * 50) # print('grid state') # print(example[0][:, :, 0]) # print('expected') # print(expected) # print('prediction') # print(_prediction) pass if predict_move_index: with open(f"nn_results/custom_{N_TILES}_{width}_{height}.csv", 'w') as f: output_str = ( f'{width}-{height}\n' f'In total guessed;{100*(total_correct/ total_count)}; {total_correct}/{total_count}\n' f'Random baseline; {100*(total_random_correct/total_count)}\n' f'Max col tile baseline; {100*(total_max_col_correct/total_count)}\n' f'Max row tile baseline; {100*(total_max_row_correct/total_count)}\n' f'Most common tile baseline; {100*(total_most_common_tile_correct/total_count)}\n' f'Max area tile baseline; {100*(total_biggest_tile_correct/total_count)}\n' f'Min area tile baseline; {100*(total_smallest_tile_correct/total_count)}' ) f.write(output_str) print(output_str) print(n_empty_tiles_with_fails) print('-' * 100) if not PREDICT_FULL_EXAMPLES: # return continue tiles_left = [] for example in examples: if SCALAR_TILES: state, tiles, _ = example tiles = get_tiles_with_orientation(tiles.tolist()) else: tiles, _ = example # tiles = dg.get_matrix_tile_dims(tiles) grid = np.zeros((width, height)) tiles_left.append( play_using_prediction(nnet, width, height, tiles, grid, N_TILES, dg, predict_move_index, scalar_tiles=SCALAR_TILES)) # [0, 6, 4, 2, 2, 2, 0, 4, 4, 8, 6, 2, 2, 6, 6, 8, 6, 4, 4, 4] print(tiles_left) print(np.sum(tiles_left) / len(tiles_left))
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 30 try: opts, args = getopt.getopt(argv,"f:z:a:b:c:s:m:r:t:") except getopt.GetoptError: print "Bad Arguments to python script" sys.exit(2) settings = Elastic_Net_Settings() for opt, arg in opts: if opt == '-f': settings.num_features = int(arg) elif opt == '-z': settings.num_nonzero_features = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert(arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.make_correlated(settings.num_features, settings.num_nonzero_features) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method not in ["SP", "SP0"] and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "d:z:f:g:a:v:s:m:t:r:i:") except getopt.GetoptError: print "Bad argument given" sys.exit(2) settings = Matrix_Completion_Group_Settings() for opt, arg in opts: if opt == '-d': arg_split = arg.split(",") settings.num_rows = int(arg_split[0]) settings.num_cols = int(arg_split[1]) elif opt == '-z': arg_split = arg.split(",") settings.num_nonzero_row_groups = int(arg_split[0]) settings.num_nonzero_col_groups = int(arg_split[1]) elif opt == '-f': arg_split = arg.split(",") settings.num_row_features = int(arg_split[0]) settings.num_col_features = int(arg_split[1]) elif opt == '-g': arg_split = arg.split(",") settings.num_row_groups = int(arg_split[0]) settings.num_col_groups = int(arg_split[1]) elif opt == '-a': arg_split = arg.split(",") settings.train_perc = float(arg_split[0]) settings.validate_perc = float(arg_split[1]) assert (settings.train_perc + settings.validate_perc <= 1.0) elif opt == "-v": settings.num_nonzero_s = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) elif opt == "-i": settings.gamma_to_row_col_m = float(arg) assert (settings.num_nonzero_s <= settings.num_rows and settings.num_nonzero_s <= settings.num_cols) settings.matrix_size = settings.num_rows * settings.num_cols print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.matrix_completion_groups( gamma_to_row_col_m=settings.gamma_to_row_col_m, feat_factor=settings.feat_factor) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
headers = { "Content-Type" : "application/json"} # I'm hardcoding the mint URL for the dev environment here for the moment. try: title_url = "http://localhost:8001/titles/%s" % title.get('title_number') print "Loading %s" % title_url res = requests.post(title_url, data=json.dumps(title), headers=headers) print "Response status code %s" % res.status_code except requests.exceptions.RequestException as e: print "Error %s" % e raise RuntimeError if __name__ == '__main__': quantity = 'all' if len(sys.argv) > 1: quantity = sys.argv[1] raw_data = DataGenerator.load_json() if quantity.isdigit(): n = int(quantity) if n <= len(raw_data): raw_data = raw_data[:n] print "Loading", len(raw_data), "titles" titles = map(DataGenerator.convert_item, raw_data) map(load_title, titles) print "Done loading", len(titles), "titles"
def main(): if FLAGS.datasource == 'sinusoid': if FLAGS.train: test_num_updates = 5 else: test_num_updates = 10 else: if FLAGS.datasource == 'miniimagenet': if FLAGS.train == True: test_num_updates = 1 # eval on at least one update during training else: test_num_updates = 10 else: test_num_updates = 10 if FLAGS.train == False: orig_meta_batch_size = FLAGS.meta_batch_size # always use meta batch size of 1 when testing. FLAGS.meta_batch_size = 1 if FLAGS.datasource == 'sinusoid': data_generator = DataGenerator(FLAGS.update_batch_size*2, FLAGS.meta_batch_size) else: if FLAGS.metatrain_iterations == 0 and FLAGS.datasource == 'miniimagenet': assert FLAGS.meta_batch_size == 1 assert FLAGS.update_batch_size == 1 data_generator = DataGenerator(1, FLAGS.meta_batch_size) # only use one datapoint, else: if FLAGS.datasource == 'miniimagenet': # TODO - use 15 val examples for imagenet? if FLAGS.train: data_generator = DataGenerator(FLAGS.update_batch_size+15, FLAGS.meta_batch_size) # only use one datapoint for testing to save memory else: data_generator = DataGenerator(FLAGS.update_batch_size*2, FLAGS.meta_batch_size) # only use one datapoint for testing to save memory else: data_generator = DataGenerator(FLAGS.update_batch_size*2, FLAGS.meta_batch_size) # only use one datapoint for testing to save memory dim_output = data_generator.dim_output if FLAGS.baseline == 'oracle': assert FLAGS.datasource == 'sinusoid' dim_input = 3 FLAGS.pretrain_iterations += FLAGS.metatrain_iterations FLAGS.metatrain_iterations = 0 else: dim_input = data_generator.dim_input if FLAGS.datasource == 'miniimagenet' or FLAGS.datasource == 'omniglot': tf_data_load = True num_classes = data_generator.num_classes if FLAGS.train: # only construct training model if needed random.seed(5) image_tensor, label_tensor = data_generator.make_data_tensor() inputa = tf.slice(image_tensor, [0,0,0], [-1,num_classes*FLAGS.update_batch_size, -1]) inputb = tf.slice(image_tensor, [0,num_classes*FLAGS.update_batch_size, 0], [-1,-1,-1]) labela = tf.slice(label_tensor, [0,0,0], [-1,num_classes*FLAGS.update_batch_size, -1]) labelb = tf.slice(label_tensor, [0,num_classes*FLAGS.update_batch_size, 0], [-1,-1,-1]) input_tensors = {'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb} random.seed(6) image_tensor, label_tensor = data_generator.make_data_tensor(train=False) inputa = tf.slice(image_tensor, [0,0,0], [-1,num_classes*FLAGS.update_batch_size, -1]) inputb = tf.slice(image_tensor, [0,num_classes*FLAGS.update_batch_size, 0], [-1,-1,-1]) labela = tf.slice(label_tensor, [0,0,0], [-1,num_classes*FLAGS.update_batch_size, -1]) labelb = tf.slice(label_tensor, [0,num_classes*FLAGS.update_batch_size, 0], [-1,-1,-1]) metaval_input_tensors = {'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb} else: tf_data_load = False input_tensors = None model = MAML(dim_input, dim_output, test_num_updates=test_num_updates) if FLAGS.train or not tf_data_load: model.construct_model(input_tensors=input_tensors, prefix='metatrain_') if tf_data_load: model.construct_model(input_tensors=metaval_input_tensors, prefix='metaval_') model.summ_op = tf.summary.merge_all() saver = loader = tf.train.Saver(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=10) sess = tf.InteractiveSession() if FLAGS.train == False: # change to original meta batch size when loading model. FLAGS.meta_batch_size = orig_meta_batch_size if FLAGS.train_update_batch_size == -1: FLAGS.train_update_batch_size = FLAGS.update_batch_size if FLAGS.train_update_lr == -1: FLAGS.train_update_lr = FLAGS.update_lr exp_string = 'cls_'+str(FLAGS.num_classes)+'.mbs_'+str(FLAGS.meta_batch_size) + '.ubs_' + str(FLAGS.train_update_batch_size) + '.numstep' + str(FLAGS.num_updates) + '.updatelr' + str(FLAGS.train_update_lr) if FLAGS.num_filters != 64: exp_string += 'hidden' + str(FLAGS.num_filters) if FLAGS.max_pool: exp_string += 'maxpool' if FLAGS.stop_grad: exp_string += 'stopgrad' if FLAGS.baseline: exp_string += FLAGS.baseline if FLAGS.norm == 'batch_norm': exp_string += 'batchnorm' elif FLAGS.norm == 'layer_norm': exp_string += 'layernorm' elif FLAGS.norm == 'None': exp_string += 'nonorm' else: print('Norm setting not recognized.') resume_itr = 0 model_file = None tf.global_variables_initializer().run() tf.train.start_queue_runners() if FLAGS.resume or not FLAGS.train: model_file = tf.train.latest_checkpoint(FLAGS.logdir + '/' + exp_string) if FLAGS.test_iter > 0: model_file = model_file[:model_file.index('model')] + 'model' + str(FLAGS.test_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1+5:]) print("Restoring model weights from " + model_file) saver.restore(sess, model_file) if FLAGS.train: train(model, saver, sess, exp_string, data_generator, resume_itr) else: test(model, saver, sess, exp_string, data_generator, test_num_updates)