def train_model(dataset, mod_hypers, opt_hypers, dat_hypers, model_params=None): """Stochastic gradient descent optimization of a Discriminative RBM. Input ----- dataset : tuple(tuple(np.ndarray)) Training and validation sets mod_hypers : dictionary Model hyperparameters opt_hypers : dictionary Optimization hyperparameters model_params : list(np.ndarray) A list of model parameter values (optional) Output ------ model_params : list(np.ndarray) List of all the model parameters (see class definition) valid_nll : float Validation set negative log-likelihood """ # Read training, validation and test sets X_train, y_train = dataset[0] X_valid, y_valid = dataset[1] if opt_hypers['eval_test'] == True: X_test, y_test = dataset[2] # We need these as well, and note that n_visible = n_input + n_class n_input = dat_hypers['n_input'] n_class = dat_hypers['n_class'] # Train the DRBM model = DRBM(n_input=n_input, n_class=n_class, hypers=mod_hypers, init_params=model_params) # The actual learning step if opt_hypers['opt_type'] == 'batch-gd': # Batch learning X_train, y_train = make_batches(X_train, y_train, opt_hypers['batch_size']) X_valid, y_valid = make_batches(X_valid, y_valid, X_valid.shape[0]) if opt_hypers['eval_test'] == True: X_test, y_test = make_batches(X_test, y_test, X_test.shape[0]) dataset = ((X_train, y_train), (X_valid, y_valid), (X_test, y_test)) else: dataset = ((X_train, y_train), (X_valid, y_valid)) optimizer = sgd(opt_hypers) params, valid_score = optimizer.optimize(model, dataset) else: raise NotImplementedError return params, valid_score
def _samples_h(self, filepath, x_batches, num_samples=100, tile_shape=(10, 10)): ''' Strategically move data by Mover ''' if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) num = ((num_samples - 1) // self.batch_size + 1) * self.batch_size x = np.zeros( [num, self.img_size[0], self.img_size[1], self.img_size[2]], dtype=np.float32) batches = make_batches(num, self.batch_size) for batch_idx, (batch_start, batch_end) in enumerate(batches): x_batch = x_batches[batch_start:batch_end] x[batch_start:batch_end] = self.tf_session.run( self.h, feed_dict={self.x: x_batch}) idx = np.random.permutation(num)[:num_samples] x = (x[idx] + 1.0) / 2.0 imgs = create_image_grid(x, img_size=self.img_size, tile_shape=tile_shape) spm.imsave(filepath, imgs)
def test_and_compute_score(self, x_test_opcode, x_test_assembly, x_test_seq_len, y_test): with tf.Session(config=utils.get_default_config()) as sess: saver = tf.train.Saver(tf.global_variables()) check_point = tf.train.get_checkpoint_state(self.checkpoint_path) if check_point and tf.train.checkpoint_exists(check_point.model_checkpoint_path): message = "Load model parameters from %s\n" % check_point.model_checkpoint_path utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, self.datetime) saver.restore(sess, check_point.model_checkpoint_path) else: raise Exception('Saved model not found.') testing_set = x_test_opcode.shape[0] - x_test_opcode.shape[0] % self.batch_size testing_batches = utils.make_batches(testing_set, self.batch_size) average_test_loss = 0.0 average_accuracy_rnn = 0.0 full_y_pred = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(testing_batches): batch_x_opcode = x_test_opcode[batch_start:batch_end] batch_x_assembly = x_test_assembly[batch_start:batch_end] batch_y = y_test[batch_start:batch_end] batch_sequence_length = x_test_seq_len[batch_start:batch_end] feed_dict = { self.X_opcode: batch_x_opcode, self.X_assembly: batch_x_assembly, self.Y: batch_y, self.sequence_length: batch_sequence_length, } # batch_test_loss, accuracy_rnn = sess.run( # [self.loss, self.accuracy_bi_rnn], # feed_dict=feed_dict) batch_test_loss = sess.run(self.loss, feed_dict=feed_dict) batch_y_pred = sess.run(self.y_pred_svm, feed_dict=feed_dict) full_y_pred = np.append(full_y_pred, batch_y_pred) average_test_loss += batch_test_loss / len(testing_batches) # average_accuracy_rnn += accuracy_rnn / len(testing_batches) full_accuracy_score = mt.accuracy_score(y_true=y_test[:testing_set], y_pred=full_y_pred) full_pre_score = mt.precision_score(y_true=y_test[:testing_set], y_pred=full_y_pred) full_f1_score = mt.f1_score(y_true=y_test[:testing_set], y_pred=full_y_pred) full_recall_score = mt.recall_score(y_true=y_test[:testing_set], y_pred=full_y_pred) full_auc_score = mt.roc_auc_score(y_true=y_test[:testing_set], y_score=full_y_pred) message = "testing loss %.5f\n" % average_test_loss message += "accuracy %.2f\n" % (full_accuracy_score * 100) message += "compute score:\n" message += '\tprecision score %.5f\n' % (full_pre_score * 100) message += '\tf1 score %.5f\n' % (full_f1_score * 100) message += '\trecall score %.5f\n' % (full_recall_score * 100) message += '\tAUC score %.5f\n' % (full_auc_score * 100) message += "-----------------------------------------------------\n" message += "Finish computing score process.\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, self.datetime)
def _generate(self, num_samples=100): sess = self.tf_session batch_size = self.g_batch_size * self.num_gens num = ((num_samples - 1) // batch_size + 1) * batch_size z = self.z_prior.sample([num, self.num_z]).astype(np.float32) x = np.zeros( [num, self.img_size[0], self.img_size[1], self.img_size[2]], dtype=np.float32) batches = make_batches(num, batch_size) for batch_idx, (batch_start, batch_end) in enumerate(batches): z_batch = z[batch_start:batch_end] x[batch_start:batch_end] = sess.run(self.sampler, feed_dict={self.z: z_batch}) f_x = np.reshape(x, [ self.num_gens, -1, self.img_size[0], self.img_size[1], self.img_size[2] ]) f_x = f_x[:, 0::7, :, :, :] f_x = np.reshape(f_x, [ num_samples, self.img_size[0], self.img_size[1], self.img_size[2] ]) # idx = np.random.permutation(num)[:num_samples] # x = (x[idx] + 1.0) / 2.0 # x = x[idx] x = (f_x + 1) / 2 return x
def setUpClass(self): # VertexFrequencyCluster # Custom window sizes self.window_sizes = np.array([2, 4, 8, 24]) data, self.labels = make_batches(n_pts_per_cluster=100) self.G = gt.Graph(data, sample_idx=self.labels, use_pygsp=True) meld_op = meld.MELD() self.EES = meld_op.fit_transform(G=self.G, RES=self.labels)
def test_mnn(): data, labels = make_batches(n_pts_per_cluster=250) meld_op = meld.MELD(verbose=0) sample_densities = meld_op.fit_transform(data, labels, sample_idx=labels) sample_likelihoods = meld.utils.normalize_densities(sample_densities) meld.VertexFrequencyCluster().fit_transform( G=meld_op.graph, sample_indicator=meld_op.sample_indicators["expt"], likelihood=sample_likelihoods["expt"], )
def train(self, x_train, y_train, loss_fn, optimiser, metric, *, batch_size, epochs): for epoch in range(epochs): batches, labels = make_batches(x_train, y_train, batch_size, shuffle_data=True) for batch, label in zip(batches, labels): result = self(batch) self.backpropagate(result, label, loss_fn, optimiser) train_metric = metric(y_train, self(x_train)) print(f'Epoch {epoch + 1}: {train_metric}')
def setUpClass(self): # VertexFrequencyCluster # Custom window sizes self.window_sizes = np.array([2, 4, 8, 24]) self.data, self.sample_labels = make_batches(n_pts_per_cluster=100) meld_op = meld.MELD(verbose=0) self.densities = meld_op.fit_transform( self.data, sample_labels=self.sample_labels) self.sample_indicators = meld_op.sample_indicators self.likelihoods = meld.utils.normalize_densities(self.densities) self.G = meld_op.graph
def generate(self, num_samples=1000): zs = np.random.normal(0.0, 1.0, [num_samples, self.num_z]) g = np.zeros([num_samples, 2]) batches = make_batches(num_samples, self.batch_size) for batch_idx, (batch_start, batch_end) in enumerate(batches): g[batch_start:batch_end] = self.tf_session.run( self.x_g, feed_dict={ self.z: np.reshape(zs[batch_start:batch_end], [batch_end - batch_start, self.num_z]) }) return g
def _generate(self, num_samples=100): num = ((num_samples - 1) // self.batch_size + 1) * self.batch_size z = self.z_prior.sample([num, self.num_z]).astype(np.float32) x = np.zeros( [num, self.img_size[0], self.img_size[1], self.img_size[2]], dtype=np.float32) batches = make_batches(num, self.batch_size) for batch_idx, (batch_start, batch_end) in enumerate(batches): z_batch = z[batch_start:batch_end] x[batch_start:batch_end] = self.tf_session.run( self.g, feed_dict={self.z: z_batch}) idx = np.random.permutation(num)[:num_samples] return (x[idx] + 1.0) / 2.0
def preproc_predict(self, imgs, batch_size=32, augmentation_seed=None): """Preprocess images and predict with the model (no batch processing for first step) Input: imgs: 4D float or int array of images batch_size: integer, size of the batch Returns: predictions: numpy array with predictions (num_images, len_model_output) """ print('base_model preproc_predict!') # import utool as ut # ut.embed() batch_idx = make_batches(imgs.shape[0], batch_size) imgs_preds = np.zeros((imgs.shape[0], ) + self.model.get_output_shape_at(0)[1:]) print('Computing predictions with the shape {}'.format( imgs_preds.shape)) # do some augmentation here use_augmentation = augmentation_seed is not None print('use_augmentation = %s and augmentation_seed = %s' % (use_augmentation, augmentation_seed)) if use_augmentation: gen_args = dict( rotation_range=30, width_shift_range=0.15, height_shift_range=0.15, shear_range=0.1, zoom_range=0.15, channel_shift_range=0.15, data_format=K.image_data_format(), fill_mode='reflect', preprocessing_function=self.backend_class.normalize, ) aug_gen = ImageDataGenerator(**gen_args) for sid, eid in batch_idx: if use_augmentation: # [0] found experimentally preproc = aug_gen.flow(imgs[sid:eid], batch_size=batch_size, seed=augmentation_seed) assert len(preproc) == 1 assert len(preproc[0]) <= batch_size preproc = preproc[0] else: preproc = self.backend_class.normalize(imgs[sid:eid]) imgs_preds[sid:eid] = self.model.predict_on_batch(preproc) print('imgs_preds = %s' % imgs_preds) return imgs_preds
def eval_step(model, criterion, data, cell_line_info, drug_fingerprint_info): model.eval() step = 0 loss = .0 l1, l2, CD, ED, pearson, r_squared = .0, .0, .0, .0, .0, .0 if len(data) % parameters.batch_size == 0: batch_num = int(len(data)/parameters.batch_size) else: batch_num = int(len(data)/parameters.batch_size) + 1 batches = utils.make_batches(data, cell_line_info, drug_fingerprint_info, parameters.batch_size, False) for batch in batches: cell_lines, drugs, targets, doses, times = batch with torch.no_grad(): input_cell_lines = torch.cuda.FloatTensor(cell_lines) input_drugs = torch.cuda.FloatTensor(drugs) input_targets = torch.cuda.FloatTensor(targets) input_doses = torch.cuda.FloatTensor(doses) input_times = torch.cuda.FloatTensor(times) # Optimizing predicted = model(input_cell_lines, input_drugs, input_doses, input_times) l1 += torch.abs(predicted - input_targets).sum() l2 += criterion(predicted, input_targets) CD += nn.functional.cosine_similarity(predicted, input_targets)\ .sum() ED += torch.sqrt(torch.mul(predicted-input_targets, predicted-input_targets).sum(dim=1)).sum() for i in xrange(len(input_targets)): pearson += stats.pearsonr(input_targets[i], predicted[i])[0] r_squared += rsquared(input_targets, predicted).sum() step+=1 sys.stdout.write("\033[F") sys.stdout.write("\033[K") print("Process Validation Batch: [{}/{}]".format(step, batch_num)) return l1/len(data), l2/len(data), 1 - CD/len(data), ED/len(data), \ pearson/len(data), r_squared/len(data)
def train(self, ktrain, kval, n_epochs, batch_size, no_improve_lim): self.tloss, self.tacc, self.vloss, self.vacc = [], [], [], [] self.min_val_loss = 100000 self.no_improve_lim = no_improve_lim self.no_improve = 0 self.max_val_acc = 0 best_model = self.optimizer.target.copy() for epoch in range(n_epochs): kbtrain = utils.make_batches(ktrain, batch_size=batch_size, shuffle=True) self.n_batches = len(kbtrain) self.epoch = epoch self.tloss.append([]) self.tacc.append([]) for i_batch in range(self.n_batches): loss = self.model(Variable(kbtrain[i_batch][0]), Variable(kbtrain[i_batch][1]), test=False).loss self.tloss[epoch].append(loss.data[()]) self.tacc[epoch].append(F.accuracy(self.model.y, kbtrain[i_batch][1]).data[()]) self.print_report('train') self.model.cleargrads() loss.backward() self.optimizer.update() self.vloss.append(self.model(Variable(kval[0], volatile=True), Variable(kval[1], volatile=True), test=True).loss.data[()]) self.vacc.append(F.accuracy(self.model.y, kval[1]).data[()]) self.vcorrect = np.sum(F.argmax(self.model.y, axis=1).data == kval[1]) self.vall = len(kval[1]) if (self.vloss[-1] < self.min_val_loss): ## | (self.vacc[-1] > self.max_val_acc): best_model = self.optimizer.target.copy() self.min_val_loss = self.vloss[-1] self.max_val_acc = self.vacc[-1] self.no_improve = 0 self.print_report('val') if self.no_improve >= self.no_improve_lim: print print "Validation loss did not reduce in " + str(self.no_improve_lim) + " iterations" print "Quit iteration loop" break self.no_improve += 1 self.optimizer.new_epoch() self.model = best_model
def create_badges(data, layout): for batch in make_batches(layout.ordering_function(data), layout.badge_per_sheet): if settings.printout.show_guidelines: draw_margins(layout) draw_guidelines(layout) draw_cutlines(layout) draw_page_borders(layout) layout.canvas.translate(0, layout.height_offset) for ticket_index, attendee in batch: write_verso(attendee, ticket_index, layout) layout.canvas.translate(layout.section_width, 0) write_recto(attendee, layout) layout.canvas.translate(-layout.section_width, -layout.height_offset) layout.canvas.showPage() # finish the page, next statements should go next page layout.canvas.save()
def fit(self, x): with tf.device('/gpu:0'): if (not hasattr(self, 'epoch')) or self.epoch == 0: self._init() with self.tf_graph.as_default(): self._build_model() self.tf_session.run(tf.global_variables_initializer()) num_data = x.shape[0] - x.shape[0] % self.d_batch_size batches = make_batches(num_data, self.d_batch_size) best_is = 0.0 while (self.epoch < self.num_epochs): print('xxx') for batch_idx, (batch_start, batch_end) in enumerate(batches): batch_size = batch_end - batch_start x_batch = x[batch_start:batch_end] if self.same_input: z_batch = self.z_prior.sample([self.g_batch_size, self.num_z]).astype(np.float32) z_batch = np.vstack([z_batch] * self.num_gens) else: z_batch = self.z_prior.sample([self.g_batch_size * self.num_gens, self.num_z]).astype(np.float32) # update discriminator D d_bin_loss, d_mul_loss, d_loss, _ = self.tf_session.run( [self.d_bin_loss, self.d_mul_loss, self.d_loss, self.d_opt], feed_dict={self.x: x_batch, self.z: z_batch}) # update generator G g_bin_loss, g_mul_loss, g_loss, _ = self.tf_session.run( [self.g_bin_loss, self.g_mul_loss, self.g_loss, self.g_opt], feed_dict={self.z: z_batch}) print('-----'+str(batch_idx)+'/'+str(len(batches))) self.epoch += 1 print("Epoch: [%4d/%4d] d_bin_loss: %.5f, d_mul_loss: %.5f, d_loss: %.5f," " g_bin_loss: %.5f, g_mul_loss: %.5f, g_loss: %.5f" % (self.epoch, self.num_epochs, d_bin_loss, d_mul_loss, d_loss, g_bin_loss, g_mul_loss, g_loss)) self._samples(self.sample_fp.format(epoch=self.epoch+1)) self._samples_by_gen(self.sample_by_gen_fp.format(epoch=self.epoch+1))
def train_step(model, criterion, optimizer, data, pre_treatment, drug_fingerprint_info): model.train() step = 0 loss = .0 if len(data) % parameters.batch_size == 0: batch_num = int(len(data)/parameters.batch_size) else: batch_num = int(len(data)/parameters.batch_size) + 1 batches = utils.make_batches(data, pre_treatment, drug_fingerprint_info, parameters.batch_size) for batch in batches: pre_treatments, drugs, targets, doses, times = batch input_pre_treatments = torch.cuda.FloatTensor(pre_treatments) input_drugs = torch.cuda.FloatTensor(drugs) input_targets = torch.cuda.FloatTensor(targets) input_doses = torch.cuda.FloatTensor(doses) input_times = torch.cuda.FloatTensor(times) # Optimizing optimizer.zero_grad() predicted = model(input_pre_treatments, input_drugs, input_doses, input_times) _loss = criterion(predicted, input_targets).mean() _loss.backward() loss+=_loss.item() optimizer.step() step+=1 sys.stdout.write("\033[F") sys.stdout.write("\033[K") print("Process Training Batch: [{}/{}]".format(step, batch_num)) return loss/batch_num
def run_actor_critic_model(args, model, session, dataset, file_writer, saver, epoch_num): with tf.device('/cpu:0'): batched_data, batched_labels, batched_seq_lens, batched_bbox = utils.make_batches(dataset, batch_size=BATCH_SIZE) batch_accuracies = [] for j in xrange(len(batched_data)): data_batch = batched_data[j] label_batch = batched_labels[j] seq_lens_batch = batched_seq_lens[j] bbox_batch = batched_bbox[j] if 'actor' in args.model: summary, loss, rewards, area_accuracy = model.run_pretrain_actor_batch(args, session, data_batch, label_batch, seq_lens_batch, bbox_batch) print("Loss of the current batch is {0}".format(loss)) print("Finished batch {0}/{1}".format(j,len(batched_data))) print("Total rewards: {0}".format(rewards)) print("Average area accuracy per sequence per batch: {0}".format(area_accuracy)) batch_accuracies.append(area_accuracy) if 'critic' in args.model: summary, loss = model.run_pretrain_critic_batch(args, session, data_batch, label_batch, seq_lens_batch, bbox_batch, seq_lens_batch, seq_lens_batch) print("Loss of the current batch is {0}".format(loss)) # if 'complete' in args.model: file_writer.add_summary(summary, j) # # Record batch accuracies for test code if args.train == "train": # Checkpoint model - every epoch utils.save_checkpoint(args, session, saver, epoch_num) else: # val or test test_accuracy = np.mean(batch_accuracies) print "Model {0} accuracy: {1}".format(args.train, test_accuracy)
def test_utils(): data, labels = make_batches(n_pts_per_cluster=250) G = gt.Graph(data, sample_idx=labels, use_pygsp=True) meld_op = meld.MELD() sample_densities = meld_op.fit_transform(G, labels) sample_likelihoods = meld.utils.normalize_densities(sample_densities) meld.VertexFrequencyCluster().fit_predict( G=G, sample_indicator=meld_op.sample_indicators["expt"], likelihood=sample_likelihoods["expt"], ) meld.utils.get_meld_cmap() # Test normalize_densities # Three samples densities = np.ones([100, 3]) meld.utils.normalize_densities(sample_densities=densities) # Two samples densities = np.ones([100, 2]) meld.utils.normalize_densities(sample_densities=densities)
def preproc_predict(self, imgs, batch_size=32): """Preprocess images and predict with the model (no batch processing for first step) Input: imgs: 4D float or int array of images batch_size: integer, size of the batch Returns: predictions: numpy array with predictions (num_images, len_model_output) """ print('base_model preproc_predict!') # import utool as ut # ut.embed() batch_idx = make_batches(imgs.shape[0], batch_size) imgs_preds = np.zeros((imgs.shape[0], ) + self.model.get_output_shape_at(0)[1:]) print('Computing predictions with the shape {}'.format( imgs_preds.shape)) for sid, eid in batch_idx: preproc = self.backend_class.normalize(imgs[sid:eid]) imgs_preds[sid:eid] = self.model.predict_on_batch(preproc) print('imgs_preds = %s' % imgs_preds) return imgs_preds
INIT_LEARNING_RATE = 0.001 WEIGHT_DECAY_RATE = 0.0005 MOMENTUM = 0.9 IMAGE_HEIGHT = 224 IMAGE_WIDTH = 224 NUM_CHANNELS = 3 BATCH_SIZE = 50 N_LABELS = 2 DROPOUT = 0.50 LOGS_PATH = './tensorflow_logs/' WEIGHT_PATH = 'vgg16_weights.npz' TRAINSET_PATH = 'train.csv' VALSET_PATH ='val.csv' ckpt_dir = "./ckpt_dir_config1" train_image_batch, train_label_batch = make_batches(TRAINSET_PATH, N_EPOCHS, IMAGE_HEIGHT, IMAGE_WIDTH, BATCH_SIZE, shuffle=False, training = True) val_image_batch, val_label_batch = make_batches(VALSET_PATH, 1, IMAGE_HEIGHT, IMAGE_WIDTH, BATCH_SIZE, shuffle=False, training = False) # Count the number of training and test examples num_train = count_images(TRAINSET_PATH) num_val = count_images(VALSET_PATH) print ('Train_images: ', num_train) print ('Validation_images: ', num_val) # Placeholders for tensorflow graph learning_rate = tf.placeholder( tf.float32, []) images_tf = tf.placeholder( tf.float32, [None, IMAGE_HEIGHT, IMAGE_WIDTH, 3], name="images") labels_tf = tf.placeholder( tf.int64, [None], name='labels') # the dimensions could be [None,N_CLASSES] network = VGG16(N_LABELS, WEIGHT_PATH)
def train(self, x_train_opcode, x_train_assembly, x_train_seq_len, y_train, x_valid_opcode, x_valid_assembly, x_valid_seq_len, y_valid, x_test_opcode, x_test_assembly, x_test_seq_len, y_test): outFile = open(self.OutName, 'w') saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) with tf.Session(config=utils.get_default_config()) as sess: writer = tf.summary.FileWriter(self.graph_path, sess.graph) check_point = tf.train.get_checkpoint_state(self.checkpoint_path) if check_point and tf.train.checkpoint_exists(check_point.model_checkpoint_path): message = "Load model parameters from %s\n" % check_point.model_checkpoint_path utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) saver.restore(sess, check_point.model_checkpoint_path) else: message = "Create the model with fresh parameters\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) sess.run(tf.global_variables_initializer()) ####Seperate dataset x_train_opcode_0 = [] x_train_opcode_1 = [] x_train_assembly_0 = [] x_train_assembly_1 = [] y_train_0 = [] y_train_1 = [] x_train_seq_len_0 = [] x_train_seq_len_1 = [] for index, aLabel in enumerate(y_train): if (aLabel == 0.0): x_train_opcode_0.append(x_train_opcode[index,:,:,:]) x_train_assembly_0.append(x_train_assembly[index,:,:,:]) y_train_0.append(y_train[index]) x_train_seq_len_0.append(x_train_seq_len[index]) else: x_train_opcode_1.append(x_train_opcode[index,:,:,:]) x_train_assembly_1.append(x_train_assembly[index,:,:,:]) y_train_1.append(y_train[index]) x_train_seq_len_1.append(x_train_seq_len[index]) x_train_opcode_0 = np.array(x_train_opcode_0) x_train_opcode_1 = np.array(x_train_opcode_1) x_train_assembly_0 = np.array(x_train_assembly_0) x_train_assembly_1 = np.array(x_train_assembly_1) min_train_0_1 = min(x_train_opcode_0.shape[0], x_train_opcode_1.shape[0]) training_set = min_train_0_1 - min_train_0_1 % (self.batch_size // 2) training_batches = utils.make_batches(training_set, (self.batch_size // 2)) ####Seperate dataset step_loss = 0.0 # average loss per epoch step_time = 0.0 full_train_accuracy_score = [] full_train_pre_score = [] full_train_f1_score = [] full_train_recall_score = [] full_train_auc_score = [] initial_step = self.global_step.eval() for step in range(initial_step, initial_step + self.num_train_steps): loss_per_batch = 0.0 start_time = time.time() full_y_predic_train = np.array([]) full_y_target_train = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(training_batches): ####Seperate batch batch_x_opcode_0 = x_train_opcode_0[batch_start:batch_end] batch_x_assembly_0 = x_train_assembly_0[batch_start:batch_end] batch_y_0 = y_train_0[batch_start:batch_end] batch_sequence_length_0 = x_train_seq_len_0[batch_start:batch_end] batch_x_opcode_1 = x_train_opcode_1[batch_start:batch_end] batch_x_assembly_1 = x_train_assembly_1[batch_start:batch_end] batch_y_1 = y_train_1[batch_start:batch_end] batch_sequence_length_1 = x_train_seq_len_1[batch_start:batch_end] batch_x_opcode = np.concatenate((batch_x_opcode_0, batch_x_opcode_1), axis=0) batch_x_assembly = np.concatenate((batch_x_assembly_0, batch_x_assembly_1), axis=0) batch_y = batch_y_0 + batch_y_1 batch_sequence_length = batch_sequence_length_0 + batch_sequence_length_1 ####Seperate batch full_y_target_train = np.append(full_y_target_train, batch_y) feed_dict = { self.X_opcode: batch_x_opcode, self.X_assembly: batch_x_assembly, self.Y: batch_y, self.sequence_length: batch_sequence_length, } soutputs, sstates, sphi_x_tilde = sess.run( [self.outputs, self.states, self.phi_x_tilde], feed_dict=feed_dict) # print("Hello") # print(soutputs.shape) # print(sstates.shape) # print(sphi_x_tilde.shape) # print(np.sum(soutputs[:,0,:])) # print(np.sum(sphi_x_tilde[:,:256])) # print(np.sum(soutputs[:,-1,:])) # print(np.sum(sstates)) # print(np.sum(sphi_x_tilde[:,-256:])) # sys.exit() _, summary, batch_loss, batch_y_pred_train = sess.run( [self.training_op, self.summary_op, self.loss, self.y_pred_svm], feed_dict=feed_dict) full_y_predic_train = np.append(full_y_predic_train, batch_y_pred_train) if (batch_idx + 1) % (len(training_batches) // 10) == 0: writer.add_summary(summary, global_step=step) loss_per_batch += batch_loss / len(training_batches) batch_train_accuracy_score = mt.accuracy_score(y_true=full_y_target_train, y_pred=full_y_predic_train) batch_train_pre_score = mt.precision_score(y_true=full_y_target_train, y_pred=full_y_predic_train) batch_train_f1_score = mt.f1_score(y_true=full_y_target_train, y_pred=full_y_predic_train) batch_train_recall_score = mt.recall_score(y_true=full_y_target_train, y_pred=full_y_predic_train) batch_train_auc_score = mt.roc_auc_score(y_true=full_y_target_train, y_score=full_y_predic_train) full_y_predic_train = np.array([]) full_y_target_train = np.array([]) full_train_accuracy_score.append(batch_train_accuracy_score) full_train_pre_score.append(batch_train_pre_score) full_train_f1_score.append(batch_train_f1_score) full_train_recall_score.append(batch_train_recall_score) full_train_auc_score.append(batch_train_auc_score) step_time += (time.time() - start_time) step_loss += loss_per_batch # if (step + 1) % 10 == 0: # # Save checkpoint and zero timer and loss. # checkpoint_path = os.path.join(self.checkpoint_path, "rnn_classifier_" + self.data_size + ".ckpt") # saver.save(sess, checkpoint_path, global_step=step) if (step + 1) % self.display_step == 0: #Train plot ave_train_accuracy_score = np.mean(full_train_accuracy_score) ave_train_pre_score = np.mean(full_train_pre_score) ave_train_f1_score = np.mean(full_train_f1_score) ave_train_recall_score = np.mean(full_train_recall_score) ave_train_auc_score = np.mean(full_train_auc_score) full_train_accuracy_score = [] full_train_pre_score = [] full_train_f1_score = [] full_train_recall_score = [] full_train_auc_score = [] message = "global step %d/%d step-time %.2fs average loss %.5f acc %.2f pre %.2f f1 %.2f rec %.2f auc %.2f\n" % ( step, self.num_train_steps - 1, step_time, step_loss, ave_train_accuracy_score, ave_train_pre_score, ave_train_f1_score, ave_train_recall_score, ave_train_auc_score) utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) outFile.write("%.2f\n" %(ave_train_accuracy_score * 100)) outFile.write("%.2f\n" %(ave_train_pre_score * 100)) outFile.write("%.2f\n" %(ave_train_f1_score * 100)) outFile.write("%.2f\n" %(ave_train_recall_score * 100)) outFile.write("%.2f\n" %(ave_train_auc_score * 100)) #Train plot #Dev plot step_time, step_loss = 0.0, 0.0 dev_set = x_valid_opcode.shape[0] - x_valid_opcode.shape[0] % self.batch_size dev_batches = utils.make_batches(dev_set, self.batch_size) ####Seperate dataset average_dev_loss = 0.0 full_y_pred_svm = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(dev_batches): valid_x_opcode = x_valid_opcode[batch_start:batch_end] valid_x_assembly = x_valid_assembly[batch_start:batch_end] valid_y = y_valid[batch_start:batch_end] valid_seq_len = x_valid_seq_len[batch_start:batch_end] ####Seperate batch feed_dict = { self.X_opcode: valid_x_opcode, self.X_assembly: valid_x_assembly, self.Y: valid_y, self.sequence_length: valid_seq_len, } batch_dev_loss, batch_y_pred = sess.run([self.loss, self.y_pred_svm], feed_dict=feed_dict) full_y_pred_svm = np.append(full_y_pred_svm, batch_y_pred) average_dev_loss += batch_dev_loss / len(dev_batches) message = "eval: accuracy_svm %.2f\n" % ( mt.accuracy_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100) message += "eval: precision_svm %.2f\n" % ( mt.precision_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100) message += "eval: f1_svm %.2f\n" % ( mt.f1_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100) message += "eval: recall_svm %.2f\n" % ( mt.recall_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100) message += "eval: roc_auc_svm %.2f\n" % ( mt.roc_auc_score(y_true=y_valid[:dev_set], y_score=full_y_pred_svm) * 100) message += "-----------------------------------------------------\n" outFile.write("%.2f\n" %(mt.accuracy_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)) outFile.write("%.2f\n" %(mt.precision_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)) outFile.write("%.2f\n" %(mt.f1_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)) outFile.write("%.2f\n" %(mt.recall_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)) outFile.write("%.2f\n" %(mt.roc_auc_score(y_true=y_valid[:dev_set], y_score=full_y_pred_svm) * 100)) utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) #Dev plot #Test plot #x_train_opcode, x_train_assembly, x_train_seq_len, y_train, #x_valid_opcode, x_valid_assembly, x_valid_seq_len, y_valid, #x_test_opcode, x_test_assembly, x_test_seq_len, y_test step_time, step_loss = 0.0, 0.0 test_set = x_test_opcode.shape[0] - x_test_opcode.shape[0] % self.batch_size test_batches = utils.make_batches(test_set, self.batch_size) ####Seperate dataset average_test_loss = 0.0 full_y_pred_svm_test = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(test_batches): test_x_opcode = x_test_opcode[batch_start:batch_end] test_x_assembly = x_test_assembly[batch_start:batch_end] test_y = y_test[batch_start:batch_end] test_seq_len = x_test_seq_len[batch_start:batch_end] ####Seperate batch feed_dict = { self.X_opcode: test_x_opcode, self.X_assembly: test_x_assembly, self.Y: test_y, self.sequence_length: test_seq_len, } batch_test_loss, batch_y_pred_test = sess.run([self.loss, self.y_pred_svm], feed_dict=feed_dict) full_y_pred_svm_test = np.append(full_y_pred_svm_test, batch_y_pred_test) average_test_loss += batch_test_loss / len(test_batches) message = "test: accuracy_svm %.2f\n" % ( mt.accuracy_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100) message += "test: precision_svm %.2f\n" % ( mt.precision_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100) message += "test: f1_svm %.2f\n" % ( mt.f1_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100) message += "test: recall_svm %.2f\n" % ( mt.recall_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100) message += "test: roc_auc_svm %.2f\n" % ( mt.roc_auc_score(y_true=y_test[:test_set], y_score=full_y_pred_svm_test) * 100) message += "-----------------------------------------------------\n" outFile.write("%.2f\n" %(mt.accuracy_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)) outFile.write("%.2f\n" %(mt.precision_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)) outFile.write("%.2f\n" %(mt.f1_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)) outFile.write("%.2f\n" %(mt.recall_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)) outFile.write("%.2f\n" %(mt.roc_auc_score(y_true=y_test[:test_set], y_score=full_y_pred_svm_test) * 100)) utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) #Test plot writer.close() message = "Finish training process.\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) outFile.close()
bkgPerBatch = bkgPerBatch.astype(int) # fill lists of all events and files b_events, b_files = [], [] for file, nEvents in bkgCounts.items(): for evt in range(nEvents): b_events.append(evt) b_files.append(file) e_events, e_files = [], [] for file, nEvents in eCounts.items(): for evt in range(nEvents): e_events.append(evt) e_files.append(file) # make batches bkg_event_batches, bkg_file_batches = utils.make_batches( b_events, b_files, bkgPerBatch, num_batches) e_event_batches, e_file_batches = utils.make_batches( e_events, e_files, ePerBatch, num_batches) # create the discriminator discriminator = build_discriminator(img_shape=(40, 40, 3), n_classes=2) # create the generator generator = build_generator(latent_dim) # create the gan gan_model = build_gan(generator, discriminator) num_examples = num_batches * batch_size print(utils.bcolors.YELLOW + 'Training on:' + utils.bcolors.ENDC) print(utils.bcolors.GREEN + 'Number of examples: ' + utils.bcolors.ENDC, num_examples)
from utils import load_mnist, make_batches, display_mnist_image from FullyConnectedNet import FullyConnectedNet import numpy as np from tqdm import tqdm batch_size = 32 learning_rate = 0.005 n_epochs = 20 X_train, X_test, y_train, y_test = load_mnist() X_train = X_train / 255 X_test = X_test / 255 np.random.seed(42) train_batches = make_batches(X_train, y_train, batch_size) test_batches = make_batches(X_test, y_test, batch_size) model = FullyConnectedNet(learning_rate) for i in tqdm(range(n_epochs)): epoch_loss = 0 for x_batch, y_batch in train_batches: pred, loss = model.forward(x_batch.T, target=y_batch) model.backward() model.update_parameters() epoch_loss += loss print("training loss for epoch", (i + 1), "is", epoch_loss / y_train.size) total_correct = 0 for x_batch, y_batch in test_batches: predicted, loss = model.forward(x_batch.T, target=y_batch) predicted_classes = np.argmax(predicted, axis=0)
bkgPerBatch = bkgPerBatch.astype(int) # fill lists of all events and files b_events, b_files = [], [] for file, nEvents in bkgCounts.items(): for evt in range(nEvents): b_events.append(evt) b_files.append(file) e_events, e_files = [], [] for file, nEvents in eCounts.items(): for evt in range(nEvents): e_events.append(evt) e_files.append(file) # make batches bkg_event_batches, bkg_file_batches = utils.make_batches(b_events, b_files, bkgPerBatch, nBatches) e_event_batches, e_file_batches = utils.make_batches(e_events, e_files, ePerBatch, nBatches) # train/validation split train_e_event_batches, val_e_event_batches, train_e_file_batches, val_e_file_batches = train_test_split(e_event_batches, e_file_batches, test_size=val_size, random_state=42) train_bkg_event_batches, val_bkg_event_batches, train_bkg_file_batches, val_bkg_file_batches = train_test_split(bkg_event_batches, bkg_file_batches, test_size=val_size, random_state=42) # count events in each batch nSavedETrain = utils.count_events(train_e_file_batches, train_e_event_batches, eCounts) nSavedEVal = utils.count_events(val_e_file_batches, val_e_event_batches, eCounts) nSavedBkgTrain = utils.count_events(train_bkg_file_batches, train_bkg_event_batches, bkgCounts) nSavedBkgVal = utils.count_events(val_bkg_file_batches, val_bkg_event_batches, bkgCounts) # add background events to validation data # to keep ratio e/bkg equal to that in original dataset if(nSavedEVal*1.0/(nSavedEVal+nSavedBkgVal) > fE):
## load data train_x, train_y = ds.load('train') valid_x, valid_y = ds.load('valid') ## setup model idim = len(train_x[0][0]) odim = max(train_y) + 1 model = RNN(idim, 300, odim, 'lstm') ## setup optimizer #train_w = utils.balance_prior(train_y) params = model.get_theta() #args, n_batches = utils.make_batches([train_x, train_y], None) #opt = climin.Rprop(params, model.opt_fprime, args=args, init_step=0.0001) args, n_batches = utils.make_batches([train_x, train_y], 30) opt = climin.Adadelta(params, model.opt_fprime, offset=1e-6, args=args) #args, n_batches = utils.make_batches([train_x, train_y], 30) #opt = climin.rmsprop.RmsProp(params, model.opt_fprime, step_rate=0.01, args=args) ## perform optimization epoch = 0 start = time.time() for info in opt: if info['n_iter'] % n_batches == 0: epoch += 1 # end if epoch == 100: break # print performance if epoch % 1 == 0:
def fit(self, x): if (not hasattr(self, 'epoch')) or self.epoch == 0: self._init() with self.tf_graph.as_default(): self._build_model() self.tf_session.run(tf.global_variables_initializer()) if self.load(): print('load the checkpoint!') else: print( 'cannot load the checkpoint and init all the varibale') num_data = x.shape[0] - x.shape[0] % self.d_batch_size batches = make_batches(num_data, self.d_batch_size) best_is = 0.0 while (self.epoch < self.num_epochs): for batch_idx, (batch_start, batch_end) in enumerate(batches): batch_size = batch_end - batch_start x_batch = x[batch_start:batch_end] if self.same_input: z_batch = self.z_prior.sample( [self.g_batch_size, self.num_z]).astype(np.float32) z_batch = np.vstack([z_batch] * self.num_gens) else: z_batch = self.z_prior.sample( [self.g_batch_size * self.num_gens, self.num_z]).astype(np.float32) # update discriminator D d_bin_loss, d_mul_loss, d_loss, _ = self.tf_session.run( [ self.d_bin_loss, self.d_mul_loss, self.d_loss, self.d_opt ], feed_dict={ self.x: x_batch, self.z: z_batch }) # update generator G g_bin_loss, g_mul_loss, g_loss, _ = self.tf_session.run( [ self.g_bin_loss, self.g_mul_loss, self.g_loss, self.g_opt ], feed_dict={self.z: z_batch}) self.epoch += 1 print( "Epoch: [%4d/%4d] d_bin_loss: %.5f, d_mul_loss: %.5f, d_loss: %.5f," " g_bin_loss: %.5f, g_mul_loss: %.5f, g_loss: %.5f" % (self.epoch, self.num_epochs, d_bin_loss, d_mul_loss, d_loss, g_bin_loss, g_mul_loss, g_loss)) # print("Epoch: [%4d/%4d] d_bin_loss: %.5f,g_bin_loss: %.5f" % (self.epoch, self.num_epochs, # d_bin_loss, g_bin_loss)) if self.epoch % 10 == 0: self._samples(self.sample_fp.format(epoch=self.epoch + 1)) if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) self.saver.save( self.tf_session, os.path.join(self.checkpoint_dir, "classifier_mode_checkpoint") ) #+str(self.num_gens)+"epoch_"+str(self.num_epochs)+"num_g_maps_"+str(self.num_gen_feature_maps))) self._samples_by_gen(self.sample_by_gen_fp)
def test_mnn(): data, labels = make_batches(n_pts_per_cluster=250) G = gt.Graph(data, sample_idx=labels, use_pygsp=True) meld_op = meld.MELD() EES = meld_op.fit_transform(G, labels) meld.VertexFrequencyCluster().fit_transform(G=G, RES=labels, EES=EES)
def fit(self, x): if (not hasattr(self, 'epoch')) or self.epoch == 0: self._init() with self.tf_graph.as_default(): self._build_model() self.tf_session.run(tf.global_variables_initializer()) batch_idx_step = max(self.num_training_mover, self.num_training_generator, self.num_training_critic) num_data = x.shape[0] - x.shape[0] % (self.batch_size * batch_idx_step) if num_data > 150000: num_data = self.num_iter_per_epoch batches = make_batches(num_data, self.batch_size * batch_idx_step) iter = 0.0 while (self.epoch < self.num_epochs): for batch_idx in np.arange(0, len(batches), batch_idx_step): iter += 1.0 if self.decay: lr = self.learning_rate - self.learning_rate * iter / self.decay_steps else: lr = self.learning_rate if self.epoch < self.gamma_steps: gamma = self.gamma0 + self.epoch * ( self.gamma1 - self.gamma0) / self.gamma_steps else: gamma = self.gamma1 # update Mover for it in range(self.num_training_mover): z_batch = self.z_prior.sample( [self.batch_size, self.num_z]).astype(np.float32) batch_start, batch_end = batches[batch_idx + it] x_batch = x[batch_start:batch_end] self.tf_session.run(self.m_opt, feed_dict={ self.gamma: gamma, self.lr: lr, self.x: x_batch, self.z: z_batch }) # update Critic for it in range(self.num_training_critic): z_batch = self.z_prior.sample( [self.batch_size, self.num_z]).astype(np.float32) batch_start, batch_end = batches[batch_idx + it] x_batch = x[batch_start:batch_end] self.tf_session.run(self.c_opt, feed_dict={ self.gamma: gamma, self.lr: lr, self.x: x_batch, self.z: z_batch }) # update Generator for _ in range(self.num_training_generator): z_batch = self.z_prior.sample( [self.batch_size, self.num_z]).astype(np.float32) self.tf_session.run(self.g_opt, feed_dict={ self.gamma: gamma, self.lr: lr, self.z: z_batch }) self._samples(self.samples_fp.format(epoch=self.epoch + 1), num_samples=100) idx = np.random.randint(x.shape[0], size=(100 // self.batch_size + 1) * self.batch_size) self._samples_h(self.samples_h_fp.format(epoch=self.epoch + 1), x[idx], num_samples=100) print('Epoch %d: done.' % (self.epoch + 1)) self.epoch += 1
def train(model, x, y, x_adv, aux_adv, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., shuffle=True, class_weight=None, sample_weight=None): # Validate user data. x, y, sample_weights = model._standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight, batch_size=batch_size) ins = x + y + sample_weights # Prepare validation data. do_validation = False if validation_split and 0. < validation_split < 1.: do_validation = True if hasattr(x[0], 'shape'): split_at = int(x[0].shape[0] * (1. - validation_split)) else: split_at = int(len(x[0]) * (1. - validation_split)) x, val_x = (slice_arrays(x, 0, split_at), slice_arrays(x, split_at)) y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at)) sample_weights, val_sample_weights = ( slice_arrays(sample_weights, 0, split_at), slice_arrays(sample_weights, split_at)) val_ins = val_x + val_y + val_sample_weights else: val_ins = [] # ____________________________________________________________________________ # Fit num_train_samples = x[0].shape[0] index_array = np.arange(num_train_samples) #sess = K.get_session() #tf_x = K.placeholder(shape=(None, x[0].shape[1])) #tf_y = K.placeholder(shape=(None, y[0].shape[1])) #sess.run(tf.initialize_all_variables()) # Loop over epochs for epoch in xrange(epochs): print('Epoch {0}'.format(epoch)) if shuffle: np.random.shuffle(index_array) batches = make_batches(num_train_samples, batch_size) # Loop over batches for batch_index, (batch_start, batch_end) in enumerate(batches): print('.. batch {0}'.format(batch_index)) batch_ids = index_array[batch_start:batch_end] ins_batch = slice_arrays(ins, batch_ids) assert isinstance(ins_batch, list) and len(ins_batch) == 1 + 2 + 2 # Add noise if add_noise: noise = x_adv[np.random.randint(0, x_adv.shape[0], ins_batch[0].shape[0])] noise_reg = np.zeros_like(ins_batch[1]) + 100. # mask_value is set to 100 noise_discr = np.zeros_like(ins_batch[2]) noise_reg_w = np.ones_like(ins_batch[3]) noise_discr_w = np.ones_like(ins_batch[3]) ins_noise = [noise, noise_reg, noise_discr, noise_reg_w, noise_discr_w] ins_batch = merge_arrays(ins_batch, ins_noise) model._make_train_function() f = model.train_function outs = f(ins_batch)
b_files.append(file) #e_events, e_files = [], [] #for file, nEvents in eCounts.items(): # for evt in range(nEvents): # e_events.append(evt) # e_files.append(file) availableBkg = sum(list(bkgCounts.values())) nBatches = int(availableBkg / batch_size) bkgPerBatch = np.asarray([batch_size]*nBatches) bkgPerBatch = bkgPerBatch.astype(int) print(availableBkg, nBatches) # make batches bkg_event_batches, bkg_file_batches = utils.make_batches(b_events, b_files, bkgPerBatch, nBatches) #e_event_batches, e_file_batches = utils.make_batches(e_events, e_files, ePerBatch, nBatches) val_e_file_batches = [list(set([-1])) for _ in range(nBatches)] val_e_event_batches = np.array([[0,0]]*nBatches) print(bkg_file_batches) print(bkg_event_batches) #reversed to save electrons np.save(outputDir + 'e_files_testBatches', bkg_file_batches) np.save(outputDir + 'e_events_testBatches', bkg_event_batches) np.save(outputDir + 'bkg_files_testBatches', val_e_file_batches) np.save(outputDir + 'bkg_events_testBatches', val_e_event_batches)