def save_generator_output(self, sess, e, fixed_z, fixed_y): feed_dict = {self.latent_z: fixed_z, self.inputs_y: fixed_y} fake_out = sess.run(network.generator(self.latent_z, y=self.inputs_y, embed_y=True, is_training=False, use_bn=True), feed_dict=feed_dict) image_fn = os.path.join(self.assets_dir, '{:s}-{:s}-e{:03d}.png'.format(self.dataset_type, self.gan_loss_type, e + 1)) utils.validation(fake_out, self.val_block_size, image_fn) return
def test(model): inputs = [] labels = [] for i in dataloader.get_test_ids(): _inputs, _labels = dataloader.get_test_batch(i) _inputs = _inputs.to(device) _features = model(_inputs).cpu().detach().numpy() for idx in range(len(_features)): inputs.append(_features[idx]) labels += _labels validation(inputs, labels)
def main(): #Processing command line arguments data_dir, save_dir, arch, learning_rate, hidden_units, epochs, device = processing_arguments() #Loading image data dataloaders, image_data= loading_data(data_dir) #Defining model, classifier, criterion and optimizer model, classifier, criterion, optimizer = prepare_model(arch, hidden_units, learning_rate) #Training model do_deep_training(model,dataloaders['train'], epochs,criterion,optimizer,device) #Obtaining test loss and accuracy validation(model,dataloaders['test'],criterion,device) #Saving checkpoint saving_model(arch,model,save_dir, image_data['train'], classifier, optimizer, epochs)
def index(request): """登录界面""" error_msg = '' if request.method == 'GET': return render(request, 'index.html', {'error_msg': error_msg}) if request.method == 'POST': username = request.POST.get("username") password = request.POST.get("password") login_data = utils.validation(username, password) if not login_data['is_login']: return render(request, 'index.html', {'error_msg': login_data['error_msg']}) else: request.session['is_login'] = True request.session['username'] = login_data['username'] request.session['account'] = username request.session['user_type'] = login_data['user_type'] if login_data['user_type'] == 'accounts': return redirect('/account/summary') elif login_data['user_type'] == 'information': return redirect('/info/summary') elif login_data['user_type'] == 'finance': return redirect('/finance/summary') elif login_data['user_type'] == 'tender': return redirect('/tender/summary') elif login_data['user_type'] == 'admin': return redirect('/admin/summary')
def post(self): username = self.request.get('username') password = self.request.get('password') verify = self.request.get('verify') email = self.request.get('email') error = utils.validation(username, password, verify, email) if error: self.template('signup.html', other_error=error, username=username, email=email) else: if User.query(User.username==username).get(): error = "Sorry, this username already exists" self.template('signup.html', other_error=error, username=username, email=email) else: h = utils.make_pw_hash(username, password) u = User(parent=user_key(), username=username, pw_hash=h, email=email) new_user_id = u.put().id() # put() returns the models Key, put().id() returns the entities id self.set_secure_cookie('user_id', str(new_user_id)) logging.error("NEW USER WITH ID " + str(new_user_id) + " CREATED") self.redirect('/user/%s' % username)
}) else: sess.run(train_step, feed_dict={ x: batch_features, y: batch_labels, rate: 0.5 }) if step % 100 == 0: train_loss = sess.run(loss, feed_dict={ x: batch_features, y: batch_labels, rate: 1. }) accuracy, val_loss = utils.validation(sess, acc, loss, x, y, rate, config.VAL_ANNOTATION_FILE, config.VAL_DIR, 16) print( "step %s: the training loss is %s, validation loss is %s, validation accuracy is %s" % (step, train_loss, val_loss, accuracy)) if step % 1000 == 0: if not os.path.exists(config.CHECKDIR): os.mkdir(config.CHECKDIR) saver.save(sess, config.CHECKFILE, global_step=step) print('writing checkpoint at step %s' % step) step += 1
def GAN(): # Graph Part # print("Graph initialization...") with tf.device(FLAGS.device): with tf.variable_scope("model", reuse=None): m_train = G.BEGAN(batch_size=FLAGS.tr_batch_size, is_training=True, num_keys=FLAGS.num_keys, input_length=FLAGS.hidden_state_size, output_length=FLAGS.predict_size, learning_rate=learning_rate) with tf.variable_scope("model", reuse=True): m_valid = G.BEGAN(batch_size=FLAGS.val_batch_size, is_training=False, num_keys=FLAGS.num_keys, input_length=FLAGS.hidden_state_size, output_length=FLAGS.predict_size, learning_rate=learning_rate) with tf.variable_scope("model", reuse=True): m_test = G.BEGAN(batch_size=FLAGS.test_batch_size, is_training=False, num_keys=FLAGS.num_keys, input_length=FLAGS.hidden_state_size, output_length=FLAGS.predict_size, learning_rate=learning_rate) print("Done") # Summary Part # print("Setting up summary op...") g_loss_ph = tf.placeholder(dtype=tf.float32) d_loss_ph = tf.placeholder(dtype=tf.float32) loss_summary_op_d = tf.summary.scalar("discriminatr_loss", d_loss_ph) loss_summary_op_g = tf.summary.scalar("generator_loss", g_loss_ph) valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/', max_queue=2) train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/', max_queue=2) print("Done") # Model Save Part # print("Setting up Saver...") saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(logs_dir) print("Done") # Session Part # print("Setting up Data Reader...") validation_dataset_reader = mt.Dataset( directory=test_dir, batch_size=FLAGS.val_batch_size, is_batch_zero_pad=FLAGS.is_batch_zero_pad, hidden_state_size=FLAGS.hidden_state_size, predict_size=FLAGS.predict_size, num_keys=FLAGS.num_keys, tick_interval=tick_interval, step=FLAGS.slice_step) test_dataset_reader = mt.Dataset(directory=test_dir, batch_size=FLAGS.test_batch_size, is_batch_zero_pad=FLAGS.is_batch_zero_pad, hidden_state_size=FLAGS.hidden_state_size, predict_size=FLAGS.predict_size, num_keys=FLAGS.num_keys, tick_interval=tick_interval, step=FLAGS.slice_step) print("done") sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) if ckpt and ckpt.model_checkpoint_path: # model restore saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") else: sess.run(tf.global_variables_initializer() ) # if the checkpoint doesn't exist, do initialization if FLAGS.mode == "train": train_dataset_reader = mt.Dataset( directory=train_dir, batch_size=FLAGS.tr_batch_size, is_batch_zero_pad=FLAGS.is_batch_zero_pad, hidden_state_size=FLAGS.hidden_state_size, predict_size=FLAGS.predict_size, num_keys=FLAGS.num_keys, tick_interval=tick_interval, step=FLAGS.slice_step) for itr in range(MAX_EPOCH): feed_dict = utils.run_epoch(train_dataset_reader, FLAGS.tr_batch_size, m_train, sess) if itr % 100 == 0: if FLAGS.use_began_loss: train_loss_d, train_loss_g, train_pred = sess.run( [m_train.loss_d, m_train.loss_g, m_train.predict], feed_dict=feed_dict) train_summary_str_d, train_summary_str_g = sess.run( [loss_summary_op_d, loss_summary_op_g], feed_dict={ g_loss_ph: train_loss_g, d_loss_ph: train_loss_d }) train_summary_writer.add_summary(train_summary_str_g, itr) print("Step : %d TRAINING LOSS *****************" % (itr)) print("Dicriminator_loss: %g\nGenerator_loss: %g" % (train_loss_d, train_loss_g)) if itr % 1000 == 0: if FLAGS.use_began_loss: valid_loss_d, valid_loss_g, valid_pred = utils.validation( validation_dataset_reader, FLAGS.val_batch_size, m_valid, FLAGS.hidden_state_size, FLAGS.predict_size, sess, logs_dir, itr, tick_interval) valid_summary_str_d, valid_summary_str_g = sess.run( [loss_summary_op_d, loss_summary_op_g], feed_dict={ g_loss_ph: valid_loss_g, d_loss_ph: valid_loss_d }) valid_summary_writer.add_summary(valid_summary_str_d, itr) print("Step : %d VALIDATION LOSS ***************" % (itr)) print("Dicriminator_loss: %g\nGenerator_loss: %g" % (valid_loss_d, valid_loss_g)) if itr % 1000 == 0 and itr != 0: utils.test_model(test_dataset_reader, FLAGS.test_batch_size, m_test, FLAGS.predict_size, sess, logs_dir, itr, tick_interval, 5) if itr % 1000 == 0: saver.save(sess, logs_dir + "/model.ckpt", itr) if FLAGS.mode == "test": utils.test_model(test_dataset_reader, FLAGS.test_batch_size, m_test, FLAGS.predict_size, sess, logs_dir, 9999, tick_interval, 10)
import read_n_write as rw import utils import models base_dir = os.path.dirname(__file__) DATA_PATH = sys.argv[1] LOG_FOLDER = './model_log' CHECK_POINT_NAME = LOG_FOLDER + '/model.{epoch:04d}-{val_acc:.4f}.h5' LOG_NAME = LOG_FOLDER + '/log.csv' PATIENCE = 50 EPOCH = 2000 feats, lables, _ = rw.read_dataset(DATA_PATH, shuffle=True) X_train, Y_train, X_val, Y_val = utils.validation(feats, lables, 0.05) train_data_gen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') train_data_gen.fit(X_train) val_data_gen = ImageDataGenerator() callback = [ TensorBoard(), CSVLogger(LOG_NAME, append=True),
def train(self): val_size = self.val_block_size * self.val_block_size steps = 0 losses = [] new_epochs = self.d_train_freq * self.epochs start_time = time.time() with tf.Session() as sess: # reset tensorflow variables sess.run(tf.global_variables_initializer()) # start training for e in range(new_epochs): for ii in range(self.mnist_loader.train.num_examples // self.batch_size): # no need labels batch_x, _ = self.mnist_loader.train.next_batch( self.batch_size) # rescale images to -1 ~ 1 batch_x = np.reshape(batch_x, (-1, 28, 28, 1)) batch_x = batch_x * 2.0 - 1.0 # Sample random noise for G batch_z = np.random.uniform(-1, 1, size=(self.batch_size, self.z_dim)) fd = {self.inputs_x: batch_x, self.inputs_z: batch_z} # Run optimizers (train D more than G) _ = sess.run(self.d_weight_clip) _ = sess.run(self.d_opt, feed_dict=fd) if ii % self.d_train_freq == 0: _ = sess.run(self.g_opt, feed_dict=fd) # print losses if steps % self.print_every == 0: # At the end of each epoch, get the losses and print them out train_loss_d = self.d_loss.eval({ self.inputs_x: batch_x, self.inputs_z: batch_z }) train_loss_g = self.g_loss.eval( {self.inputs_z: batch_z}) print( "Epoch {}/{}...".format(e + 1, self.epochs), "Discriminator Loss: {:.4f}...".format( train_loss_d), "Generator Loss: {:.4f}".format(train_loss_g)) losses.append((train_loss_d, train_loss_g)) steps += 1 # save generation results at every epochs if e % (self.d_train_freq * self.save_every) == 0: val_z = np.random.uniform(-1, 1, size=(val_size, self.z_dim)) val_out = sess.run(network.generator(self.inputs_z, reuse=True, is_training=False), feed_dict={self.inputs_z: val_z}) image_fn = os.path.join( self.assets_dir, '{:s}-val-e{:03d}.png'.format( self.dataset_type, (e // self.d_train_freq + 1))) utils.validation(val_out, self.val_block_size, image_fn, color_mode='L') end_time = time.time() elapsed_time = end_time - start_time # save losses as image losses_fn = os.path.join(self.assets_dir, '{:s}-losses.png'.format(self.dataset_type)) utils.save_losses(losses, ['Discriminator', 'Generator'], elapsed_time, losses_fn) return
batch_size=train_batch_size, shuffle=True, num_workers=8) val_data_loader = DataLoader(ValData(val_data_dir, val_filename), batch_size=val_batch_size, shuffle=False, num_workers=8) num_labeled = train_batch_size * len( lbl_train_data_loader) # number of labeled images num_unlabeled = train_batch_size * len( unlbl_train_data_loader) # number of unlabeled images # --- Previous PSNR and SSIM in testing --- # net.eval() old_val_psnr, old_val_ssim = validation(net, val_data_loader, device, category, exp_name) print('old_val_psnr: {0:.2f}, old_val_ssim: {1:.4f}'.format( old_val_psnr, old_val_ssim)) net.train() #intializing GPStruct gp_struct = GPStruct(num_labeled, num_unlabeled, train_batch_size, version, kernel_type) for epoch in range(epoch_start, num_epochs): psnr_list = [] start_time = time.time() adjust_learning_rate(optimizer, epoch, category=category) #------------------------------------------------------------------------------------------------------------- #Labeled phase if lambgp != 0 and use_GP_inlblphase == True: gp_struct.gen_featmaps(lbl_train_data_loader, net, device)
output = model.forward(images) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: # Make sure network is in eval mode for inference model.eval() # Turn off gradients for validation, saves memory and computations with torch.no_grad(): test_loss, accuracy = utils.validation(model, valid_dataloaders, criterion) print( "Epoch: {}/{}.. ".format(e + 1, epochs), "Training Loss: {:.3f}.. ".format(running_loss / print_every), "Validation Loss: {:.3f}.. ".format(test_loss / len(valid_dataloaders)), "Validation Accuracy: {:.3f}".format(accuracy)) running_loss = 0 # Make sure training is back on model.train() #save model class index
recon_batch, mu, logvar, Z = net(x) loss = net.loss_function(recon_batch, x, mu, logvar) if args.l1 > 0: loss += l1_regularization(net) * args.l1 loss.backward() optimizer.step() train_loss += loss.item() lr_scheduler.step() train_loss = train_loss / (batch_idx + 1) # val_loss, loss_mse = validation(net, val_loader, args.sigma, args) val_loss = validation(net, val_loader, args.sigma, args) logs['train_loss'].append(train_loss) logs['val_loss'].append(val_loss) # print('Train loss: {:5f} -- Val loss {:5f} --- loss_mse {:5f}'.format(train_loss, val_loss, loss_mse)) print('Train loss: {:5f} -- Val loss {:5f}'.format(train_loss, val_loss)) test_loss, test_psnr, img_pairs = test(net, test_loader, args) logs['test_loss'] = test_loss logs['test_psnr'] = test_psnr logs['img_pairs'] = img_pairs print('Test loss: {:5f} -- Test PSNR {:5f}'.format(test_loss, test_psnr)) torch.save( logs, '../weights/logs_{}_epoch_{}_lr_{}_sigma_{:5f}_bs_{}_pw_{}_wd_{:5f}_valloss_{:5f}_testloss_{:5f}_testpnsr_{:5f}.pth' .format(args.net, args.epochs, args.lr, args.sigma, args.batch_size,
def train(self): n_fixed_samples = self.val_block_size * self.val_block_size fixed_z = np.random.uniform(-1, 1, size=(n_fixed_samples, self.z_dim)) fixed_y = np.zeros(shape=[n_fixed_samples, self.y_dim]) for s in range(n_fixed_samples): loc = s % self.y_dim fixed_y[s, loc] = 1 steps = 0 losses = [] start_time = time.time() with tf.Session() as sess: # reset tensorflow variables sess.run(tf.global_variables_initializer()) # start training for e in range(self.epochs): for ii in range(self.mnist_loader.train.num_examples // self.batch_size): batch_x, batch_y = self.mnist_loader.train.next_batch( self.batch_size) # rescale images to -1 ~ 1 batch_x = np.reshape(batch_x, (-1, 28, 28, 1)) batch_x = batch_x * 2.0 - 1.0 # Sample random noise for G batch_z = np.random.uniform(-1, 1, size=(self.batch_size, self.z_dim)) fd = { self.inputs_x: batch_x, self.inputs_y: batch_y, self.inputs_z: batch_z } # Run optimizers _ = sess.run(self.d_opt, feed_dict=fd) _ = sess.run(self.g_opt, feed_dict=fd) _ = sess.run(self.ac_opt, feed_dict=fd) # print losses if steps % self.print_every == 0: # At the end of each epoch, get the losses and print them out train_loss_d = self.d_loss.eval(fd) train_loss_g = self.g_loss.eval(fd) train_loss_ac = self.ac_loss.eval(fd) print( "Epoch {}/{}...".format(e + 1, self.epochs), "Discriminator Loss: {:.4f}...".format( train_loss_d), "Generator Loss: {:.4f}...".format(train_loss_g), "Auxilary Classifier Loss: {:.4f}...".format( train_loss_ac)) losses.append( (train_loss_d, train_loss_g, train_loss_ac)) steps += 1 # save generation results at every epochs if e % self.save_every == 0: val_out = sess.run(network.generator(self.inputs_z, y=self.inputs_y, reuse=True, is_training=False), feed_dict={ self.inputs_y: fixed_y, self.inputs_z: fixed_z }) image_fn = os.path.join( self.assets_dir, '{:s}-val-e{:03d}.png'.format(self.dataset_type, e + 1)) utils.validation(val_out, self.val_block_size, image_fn, color_mode='L') end_time = time.time() elapsed_time = end_time - start_time # save losses as image losses_fn = os.path.join(self.assets_dir, '{:s}-losses.png'.format(self.dataset_type)) utils.save_losses(losses, ['Discriminator', 'Generator', 'Auxilary'], elapsed_time, losses_fn) return
batch_size=val_batch_size, shuffle=False, num_workers=24) # --- Define the network --- # net = DeRain_v2() # --- Multi-GPU --- # net = net.to(device) net = nn.DataParallel(net, device_ids=device_ids) # --- Load the network weight --- # net.load_state_dict(torch.load('./{}/{}_best'.format(exp_name, category))) # --- Use the evaluation model in testing --- # net.eval() if os.path.exists('./{}_results/{}/'.format(category, exp_name)) == False: os.mkdir('./{}_results/{}/'.format(category, exp_name)) os.mkdir('./{}_results/{}/rain/'.format(category, exp_name)) print('--- Testing starts! ---') start_time = time.time() val_psnr, val_ssim = validation(net, val_data_loader, device, category, exp_name, save_tag=True) end_time = time.time() - start_time print('val_psnr: {0:.2f}, val_ssim: {1:.4f}'.format(val_psnr, val_ssim)) print('validation time is {0:.4f}'.format(end_time))
loss = Rec_Loss1 + Rec_Loss2 loss.backward() optimizer.step() # --- To calculate average PSNR --- # psnr_list.extend(to_psnr(J, gt)) #if not (batch_id % 100): print( 'Epoch: {}, Iteration: {}, Loss: {}, Rec_Loss1: {}, Rec_loss2: {}'. format(epoch, batch_id, loss, Rec_Loss1, Rec_Loss2)) # --- Calculate the average training PSNR in one epoch --- # train_psnr = sum(psnr_list) / len(psnr_list) # --- Save the network parameters --- # torch.save(net.state_dict(), '/output/haze_current{}'.format(epoch)) # --- Use the evaluation model in testing --- # net.eval() val_psnr, val_ssim = validation(net, val_data_loader, device, category) one_epoch_time = time.time() - start_time print_log(epoch + 1, num_epochs, one_epoch_time, train_psnr, val_psnr, val_ssim, category) # --- update the network weight --- # #if val_psnr >= old_val_psnr: # torch.save(net.state_dict(), '{}_haze_best_{}_{}'.format(category, network_height, network_width)) # old_val_psnr = val_psnr
def train(model, output_size, optimizer_name, trainloader, validationloader, epochs, learning_rate, print_every, device, hidden_layers=None): """ All models used with this training function will have a dropout layer between every hidden layer with p=0.1 Inputs: - model: The model object that requires training. - output_size: The output size of the network - corresponds to the number of classes - optimizer_name: The string name of the optimizer to use i.e. Adam - trainloader: DataLoader object representing the training data - validationloader: DataLoader object representing the validation data - epochs: Integer representing the number of epochs the model should run through. - learning_rate: Float representing the learning rate i.e. 0.001 - print_every: Integer representing the frequency of printed statistics i.e. Every 50 iterations - hidden_layers - Defaults to None, this creates a default CustomClassifier that will be added to the pretraoned model. - Otherwise, enter a list representing the hidden layer dimensions to be used for the CustomClassifier. Outputs: - model: Trained model """ model.to(device) # run on GPU # freeze entire pre-loaded Network. for param in model.parameters(): param.requires_grad = False ### Building out Custom Classifier Depending on pretrained network input_size = list(model.classifier.named_children() )[0][-1].in_features # input size to custom network if hidden_layers: new_classifier = CustomClassifier(input_size, output_size, hidden_layers, [0.1] * len(hidden_layers)) else: # default hidden layers default_layers = [input_size // 2, input_size // 4] # dropout probs set to 0.1 per layer new_classifier = CustomClassifier(input_size, output_size, default_layers, [0.1] * len(default_layers)) new_classifier.to(device) # update pretrained model to use new classifier - transfer learning step. model.classifier = new_classifier # parameters for this classifier are NOT frozen. criterion = nn.NLLLoss() #optimizer only concerned with classifier. optimizer = eval( f"optim.{optimizer_name}(model.classifier.parameters(), lr={learning_rate})" ) steps = 0 running_loss = 0 for e in range(epochs): model.train() # train mode. for images, labels in trainloader: steps += 1 images, labels = images.to(device), labels.to(device) # GPU copy # No need for image resize as the image is going through a pre-trained network. optimizer.zero_grad( ) # zero out the gradient calculations for every batch output = model.forward(images) loss = criterion(output, labels) loss.backward() # backprop over classifier parameters optimizer.step() # take step running_loss += loss.item() if steps % print_every == 0: model.eval() # removes dropout # no gradient tracking here. with torch.no_grad(): validation_loss, accuracy = validation( model, validationloader, criterion, device) print( "Epoch: {}/{}.. ".format(e + 1, epochs), "Training Loss: {:.3f}.. ".format(running_loss / print_every), "Validation Loss: {:.3f}.. ".format(validation_loss), "Validation Accuracy: {:.3f}".format(accuracy)) running_loss = 0 model.train() # enter training mode again. return model
def run( task: str, rnd_seed: int, modality: str, results_dir: str, plots_dir: str, triplets_dir: str, device: torch.device, batch_size: int, embed_dim: int, epochs: int, window_size: int, sampling_method: str, lmbda: float, lr: float, p=None, show_progress: bool = True, ): #initialise logger and start logging events logger = setup_logging(file='spose_optimization.log', dir=f'./log_files/lmbda_{lmbda}/') logger.setLevel(logging.INFO) #load triplets into memory train_triplets, test_triplets = utils.load_data(device=device, triplets_dir=triplets_dir) n_items = utils.get_nitems(train_triplets) #load train and test mini-batches train_batches, val_batches = utils.load_batches( train_triplets=train_triplets, test_triplets=test_triplets, n_items=n_items, batch_size=batch_size, sampling_method=sampling_method, multi_proc=multi_proc, n_gpus=n_gpus, rnd_seed=rnd_seed, p=p, ) print( f'\nNumber of train batches in current process: {len(train_batches)}\n' ) ############################### ########## settings ########### ############################### temperature = torch.tensor(1.).to(device) model = SPoSE(in_size=n_items, out_size=embed_dim, init_weights=True) model.to(device) optim = Adam(model.parameters(), lr=lr) ################################################ ############# Creating PATHs ################### ################################################ print(f'...Creating PATHs') print() if results_dir == './results/': results_dir = os.path.join(results_dir, modality, f'{embed_dim}d', str(lmbda), f'seed{rnd_seed:02d}') if not os.path.exists(results_dir): os.makedirs(results_dir) if plots_dir == './plots/': plots_dir = os.path.join(plots_dir, modality, f'{embed_dim}d', str(lmbda), f'seed{rnd_seed}') if not os.path.exists(plots_dir): os.makedirs(plots_dir) model_dir = os.path.join(results_dir, 'model') ################################################ ############## Data Parallelism ################ ################################################ if (multi_proc and n_gpus > 1): model = nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, ) ##################################################################### ######### Load model from previous checkpoint, if available ######### ##################################################################### if os.path.exists(model_dir): models = sorted( [m.name for m in os.scandir(model_dir) if m.name.endswith('.tar')]) if len(models) > 0: try: PATH = os.path.join(model_dir, models[-1]) #TODO: figure out whether line below is really necessary to load model's checkpoints for single-node multi-proc distrib training #torch.distributed.barrier() map_location = { f'cuda:0': f'cuda:{local_rank}' } if (multi_proc and n_gpus > 1) else device checkpoint = torch.load(PATH, map_location=map_location) model.load_state_dict(checkpoint['model_state_dict']) optim.load_state_dict(checkpoint['optim_state_dict']) start = checkpoint['epoch'] + 1 loss = checkpoint['loss'] train_accs = checkpoint['train_accs'] val_accs = checkpoint['val_accs'] train_losses = checkpoint['train_losses'] val_losses = checkpoint['val_losses'] nneg_d_over_time = checkpoint['nneg_d_over_time'] loglikelihoods = checkpoint['loglikelihoods'] complexity_losses = checkpoint['complexity_costs'] print( f'...Loaded model and optimizer state dicts from previous run. Starting at epoch {start}.\n' ) except RuntimeError: print( f'...Loading model and optimizer state dicts failed. Check whether you are currently using a different set of model parameters.\n' ) start = 0 train_accs, val_accs = [], [] train_losses, val_losses = [], [] loglikelihoods, complexity_losses = [], [] nneg_d_over_time = [] else: start = 0 train_accs, val_accs = [], [] train_losses, val_losses = [], [] loglikelihoods, complexity_losses = [], [] nneg_d_over_time = [] else: os.makedirs(model_dir) start = 0 train_accs, val_accs = [], [] train_losses, val_losses = [], [] loglikelihoods, complexity_losses = [], [] nneg_d_over_time = [] ################################################ ################## Training #################### ################################################ iter = 0 results = {} logger.info(f'Optimization started for lambda: {lmbda}\n') for epoch in range(start, epochs): model.train() batch_llikelihoods = torch.zeros(len(train_batches)) batch_closses = torch.zeros(len(train_batches)) batch_losses_train = torch.zeros(len(train_batches)) batch_accs_train = torch.zeros(len(train_batches)) for i, batch in enumerate(train_batches): optim.zero_grad() #zero out gradients batch = batch.to(device) logits = model(batch) anchor, positive, negative = torch.unbind(torch.reshape( logits, (-1, 3, embed_dim)), dim=1) c_entropy = utils.trinomial_loss(anchor, positive, negative, task, temperature) l1_pen = l1_regularization(model).to( device) #L1-norm to enforce sparsity (many 0s) W = model.module.fc.weight if (multi_proc and n_gpus > 1) else model.fc.weight pos_pen = torch.sum( F.relu(-W) ) #positivity constraint to enforce non-negative values in embedding matrix complexity_loss = (lmbda / n_items) * l1_pen loss = c_entropy + 0.01 * pos_pen + complexity_loss loss.backward() optim.step() batch_losses_train[i] += loss.item() batch_llikelihoods[i] += c_entropy.item() batch_closses[i] += complexity_loss.item() batch_accs_train[i] += utils.choice_accuracy( anchor, positive, negative, task) iter += 1 avg_llikelihood = torch.mean(batch_llikelihoods).item() avg_closs = torch.mean(batch_closses).item() avg_train_loss = torch.mean(batch_losses_train).item() avg_train_acc = torch.mean(batch_accs_train).item() loglikelihoods.append(avg_llikelihood) complexity_losses.append(avg_closs) train_losses.append(avg_train_loss) train_accs.append(avg_train_acc) ################################################ ################ validation #################### ################################################ avg_val_loss, avg_val_acc = utils.validation(model=model, val_batches=val_batches, task=task, device=device) val_losses.append(avg_val_loss) val_accs.append(avg_val_acc) logger.info(f'Epoch: {epoch+1}/{epochs}') logger.info(f'Train acc: {avg_train_acc:.3f}') logger.info(f'Train loss: {avg_train_loss:.3f}') logger.info(f'Val acc: {avg_val_acc:.3f}') logger.info(f'Val loss: {avg_val_loss:.3f}\n') if show_progress: print( "\n========================================================================================================" ) print( f'====== Epoch: {epoch+1}, Train acc: {avg_train_acc:.3f}, Train loss: {avg_train_loss:.3f}, Val acc: {avg_val_acc:.3f}, Val loss: {avg_val_loss:.3f} ======' ) print( "========================================================================================================\n" ) if (epoch + 1) % 10 == 0: W = model.module.fc.weight if (multi_proc and n_gpus > 1) else model.fc.weight np.savetxt( os.path.join(results_dir, f'sparse_embed_epoch{epoch+1:04d}.txt'), W.detach().cpu().numpy()) logger.info(f'Saving model weights at epoch {epoch+1}') current_d = utils.get_nneg_dims(W) nneg_d_over_time.append((epoch + 1, current_d)) print( "\n========================================================================================================" ) print( f"========================= Current number of non-negative dimensions: {current_d} =========================" ) print( "========================================================================================================\n" ) #save model and optim parameters for inference or to resume training #PyTorch convention is to save checkpoints as .tar files if (multi_proc and n_gpus > 1): if local_rank == 0: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optim_state_dict': optim.state_dict(), 'loss': loss, 'train_losses': train_losses, 'train_accs': train_accs, 'val_losses': val_losses, 'val_accs': val_accs, 'nneg_d_over_time': nneg_d_over_time, 'loglikelihoods': loglikelihoods, 'complexity_costs': complexity_losses, }, os.path.join(model_dir, f'model_epoch{epoch+1:04d}.tar')) else: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optim_state_dict': optim.state_dict(), 'loss': loss, 'train_losses': train_losses, 'train_accs': train_accs, 'val_losses': val_losses, 'val_accs': val_accs, 'nneg_d_over_time': nneg_d_over_time, 'loglikelihoods': loglikelihoods, 'complexity_costs': complexity_losses, }, os.path.join(model_dir, f'model_epoch{epoch+1:04d}.tar')) logger.info(f'Saving model parameters at epoch {epoch+1}\n') if (epoch + 1) > window_size: #check termination condition (we want to train until convergence) lmres = linregress( range(window_size), train_losses[(epoch + 1 - window_size):(epoch + 2)]) if (lmres.slope > 0) or (lmres.pvalue > .1): break #save final model weights utils.save_weights_(results_dir, model.fc.weight) results = { 'epoch': len(train_accs), 'train_acc': train_accs[-1], 'val_acc': val_accs[-1], 'val_loss': val_losses[-1] } logger.info( f'\nOptimization finished after {epoch+1} epochs for lambda: {lmbda}\n' ) logger.info( f'\nPlotting number of non-negative dimensions as a function of time for lambda: {lmbda}\n' ) plot_nneg_dims_over_time(plots_dir=plots_dir, nneg_d_over_time=nneg_d_over_time) logger.info(f'\nPlotting model performances over time for lambda: {lmbda}') #plot train and validation performance alongside each other to examine a potential overfit to the training data plot_single_performance(plots_dir=plots_dir, val_accs=val_accs, train_accs=train_accs) logger.info(f'\nPlotting losses over time for lambda: {lmbda}') #plot both log-likelihood of the data (i.e., cross-entropy loss) and complexity loss (i.e., l1-norm in DSPoSE and KLD in VSPoSE) plot_complexities_and_loglikelihoods(plots_dir=plots_dir, loglikelihoods=loglikelihoods, complexity_losses=complexity_losses) PATH = os.path.join(results_dir, 'results.json') with open(PATH, 'w') as results_file: json.dump(results, results_file)
def train(): device_ids = [0] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("CUDA visible devices: " + str(torch.cuda.device_count())) print("CUDA Device Name: " + str(torch.cuda.get_device_name(device))) # Initialize loss and model loss = ms_Loss().to(device) net = AWNet(4, 3, block=[3, 3, 3, 4, 4]).to(device) net = nn.DataParallel(net, device_ids=device_ids) new_lr = trainConfig.learning_rate[0] # Reload if trainConfig.pretrain == True: net.load_state_dict( torch.load( '{}/best_4channel.pkl'.format(trainConfig.save_best), map_location=device)["model_state"]) print('weight loaded.') else: print('no weight loaded.') pytorch_total_params = sum( p.numel() for p in net.parameters() if p.requires_grad) print("Total_params: {}".format(pytorch_total_params)) # optimizer and scheduler optimizer = torch.optim.Adam( net.parameters(), lr=new_lr, betas=(0.9, 0.999)) # Dataloaders train_dataset = LoadData( trainConfig.data_dir, TRAIN_SIZE, dslr_scale=1, test=False) train_loader = DataLoader( dataset=train_dataset, batch_size=trainConfig.batch_size, shuffle=True, num_workers=32, pin_memory=True, drop_last=True) test_dataset = LoadData( trainConfig.data_dir, TEST_SIZE, dslr_scale=1, test=True) test_loader = DataLoader( dataset=test_dataset, batch_size=8, shuffle=False, num_workers=18, pin_memory=True, drop_last=False) print('Train loader length: {}'.format(len(train_loader))) pre_psnr, pre_ssim = validation(net, test_loader, device, save_tag=True) print('previous PSNR: {:.4f}, previous ssim: {:.4f}'.format( pre_psnr, pre_ssim)) iteration = 0 for epoch in range(trainConfig.epoch): psnr_list = [] start_time = time.time() if epoch > 0: new_lr = adjust_learning_rate_step( optimizer, epoch, trainConfig.epoch, trainConfig.learning_rate) for batch_id, data in enumerate(train_loader): x, target, _ = data x = x.to(device) target = target.to(device) pred, _ = net(x) optimizer.zero_grad() total_loss, losses = loss(pred, target) total_loss.backward() optimizer.step() iteration += 1 if trainConfig.print_loss: print("epoch:{}/{} | Loss: {:.4f} ".format( epoch, trainConfig.epoch, total_loss.item())) if not (batch_id % 1000): print('Epoch:{0}, Iteration:{1}'.format(epoch, batch_id)) psnr_list.extend(to_psnr(pred[0], target)) train_psnr = sum(psnr_list) / len(psnr_list) state = { "model_state": net.state_dict(), "lr": new_lr, } print('saved checkpoint') torch.save(state, '{}/four_channel_epoch_{}.pkl'.format( trainConfig.checkpoints, epoch)) one_epoch_time = time.time() - start_time print('time: {}, train psnr: {}'.format(one_epoch_time, train_psnr)) val_psnr, val_ssim = validation( net, test_loader, device, save_tag=True) print_log(epoch + 1, trainConfig.epoch, one_epoch_time, train_psnr, val_psnr, val_ssim, 'multi_loss') if val_psnr >= pre_psnr: state = { "model_state": net.state_dict(), "lr": new_lr, } print('saved best weight') torch.save(state, '{}/best_4channel.pkl'.format( trainConfig.save_best)) pre_psnr = val_psnr
optimizer.zero_grad() output = model(inputs) _, preds = torch.max(output.data, 1) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() ps = torch.exp(output) equality = (labels.data == ps.max(dim=1)[1]) accuracy += equality.type(torch.FloatTensor).mean() if steps % print_every == 0: test_loss, test_accuracy = validation(model, dataloaders['valid'], criterion, device) print("Epoch: {}/{}".format(epoch + 1, args.epochs), "Train Loss: {:.4f}".format(running_loss / print_every), "Train Accuracy : {:.4f}".format(accuracy / print_every), "Validation Loss : {:.4f}".format(test_loss), "Validation Accuracy : {:.4f}".format(test_accuracy)) model.train() accuracy = 0 running_loss = 0 # Do validation on the test set, print results test_loss, test_accuracy = validation(model, dataloaders['test'], criterion, device) print("Test Loss : {:.4f}".format(test_loss), "Test Accuracy : {:.4f}".format(test_accuracy))
with tf.Session() as sess: sess, step = utils.start_or_restore_training( sess, saver, checkpoint_dir=config.CKECKDIR) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) start_time = time.time() print("trainning......") while True: sess.run(train_step, feed_dict={rate: 0.5}) step += 1 if step % 50 == 0: train_loss_v = sess.run(train_loss, feed_dict={rate: 1.}) valid_loss_v = utils.validation(sess, valid_loss, rate, batch_size=config.BATCH_SIZE) duration = time.time() - start_time logger.info( "step %d: trainning loss is %g, validation loss is %g (%0.3f sec)" % (step, train_loss_v, valid_loss_v, duration)) print( "step %d: trainning loss is %g, validation loss is %g (%0.3f sec)" % (step, train_loss_v, valid_loss_v, duration)) start_time = time.time() if step % 1000 == 0: if not os.path.exists(config.CKECKDIR): os.mkdir(config.CKECKDIR) saver.save(sess, config.CKECKFILE, global_step=step) print('writing checkpoint at step %s' % step)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('out_path', help='path to UNet features', type=Path) arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=160) arg('--offset', type=int, default=6) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'valid', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--model-path', help='path to model file to use for validation/prediction') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) arg('--pred-oddity', type=int, help='set to 0/1 to predict even/odd images') args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args, coords) root = Path(args.root) model = VGGModel(args.patch_size) model = utils.cuda(model) criterion = nn.CrossEntropyLoss() loader_kwargs = dict(min_scale=args.min_scale, max_scale=args.max_scale, offset=args.offset) if args.mode == 'train': train_loader, valid_loader = (utils.make_loader( ClassificationDataset, args, train_paths, coords, **loader_kwargs), utils.make_loader(ClassificationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train( args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions, is_classification=True, make_optimizer=lambda lr: SGD([ { 'params': model.features.parameters(), 'lr': lr }, { 'params': model.classifier.parameters(), 'lr': lr }, ], nesterov=True, momentum=0.9), ) elif args.mode == 'valid': utils.load_best_model(model, root, args.model_path) valid_loader = utils.make_loader(ClassificationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs) utils.validation(model, criterion, tqdm.tqdm(valid_loader, desc='Validation'), is_classification=True) else: utils.load_best_model(model, root, args.model_path) if args.mode in {'predict_valid', 'predict_all_valid'}: if args.mode == 'predict_all_valid': # include all paths we did not train on (makes sense only with --limit) valid_paths = list( set(valid_paths) | (set(utils.labeled_paths()) - set(train_paths))) predict(model, valid_paths, out_path=args.out_path, patch_size=args.patch_size, batch_size=args.batch_size, min_scale=args.min_scale, max_scale=args.max_scale) elif args.mode == 'predict_test': assert False # FIXME - use out_path too out_path = root.joinpath('test') out_path.mkdir(exist_ok=True) predicted = {p.stem.split('-')[0] for p in out_path.glob('*.npy')} test_paths = [ p for p in utils.DATA_ROOT.joinpath('Test').glob('*.jpg') if p.stem not in predicted ] if args.pred_oddity is not None: assert args.pred_oddity in {0, 1} test_paths = [ p for p in test_paths if int(p.stem) % 2 == args.pred_oddity ] predict(model, test_paths, out_path, patch_size=args.patch_size, batch_size=args.batch_size, test_scale=args.test_scale, is_test=True) else: parser.error('Unexpected mode {}'.format(args.mode))
def train_model_1(model, n_epoch, labelsdict, criterion, optimizer, device, trainloader, validloader, train_data, model_name, model_path, model_path_best, loss_graph, accuracy_graph, start_epoch=0, valid_loss=1000): """ Commence training of model model: model used n_epoch: number of epoch used for training labelsdict: dictionary containing class names which correnspond to their respective indexes optimizer: choice of optimizer use for training device: 'cuda' or 'cpu' (speed up training) trainloader: input training data split in batches validloader: input validation data split in batches train_data: input training data model_name: name of model indicated model_path: path where model checkpoint is saved at every epoch model_path_best: path where model yields best training result is saved (lowest val acc) loss_graph: path of graph indicating training and validation losses of model is saved accuracy_graph: path of graph indicating training and validation accuracies of model is saved start_epoch: indicate start epoch.(where start epoch != 0 when model is not trained from scratch but loaded and retrained) valid_acc: indicate value of best validation accuracy during point of training """ print( f'Training custom CNN Model to distinguish normal and infected lungs') print(f'total epochs: {n_epoch}') if start_epoch != 0: print(f'Retraining model continuing from epoch {start_epoch+1}') n_in = next(model.fc2.modules()).in_features model.to(device) start = time.time() epochs = n_epoch steps = 0 running_loss = 0 running_acc = 0 print_every = len(trainloader) train_loss = [] val_loss = [] train_acc = [] val_acc = [] val_loss_max = valid_loss Singapore = pytz.timezone('Asia/Singapore') for e in range(start_epoch, epochs): # Make sure training is on model.train() for images, labels, path in trainloader: # for each batch images, labels = images.to(device), labels.to(device) steps += 1 optimizer.zero_grad() output = model.forward(images) # getting loss loss = criterion(output, labels) loss.backward() optimizer.step() # getting accuracy ps = torch.exp(output) equality = (labels == ps.max(dim=1)[1]) running_acc += equality.type(torch.FloatTensor).mean() running_loss += loss.item() # At the end of every epoch... if steps % print_every == 0: # Eval mode for predictions model.eval() # Turn off gradients for validation with torch.no_grad(): test_loss, accuracy = validation(model, validloader, criterion, device) # log results at every epoch print( "Epoch: {}/{} - ".format(e + 1, epochs), "Time: {} ".format(datetime.now(Singapore)), "Training Loss: {:.3f} - ".format(running_loss / len(trainloader)), "Validation Loss: {:.3f} - ".format(test_loss / len(validloader)), "Training Accuracy: {:.3f} - ".format(running_acc / len(trainloader)), "Validation Accuracy: {:.3f}".format(accuracy / len(validloader))) # saving results into a list for plotting train_loss.append(running_loss / print_every) val_loss.append(test_loss / len(validloader)) train_acc.append(running_acc / len(trainloader)) val_acc.append(accuracy / len(validloader)) valid_loss = test_loss / len(validloader) # saving checkpoint model.n_in = n_in model.n_out = len(labelsdict) model.labelsdict = labelsdict model.optimizer = optimizer model.optimizer_state_dict = optimizer.state_dict() model.model_name = model_name model.loss = criterion model.val_loss = valid_loss loss_acc = [] loss_acc.append(train_loss) loss_acc.append(val_loss) loss_acc.append(train_acc) loss_acc.append(val_acc) model.loss_acc = loss_acc model.start_epoch = start_epoch model.epoch = e + 1 path = model_path path_best = model_path_best # saving checkpoint model at every epoch save_checkpoint(model, path) # saving best model during training, best indicated by highest validation accuracy obtained if valid_loss <= val_loss_max: print( 'Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...' .format(val_loss_max, valid_loss)) # update threshold val_loss_max = valid_loss save_checkpoint(model, path_best) # reset training loss and accuracy after validation, which is used again for subsequent training epoch running_loss = 0 running_acc = 0 print('model:', model_name, '- epochs:', n_epoch) print(f"Run time: {(time.time() - start)/60:.3f} min") # plotting the graph on training and validation loss for model plot_curves(start_epoch, model.epoch, loss_acc, model_name, loss_graph, accuracy_graph) return model
loss = classification_loss_func(predictions, label_data) loss.backward() optimizer.step() running_loss += loss.item() print(f'Experiment: {parameters.experiment} -- Epoch: {epoch} -- Batch: {batch_num} -- Loss: {loss.item()}') # Record the actual and predicted labels for the instance true_classes = np.concatenate((true_classes, label_data.detach().cpu().numpy())) _, predictions = torch.max(predictions, 1) predicted_classes = np.concatenate((predicted_classes, predictions.detach().cpu().numpy())) if scheduler is not None: if isinstance(scheduler, torch.optim.lr_scheduler.StepLR): scheduler.step() elif isinstance(scheduler, (torch.optim.lr_scheduler.ReduceLROnPlateau, BoldDriver)): scheduler.step(running_loss) # Get the training accuracy loss = running_loss / (batch_num + 1) accuracy = metrics.accuracy_score(true_classes, predicted_classes) # Check the validation error after each training epoch print(f'Evaluating validation set (epoch {epoch}):') val_loss, val_accuracy = validation(network=network, dataloader=val_dataloader, compute_device=compute_device, experiment=parameters.experiment, results_directory=args['results_dir'], classification_loss_func=classification_loss_func) recorder.record(epoch, loss, val_loss, accuracy, val_accuracy) recorder.update(epoch, val_loss, network.state_dict(), optimizer.state_dict())
def run( n_samples: int, version: str, task: str, modality: str, results_dir: str, triplets_dir: str, lmbda: float, batch_size: int, embed_dim: int, rnd_seed: int, device: torch.device, ) -> None: #load train triplets train_triplets, _ = load_data(device=device, triplets_dir=os.path.join( triplets_dir, modality)) #number of unique items in the data matrix n_items = torch.max(train_triplets).item() + 1 #initialize an identity matrix of size n_items x n_items for one-hot-encoding of triplets I = torch.eye(n_items) #get mini-batches for training to sample an equally sized synthetic dataset train_batches = BatchGenerator(I=I, dataset=train_triplets, batch_size=batch_size, sampling_method=None, p=None) #initialise model for i in range(n_samples): if version == 'variational': model = VSPoSE(in_size=n_items, out_size=embed_dim) else: model = SPoSE(in_size=n_items, out_size=embed_dim) #load weights of pretrained model model = load_model( model=model, results_dir=results_dir, modality=modality, version=version, dim=embed_dim, lmbda=lmbda, rnd_seed=rnd_seed, device=device, ) #move model to current device model.to(device) #probabilistically sample triplet choices given model ouput PMFs sampled_choices = validation( model=model, val_batches=train_batches, version=version, task=task, device=device, embed_dim=embed_dim, sampling=True, batch_size=batch_size, ) PATH = os.path.join(triplets_dir, 'synthetic', f'sample_{i+1:02d}') if not os.path.exists(PATH): os.makedirs(PATH) np.savetxt(os.path.join(PATH, 'train_90.txt'), sampled_choices)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=256) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--bg-weight', type=float, default=1.0, help='background weight') arg('--dice-weight', type=float, default=0.0) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'valid', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--model-path', help='path to model file to use for validation/prediction') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) arg('--oversample', type=float, default=0.0, help='sample near lion with given probability') arg('--with-head', action='store_true') arg('--pred-oddity', type=int, help='set to 0/1 to predict even/odd images') args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args) root = Path(args.root) model = UNetWithHead() if args.with_head else UNet() model = utils.cuda(model) criterion = Loss(dice_weight=args.dice_weight, bg_weight=args.bg_weight) loader_kwargs = dict( min_scale=args.min_scale, max_scale=args.max_scale, downscale=args.with_head, ) if args.mode == 'train': train_loader, valid_loader = (utils.make_loader( SegmentationDataset, args, train_paths, coords, oversample=args.oversample, **loader_kwargs), utils.make_loader(SegmentationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) # remove dir tree root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train(args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions) elif args.mode == 'valid': utils.load_best_model(model, root, args.model_path) valid_loader = utils.make_loader(SegmentationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs) utils.validation(model, criterion, tqdm.tqdm(valid_loader, desc='Validation')) else: utils.load_best_model(model, root, args.model_path) if args.mode in {'predict_valid', 'predict_all_valid'}: if args.mode == 'predict_all_valid': # include all paths we did not train on (makes sense only with --limit) valid_paths = list( set(valid_paths) | (set(utils.labeled_paths()) - set(train_paths))) predict(model, valid_paths, out_path=root, patch_size=args.patch_size, batch_size=args.batch_size, min_scale=args.min_scale, max_scale=args.max_scale, downsampled=args.with_head) elif args.mode == 'predict_test': out_path = root.joinpath('test') out_path.mkdir(exist_ok=True) predicted = {p.stem.split('-')[0] for p in out_path.glob('*.npy')} test_paths = [ p for p in utils.DATA_ROOT.joinpath('Test').glob('*.png') if p.stem not in predicted ] if args.pred_oddity is not None: assert args.pred_oddity in {0, 1} test_paths = [ p for p in test_paths if int(p.stem) % 2 == args.pred_oddity ] predict(model, test_paths, out_path, patch_size=args.patch_size, batch_size=args.batch_size, test_scale=args.test_scale, is_test=True, downsampled=args.with_head) else: parser.error('Unexpected mode {}'.format(args.mode))
#args.train_cuts = '((sample["mcChannelNumber"]==361106) | (sample["mcChannelNumber"]==423300)) & (sample["pt"]>=15)' #args.valid_cuts = '((sample["mcChannelNumber"]==361106) | (sample["mcChannelNumber"]==423300)) & (sample["pt"]>=15)' # OBTAINING PERFORMANCE FROM EXISTING VALIDATION RESULTS if os.path.isfile(args.output_dir + '/' + args.results_in) or os.path.islink(args.output_dir + '/' + args.results_in): if args.eta_region in ['0.0-1.3', '1.3-1.6', '1.6-2.5']: eta_1, eta_2 = args.eta_region.split('-') valid_cuts = '(abs(sample["eta"]) >= ' + str( eta_1) + ') & (abs(sample["eta"]) <= ' + str(eta_2) + ')' if args.valid_cuts == '': args.valid_cuts = valid_cuts else: args.valid_cuts = valid_cuts + '& (' + args.valid_cuts + ')' inputs = {'scalars': scalars, 'images': [], 'others': others} validation(args.output_dir, args.results_in, args.plotting, args.n_valid, data_files, inputs, args.valid_cuts, args.sep_bkg, args.runDiffPlots) elif args.results_in != '': print('\nOption --results_in not matching any file --> aborting\n') if args.results_in != '': sys.exit() # MODEL CREATION AND MULTI-GPU DISTRIBUTION n_gpus = min(args.n_gpus, len(tf.config.experimental.list_physical_devices('GPU'))) train_batch_size = max(1, n_gpus) * args.batch_size valid_batch_size = max(1, n_gpus) * max(args.batch_size, int(5e3)) sample = make_sample(data_files[0], [0, 1], input_data, args.n_tracks, args.n_classes)[0] model = create_model(args.n_classes, sample, args.NN_type, args.FCN_neurons, CNN, args.l2, args.dropout, train_data, n_gpus)
import linear_model as lm import utils train_path = os.path.join(os.path.dirname(__file__), "./data/train.csv") test_path = os.path.join(os.path.dirname(__file__), "./data/test.csv") output_path = os.path.join(os.path.dirname(__file__), "./ans.csv") model_path = os.path.join(os.path.dirname(__file__), "./model") scaler_path = os.path.join(os.path.dirname(__file__), "./scaler") fea_select, y_pos = (0, 4, 5, 6, 7, 8, 9, 16), 70 x, y= utils.load(train_path, mode = 'train', fea_select = fea_select, y_pos = y_pos) # 讀出所有 data 、擷取 feature 、劃分 9 天成一筆 x, max, min = utils.rescaling(x) # 作 rescaling , 在 [0, 1] 間 x, y = utils.shuffle(x, y) x_train, y_train, x_val, y_val = utils.validation(x, y, ratio = 0.1) b, w = lm.LinearRegression(x, y, lr = 100000, epoch = 1000000, lr_method = 'adagrad', x_val = x_val, y_val = y_val) x_test = utils.load(test_path, mode = 'test', fea_select = fea_select, y_pos = y_pos) x_test = utils.scaling(x_test, max, min) predicted = lm.predict(x_test, b, w) print('>>> Predicted Result :\n', predicted) utils.save_scaler(max, min, scaler_path) utils.save_model(b, w, model_path) utils.save_ans(predicted, output_path)
print('Entropía del conjunto: ', data_set_entropy) # Separamos el data set en dos subconjuntos print() print('Se separa el data set en dos subconjuntos') splitted_data = utils.split_20_80(data_set) # Verificamos la correctitud de los tamaños print('Tamaño del data set original: ', str(len(data_set))) print('Tamaño del subset de validación: ', str(len(splitted_data[0]))) print('Tamaño del subset de entrenamiento: ', str(len(splitted_data[1]))) print() # Parte 1 print('Parte 1') # Se realiza cross-validation de tamaño 10 sobre el 80% del conjunto original. print('Se realiza 10-fold cross-validation') v_cs = utils.cross_validation(splitted_data[1], attributes, 'Class/ASD', 10) print('Promedio de error: ', v_cs) # Parte 2 print('Parte 2') print('Se realiza Hold out validation') # Se entrena con el 80% tree_6 = utils.ID3_algorithm(splitted_data[1], attributes, 'Class/ASD', False, False) # Se valida con el 20% v_ho = utils.validation(tree_6, splitted_data[0], 'Class/ASD') print('Resultado de la validación: ', v_ho)
sample_size = len(h5py.File(args.data_file, 'r')['mcChannelNumber']) args.n_train = [0, min(sample_size, args.n_train)] args.n_valid = [ args.n_train[1], min(args.n_train[1] + args.n_valid, sample_size) ] if args.n_valid[0] == args.n_valid[1]: args.n_valid = args.n_train #args.train_cuts += '(abs(sample["eta"]) > 0.8) & (abs(sample["eta"]) < 1.15)' #args.valid_cuts += '(sample["p_et_calo"] > 4.5) & (sample["p_et_calo"] < 20)' # OBTAINING PERFORMANCE FROM EXISTING VALIDATION RESULTS if os.path.isfile(args.output_dir + '/' + args.results_in) or os.path.islink(args.output_dir + '/' + args.results_in): variables = {'others': others, 'scalars': scalars, 'images': []} validation(args.output_dir, args.results_in, args.plotting, args.n_valid, args.data_file, variables, args.runDiffPlots) elif args.results_in != '': print( "\noption [--results_in] was given but no matching file found in the right path, aborting.." ) print("results_in file =", args.output_dir + '/' + args.results_in, '\n') if args.results_in != '': sys.exit() # MULTI-GPU DISTRIBUTION n_gpus = min(args.n_gpus, len(tf.config.experimental.list_physical_devices('GPU'))) devices = ['/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3'] tf.debugging.set_log_device_placement(False) strategy = tf.distribute.MirroredStrategy(devices=devices[:n_gpus]) with strategy.scope(): if tf.__version__ >= '2.1.0' and len(variables['images']) >= 1: