def train_neural_network(): sess = setup_tensorflow() train_feature_filenames, train_label_filenames = get_filenames() test_feature_filenames, test_label_filenames = get_test_filenames() # we can also have input summaries written out to tensorboard train_features, train_labels = input_pipeline.get_files( sess, train_feature_filenames, train_label_filenames) test_features, test_labels = input_pipeline.get_files( sess, test_feature_filenames, test_label_filenames) # get outputs and variable lists output, cnn_var_list, test_input, test_label, test_output = model.create_model( sess, train_features, train_labels) # get loss and minimize operations with tf.name_scope("loss"): cnn_loss = model.create_cnn_loss(output, train_labels) tf.summary.scalar("loss", cnn_loss) (global_step, learning_rate, cnn_minimize) = model.create_optimizer(cnn_loss, cnn_var_list) # get loss summaries for visualization in tensorboard tf.summary.scalar('loss', cnn_loss) # train the network sess.run(tf.global_variables_initializer()) # cache test features and labels so we can monitor the progress test_feature_batch, test_label_batch = sess.run( [test_features, test_labels]) num_batches = TRAINING_DATASET_SIZE / FLAGS.BATCH_SIZE # add computation graph to the summary writer writer = tf.summary.FileWriter(summary_dir) writer.add_graph(sess.graph) merged_summaries = tf.summary.merge_all() for epoch in range(1, EPOCHS + 1): for batch in range(1, (TRAINING_DATASET_SIZE / FLAGS.BATCH_SIZE) + 1): # create feed dictionary for passing hyperparameters feed_dict = {learning_rate: LEARNING_RATE} #create operations list for root nodes of computation graph ops = [cnn_minimize, cnn_loss, merged_summaries] _, loss, summaries = sess.run(ops, feed_dict=feed_dict) print("Epoch : " + str(epoch) + "/" + str(EPOCHS) + " , Batch : " + str(batch) + "/" + str(num_batches) + " completed; Loss " + str(loss)) if batch % SUMMARY_PERIOD == 0: # write summary to logdir writer.add_summary(summaries) print "Summary Written to Logdir" if batch % CHECKPOINT_PERIOD == 0: # save model progress and save output images for this batch feed_dict = { test_input: test_feature_batch, test_label: test_label_batch } output_batch = sess.run(test_output, feed_dict=feed_dict) # save the output images _save_image_batch(epoch, batch, output_batch) _save_tf_model(sess) print "Image batch and model saved!!"
def evaluate_model(): """Evaluate model with calculating test accuracy """ sess = setup_tensorflow() # SetUp Input PipeLine for queue inputs with tf.name_scope('train_input'): evaluate_features, evaluate_labels = input_pipeline.get_files( evaluate_dir) # Create Model creating graph output, var_list, is_training1 = model.create_model( sess, evaluate_features, evaluate_labels) # Create Model loss & optimizer with tf.name_scope("loss"): total_loss, softmax_loss = model.compute_loss(output, evaluate_labels) (global_step, learning_rate, minimize) = model.create_optimizer(total_loss, var_list) # Acurracy setup out_eval, eval_input, eval_label, accuracy, is_training2 = model.compute_accuracy( sess) sess.run(tf.global_variables_initializer()) # Basic stuff for input pipeline coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Calculate number of batches to run num_batches = EVALUATE_DATASET_SIZE / FLAGS.BATCH_SIZE # Add ops to restore all the variables. saver = tf.train.Saver() # Give the path of model with weights u wanna load saver.restore(sess, "./model/model100.ckpt") # Calculate acurracy for whole evaluate data total_accuracy = 0 for batch in range(1, num_batches + 1): # Load input from the pipeline in batches , batch by batch input_batch, label_batch = sess.run( [evaluate_features, evaluate_labels]) feed_dict = { eval_input: input_batch, eval_label: label_batch, is_training2: False } ops = [out_eval, accuracy] # Get the accuracy on evaluate batch run _, acc = sess.run(ops, feed_dict=feed_dict) print(" batch /" + str(batch) + " /" + str(num_batches) + " acc: " + str(acc)) total_accuracy += acc total_accuracy /= (num_batches + 1) # Total Accuracy for Evaluate dataset print(" ACCURACY : " + str(total_accuracy))
def run(PATH_TO_IMAGES, LR, WEIGHT_DECAY, opt): """ Train torchvision model to NIH data given high level hyperparameters. Args: PATH_TO_IMAGES: path to NIH images LR: learning rate WEIGHT_DECAY: weight decay parameter for SGD Returns: preds: torchvision model predictions on test fold with ground truth for comparison aucs: AUCs for each train,test tuple """ use_gpu = torch.cuda.is_available() gpu_count = torch.cuda.device_count() print("Available GPU count:" + str(gpu_count)) wandb.init(project=opt.project, name=opt.run_name) wandb.config.update(opt, allow_val_change=True) NUM_EPOCHS = 60 BATCH_SIZE = opt.batch_size if opt.eval_only: # test only. it is okay to have duplicate run_path os.makedirs(opt.run_path, exist_ok=True) else: # train from scratch, should not have the same run_path. Otherwise it will overwrite previous runs. try: os.makedirs(opt.run_path) except FileExistsError: print("[ERROR] run_path {} exists. try to assign a unique run_path".format(opt.run_path)) return None, None except Exception as e: print("exception while creating run_path {}".format(opt.run_path)) print(str(e)) return None, None # use imagenet mean,std for normalization mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] N_LABELS = 14 # we are predicting 14 labels # define torchvision transforms if opt.random_crop: data_transforms = { 'train': transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop(size=opt.input_size, scale=(0.8, 1.0)), # crop then resize transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), transforms.ToTensor(), transforms.Normalize(mean, std) ]), 'val': transforms.Compose([ transforms.Resize(int(opt.input_size * 1.05)), transforms.CenterCrop(opt.input_size), transforms.ToTensor(), transforms.Normalize(mean, std) ]), } else: data_transforms = { 'train': transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize(opt.input_size), transforms.ToTensor(), transforms.Normalize(mean, std) ]), 'val': transforms.Compose([ transforms.Resize(opt.input_size), transforms.ToTensor(), transforms.Normalize(mean, std) ]), } # create train/val dataloaders transformed_datasets = {} transformed_datasets['train'] = CXR.CXRDataset( path_to_images=PATH_TO_IMAGES, fold='train', transform=data_transforms['train']) transformed_datasets['val'] = CXR.CXRDataset( path_to_images=PATH_TO_IMAGES, fold='val', transform=data_transforms['val']) worker_init_fn = set_seed(opt) dataloaders = {} dataloaders['train'] = torch.utils.data.DataLoader( transformed_datasets['train'], batch_size=BATCH_SIZE, shuffle=True, num_workers=30, drop_last=True, worker_init_fn=worker_init_fn ) dataloaders['val'] = torch.utils.data.DataLoader( transformed_datasets['val'], batch_size=BATCH_SIZE, shuffle=False, num_workers=30, drop_last=True, worker_init_fn=worker_init_fn ) # please do not attempt to train without GPU as will take excessively long if not use_gpu: raise ValueError("Error, requires GPU") # load model model = load_model(N_LABELS, opt) # define criterion, optimizer for training criterion = nn.BCELoss() optimizer = create_optimizer(model, LR, WEIGHT_DECAY, opt) scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'max', factor=opt.lr_decay_ratio, patience=opt.patience, verbose=True ) dataset_sizes = {x: len(transformed_datasets[x]) for x in ['train', 'val']} if opt.eval_only: print("loading best model statedict") # load best model weights to return checkpoint_best = torch.load(os.path.join(opt.run_path, 'checkpoint')) model = load_model(N_LABELS, opt=opt) model.load_state_dict(checkpoint_best['state_dict']) else: # train model model, best_epoch = train_model( model, criterion, optimizer, LR, scheduler=scheduler, num_epochs=NUM_EPOCHS, dataloaders=dataloaders, dataset_sizes=dataset_sizes, PATH_TO_IMAGES=PATH_TO_IMAGES, data_transforms=data_transforms, opt=opt, ) # get preds and AUCs on test fold preds, aucs = E.make_pred_multilabel( data_transforms, model, PATH_TO_IMAGES, fold="test", opt=opt, ) wandb.log({ 'val_official': np.average(list(aucs.auc)) }) return preds, aucs
def train_model(): """Training model with calculating training and dev accuracy """ sess = setup_tensorflow() # SetUp Input PipeLine for queue inputs with tf.name_scope('train_input'): train_features, train_labels = input_pipeline.get_files(train_dir) with tf.name_scope('dev_input'): dev_features , dev_labels = input_pipeline.get_files(dev_dir) # Create Model creating graph output, var_list, is_training1 = model.create_model(sess, train_features, train_labels) # Create Model loss & optimizer with tf.name_scope("loss"): total_loss, softmax_loss = model.compute_loss(output, train_labels ) tf.summary.scalar("loss",total_loss) (global_step, learning_rate, minimize) = model.create_optimizer(total_loss, var_list) # Adds summary tensorboard tf.summary.scalar("loss",total_loss) # Acurracy setup out_eval,eval_input, eval_label, accuracy, is_training2 = model.compute_accuracy(sess) sess.run(tf.global_variables_initializer()) # Basic stuff for input pipeline coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess,coord=coord) # Add ops to save and restore all the variables. saver = tf.train.Saver() num_batches = TRAINING_DATASET_SIZE/FLAGS.BATCH_SIZE num_batches_dev = DEV_DATASET_SIZE/FLAGS.BATCH_SIZE #add computation graph to summary writer writer = tf.summary.FileWriter(summary_dir) writer.add_graph(sess.graph) merged_summaries = tf.summary.merge_all() for epoch in range(1,EPOCHS+1): # Train Model feeding data in batches calculating total loss Tsloss = 0 Tloss = 0 for batch in range(1,num_batches+1 ): feed_dict = {learning_rate: LEARNING_RATE,is_training1:True} ops = [minimize, softmax_loss, total_loss, merged_summaries] _, sloss, loss, summaries = sess.run(ops, feed_dict=feed_dict) #print ("Epoch /" + str (epoch) + " /" + str(EPOCHS)+" batch /" + str (batch) + " /" + str(num_batches) + " ; Loss " + str(loss)+ " softmax Loss " + str(sloss)) Tsloss += sloss Tloss += loss Tsloss /= (num_batches+1) Tloss /= (num_batches+1) print ("Epoch /" + str (epoch) + " /" + str(EPOCHS) + " ; Loss " + str(Tloss)+ " softmax Loss " + str(Tsloss)) # Calculate training acurracy for whole training data total_accuracy = 0 for batch in range(1,num_batches+1 ): input_batch, label_batch = sess.run([train_features, train_labels]) feed_dict = {eval_input:input_batch,eval_label:label_batch,is_training2:False} ops = [out_eval,accuracy] _,acc = sess.run(ops, feed_dict=feed_dict) #print("Epoch /" + str (epoch) + " /" + str(EPOCHS)+" batch /" + str (batch) + " /" + str(num_batches) + " acc: " + str( acc ) ) total_accuracy += acc total_accuracy /= (num_batches+1) print(" TRAINING ACCURACY : " + str( total_accuracy ) ) # Calculate dev acurracy total_accuracy = 0 for batch in range(1,num_batches_dev+1 ): input_batch, label_batch = sess.run([dev_features, dev_labels]) feed_dict = {eval_input:input_batch,eval_label:label_batch,is_training2:False} ops = [out_eval,accuracy] _,acc = sess.run(ops, feed_dict=feed_dict) #print("Epoch /" + str (epoch) + " /" + str(EPOCHS)+" batch /" + str (batch) + " /" + str(num_batches_dev) + " acc: " + str( acc ) ) total_accuracy += acc total_accuracy /= (num_batches_dev+1) print(" DEV ACCURACY : " + str( total_accuracy ) ) # Write summary to logdir writer.add_summary(summaries) print "Summary Written to Logdir" # Save model for each eopch make_dir_if_not_exists(model_dir) save_path = saver.save(sess, model_dir + "model" + str(epoch) +".ckpt") print("Model saved in path: %s" % save_path)