def main(): test_args = arglib.TestArgs() args, str_args = test_args.args, test_args.str_args os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu Writer.set_writer(args.results_dir) id_model_path = args.pretrained_models_path.joinpath('vggface2.h5') stylegan_G_synthesis_path = str( args.pretrained_models_path.joinpath( f'stylegan_G_{args.resolution}x{args.resolution}_synthesis')) utils.landmarks_model_path = str( args.pretrained_models_path.joinpath( 'shape_predictor_68_face_landmarks.dat')) stylegan_G_synthesis = StyleGAN_G_synthesis( resolution=args.resolution, is_const_noise=args.const_noise) stylegan_G_synthesis.load_weights(stylegan_G_synthesis_path) network = Network(args, id_model_path, stylegan_G_synthesis) network.test() inference = Inference(args, network) test_func = getattr(inference, args.test_func) test_func()
def deploy(args, data_loader): model = Network(k=args.network_k, att_type=args.network_att_type, kernel3=args.kernel3, width=args.network_width, dropout=args.network_dropout, compensate=True, norm=args.norm, inp_channels=args.input_channels) print(model) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) checkpoint_path = os.path.join(args.logdir, 'best_checkpoint.pth') if os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['state_dict']) else: raise Exception('Couldnt load checkpoint.') df = pd.DataFrame(columns=['img', 'label', 'pred']) with tqdm(enumerate(data_loader)) as pbar: for i, (images, labels) in pbar: raw_label = labels raw_images = images if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() images.requires_grad = True # Forward pass outputs, att, localised = model(images, True) localised = F.softmax(localised.data, 3)[..., 1] predicted = torch.argmax(outputs.data, 1) saliency = torch.autograd.grad(outputs[:, 1].sum(), images)[0].data localised = localised[0].cpu().numpy() saliency = torch.sqrt((saliency[0]**2).mean(0)).cpu().numpy() raw_img = np.transpose(raw_images.numpy(), (0, 2, 3, 1)).squeeze() np.save(os.path.join(args.outpath, 'pred_{}.npy'.format(i)), localised) np.save(os.path.join(args.outpath, 'sal_{}.npy'.format(i)), saliency) df.loc[len(df)] = [ i, raw_label.numpy().squeeze(), predicted.cpu().numpy().squeeze() ] df.to_csv(os.path.join(args.outpath, 'pred.csv'), index=False) print('done - stopping now')
def __init__(self, num_samples, burn_in, population_size, topology, train_data, test_data, directory, temperature, swap_sample, parameter_queue, problem_type, main_process, event, active_chains, num_accepted, swap_interval, max_limit=(-5), min_limit=5): # Multiprocessing attributes multiprocessing.Process.__init__(self) self.process_id = temperature self.parameter_queue = parameter_queue self.signal_main = main_process self.event = event self.active_chains = active_chains self.num_accepted = num_accepted self.event.clear() self.signal_main.clear() # Parallel Tempering attributes self.temperature = temperature self.swap_sample = swap_sample self.swap_interval = swap_interval self.burn_in = burn_in # MCMC attributes self.num_samples = num_samples self.topology = topology self.train_data = train_data self.test_data = test_data self.problem_type = problem_type self.directory = directory self.w_size = (topology[0] * topology[1]) + ( topology[1] * topology[2]) + topology[1] + topology[2] self.neural_network = Network(topology, train_data, test_data) self.min_limits = np.repeat(min_limit, self.w_size) self.max_limits = np.repeat(max_limit, self.w_size) self.initialize_sampling_parameters() self.create_directory(directory) PSO.__init__(self, pop_size=population_size, num_params=self.w_size, max_limits=self.max_limits, min_limits=self.min_limits)
def main(): train_args = arglib.TrainArgs() args, str_args = train_args.args, train_args.str_args os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu init_logger(args) logger = logging.getLogger('main') cmd_line = ' '.join(sys.argv) logger.info(f'cmd line is: \n {cmd_line}') logger.info(str_args) logger.debug('Copying src to results dir') Writer.set_writer(args.results_dir) if not args.debug: description = input('Please write a short description of this run\n') desc_file = args.results_dir.joinpath('description.txt') with desc_file.open('w') as f: f.write(description) id_model_path = args.pretrained_models_path.joinpath('vggface2.h5') stylegan_G_synthesis_path = str( args.pretrained_models_path.joinpath(f'stylegan_G_{args.resolution}x{args.resolution}_synthesis')) landmarks_model_path = str(args.pretrained_models_path.joinpath('face_utils/keypoints')) face_detection_model_path = str(args.pretrained_models_path.joinpath('face_utils/detector')) arcface_model_path = str(args.pretrained_models_path.joinpath('arcface_weights/weights-b')) utils.landmarks_model_path = str(args.pretrained_models_path.joinpath('shape_predictor_68_face_landmarks.dat')) stylegan_G_synthesis = StyleGAN_G_synthesis(resolution=args.resolution, is_const_noise=args.const_noise) stylegan_G_synthesis.load_weights(stylegan_G_synthesis_path) network = Network(args, id_model_path, stylegan_G_synthesis, landmarks_model_path, face_detection_model_path, arcface_model_path) data_loader = DataLoader(args) trainer = Trainer(args, network, data_loader) trainer.train()
def main(): train_dataset = MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor()) test_dataset = MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor()) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) net = Network(1, 128, 10, 10) if USE_CUDA: net = net.cuda() opt = optim.SGD(net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, momentum=.9, nesterov=True) for epoch in range(1, EPOCHS + 1): print('[Epoch %d]' % epoch) train_loss = 0 train_correct, train_total = 0, 0 start_point = time.time() for inputs, labels in train_loader: inputs, labels = Variable(inputs), Variable(labels) if USE_CUDA: inputs, labels = inputs.cuda(), labels.cuda() opt.zero_grad() preds = F.log_softmax(net(inputs), dim=1) loss = F.cross_entropy(preds, labels) loss.backward() opt.step() train_loss += loss.item() train_correct += (preds.argmax(dim=1) == labels).sum().item() train_total += len(preds) print('train-acc : %.4f%% train-loss : %.5f' % (100 * train_correct / train_total, train_loss / len(train_loader))) print('elapsed time: %ds' % (time.time() - start_point)) test_loss = 0 test_correct, test_total = 0, 0 for inputs, labels in test_loader: with torch.no_grad(): inputs, labels = Variable(inputs), Variable(labels) if USE_CUDA: inputs, labels = inputs.cuda(), labels.cuda() preds = F.softmax(net(inputs), dim=1) test_loss += F.cross_entropy(preds, labels).item() test_correct += (preds.argmax(dim=1) == labels).sum().item() test_total += len(preds) print('test-acc : %.4f%% test-loss : %.5f' % (100 * test_correct / test_total, test_loss / len(test_loader))) torch.save(net.state_dict(), './checkpoint/checkpoint-%04d.bin' % epoch)
def main(params): print("Loading dataset ... ") with open(params['train_data_pkl'], 'rb') as f: train_data = pkl.load(f) with open(params['train_anno_pkl'], 'rb') as f: train_anno = pkl.load(f) """ with open(params['val_data_pkl'], 'rb') as f: val_data = pkl.load(f) with open(params['val_anno_pkl'], 'rb') as f: val_anno = pkl.load(f) """ # Train dataset and Train dataloader train_data = np.transpose(train_data, (0, 3, 1, 2)) train_dataset = torch.utils.data.TensorDataset( torch.FloatTensor(train_data), torch.LongTensor(train_anno)) train_loader = dataloader.DataLoader(train_dataset, params['batch_size'], shuffle=True, collate_fn=collate_fn) """ # Validation dataset and Validation dataloader val_data = np.transpose(val_data, (0, 3, 1, 2)) val_dataset = torch.utils.data.TensorDataset( torch.FloatTensor(val_data), torch.LongTensor(val_anno)) val_loader = dataloader.DataLoader( val_dataset, params['batch_size'], collate_fn=collate_fn) """ # the number of layers in each dense block n_layers_list = [4, 5, 7, 10, 12, 15, 12, 10, 7, 5, 4] print("Constructing the network ... ") # Define the network densenet = Network(n_layers_list, 5).to(device) if os.path.isfile(params['model_from']): print("Starting from the saved model") densenet.load_state_dict(torch.load(params['model_from'])) else: print("Couldn't find the saved model") print("Starting from the bottom") print("Training the model ...") # hyperparameter, optimizer, criterion learning_rate = params['lr'] optimizer = torch.optim.RMSprop(densenet.parameters(), learning_rate, weight_decay=params['l2_reg']) criterion = nn.CrossEntropyLoss() for epoch in range(params['max_epoch']): for i, (img, label) in enumerate(train_loader): img = img.to(device) label = label.to(device) # forward-propagation pred = densenet(img) # flatten for all pixel pred = pred.view((-1, params['num_answers'])) label = label.view((-1)) # get loss loss = criterion(pred, label) # back-propagation optimizer.zero_grad() loss.backward() optimizer.step() print("Epoch: %d, Steps:[%d/%d], Loss: %.4f" % (epoch, i, len(train_loader), loss.data)) learning_rate *= 0.995 optimizer = torch.optim.RMSprop(densenet.parameters(), learning_rate, weight_decay=params['l2_reg']) if (epoch + 1) % 10 == 0: print("Saved the model") torch.save(densenet.state_dict(), params['model_save'])
def train(args, train_loader, train_val_loader, val_loader, test_loader): seed(args.seed) job_id = os.environ.get('SLURM_JOB_ID', 'local') print('Starting run {} with:\n{}'.format(job_id, args)) writer = SummaryWriter(args.logdir) columns = ['epoch', 'eval_loss', 'eval_acc', 'eval_prec', 'eval_recall', 'train_loss', 'train_acc', 'train_prec', 'train_recall', 'test_loss', 'test_acc', 'test_prec', 'test_recall'] stats_csv = pd.DataFrame(columns=columns) model = Network( k=args.network_k, att_type=args.network_att_type, kernel3=args.kernel3, width=args.network_width, dropout=args.network_dropout, compensate=True, norm=args.norm, inp_channels=args.input_channels) print(model) epochs = args.num_epochs * args.shrinkage milestones = np.array([80, 120, 160]) milestones *= args.shrinkage milestones = list(milestones) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") raw_model = model if torch.cuda.device_count() > 1: print('using multiple gpus') model = torch.nn.DataParallel(model) model.to(device) criterion = nn.CrossEntropyLoss() print(criterion) nn.utils.clip_grad_value_(raw_model.parameters(), 5.) if args.opt == 'rmsprop': optimizer = torch.optim.RMSprop(raw_model.parameters(), lr=args.lr, eps=1e-5, weight_decay=args.l2) elif args.opt == 'momentum': optimizer = torch.optim.SGD(raw_model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.l2) elif args.opt == 'adam': optimizer = torch.optim.Adam(raw_model.parameters(), lr=args.lr, eps=1e-5, weight_decay=args.l2) lr_schedule = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones) state = { 'epoch': 0, 'step': 0, 'state_dict': copy.deepcopy(raw_model.state_dict()), 'optimizer': copy.deepcopy(optimizer.state_dict()), 'lr_schedule': copy.deepcopy(lr_schedule.state_dict()), 'best_acc': None, 'best_epoch': 0, 'is_best': False, 'stats_csv': stats_csv, 'config': vars(args) } if load_checkpoint(args.logdir, state): raw_model.load_state_dict(state['state_dict']) optimizer.load_state_dict(state['optimizer']) lr_schedule.load_state_dict(state['lr_schedule']) stats_csv = state['stats_csv'] save_checkpoint(args.logdir, state) writer.add_text('args/str', str(args), state['epoch']) writer.add_text('job_id/str', job_id, state['epoch']) writer.add_text('model/str', str(model), state['epoch']) # Train the model for epoch in range(state['epoch'], epochs): lr_schedule.step() model.train() losses = [] tps = [] tns = [] fps = [] fns = [] batch_labels = [] delayed = 0 writer.add_scalar('stats/lr', optimizer.param_groups[0]['lr'], epoch + 1) with tqdm(train_loader, desc="Epoch [{}/{}]".format(epoch+1, epochs)) as pbar: for images, labels in pbar: batch_labels += list(labels) if torch.cuda.is_available(): if torch.cuda.device_count() == 1: images = images.cuda() labels = labels.cuda() # Forward pass outputs, att = model(images) loss = criterion(outputs, labels) predicted = torch.argmax(outputs.data, 1) TP, TN, FP, FN = pred_stats(predicted, labels) cpu_loss = loss.mean().cpu().item() losses += [cpu_loss] tps += [TP] tns += [TN] fps += [FP] fns += [FN] # Backward and optimize delayed += 1 if args.delayed_step > 0: (loss / args.delayed_step).backward() else: loss.backward() if args.delayed_step == 0 or (delayed + 1) % args.delayed_step == 0: optimizer.step() optimizer.zero_grad() precision, recall, accuracy = precision_recall_accuracy( np.sum(tps), np.sum(tns), np.sum(fps), np.sum(fns)) writer.add_scalar('train/loss', np.mean(losses), state['step']) writer.add_scalar('train/precision', precision, state['step']) writer.add_scalar('train/recall', recall, state['step']) writer.add_scalar('train/accuracy', accuracy, state['step']) writer.add_scalar('train/labels', np.mean(batch_labels), state['step']) state['step'] += 1 delayed = 0 losses = [] tps = [] tns = [] fps = [] fns = [] batch_labels = [] pbar.set_postfix(loss=cpu_loss) # step last backward if the step isn't done yet because of an 'incomplete' # delayed / accumulated batch if delayed > 0: optimizer.step() optimizer.zero_grad() precision, recall, accuracy = precision_recall_accuracy( np.sum(tps), np.sum(tns), np.sum(fps), np.sum(fns)) writer.add_scalar('train/loss', np.mean(losses), state['step']) writer.add_scalar('train/precision', precision, state['step']) writer.add_scalar('train/recall', recall, state['step']) writer.add_scalar('train/accuracy', accuracy, state['step']) writer.add_scalar('train/labels', np.mean(batch_labels), state['step']) state['step'] += 1 state['epoch'] = epoch + 1 state['state_dict'] = copy.deepcopy(raw_model.state_dict()) state['optimizer'] = copy.deepcopy(optimizer.state_dict()) state['lr_schedule'] = copy.deepcopy(lr_schedule.state_dict()) if args.opt == 'rmsprop': rms_m2 = get_rmsprop_m2(model, optimizer) writer.add_scalar('train/rmsprop_m2_min', rms_m2.min(), state['epoch']) writer.add_scalar('train/rmsprop_m2_mean', rms_m2.mean(), state['epoch']) writer.add_scalar('train/rmsprop_m2_max', rms_m2.max(), state['epoch']) writer.add_histogram('train/rmsprop_m2', rms_m2, state['epoch']) val_stats = evaluate(model, criterion, val_loader) log_evaluation(state['epoch'], val_stats, writer, 'eval') if state['best_acc'] is None or state['best_acc'] < val_stats['accuracy']: state['is_best'] = True state['best_acc'] = val_stats['accuracy'] state['best_epoch'] = state['epoch'] else: state['is_best'] = False if (state['is_best'] or state['epoch'] >= epochs or args.test_all): train_stats = evaluate(model, criterion, train_val_loader) log_evaluation(state['epoch'], train_stats, writer, 'train_eval') test_stats = evaluate(model, criterion, test_loader) log_evaluation(state['epoch'], test_stats, writer, 'test') stats_csv.loc[len(stats_csv)] = [ state['epoch'], val_stats['loss'], val_stats['accuracy'], val_stats['precision'], val_stats['recall'], train_stats['loss'], train_stats['accuracy'], train_stats['precision'], train_stats['recall'], test_stats['loss'], test_stats['accuracy'], test_stats['precision'], test_stats['recall']] else: stats_csv.loc[len(stats_csv)] = [ state['epoch'], val_stats['loss'], val_stats['accuracy'], val_stats['precision'], val_stats['recall'], np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] save_checkpoint(args.logdir, state) writer.add_text('done/str', 'true', state['epoch']) print('done - stopping now') writer.close()