class RandomIdSamplerTest(unittest.TestCase): def setUp(self): self.batch_id = 4 self.batch_image = 16 self.data_source = Market1501(root + '/bounding_box_train', transform=ToTensor()) self.sampler = RandomIdSampler(self.data_source, batch_image=self.batch_image) self.data_loader = DataLoader(self.data_source, sampler=self.sampler, batch_size=self.batch_id * self.batch_image) @patch('random.shuffle', lambda x: x) @patch('random.sample', lambda population, k: population[:k]) def test_sampler(self): imgs = [img for img in self.sampler] self.assertEqual(range(16), imgs[:16]) self.assertEqual(range(46, 53) + range(46, 53) + range(46, 48), imgs[16:32]) @patch('random.shuffle', lambda x: x) @patch('random.sample', lambda population, k: population[:k]) def test_data_loader(self): it = self.data_loader.__iter__() _, target = next(it) self.assertEqual([0] * 16 + [1] * 16 + [2] * 16 + [3] * 16, target.numpy().tolist()) _, target = next(it) self.assertEqual([4] * 16 + [5] * 16 + [6] * 16 + [7] * 16, target.numpy().tolist())
def setUp(self): self.batch_id = 4 self.batch_image = 16 self.data_source = Market1501(root + '/bounding_box_train', transform=ToTensor()) self.sampler = RandomIdSampler(self.data_source, batch_image=self.batch_image) self.data_loader = DataLoader(self.data_source, sampler=self.sampler, batch_size=self.batch_id * self.batch_image)
indexes = [] if is_train: for i in range(0,index): indexes.extend(train_folds[i]) for i in range(index+1,5): indexes.extend(train_folds[i]) else: indexes = test_fold self.indexes = indexes def __getitem__(self, index): i = self.indexes[index] drug_i = label_row_inds[i] protein_i = label_col_inds[i] relation = torch.tensor(relationship[drug_i][protein_i]).float().cuda() protein = torch.from_numpy(protein_seq[protein_i]).float().cuda() protein_fp = torch.from_numpy(prot_fp[protein_i]).float().cuda() drug = torch.from_numpy(smiles_actives[drug_i]).float().cuda() drug_phy = torch.from_numpy(drug_fp[drug_i]).float().cuda() return drug, drug_phy, protein, protein_fp, relation def __len__(self): return len(self.indexes) index = np.ones(relationship.shape[0] * relationship.shape[1]) datas = [] datas = [] for i in range(5): datas.append({ 'train': DataLoader(Datas(i, True), batch_size=128, shuffle=True), 'test': DataLoader(Datas(i, False), batch_size=128, shuffle=True) })
def count_acc(logits, label): pred = torch.argmax(logits, dim=1) return (pred == label).type(torch.cuda.FloatTensor).mean().item() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--max_epoch', type=int, default=50) parser.add_argument('--batch_size', type=int, default=256) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--step_size', type=int, default=5) parser.add_argument('--gamma', type=float, default=0.5) args = parser.parse_args() trainset = MyDataset('train') train_loader = DataLoader(dataset=trainset, num_workers=4, batch_size=args.batch_size, shuffle=True, drop_last=True, pin_memory=True) valset = MyDataset('val') val_loader = DataLoader(dataset=valset, num_workers=4, batch_size=args.batch_size, pin_memory=True) testset = MyDataset('test') test_loader = DataLoader(dataset=testset, num_workers=4, batch_size=args.batch_size, pin_memory=True) model = ConvNet() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) model = model.cuda() best_acc = 0.0 for epoch in range(args.max_epoch): lr_scheduler.step()
def train(args, train_dataset, model, tokenizer): """ Train the model """ if args.local_rank in [-1, 0]: tb_writer = SummaryWriter() args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1 else: t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs # Prepare optimizer and schedule (linear warmup and decay) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total) if args.fp16: try: from apex import amp except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) # multi-gpu training (should be after apex fp16 initialization) if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Distributed training (should be after apex fp16 initialization) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) # Train! logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) logger.info(" Num Epochs = %d", args.num_train_epochs) logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) logger.info(" Total train batch size (w. parallel, distributed & accumulation) = %d", args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1)) logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) global_step = 0 tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]) set_seed(args) # Added here for reproductibility (even between python 2 and 3) for num in train_iterator: epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0]) for step, batch in enumerate(epoch_iterator): model.train() batch = tuple(t.to(args.device) for t in batch) inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[3]} if args.model_type != 'distilbert': inputs['token_type_ids'] = batch[2] if args.model_type in ['bert', 'xlnet'] else None # XLM, DistilBERT and RoBERTa don't use segment_ids outputs = model(**inputs) loss = outputs[0] # model outputs are always tuple in transformers (see doc)\ #loss here if args.n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu parallel training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm) else: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) tr_loss += loss.item() loss_graph.append(loss.item()) num_graph.append(num) if (step + 1) % args.gradient_accumulation_steps == 0: optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0: # Log metrics if args.local_rank == -1 and args.evaluate_during_training: # Only evaluate when single GPU otherwise metrics may not average well results = evaluate(args, model, tokenizer) for key, value in results.items(): tb_writer.add_scalar('eval_{}'.format(key), value, global_step) tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step) tb_writer.add_scalar('loss', (tr_loss - logging_loss)/args.logging_steps, global_step) logging_loss = tr_loss if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: # Save model checkpoint output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step)) if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training model_to_save.save_pretrained(output_dir) torch.save(args, os.path.join(output_dir, 'training_args.bin')) logger.info("Saving model checkpoint to %s", output_dir) if args.max_steps > 0 and global_step > args.max_steps: epoch_iterator.close() break if args.max_steps > 0 and global_step > args.max_steps: train_iterator.close() break if args.local_rank in [-1, 0]: tb_writer.close() return global_step, tr_loss / global_step
if __name__ == '__main__': SEED = 0 random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) print(args.name) print(args.scale) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_path = os.path.join(args.name, 'checkpoints', args.model) print('load model: ', model_path) dataset = make_dataset(args) dataloader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.num_workers) warper = Warper(args) state_dict = torch.load(model_path) warper.load_state_dict(state_dict) warper.to(device) warper.eval() deltas = [] const = make_field(256) for batch, item in tqdm(enumerate(dataloader)): img_p = item['img_p'].to(device) names = item['name'] filenames = item['filename']
def main(): # parser = argparse.ArgumentParser( # formatter_class=argparse.ArgumentDefaultsHelpFormatter # ) # parser.add_argument('--model', type=str, default='multi-gnn1') # parser.add_argument('--model_file', type=str, default='/home/ecust/lx/Multimodal/logs/multi-gnn1_FS/model_best.pth.tar',help='Model path') # parser.add_argument('--dataset_type', type=str, default='b',help='type of dataset') # parser.add_argument('--dataset', type=str, default='/home/ecust/Datasets/数据库B(541)',help='path to dataset') # parser.add_argument('--base_size', type=tuple, default=(300, 300), help='resize images using bilinear interpolation') # parser.add_argument('--crop_size', type=tuple, default=None, help='crop images') # parser.add_argument('--n_classes', type=int, default=13, help='number of classes') # parser.add_argument('--pretrained', type=bool, default=True, help='should be set the same as train.py') # args = parser.parse_args() args = argparser() model_file = '/home/ecust/lx/Multimodal/logs/resnet_20190916_093026/model_best.pth.tar' root = args.dataset_root crop=None # crop = Compose([RandomCrop(args.crop_size)]) loader = get_loader(args.dataset) val_loader = DataLoader( loader(root, split='val', base_size=args.base_size, augmentations=crop), batch_size=1, shuffle=False, num_workers=4) args.n_classes = loader.NUM_CLASS model = Models.model_loader(args.model, args.n_classes, backbone=args.backbone, norm_layer=nn.BatchNorm2d, multi_grid=args.multi_grid, multi_dilation=args.multi_dilation) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) print('==> Loading {} model file: {}'.format(model.__class__.__name__, model_file)) model_data = torch.load(model_file) try: model.load_state_dict(model_data) except Exception: model.load_state_dict(model_data['model_state_dict']) model.eval() print('==> Evaluating with {} dataset'.format(args.dataset)) visualizations = [] metrics = runningScore(args.n_classes) i = 0 for rgb, ir, target in tqdm.tqdm(val_loader, total=len(val_loader), ncols=80, leave=False): rgb, ir, target = rgb.to(device), ir.to(device), target.to(device) score = model(rgb, ir) # score = model(ir) rgbs = rgb.data.cpu() irs = ir.data.cpu() lbl_pred = score[0].data.max(1)[1].cpu().numpy() lbl_true = target.data.cpu() for rgb, ir, lt, lp in zip(rgbs, irs, lbl_true, lbl_pred): rgb, ir, lt = val_loader.dataset.untransform(rgb, ir, lt) metrics.update(lt, lp) i += 1 if i % 5 == 0: if len(visualizations) < 9: viz = visualize_segmentation( lbl_pred=lp, lbl_true=lt, img=rgb, ir=ir, n_classes=args.n_classes, dataloader=val_loader) visualizations.append(viz) acc, acc_cls, mean_iu, fwavacc, cls_iu = metrics.get_scores() print(''' Accuracy: {0:.2f} Accuracy Class: {1:.2f} Mean IoU: {2:.2f} FWAV Accuracy: {3:.2f}'''.format(acc * 100, acc_cls * 100, mean_iu * 100, fwavacc * 100) + '\n') class_name = val_loader.dataset.class_names if class_name is not None: for index, value in enumerate(cls_iu.values()): offset = 20 - len(class_name[index]) print(class_name[index] + ' ' * offset + f'{value * 100:>.2f}') else: print("\nyou don't specify class_names, use number instead") for key, value in cls_iu.items(): print(key, f'{value * 100:>.2f}') viz = get_tile_image(visualizations) # img = Image.fromarray(viz) # img.save('viz_evaluate.png') scipy.misc.imsave('viz_evaluate.png', viz)
# ------------ train data ------------ # # CULane mean, std # mean=(0.3598, 0.3653, 0.3662) # std=(0.2573, 0.2663, 0.2756) # Imagenet mean, std mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) transform_train = Compose(Resize(resize_shape), Darkness(5), Rotation(2), ToTensor(), Normalize(mean=mean, std=std)) dataset_name = exp_cfg['dataset'].pop('dataset_name') Dataset_Type = getattr(dataset, dataset_name) train_dataset = Dataset_Type(Dataset_Path[dataset_name], "train", transform_train) train_loader = DataLoader(train_dataset, batch_size=exp_cfg['dataset']['batch_size'], shuffle=True, collate_fn=train_dataset.collate, num_workers=8) # ------------ val data ------------ transform_val = Compose(Resize(resize_shape), ToTensor(), Normalize(mean=mean, std=std)) val_dataset = Dataset_Type(Dataset_Path[dataset_name], "val", transform_val) val_loader = DataLoader(val_dataset, batch_size=8, collate_fn=val_dataset.collate, num_workers=4) # Tests testing = False test_size = 0.1
def main(): use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False pin_memory = True if use_gpu else False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_img_dataset( root=args.root, name=args.dataset, split_id=args.split_id, cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split, nuscenes_root=args.nuscenes_root) # data augmentation transform_train = T.Compose([ T.Random2DTranslation(args.height, args.width), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) trainloader = DataLoader( ImageDataset(dataset.train, transform=transform_train), sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( ImageDataset(dataset.query, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( ImageDataset(dataset.gallery, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'softmax', 'metric'}, aligned=True, use_gpu=use_gpu) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) if args.labelsmooth: criterion_class = CrossEntropyLabelSmooth( num_classes=dataset.num_train_pids, use_gpu=use_gpu) else: criterion_class = CrossEntropyLoss(use_gpu=use_gpu) criterion_metric = TripletLossAlignedReID(margin=args.margin) optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) start_epoch = args.start_epoch if args.resume: print("Loading checkpoint from '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, use_gpu) return 0 start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train(epoch, model, criterion_class, criterion_metric, optimizer, trainloader, use_gpu) train_time += round(time.time() - start_train_time) if args.stepsize > 0: scheduler.step() if (epoch + 1) > args.start_eval and args.eval_step > 0 and ( epoch + 1) % args.eval_step == 0 or (epoch + 1) == args.max_epoch: print("==> Test") rank1 = test(model, queryloader, galleryloader, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format( best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
# at index 0 with_unk = {} for word, idx in embeddings._w2idx.items(): with_unk[word] = idx + 1 vocab.update(with_unk) # Import datasets # This will update vocab with words not found in embeddings sfu = SFUDataset(vocab, False, "../data") maintask_train_iter = sfu.get_split("train") maintask_dev_iter = sfu.get_split("dev") maintask_test_iter = sfu.get_split("test") maintask_loader = DataLoader(maintask_train_iter, batch_size=args.BATCH_SIZE, collate_fn=maintask_train_iter.collate_fn, shuffle=True) tag_2_idx = {START_TAG: 3, STOP_TAG: 4, "B": 0, "I": 1, "O": 2} main_train_x = [[vocab.ws2ids(s) for s in doc] for doc, pol, scope, rel in maintask_train_iter] main_train_y = [[sfu.labels[pol]] for doc, pol, rel, scope in maintask_train_iter] aux_train_y = [[[tag_2_idx[w] for w in s] for s in scope] for doc, pol, rel, scope in maintask_train_iter] main_dev_x = [[vocab.ws2ids(s) for s in doc] for doc, pol, scope, rel in maintask_dev_iter] main_dev_y = [[sfu.labels[pol]] for doc, pol, rel, scope in maintask_dev_iter]
logger.info("Loading validation dataset") validationDataset = Dataset(config["path_to_processed_MAPS"], "validation") logger.info("Done") trainDatasetMean = trainDataset.mean() batchCounter = 0 for epoch in range(config["epochs_num"]): logger.info("Epoch %s" % (epoch, )) model = model.train() dataloader = DataLoader(trainDataset, batch_size=config["mini_batch_size"], shuffle=True) epochLossValue = 0 logger.info("Start training") for batchX, batchy in tqdm.tqdm(dataloader): batchX -= trainDatasetMean batchX, batchy = batchX.to(device), batchy.to(device) optimizer.zero_grad() output = model(batchX) loss = criterion(output, batchy.float())
TRAIN_REGRESSOR = False # CHECKPOINT = 'checkpoints/classification 12-03-2021 22-31-17 epoch-2.pth' CHECKPOINT = None if not os.path.exists("checkpoints"): os.mkdir("checkpoints") model = Network(MODEL_NAME, NUM_CLASSES) model = model.to(DEVICE) for param in model.parameters(): param.requires_grad = False train_set = Full_Images_Data(data_dir=DATA_PATH,split="train") test_set = Full_Images_Data(data_dir=DATA_PATH,split="test") train_loader = DataLoader(train_set,batch_size=BATCH_SIZE,shuffle=True,drop_last=False) test_loader = DataLoader(test_set,batch_size=BATCH_SIZE,shuffle=True,drop_last=False) def batch_to_rois(batch, mode='classfication'): imgs, labels = batch imgs = imgs.to(DEVICE) roi_batch, indices = roi_pooling(model.features(imgs), labels[:,:,:4]) roi_batch = roi_batch.to(DEVICE) b,n = labels.shape[:2] if mode=='classfication': roi_labels = labels[:,:,4].reshape(b*n)[indices].long().to(DEVICE) else: roi_labels = labels[:,:,5:].reshape(b*n,4)[indices].type(torch.float32).to(DEVICE) return roi_batch, roi_labels # ------------- ------------- ------------- -----
def train(params): dataset = { 'mimic': mimic_dataset, }[params['dataset']] _, train_dataset, validation_dataset, _ = dataset.get_datasets() x_validation = next( iter(DataLoader(validation_dataset, batch_size=len(validation_dataset)))).to( params['device']) autoencoder = Autoencoder( example_dim=np.prod(train_dataset[0].shape), compression_dim=params['compress_dim'], binary=params['binary'], device=params['device'], ) autoencoder_optimizer = dp_optimizer.DPAdam( l2_norm_clip=params['l2_norm_clip'], noise_multiplier=params['noise_multiplier'], minibatch_size=params['minibatch_size'], microbatch_size=params['microbatch_size'], params=autoencoder.get_decoder().parameters(), lr=params['lr'], betas=(params['b1'], params['b2']), weight_decay=params['l2_penalty'], ) autoencoder_loss = lambda inp, target: nn.BCELoss(reduction='none')( inp, target).sum(dim=1).mean(dim=0) if params['binary' ] else nn.MSELoss() print('Achieves ({}, {})-DP'.format( analysis.epsilon(len(train_dataset), params['minibatch_size'], params['noise_multiplier'], params['iterations'], params['delta']), params['delta'], )) minibatch_loader, microbatch_loader = sampling.get_data_loaders( minibatch_size=params['minibatch_size'], microbatch_size=params['microbatch_size'], iterations=params['iterations'], ) iteration = 0 train_losses, validation_losses = [], [] for X_minibatch in minibatch_loader(train_dataset): autoencoder_optimizer.zero_grad() for X_microbatch in microbatch_loader(X_minibatch): X_microbatch = X_microbatch.to(params['device']) autoencoder_optimizer.zero_microbatch_grad() output = autoencoder(X_microbatch) loss = autoencoder_loss(output, X_microbatch) loss.backward() autoencoder_optimizer.microbatch_step() autoencoder_optimizer.step() validation_loss = autoencoder_loss( autoencoder(x_validation).detach(), x_validation) train_losses.append(loss.item()) validation_losses.append(validation_loss.item()) if iteration % 100 == 0: print('[Iteration %d/%d] [Loss: %f] [Validation Loss: %f]' % (iteration, params['iterations'], loss.item(), validation_loss.item())) iteration += 1 return autoencoder, pd.DataFrame(data={ 'train': train_losses, 'validation': validation_losses })
batch_size = 64 # Create problem object. interruptreverserecall = InterruptionReverseRecall(params) # get a sample sample = interruptreverserecall[0] print(repr(sample)) print('__getitem__ works.') # wrap DataLoader on top from torch.utils.data import DataLoader problem = DataLoader(dataset=interruptreverserecall, batch_size=batch_size, collate_fn=interruptreverserecall.collate_fn, shuffle=False, num_workers=0) # generate a batch import time s = time.time() for i, batch in enumerate(problem): print('Batch # {} - {}'.format(i, type(batch))) print('Number of workers: {}'.format(problem.num_workers)) print( 'time taken to exhaust a dataset of size {}, with a batch size of {}: {}s' .format(interruptreverserecall.__len__(), batch_size, time.time() - s))
def main(args): # Figure out the datatype we will use; this will determine whether we run on # CPU or on GPU. Run on GPU by adding the command-line flag --use_gpu dtype = torch.FloatTensor if args.use_gpu: dtype = torch.cuda.FloatTensor # Set up a transform to use for validation data at test-time. For validation # images we will simply resize so the smaller edge has 224 pixels, then take # a 224 x 224 center crop. We will then construct an ImageFolder Dataset object # for the validation data, and a DataLoader for the validation set. test_transform = T.Compose([ T.Scale(224), T.CenterCrop(224), T.ToTensor(), T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), ]) test_dset = MultiLabelImageFolderTest(args.test_dir, transform=test_transform) test_loader = DataLoader(test_dset, batch_size=args.batch_size, num_workers=args.num_workers) def transform_target_to_1_0_vect(target): vect = np.zeros((17,)) vect[target] = 1 return vect # Now that we have set up the data, it's time to set up the model. # For this example we will finetune a densenet-169 model which has been # pretrained on ImageNet. We will first reinitialize the last layer of the # model, and train only the last layer for a few epochs. We will then finetune # the entire model on our dataset for a few more epochs. # First load the pretrained densenet-169 model; this will download the model # weights from the web the first time you run it. #model = torchvision.models.densenet169(pretrained=True) encoder = EncoderCNN(dtype, model_type = 'densenet') encoder.load_state_dict(torch.load(args.cnn_load_path)) encoder.type(dtype) encoder.eval() #decoder = DecoderRNN(args.label_embed_size, args.lstm_hidden_size, encoder.output_size, 17, args.combined_hidden_size) decoder = DecoderCaptionRNN(args.label_embed_size, args.lstm_hidden_size, encoder.output_size, 17) decoder.load_state_dict(torch.load(args.rnn_load_path)) decoder.type(dtype) decoder.eval() # Reinitialize the last layer of the model. Each pretrained model has a # slightly different structure, but from the densenet class definition # we see that the final fully-connected layer is stored in model.classifier: # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py#L111 num_classes = 17 classes = find_classes(args.label_list_file) y_pred = np.zeros((len(test_dset), 17)) filenames_list = [] predictions = [] count = 0 for x, filenames in test_loader: print_progress(count, len(test_dset), 'Running example') x_var = Variable(x.type(dtype), volatile = True) preds = decoder.sample(encoder(x_var)) for i in range(preds.size(0)): pred = preds[i].data.cpu().numpy().tolist() if 17 in pred: ind = pred.index(17) pred = pred[:ind] predictions.append(' '.join([classes[j] for j in pred])) filenames_list += filenames count += x.size(0) subm = pd.DataFrame() subm['image_name'] = filenames_list subm['tags'] = predictions subm.to_csv(args.sub_file, index=False)
import torch import torch.nn.functional as F from torchvision import datasets, transforms from torch.utils.data import DataLoader from vgg import VGG11 from config import Config transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) test_dataset = datasets.ImageFolder('data/test', transform) test_loader = DataLoader(test_dataset, batch_size=Config.batch_size, shuffle=True) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') loc = 'cuda' if torch.cuda.is_available() else 'cpu' model = VGG11() model.load_state_dict(torch.load(Config.model_path, map_location=loc)) model = model.to(device) model.eval() loss = 0 correct = 0 with torch.no_grad(): for batch_idx, (x, y) in enumerate(test_loader): x, y = x.cuda(), y.cuda() out = model(x)
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format( device, n_gpu, hvd.rank(), opts.fp16)) if hvd.rank() != 0: LOGGER.disabled = True hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(load_json(hps_file)) model_config = f'{opts.output_dir}/log/model_config.json' # load DBs and image dirs video_ids = get_video_ids(opts.query_txt_db) if opts.task != "msrvtt_video_only": video_db = load_video_sub_dataset( opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts) else: txt_meta = load_json( os.path.join(opts.query_txt_db, "meta.json")) video_db = load_video_only_dataset( opts.vfeat_db, txt_meta, model_opts.vfeat_interval, model_opts) assert opts.split in opts.query_txt_db q_txt_db = MsrvttQueryTokLmdb(opts.query_txt_db, -1) if opts.task != "msrvtt_video_only": inf_dataset = VrFullEvalDataset else: inf_dataset = VrVideoOnlyFullEvalDataset eval_dataset = inf_dataset( video_ids, video_db, q_txt_db, distributed=model_opts.distributed_eval) # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) img_pos_embed_weight_key = ( "v_encoder.f_encoder.img_embeddings" + ".position_embeddings.weight") assert img_pos_embed_weight_key in checkpoint max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) model = HeroForVr.from_pretrained( model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len, lw_neg_ctx=model_opts.lw_neg_ctx, lw_neg_q=model_opts.lw_neg_q, ranking_loss_type=model_opts.ranking_loss_type, use_hard_negative=False, hard_pool_size=model_opts.hard_pool_size, margin=model_opts.margin, use_all_neg=model_opts.use_all_neg) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=vr_full_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) _, results = validate_full_vr( model, eval_dataloader, opts.split, opts, model_opts) result_dir = f'{opts.output_dir}/results_{opts.split}' if not exists(result_dir) and rank == 0: os.makedirs(result_dir) all_results_list = all_gather_list(results) if hvd.rank() == 0: all_results = {"video2idx": all_results_list[0]["video2idx"]} for rank_id in range(hvd.size()): for key, val in all_results_list[rank_id].items(): if key == "video2idx": continue if key not in all_results: all_results[key] = [] all_results[key].extend(all_results_list[rank_id][key]) LOGGER.info('All results joined......') save_json( all_results, f'{result_dir}/results_{opts.checkpoint}_all.json') LOGGER.info('All results written......')
def main(): parser = ArgumentParser() parser.add_argument('--lstm_model', type=str, required=True, choices=['lstm_gru', 'lstm_capsule_atten', 'lstm_conv']) parser.add_argument('--valid', action='store_true') args = parser.parse_args() config = load_config('./config/lstm_s.json') config.setdefault('max_len', 220) config.setdefault('max_features', 100000) config.setdefault('batch_size', 512) config.setdefault('train_epochs', 6) config.setdefault('n_splits', 5) config.setdefault('start_lr', 1e-4) config.setdefault('max_lr', 5e-3) config.setdefault('last_lr', 1e-3) config.setdefault('warmup', 0.2) config.setdefault('pseudo_label', True) config.setdefault('mu', 0.9) config.setdefault('updates_per_epoch', 10) config.setdefault('lstm_gru', {}) config.setdefault('lstm_capsule_atten', {}) config.setdefault('lstm_conv', {}) config.setdefault('device', 'cuda') config.setdefault('seed', 1234) device = torch.device(config.device) OUT_DIR = Path(f'../output/{args.lstm_model}/') MODEL_STATE = OUT_DIR / 'pytorch_model.bin' submission_file_name = 'valid_submission.csv' if args.valid else 'submission.csv' SUBMISSION_PATH = OUT_DIR / submission_file_name OUT_DIR.mkdir(exist_ok=True) warnings.filterwarnings('ignore') seed_torch(config.seed) if args.lstm_model == 'lstm_gru': neural_net = LstmGruModel elif args.lstm_model == 'lstm_capsule_atten': neural_net = LstmCapsuleAttenModel config.lstm_capsule_atten['max_len'] = config.max_len else: neural_net = LstmConvModel with timer('preprocess'): train = pd.read_csv(TRAIN_DATA, index_col='id') if args.valid: train = train.sample(frac=1, random_state=1029).reset_index(drop=True) test = train.tail(200000) train = train.head(len(train) - 200000) else: test = pd.read_csv(TEST_DATA) train['comment_text'] = train['comment_text'].apply(preprocess) test['comment_text'] = test['comment_text'].apply(preprocess) # replace blank with nan train['comment_text'].replace('', np.nan, inplace=True) test['comment_text'].replace('', np.nan, inplace=True) # nan prediction nan_pred = train['target'][train['comment_text'].isna()].mean() # fill up the missing values train_x = train['comment_text'].fillna('_##_').values test_x = test['comment_text'].fillna('_##_').values # get the target values weights = training_weights_s(train, TOXICITY_COLUMN, IDENTITY_COLUMNS) train_y = np.vstack([train[TOXICITY_COLUMN].values, weights]).T train_y_identity = train[IDENTITY_COLUMNS].values train_nan_mask = train_x == '_##_' test_nan_mask = test_x == '_##_' y_binary = (train_y[:, 0] >= 0.5).astype(int) y_identity_binary = (train_y_identity >= 0.5).astype(int) vocab = build_vocab(chain(train_x, test_x), config.max_features) embedding_matrix = load_embedding(EMBEDDING_FASTTEXT, vocab['token2id']) joblib.dump(vocab, OUT_DIR / 'vocab.pkl') np.save('embedding_matrix', embedding_matrix) train_x = np.array(tokenize(train_x, vocab, config.max_len)) test_x = np.array(tokenize(test_x, vocab, config.max_len)) models = {} train_preds = np.zeros((len(train_x))) test_preds = np.zeros((len(test_x))) ema_train_preds = np.zeros((len(train_x))) ema_test_preds = np.zeros((len(test_x))) if config.pseudo_label: with timer('pseudo label'): train_dataset = TokenDataset(train_x, targets=train_y, maxlen=config.max_len) test_dataset = TokenDataset(test_x, maxlen=config.max_len) train_sampler = BucketSampler(train_dataset, train_dataset.get_keys(), bucket_size=config.batch_size * 20, batch_size=config.batch_size) test_sampler = BucketSampler(test_dataset, test_dataset.get_keys(), batch_size=config.batch_size, shuffle_data=False) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=False, sampler=train_sampler, num_workers=0, collate_fn=collate_fn) test_loader = DataLoader(test_dataset, batch_size=config.batch_size, sampler=test_sampler, shuffle=False, num_workers=0, collate_fn=collate_fn) model = neural_net(embedding_matrix, **config[args.lstm_model]).to(device) ema_model = copy.deepcopy(model) ema_model.eval() ema_n = int(len(train_loader.dataset) / (config.updates_per_epoch * config.batch_size)) ema = EMA(model, config.mu, n=ema_n) optimizer = torch.optim.Adam(model.parameters(), lr=0.005) scheduler = ParamScheduler(optimizer, config.train_epochs * len(train_loader), start_lr=config.start_lr, max_lr=config.max_lr, last_lr=config.last_lr, warmup=config.warmup) all_test_preds = [] for epoch in range(config.train_epochs): start_time = time.time() model.train() for _, x_batch, y_batch in train_loader: x_batch = x_batch.to(device) y_batch = y_batch.to(device) scheduler.batch_step() y_pred = model(x_batch) loss = nn.BCEWithLogitsLoss(weight=y_batch[:, 1])(y_pred[:, 0], y_batch[:, 0]) optimizer.zero_grad() loss.backward() optimizer.step() ema.on_batch_end(model) elapsed_time = time.time() - start_time print('Epoch {}/{} \t time={:.2f}s'.format( epoch + 1, config.train_epochs, elapsed_time)) all_test_preds.append(eval_model(model, test_loader)) ema.on_epoch_end(model) ema.set_weights(ema_model) ema_model.lstm.flatten_parameters() ema_model.gru.flatten_parameters() checkpoint_weights = np.array([2 ** epoch for epoch in range(config.train_epochs)]) checkpoint_weights = checkpoint_weights / checkpoint_weights.sum() ema_test_y = eval_model(ema_model, test_loader) test_y = np.average(all_test_preds, weights=checkpoint_weights, axis=0) test_y = np.mean([test_y, ema_test_y], axis=0) test_y[test_nan_mask] = nan_pred weight = np.ones((len(test_y))) test_y = np.vstack((test_y, weight)).T models['model'] = model.state_dict() models['ema_model'] = ema_model.state_dict() with timer('train'): splits = list( StratifiedKFold(n_splits=config.n_splits, shuffle=True, random_state=config.seed).split(train_x, y_binary)) if config.pseudo_label: splits_test = list(KFold(n_splits=config.n_splits, shuffle=True, random_state=config.seed).split(test_x)) splits = zip(splits, splits_test) for fold, split in enumerate(splits): print(f'Fold {fold + 1}') if config.pseudo_label: (train_idx, valid_idx), (train_idx_test, _) = split x_train_fold = np.concatenate((train_x[train_idx], test_x[train_idx_test]), axis=0) y_train_fold = np.concatenate((train_y[train_idx], test_y[train_idx_test]), axis=0) else: train_idx, valid_idx = split x_train_fold = train_x[train_idx] y_train_fold = train_y[train_idx] x_valid_fold = train_x[valid_idx] y_valid_fold = train_y[valid_idx] valid_nan_mask = train_nan_mask[valid_idx] y_valid_fold_binary = y_binary[valid_idx] y_valid_fold_identity_binary = y_identity_binary[valid_idx] evaluator = JigsawEvaluator(y_valid_fold_binary, y_valid_fold_identity_binary) train_dataset = TokenDataset(x_train_fold, targets=y_train_fold, maxlen=config.max_len) valid_dataset = TokenDataset(x_valid_fold, targets=y_valid_fold, maxlen=config.max_len) train_sampler = BucketSampler(train_dataset, train_dataset.get_keys(), bucket_size=config.batch_size * 20, batch_size=config.batch_size) valid_sampler = BucketSampler(valid_dataset, valid_dataset.get_keys(), batch_size=config.batch_size, shuffle_data=False) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=False, sampler=train_sampler, num_workers=0, collate_fn=collate_fn) valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False, sampler=valid_sampler, collate_fn=collate_fn) model = neural_net(embedding_matrix, **config[args.lstm_model]).to(device) ema_model = copy.deepcopy(model) ema_model.eval() ema_n = int(len(train_loader.dataset) / (config.updates_per_epoch * config.batch_size)) ema = EMA(model, config.mu, n=ema_n) optimizer = torch.optim.Adam(model.parameters(), lr=0.005) scheduler = ParamScheduler(optimizer, config.train_epochs * len(train_loader), start_lr=config.start_lr, max_lr=config.max_lr, last_lr=config.last_lr, warmup=config.warmup) all_valid_preds = [] all_test_preds = [] for epoch in range(config.train_epochs): start_time = time.time() model.train() for _, x_batch, y_batch in train_loader: x_batch = x_batch.to(device) y_batch = y_batch.to(device) scheduler.batch_step() y_pred = model(x_batch) loss = nn.BCEWithLogitsLoss(weight=y_batch[:, 1])(y_pred[:, 0], y_batch[:, 0]) optimizer.zero_grad() loss.backward() optimizer.step() ema.on_batch_end(model) valid_preds = eval_model(model, valid_loader) valid_preds[valid_nan_mask] = nan_pred all_valid_preds.append(valid_preds) auc_score, _ = evaluator.get_final_metric(valid_preds) elapsed_time = time.time() - start_time print('Epoch {}/{} \t auc={:.5f} \t time={:.2f}s'.format( epoch + 1, config.train_epochs, auc_score, elapsed_time)) all_test_preds.append(eval_model(model, test_loader)) models[f'model_{fold}{epoch}'] = model.state_dict() ema.on_epoch_end(model) ema.set_weights(ema_model) ema_model.lstm.flatten_parameters() ema_model.gru.flatten_parameters() models[f'ema_model_{fold}'] = ema_model.state_dict() checkpoint_weights = np.array([2 ** epoch for epoch in range(config.train_epochs)]) checkpoint_weights = checkpoint_weights / checkpoint_weights.sum() valid_preds_fold = np.average(all_valid_preds, weights=checkpoint_weights, axis=0) valid_preds_fold[valid_nan_mask] = nan_pred auc_score, _ = evaluator.get_final_metric(valid_preds) print(f'cv model \t auc={auc_score:.5f}') ema_valid_preds_fold = eval_model(ema_model, valid_loader) ema_valid_preds_fold[valid_nan_mask] = nan_pred auc_score, _ = evaluator.get_final_metric(ema_valid_preds_fold) print(f'EMA model \t auc={auc_score:.5f}') train_preds[valid_idx] = valid_preds_fold ema_train_preds[valid_idx] = ema_valid_preds_fold test_preds_fold = np.average(all_test_preds, weights=checkpoint_weights, axis=0) ema_test_preds_fold = eval_model(ema_model, test_loader) test_preds += test_preds_fold / config.n_splits ema_test_preds += ema_test_preds_fold / config.n_splits with timer('evaluate'): torch.save(models, MODEL_STATE) test_preds[test_nan_mask] = nan_pred ema_test_preds[test_nan_mask] = nan_pred evaluator = JigsawEvaluator(y_binary, y_identity_binary) auc_score, _ = evaluator.get_final_metric(train_preds) ema_auc_score, _ = evaluator.get_final_metric(ema_train_preds) print(f'cv score: {auc_score:<8.5f}') print(f'EMA cv score: {ema_auc_score:<8.5f}') train_preds = np.mean([train_preds, ema_train_preds], axis=0) test_preds = np.mean([test_preds, ema_test_preds], axis=0) auc_score, _ = evaluator.get_final_metric(train_preds) print(f'final prediction score: {auc_score:<8.5f}') if config.pseudo_label: test_preds = test_preds * 0.9 + test_y[:, 0] * 0.1 submission = pd.DataFrame({ 'id': test['id'], 'prediction': test_preds }) submission.to_csv(SUBMISSION_PATH, index=False)
def main(): data_dir = sys.argv[1] hero2ix_dir = sys.argv[2] # import DataFrame and hero2ix dictionary heroes_df_dota = pd.read_csv(data_dir, index_col=0) heroes_df_dota = heroes_df_dota.dropna().reset_index(drop=True) with open(hero2ix_dir, 'r') as fp: hero2ix = json.load(fp) print(len(heroes_df_dota)) # train test split split_1 = int(len(heroes_df_dota)*0.8) split_2 = int(len(heroes_df_dota)*0.9) heroes_train_dota = heroes_df_dota.iloc[:split_1] heroes_dev_dota = heroes_df_dota.iloc[split_1:split_2] heroes_test_dota = heroes_df_dota.iloc[split_2:] # build dataset generator train_gen = DataFrameIterator(heroes_train_dota, hero2ix) dev_gen = DataFrameIterator(heroes_dev_dota, hero2ix) test_gen = DataFrameIterator(heroes_test_dota, hero2ix) # Use Dataloader class in pytorch to generate batched data batch_size = 16 loader_train = DataLoader(train_gen, batch_size=batch_size, sampler=sampler.RandomSampler(train_gen), num_workers=4) loader_dev = DataLoader(dev_gen, batch_size=batch_size, sampler=sampler.RandomSampler(dev_gen), num_workers=4) loader_test = DataLoader(test_gen, batch_size=batch_size, sampler=sampler.SequentialSampler(test_gen), num_workers=4) # define model, totally three models in hetor2vec.py # model = CBOHBilayer(embedding_dim=20, heropool_size=len(hero2ix)) model = CBOHBilayer(embedding_dim=20, heropool_size=len(hero2ix),hidden_dim=20) # define loss function loss_function = nn.CrossEntropyLoss() # run train losses = train(model=model, dataloader=loader_train,devloader=loader_dev,loss_function=loss_function, init_lr=0.1, epochs=20, lr_decay_epoch=8, print_epoch=2, gpu=False) # check test accuracy print('Top3, Top5 and Top 10 accuracy: ', accuracy_in_train(model, dataloader=loader_test, batch_size=batch_size, gpu=False)) # save embeddings as numpy arrays output_dir = './output/hero/hero_embeddings.npy' save_embeddings(model, filename=output_dir) # pickle model pickle_dir = './output/hero/model.p' pickle.dump(obj=model, file=open(pickle_dir, 'wb')) # np.save('loss0',losses[0]) # np.save('loss1',losses[1]) # np.save('loss2',losses[2]) # np.save('loss3',losses[3]) # # plot loss vs epoch plot_loss(losses, './output/hero/loss_hitory.png') # project embeddings to 2d plane plot_embeddings(model, hero2ix)
def test(model, annotations_file, imagedir, outdir, outname="test", epoch=None, record_individual_scores=False, print_batch_metrics=False): """ Arguments: epoch: If specified, it is used to include the epoch in the output file name. """ pathlib.Path(outdir).mkdir(exist_ok=True, parents=True) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) testset = COCODatasetWithID(annotations_file, imagedir, image_size=(224, 224), normalize_means=[0.485, 0.456, 0.406], normalize_stds=[0.229, 0.224, 0.225]) dataloader = DataLoader(testset, batch_size=1, num_workers=1, shuffle=False, drop_last=False) if print_batch_metrics: criterion = nn.CrossEntropyLoss() test_accuracy = AccuracyLogger(testset.idx2label) if record_individual_scores: individual_scores = IndividualScoreLogger(testset.idx2label) model.eval() # set eval mode with torch.no_grad(): for i, (context_images, target_images, bbox, labels_cpu, annotation_ids) in enumerate( tqdm(dataloader, desc="Test Batches", leave=True)): context_images = context_images.to(device) target_images = target_images.to(device) bbox = bbox.to(device) labels = labels_cpu.to( device ) # keep a copy of labels on cpu to avoid unnecessary transfer back to cpu later output = model( context_images, target_images, bbox) # output is (batchsize, num_classes) tensor of logits _, predictions = torch.max( output.detach().to("cpu"), 1) # choose idx with maximum score as prediction test_accuracy.update(predictions, labels_cpu) if record_individual_scores: individual_scores.update(predictions.to("cpu"), labels_cpu, annotation_ids) # print if print_batch_metrics: batch_loss = criterion(output, labels).item() batch_corr = sum( predictions == labels_cpu) # number of correct predictions batch_accuracy = batch_corr # / batch_size # since batchsize is 1 print("\t Test Batch {}: \t Loss: {} \t Accuracy: {}".format( i, batch_loss, batch_accuracy)) print("\nTotal Test Accuracy: {}".format(test_accuracy.accuracy())) print("{0:20} {1:10}".format("Class", "Accuracy")) # header for name, acc in test_accuracy.named_class_accuarcies().items(): print("{0:20} {1:10.4f}".format(name, acc)) # save accuracies if epoch is not None: test_accuracy.save(outdir, name="{}_accuracies_epoch_{}".format( outname, epoch)) else: test_accuracy.save(outdir, name="{}_accuracies".format(outname)) if record_individual_scores: individual_scores.save(outdir, name="{}_individual_scores".format(outname)) return test_accuracy
import torch import torch.nn as nn import numpy as np from torch.utils.data import DataLoader from torch.autograd import Variable import torch.optim as optim import nyu_dataset_depth import pix2pix_model import tqdm import Deeper_Depth_Prediction.pytorch.model as model import Deeper_Depth_Prediction.pytorch.weights as weights num_classes = 41 segmentation_dataset_train = nyu_dataset_depth.SegmentationDataset(transforms=nyu_dataset_depth.SegmentationTransform(), use_depth=True) data_loader_train = DataLoader(segmentation_dataset_train, batch_size=1, shuffle=True, num_workers=1) segmentation_dataset_validation = nyu_dataset_depth.SegmentationDataset(path_to_datafolder='./datasets/nyu/val/', transforms= nyu_dataset_depth.SegmentationTransform(False), use_depth=True) data_loader_val = DataLoader(segmentation_dataset_validation, batch_size=1, shuffle=False, num_workers=1) generator = pix2pix_model.Generator(num_classes + 1, 3, instance_norm=False).cuda(0) discriminator = pix2pix_model.Discriminator(num_classes + 1 + 3, instance_norm=False).cuda(0) depth_model = model.Model(1).cuda(0) depth_model.load_state_dict(weights.load_weights(depth_model, 'NYU_ResNet-UpProj.npy', torch.cuda.FloatTensor)) for param in depth_model.parameters(): param.requires_grad = False def berhu(generated_depth, ground_truth_depth): y = generated_depth - ground_truth_depth
def val_dataloader(self): return DataLoader(DummyDataset(), batch_size=3, shuffle=False)
test = pd.read_pickle("./dataset/test_dataset.tsv.gzip", compression="gzip") # Embedding (these are learnt using only the train dataset) embedding = fasttext.load_model("./dataset/fasttext_embeddings.bin") # Generate categories categories = dict([(s, i) for i, s in enumerate(train["category"].unique())]) # Load test data dataset = RakutenLoader(test["product"], test["category"], embedding, categories) dataloader_test = DataLoader(dataset, batch_size=100, shuffle=False, num_workers=4, pin_memory=True) # Build model model = BidLSTM(len(categories)) model.load_state_dict(torch.load(load_model)) correct_result = [] predicted_result = [] for t_batch, sample_batched_test in enumerate(tqdm(dataloader_test)): inputs, labels = sample_batched_test labels = labels.flatten()
def evaluate(args, model, tokenizer, prefix=""): # Loop to handle MNLI double evaluation (matched, mis-matched) eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (args.task_name,) eval_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else (args.output_dir,) results = {} for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs): eval_dataset = load_and_cache_examples(args, eval_task, tokenizer, evaluate=True) if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[3]} if args.model_type != 'distilbert': inputs['token_type_ids'] = batch[2] if args.model_type in ['bert', 'xlnet'] else None # XLM, DistilBERT and RoBERTa don't use segment_ids outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = round(eval_loss / nb_eval_steps, 10) if args.output_mode == "classification": preds = np.argmax(preds, axis=1) elif args.output_mode == "regression": preds = np.squeeze(preds) result = compute_metrics(eval_task, preds, out_label_ids) results.update(result) output_eval_file = os.path.join(eval_output_dir, "eval_results.txt") # here is output print("------------------------- eval_loss = ", eval_loss) with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) ## ------------------------------------- # Make output file and image graph. import make_output_file_graph.py Make_out_graph = ifg.make_output_file_graph(preds) one, zero = Make_out_graph.make_output_labels() Make_out_graph.make_output_labels_num(one, zero) #Make_out_graph.make_graph(one, zero) #Make_out_graph.make_graph2(num_, loss_graph) Make_out_graph.make_bert_pred_bad_des() Make_out_graph.make_bad_long_des_file() Make_out_graph.make_bad_shot_des_file() count = 0 bzl.bring_output_label() filename = "sd1_allp.tsv" bzl.bring_test_file(filename, count) bzl.print_bad_des(filename) TFIDF.main__run() # ------------------------------------- return results
def eval_mlc_cam_1(self): net = self.config.Net(num_classes=self.config.mlc_num_classes) # net = nn.DataParallel(net).cuda() # cudnn.benchmark = True net = net.cuda() _, _, dataset_cam = DatasetUtil.get_dataset_by_type( DatasetUtil.dataset_type_mlc, image_size=self.config.mlc_size, scales=self.config.scales, data_root=self.config.data_root_path, return_image_info=True, sampling=self.config.sampling) data_loader_cam = DataLoader(dataset_cam, self.config.mlc_batch_size, shuffle=False, num_workers=16) Tools.print("image num: {}".format(len(dataset_cam))) Tools.print("Load model form {}".format(self.config.model_file_name)) self.load_model(net=net, model_file_name=self.config.model_file_name) net.eval() with torch.no_grad(): for _, (inputs, labels, image_paths) in tqdm(enumerate(data_loader_cam), total=len(data_loader_cam)): all_logits = 0 all_features = [] for input_one in inputs: input_one_cuda = input_one.float().cuda() logits, out_features = net.forward(input_one_cuda, is_vis=True) all_logits += torch.sigmoid(logits).detach().cpu().numpy() all_features.append(out_features) pass logits = all_logits / len(inputs) # 标签选择策略 label_for_cam = self.label_select_strategy( logits=logits, image_labels=labels.numpy(), thr=self.config.top_k_thr) # 生成 CAM cam_list = self.generate_cam(all_features=all_features, indexes=label_for_cam) for input_index, image_path_one in enumerate(image_paths): now_name = image_path_one.split("Data/DET/")[1] result_filename = Tools.new_dir( os.path.join(self.config.mlc_cam_pkl_dir, now_name)) cam_one = cam_list[input_index] label_one = labels[input_index].numpy() label_for_cam_one = label_for_cam[input_index] Tools.write_to_pkl(_path=result_filename.replace( ".JPEG", ".pkl"), _data={ "label": label_one, "image_path": image_path_one, "label_for_cam": label_for_cam_one, "cam": cam_one }) pass pass pass pass
X, y = process_data_dir(dir_pair[0]), process_data_dir(dir_pair[1]) IMG_FILES.extend(X) GT_FILES.extend(y) for dir_pair in test_dirs: X, y = process_data_dir(dir_pair[0]), process_data_dir(dir_pair[1]) IMG_FILES_TEST.extend(X) GT_FILES_TEST.extend(y) IMGS_train, GT_train = IMG_FILES, GT_FILES train_folder = DataFolder(IMGS_train, GT_train, True) train_data = DataLoader(train_folder, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, shuffle=True, drop_last=True) test_folder = DataFolder(IMG_FILES_TEST, GT_FILES_TEST, False) test_data = DataLoader(test_folder, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, shuffle=False) Sal = SalGAN().cuda() Dis = Discriminator(config.IMG_SIZE).cuda() #Sal._modules = torch.load("checkpoint/model.pth")['net'] optimizer = optim.SGD([ { 'params': Sal.encoder.parameters(),
# set training data directory: rootDir = '/home/armandcomas/datasets/DisentanglingMotion/importing_data/moving_symbols/output/MovingSymbols2_same_4px-OF/train' # rootDir = './datasets/UCF-101-Frames' trainFoldeList = getListOfFolders(trainFolderFile)[::10] # if Kitti dataset: use listOfFolders instead of trainFoldeList # listOfFolders = [name for name in os.listdir(rootDir) if os.path.isdir(os.path.join(rootDir, name))] trainingData = videoDataset(folderList=trainFoldeList, rootDir=rootDir, N_FRAME=N_FRAME, N_FRAME_FOLDER=N_FRAME_FOLDER) dataloader = DataLoader(trainingData, batch_size=BATCH_SIZE, shuffle=True, num_workers=1) ## Initializing r, theta P, Pall = gridRing(N) Drr = abs(P) Drr = torch.from_numpy(Drr).float() Dtheta = np.angle(P) Dtheta = torch.from_numpy(Dtheta).float() # What and where is gamma ## Create the model model = OFModel(Drr, Dtheta, T, PRE, gpu_id) model.cuda(gpu_id) optimizer = torch.optim.Adam(model.parameters(), lr=LR) scheduler = lr_scheduler.MultiStepLR(
ip = self.img_path[index] label = self.labels[index] # 读取图片,像素值在0-255之间 img = Image.open(ip).convert("RGB") if self.transform: img = self.transform(img) return img, label def __len__(self): return len(self.img_path) if __name__ == "__main__": # raw_path = r"D:\GitHub\learn_pytorch\data" # generate_datasets_txtfile(raw_path) txt_path = r"D:\GitHub\learn_pytorch\data\train.txt" # 数据预处理设置 norm_mean = [0.4948052, 0.48568845, 0.44682974] norm_std = [0.24580306, 0.24236229, 0.2603115] norm_transform = transforms.Normalize(norm_mean, norm_std) train_transform = transforms.Compose([ transforms.Resize(32), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), norm_transform ]) train_data = MyDataset(txt_path, transform=train_transform) train_loader = DataLoader(train_data, batch_size=32)
def train( self, train_dataset, output_dir, show_running_loss=True, eval_data=None, verbose=True, **kwargs, ): """ Trains the model on train_dataset. Utility function to be used by the train_model() method. Not intended to be used directly. """ model = self.model args = self.args device = self.device tb_writer = SummaryWriter(logdir=args.tensorboard_dir) train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader( train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, num_workers=self.args.dataloader_num_workers, ) if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // ( len(train_dataloader) // args.gradient_accumulation_steps) + 1 else: t_total = len( train_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [] custom_parameter_names = set() for group in self.args.custom_parameter_groups: params = group.pop("params") custom_parameter_names.update(params) param_group = {**group} param_group["params"] = [ p for n, p in model.named_parameters() if n in params ] optimizer_grouped_parameters.append(param_group) for group in self.args.custom_layer_parameters: layer_number = group.pop("layer") layer = f"layer.{layer_number}." group_d = {**group} group_nd = {**group} group_nd["weight_decay"] = 0.0 params_d = [] params_nd = [] for n, p in model.named_parameters(): if n not in custom_parameter_names and layer in n: if any(nd in n for nd in no_decay): params_nd.append(p) else: params_d.append(p) custom_parameter_names.add(n) group_d["params"] = params_d group_nd["params"] = params_nd optimizer_grouped_parameters.append(group_d) optimizer_grouped_parameters.append(group_nd) if not self.args.train_custom_parameters_only: optimizer_grouped_parameters.extend([ { "params": [ p for n, p in model.named_parameters() if n not in custom_parameter_names and not any( nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if n not in custom_parameter_names and any( nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ]) warmup_steps = math.ceil(t_total * args.warmup_ratio) args.warmup_steps = warmup_steps if args.warmup_steps == 0 else args.warmup_steps optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total) if (args.model_name and os.path.isfile( os.path.join(args.model_name, "optimizer.pt")) and os.path.isfile( os.path.join(args.model_name, "scheduler.pt"))): # Load in optimizer and scheduler states optimizer.load_state_dict( torch.load(os.path.join(args.model_name, "optimizer.pt"))) scheduler.load_state_dict( torch.load(os.path.join(args.model_name, "scheduler.pt"))) if args.n_gpu > 1: model = torch.nn.DataParallel(model) logger.info(" Training started") global_step = 0 training_progress_scores = None tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.silent, mininterval=0) epoch_number = 0 best_eval_metric = None early_stopping_counter = 0 steps_trained_in_current_epoch = 0 epochs_trained = 0 if args.model_name and os.path.exists(args.model_name): try: # set global_step to gobal_step of last saved checkpoint from model path checkpoint_suffix = args.model_name.split("/")[-1].split("-") if len(checkpoint_suffix) > 2: checkpoint_suffix = checkpoint_suffix[1] else: checkpoint_suffix = checkpoint_suffix[-1] global_step = int(checkpoint_suffix) epochs_trained = global_step // ( len(train_dataloader) // args.gradient_accumulation_steps) steps_trained_in_current_epoch = global_step % ( len(train_dataloader) // args.gradient_accumulation_steps) logger.info( " Continuing training from checkpoint, will skip to saved global_step" ) logger.info(" Continuing training from epoch %d", epochs_trained) logger.info(" Continuing training from global step %d", global_step) logger.info( " Will skip the first %d steps in the current epoch", steps_trained_in_current_epoch) except ValueError: logger.info(" Starting fine-tuning.") if args.evaluate_during_training: training_progress_scores = self._create_training_progress_scores( **kwargs) if args.wandb_project: wandb.init(project=args.wandb_project, config={**asdict(args)}, **args.wandb_kwargs) wandb.watch(self.model) if args.fp16: from torch.cuda import amp scaler = amp.GradScaler() for current_epoch in train_iterator: model.train() if epochs_trained > 0: epochs_trained -= 1 continue train_iterator.set_description( f"Epoch {epoch_number + 1} of {args.num_train_epochs}") batch_iterator = tqdm( train_dataloader, desc=f"Running Epoch {epoch_number} of {args.num_train_epochs}", disable=args.silent, mininterval=0, ) for step, batch in enumerate(batch_iterator): if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue batch = tuple(t.to(device) for t in batch) inputs = self._get_inputs_dict(batch) if args.fp16: with amp.autocast(): outputs = model(**inputs) # model outputs are always tuple in pytorch-transformers (see doc) loss = outputs[0] else: outputs = model(**inputs) # model outputs are always tuple in pytorch-transformers (see doc) loss = outputs[0] if args.n_gpu > 1: loss = loss.mean( ) # mean() to average on multi-gpu parallel training current_loss = loss.item() if show_running_loss: batch_iterator.set_description( f"Epochs {epoch_number}/{args.num_train_epochs}. Running Loss: {current_loss:9.4f}" ) if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: scaler.scale(loss).backward() else: loss.backward() tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) if args.fp16: scaler.step(optimizer) scaler.update() else: optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 if args.logging_steps > 0 and global_step % args.logging_steps == 0: # Log metrics tb_writer.add_scalar("lr", scheduler.get_last_lr()[0], global_step) tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step) logging_loss = tr_loss if args.wandb_project or self.is_sweeping: wandb.log({ "Training loss": current_loss, "lr": scheduler.get_last_lr()[0], "global_step": global_step, }) if args.save_steps > 0 and global_step % args.save_steps == 0: # Save model checkpoint output_dir_current = os.path.join( output_dir, "checkpoint-{}".format(global_step)) self.save_model(output_dir_current, optimizer, scheduler, model=model) if args.evaluate_during_training and ( args.evaluate_during_training_steps > 0 and global_step % args.evaluate_during_training_steps == 0): # Only evaluate when single GPU otherwise metrics may not average well results = self.eval_model( eval_data, verbose=verbose and args.evaluate_during_training_verbose, silent=args.evaluate_during_training_silent, **kwargs, ) for key, value in results.items(): tb_writer.add_scalar("eval_{}".format(key), value, global_step) output_dir_current = os.path.join( output_dir, "checkpoint-{}".format(global_step)) if args.save_eval_checkpoints: self.save_model(output_dir_current, optimizer, scheduler, model=model, results=results) training_progress_scores["global_step"].append( global_step) training_progress_scores["train_loss"].append( current_loss) for key in results: training_progress_scores[key].append(results[key]) report = pd.DataFrame(training_progress_scores) report.to_csv( os.path.join(args.output_dir, "training_progress_scores.csv"), index=False, ) if args.wandb_project or self.is_sweeping: wandb.log( self._get_last_metrics( training_progress_scores)) if not best_eval_metric: best_eval_metric = results[ args.early_stopping_metric] self.save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results) if best_eval_metric and args.early_stopping_metric_minimize: if results[ args. early_stopping_metric] - best_eval_metric < args.early_stopping_delta: best_eval_metric = results[ args.early_stopping_metric] self.save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results) early_stopping_counter = 0 else: if args.use_early_stopping: if early_stopping_counter < args.early_stopping_patience: early_stopping_counter += 1 if verbose: logger.info( f" No improvement in {args.early_stopping_metric}" ) logger.info( f" Current step: {early_stopping_counter}" ) logger.info( f" Early stopping patience: {args.early_stopping_patience}" ) else: if verbose: logger.info( f" Patience of {args.early_stopping_patience} steps reached" ) logger.info( " Training terminated.") train_iterator.close() return ( global_step, tr_loss / global_step if not self. args.evaluate_during_training else training_progress_scores, ) else: if results[ args. early_stopping_metric] - best_eval_metric > args.early_stopping_delta: best_eval_metric = results[ args.early_stopping_metric] self.save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results) early_stopping_counter = 0 else: if args.use_early_stopping: if early_stopping_counter < args.early_stopping_patience: early_stopping_counter += 1 if verbose: logger.info( f" No improvement in {args.early_stopping_metric}" ) logger.info( f" Current step: {early_stopping_counter}" ) logger.info( f" Early stopping patience: {args.early_stopping_patience}" ) else: if verbose: logger.info( f" Patience of {args.early_stopping_patience} steps reached" ) logger.info( " Training terminated.") train_iterator.close() return ( global_step, tr_loss / global_step if not self. args.evaluate_during_training else training_progress_scores, ) epoch_number += 1 output_dir_current = os.path.join( output_dir, "checkpoint-{}-epoch-{}".format(global_step, epoch_number)) if args.save_model_every_epoch or args.evaluate_during_training: os.makedirs(output_dir_current, exist_ok=True) if args.save_model_every_epoch: self.save_model(output_dir_current, optimizer, scheduler, model=model) if args.evaluate_during_training and args.evaluate_each_epoch: results = self.eval_model( eval_data, verbose=verbose and args.evaluate_during_training_verbose, silent=args.evaluate_during_training_silent, **kwargs, ) self.save_model(output_dir_current, optimizer, scheduler, results=results) training_progress_scores["global_step"].append(global_step) training_progress_scores["train_loss"].append(current_loss) for key in results: training_progress_scores[key].append(results[key]) report = pd.DataFrame(training_progress_scores) report.to_csv(os.path.join(args.output_dir, "training_progress_scores.csv"), index=False) if args.wandb_project or self.is_sweeping: wandb.log(self._get_last_metrics(training_progress_scores)) if not best_eval_metric: best_eval_metric = results[args.early_stopping_metric] self.save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results) if best_eval_metric and args.early_stopping_metric_minimize: if results[ args. early_stopping_metric] - best_eval_metric < args.early_stopping_delta: best_eval_metric = results[args.early_stopping_metric] self.save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results) early_stopping_counter = 0 else: if args.use_early_stopping and args.early_stopping_consider_epochs: if early_stopping_counter < args.early_stopping_patience: early_stopping_counter += 1 if verbose: logger.info( f" No improvement in {args.early_stopping_metric}" ) logger.info( f" Current step: {early_stopping_counter}" ) logger.info( f" Early stopping patience: {args.early_stopping_patience}" ) else: if verbose: logger.info( f" Patience of {args.early_stopping_patience} steps reached" ) logger.info(" Training terminated.") train_iterator.close() return ( global_step, tr_loss / global_step if not self.args.evaluate_during_training else training_progress_scores, ) else: if results[ args. early_stopping_metric] - best_eval_metric > args.early_stopping_delta: best_eval_metric = results[args.early_stopping_metric] self.save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results) early_stopping_counter = 0 else: if args.use_early_stopping and args.early_stopping_consider_epochs: if early_stopping_counter < args.early_stopping_patience: early_stopping_counter += 1 if verbose: logger.info( f" No improvement in {args.early_stopping_metric}" ) logger.info( f" Current step: {early_stopping_counter}" ) logger.info( f" Early stopping patience: {args.early_stopping_patience}" ) else: if verbose: logger.info( f" Patience of {args.early_stopping_patience} steps reached" ) logger.info(" Training terminated.") train_iterator.close() return ( global_step, tr_loss / global_step if not self.args.evaluate_during_training else training_progress_scores, ) return ( global_step, tr_loss / global_step if not self.args.evaluate_during_training else training_progress_scores, )
def train(self, data_provider_path,store_learning, save_path='', restore_path='', epochs=3, dropout=0.2, display_step=100, validation_batch_size=30, prediction_path = '',dist_net=None,threshold=20,bins=15,iou_step=1,reduce_lr_steps=[1,10,100,200],data_aug=None): """ Lauches the training process :param data_provider_path: where the DATASET folder is :param store_learning: to store the metrics during the training as .txt file :param save_path: path where to store checkpoints :param restore_path: path where is the model to restore is stored :param epochs: number of epochs :param dropout: dropout probability :param validation_batch_size: batch size of the validation set :param prediction_path: where to store output of training (patches, losses .txt file, models) :param dist_net: distance module or not :param threshold: threshold of distance module :param bins: number of bins for distance module :iou_step: how often is computed Iou measures over the validation set :reduce_lr_steps: epoch at which the learning rate is halved :data_aug: 'yes' or 'no' if the training set is augmented """ ##SET UP PATHS FOR TRAINING ## #check they exist? PATH_TRAINING=data_provider_path+'TRAINING/' if not os.path.exists(PATH_TRAINING): print('Training dataset path not valid. Should be path_to_dataset/TRAINING/ and this folder should contain INTPUT/ and OUTPUT/') raise PATH_VALIDATION=data_provider_path+'VALIDATION/' if not os.path.exists(PATH_VALIDATION): print('Validation dataset path not valid. Should be path_to_dataset/VALIDATION/ and this folder should contain INTPUT/ and OUTPUT/') raise PATH_TEST=data_provider_path+'TEST/' if not os.path.exists(PATH_TEST): print('Test dataset path not valid. Should be path_to_dataset/TEST/ and this folder should contain INTPUT/ and OUTPUT/') raise TMP_IOU=prediction_path+'TMP_IOU/' if not os.path.exists(TMP_IOU): os.makedirs(TMP_IOU) loss_train=[] if epochs == 0: print('Epoch set 0, model won\'t be trained') raise if save_path=='': print('Specify a path where to store the Model') raise if prediction_path=='': print('Specify where to stored visualization of training') raise if restore_path=='': store_learning.initialize('w') store_learning print('Model trained from scratch') else: store_learning.initialize('a') self.net.load_state_dict(torch.load(restore_path)) print('Model loaded from {}'.format(restore_path)) self._initialize(prediction_path,store_learning,iou_step,dist_net,threshold,bins) ###Validation loader val_generator=Dataset_sat.from_root_folder(PATH_VALIDATION,self.nb_classes) val_loader = DataLoader(val_generator, batch_size=validation_batch_size,shuffle=False, num_workers=1) RBD=randint(0,int(val_loader.__len__())-1) self.info_validation(val_loader,-1,RBD,"_init",TMP_IOU) ###Training loader train_generator=Dataset_sat.from_root_folder(PATH_TRAINING,self.nb_classes,transform=data_aug)#max_data_size=4958 logging.info("Start optimization") counter=0 for epoch in range(epochs): ##tune learning reate if epoch in reduce_lr_steps: self.lr = self.lr * 0.5 self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.lr) total_loss = 0 error_tot=0 train_loader = DataLoader(train_generator, batch_size=self.batch_size,shuffle=True, num_workers=1) for i_batch,sample_batch in enumerate(train_loader): self.optimizer.zero_grad() predict_net=Train_or_Predict(sample_batch,self.dist_net,self.loss_fn,self.threshold,self.bins,self.net) loss,_,probs_seg=predict_net.forward_pass() loss,self.optimizer,self.net=predict_net.backward_prog(loss,self.optimizer) total_loss+=loss.data[0] loss_train.append(loss.data[0]) counter+=1 if i_batch % display_step == 0: self.output_training_stats(i_batch,loss,predict_net.batch_y,probs_seg) avg_loss_train_value=total_loss/train_loader.__len__() (self.store_learning).avg_loss_train.append(avg_loss_train_value) (self.store_learning).write_file((self.store_learning).file_train,avg_loss_train_value) logging.info(" Training {:}, Minibatch Loss= {:.4f}".format("epoch_%s"%epoch,avg_loss_train_value)) self.info_validation(val_loader,epoch,RBD,"epoch_%s"%epoch,TMP_IOU) torch.save(self.net.state_dict(),save_path + 'CP{}.pth'.format(epoch)) print('Checkpoint {} saved !'.format(epoch)) self.info_validation(val_loader,-2,RBD,'_last_',TMP_IOU) # time.sleep(4) # plt.close(fig) return save_path + 'CP{}.pth'.format(epoch)
def evaluate(self, eval_dataset, output_dir, verbose=True, silent=False, **kwargs): """ Evaluates the model on eval_dataset. Utility function to be used by the eval_model() method. Not intended to be used directly. """ model = self.model args = self.args eval_output_dir = output_dir device = self.device results = {} eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) if args.n_gpu > 1: model = torch.nn.DataParallel(model) eval_loss = 0.0 nb_eval_steps = 0 model.eval() if args.n_gpu > 1: model = torch.nn.DataParallel(model) if self.args.fp16: from torch.cuda import amp for batch in tqdm(eval_dataloader, disable=args.silent or silent, desc="Running Evaluation"): batch = tuple(t.to(device) for t in batch) inputs = self._get_inputs_dict(batch) with torch.no_grad(): if self.args.fp16: with amp.autocast(): outputs = model(**inputs) loss = outputs[0] else: outputs = model(**inputs) loss = outputs[0] if self.args.n_gpu > 1: loss = loss.mean() eval_loss += loss.item() nb_eval_steps += 1 eval_loss = eval_loss / nb_eval_steps results["eval_loss"] = eval_loss output_eval_file = os.path.join(eval_output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: for key in sorted(results.keys()): writer.write("{} = {}\n".format(key, str(results[key]))) return results
def main(): options = parse_args() # Load Data if options.leonhard: train_path = 'ZSL_Data/AwA2_train' test_path = 'ZSL_Data/AwA2_test' else: train_path = 'Data/AwA2/train_set' test_path = 'Data/AwA2/test_set' trainset = ZSLDataset(train_path, use_predicates=True, use_irevnet=options.use_irevnet) testset = ZSLDataset(test_path, use_predicates=True, use_irevnet=options.use_irevnet) num_classes = trainset.classes.shape[0] dim_semantic = trainset[0]['class_embedding'].shape[0] dim_visual = trainset[0]['image_embedding'].shape[0] dim_attributes = trainset[0]['class_predicates'].shape[0] all_class_embeddings = torch.tensor(np.array( trainset.class_embeddings)).float().cuda() all_class_predicates = torch.tensor(np.array( trainset.class_predicates)).float().cuda() classes_enum = torch.tensor(np.array(range(num_classes), dtype=np.int64)).cuda() query_ids = set([testset[i]['class_id'] for i in range(len(testset))]) ids = list(i - 1 for i in query_ids) query_mask = np.zeros(num_classes) query_mask[ids] = 1 query_mask = torch.tensor(query_mask, dtype=torch.int64).cuda() v_to_s = DecoderAttributes(dim_source=dim_visual, dim_target1=dim_attributes, dim_target2=dim_semantic, width=512).cuda() s_to_v = EncoderAttributes(dim_source1=dim_semantic, dim_source2=dim_attributes, dim_target=dim_visual, width=512).cuda() if options.optimizer == 'adam': optimizer = torch.optim.Adam(list(v_to_s.parameters()) + list(s_to_v.parameters()), lr=options.learning_rate, betas=(0.9, 0.999), weight_decay=3e-3) else: optimizer = torch.optim.SGD(list(v_to_s.parameters()) + list(s_to_v.parameters()), lr=options.learning_rate, momentum=options.momentum, weight_decay=5e-3, nesterov=True) positive_part = torch.nn.ReLU().cuda() trainloader = DataLoader(trainset, batch_size=options.batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) testloader = DataLoader(testset, batch_size=options.batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) gamma = options.gamma alpha1 = options.alphas[0] # triplet alpha2 = options.alphas[1] # surjection alpha3 = options.alphas[2] # l2 regularization margin = options.margin validation_accuracy = [] for e in range(options.n_epochs): v_to_s = v_to_s.train() s_to_v = s_to_v.train() running_loss = 0 for i, sample in enumerate(trainloader): optimizer.zero_grad() batch_visual = sample['image_embedding'].float().cuda() batch_classes = sample['class_id'].cuda() - 1 e_hat = v_to_s(s_to_v(all_class_embeddings, all_class_predicates)) delta = (e_hat[1] - all_class_embeddings) surjection_loss = (delta * delta).sum(dim=-1).mean() delta = (e_hat[0] - all_class_predicates) surjection_loss = (1 - gamma) * surjection_loss + gamma * ( delta * delta).sum(dim=-1).mean() s_out = v_to_s(batch_visual) s_attr, s_word = s_out same_class = classes_enum.unsqueeze(0) == batch_classes.unsqueeze( 1) same_class = same_class.detach() d_matrix = (1 - gamma) * dist_matrix( s_word, all_class_embeddings) + gamma * dist_matrix( s_attr, all_class_predicates) closest_negative, _ = (d_matrix + same_class.float() * 1e6).min(dim=-1) furthest_positive, _ = (d_matrix * same_class.float()).max(dim=-1) l2_loss = (1 - gamma) * (s_word * s_word).sum( dim=-1).mean() + gamma * (s_attr * s_attr).sum(dim=-1).mean() loss = positive_part(furthest_positive - closest_negative + margin) loss = alpha1 * loss.mean( ) + alpha2 * surjection_loss + alpha3 * l2_loss loss.backward() optimizer.step() running_loss += loss.item() else: print('Training Loss epoch {0}: {1}'.format( e + 1, running_loss / len(trainloader))) if (e + 1) % 30 == 0: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.5 if (e + 1) % 5 == 0: print('\n\n- Evaluation on epoch {}'.format(e + 1)) avg_accuracy = 0. avg_loss = 0. n = 0 v_to_s = v_to_s.eval() s_to_v = s_to_v.eval() with torch.no_grad(): for i, sample in enumerate(testloader): n += 1 batch_visual = sample['image_embedding'].float().cuda() batch_classes = sample['class_id'].cuda() - 1 s_out = v_to_s(batch_visual) s_attr, s_word = s_out same_class = classes_enum.unsqueeze( 0) == batch_classes.unsqueeze(1) same_class = same_class.detach() d_matrix = (1 - gamma) * dist_matrix( s_word, all_class_embeddings) + gamma * dist_matrix( s_attr, all_class_predicates) c_hat = (d_matrix + (1 - query_mask).float() * 1e9).argmin(dim=-1) closest_negative, _ = (d_matrix + same_class.float() * 1e6).min( dim=-1) furthest_positive, _ = (d_matrix * same_class.float()).max(dim=-1) loss = alpha1 * furthest_positive.mean() avg_loss += loss.item() avg_accuracy += ( c_hat == batch_classes).float().mean().item() avg_accuracy /= n avg_loss /= n if e > 50: validation_accuracy.append(avg_accuracy) print('Average acc.: {}, Average loss:{}\n\n'.format( avg_accuracy, avg_loss)) print('Mean Accuracy: {0}'.format(np.mean(validation_accuracy)))
# Set up model model = Darknet(opt.model_def, img_size=opt.img_size).to(device) if opt.weights_path.endswith(".weights"): # Load darknet weights model.load_darknet_weights(opt.weights_path) else: # Load checkpoint weights model.load_state_dict( torch.load(opt.weights_path, map_location=torch.device('cpu'))) model.eval() # Set in evaluation mode dataloader = DataLoader( ImageFolder(opt.image_folder, img_size=opt.img_size), batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu, ) classes = load_classes(opt.class_path) # Extracts class labels from file Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor imgs = [] # Stores image paths img_detections = [] # Stores detections for each image index print("\nPerforming object detection:") prev_time = time.time() for batch_i, (img_paths, input_imgs) in enumerate(dataloader): # Configure input