def __init__(self, net, device, query, gallery, batch_size=64, top_k=100): self.top_k = top_k self.net = net self.device = device dataset = getter.get_instance(query) self.query_dataloader = DataLoader(dataset, batch_size=batch_size) dataset = getter.get_instance(gallery) self.gallery_dataloader = DataLoader(dataset, batch_size=batch_size) self.reset()
def train_dataloader(self): train_dataloader = get_instance( self.config['dataset']['train']['loader'], dataset=self.train_dataset, num_workers=4) return train_dataloader
def __init__(self, dataset): self.dataset = getter.get_instance(dataset) self.train = self.dataset.train self.transform = self.dataset.transform self.labels = self.dataset.labels self.data = self.dataset self.labels_set = set(self.labels.numpy()) self.label_to_indices = {label: np.where(self.labels.numpy() == label)[0] for label in self.labels_set} if not self.train: triplets = [[i, np.random.choice( self.label_to_indices[self.labels[i].item()]), np.random.choice(self.label_to_indices[ np.random.choice( list( self.labels_set - set([self.labels[i].item()])) ) ]) ] for i in range(len(self.data))] self.triplets = triplets
def __init__(self, dataset): self.dataset = getter.get_instance(dataset) self.train = self.dataset.train self.transform = self.dataset.transform self.labels = self.dataset.labels self.data = self.dataset self.labels_set = set(self.labels.numpy()) self.label_to_indices = {label: np.where(self.labels.numpy() == label)[0] for label in self.labels_set} if not self.train: positive_pairs = [[i, np.random.choice( self.label_to_indices[self.labels[i].item()]), 1] for i in range(0, len(self.data), 2)] negative_pairs = [[i, np.random.choice(self.label_to_indices[ np.random.choice( list( self.labels_set - set([self.labels[i].item()])) ) ]), 0] for i in range(1, len(self.data), 2)] self.pairs = positive_pairs + negative_pairs
def __init__(self, dataset, niters, nimgs): self.dataset = getter.get_instance(dataset) self.niters = niters self.nimgs = nimgs if len(self.dataset) != len(self): print("Random getitem") else: print("Get all items")
def evaluate(mode, gpus, weight_path, query_dir, query_label, gallery_dir, gallery_label, top_k, cmc_rank): dev_id = 'cuda:{}'.format(gpus) \ if torch.cuda.is_available() and gpus is not None \ else 'cpu' device = torch.device(dev_id) # Get pretrained model assert os.path.exists(weight_path) pretrained = torch.load(weight_path, map_location=dev_id) # 1: Define network net = get_instance(pretrained['config']['model']).to(device) net.load_state_dict(pretrained['model_state_dict']) # 2: Load datasets print('Load queries...') dataset = AIC2020Track2(query_dir, query_label, train=True) dataloader = DataLoader(dataset, batch_size=64) print('Extract queries...') q_embs, q_labels = extract_embeddings(dataloader, net, device) print('Load gallery...') dataset = AIC2020Track2(root=gallery_dir, path=gallery_label, train=True) dataloader = DataLoader(dataset, batch_size=64) print('Extract gallery...') g_embs, g_labels = extract_embeddings(dataloader, net, device) if mode == 'finetune': import optuna def objective(trial): k1 = trial.suggest_int('k1', 10, 60) k2 = trial.suggest_int('k2', 1, 6) l = trial.suggest_uniform('lambda', 0.1, 0.9) mAP, _ = reid_evaluate(q_embs, g_embs, q_labels, g_labels, cmc_rank, top_k, k1, k2, l) return -mAP study = optuna.create_study() study.optimize(objective, n_trials=100) print(study.best_params) elif mode == 'eval': print('Evaluate...') cmc_rank = 1 top_k = 100 mAP, cmc = reid_evaluate(q_embs, g_embs, q_labels, g_labels, cmc_rank, top_k, 55, 6, 0.1) print(f'mAP@{top_k}={mAP}, cmc@{cmc_rank}={cmc}')
def __init__(self, losses, weights=None): super().__init__() self.loss_fns = nn.ModuleList( [getter.get_instance(loss) for loss in losses]) if weights is None: weights = torch.ones(len(losses)) elif isinstance(weights, list): weights = torch.FloatTensor(weights) weights /= weights.sum() self.register_buffer('weights', weights)
def generate_submission(gpus, weight_path, save_dir, query_dir, gallery_dir, top_k): dev_id = 'cuda:{}'.format(gpus) \ if torch.cuda.is_available() and gpus is not None \ else 'cpu' device = torch.device(dev_id) # Get pretrained model assert os.path.exists(weight_path) pretrained = torch.load(weight_path, map_location=dev_id) model_id = pretrained['config']['id'] + '-' + \ datetime.now().strftime('%Y_%m_%d-%H_%M_%S') # 1: Define network net = get_instance(pretrained['config']['model']).to(device) net.load_state_dict(pretrained['model_state_dict']) # 2: Load datasets transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print('Load queries...') dataset = ImageFolderDataset(query_dir, transform) dataloader = DataLoader(dataset, batch_size=64) print('Extract queries...') q_embs, q_ids = extract_embeddings(dataloader, net, device) print('Load gallery...') dataset = ImageFolderDataset(gallery_dir, transform) dataloader = DataLoader(dataset, batch_size=64) print('Extract gallery...') g_embs, g_ids = extract_embeddings(dataloader, net, device) print('Generate...') submission = ranking(q_embs, q_ids, g_embs, g_ids, top_k) create_submission_file(model_id, save_dir, submission)
def predict(config): device = xm.xla_device() config['dataset']['train']['args']['data_root_dir'] = './data/clean/' config['dataset']['train']['args']['infer'] = True test_dataset = get_instance(config['dataset']['train']) test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False, drop_last=False) model = pipeline(config) checkpoint_path = config['trainer']['cp_dir'] print('Loading model ...') checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict']) model.to(device) model.eval() model.freeze() print('generating submission ... ') res = torch.Tensor().long().cpu() for x in tqdm(test_dataloader): inps = x['input_ids'].to(device) mask = x['attention_mask'].to(device) tmp = model(inps, mask) tmp = tmp.argmax(-1).cpu() res = torch.cat((res, tmp), 0) length = res.shape[0] if config['model']['args']['nclasses'] == 3: res = res + torch.Tensor([3] * length).long() else: res = res + torch.Tensor([1] * length).long() res_csv = res.tolist() idx = [x + 1 for x in range(length)] df = pd.DataFrame(zip(idx, res_csv), columns=['review_id', 'rating']) print(length) df.to_csv(config['out'], index=False)
def configure_optimizers(self): optimizer = get_instance(self.config['optimizer'], params=self.model.parameters()) return optimizer
def val_dataloader(self): val_dataloader = get_instance(self.config['dataset']['val']['loader'], dataset=self.val_dataset, num_workers=4) return val_dataloader
from utils.getter import get_instance if __name__ == "__main__": # config_path = '/content/shopee-contest2/configs/train/baseline_colab.yaml' cp_model_dir = './cp/bert_multi3/best_metric_Accuracy.pth' csv_test_dir = './data/clean/test.csv' config = torch.load(cp_model_dir).device('cpu')['config'] dev_id = 'cuda:{}'.format(config['gpus']) \ if torch.cuda.is_available() and config.get('gpus', None) is not None \ else 'cpu' device = torch.device(dev_id) model = get_instance(config['model']).to(device) model.load_state_dict(torch.load(cp_model_dir)['model_state_dict']) print('load weights-----------------------') # Classify print('generate submission----------------') dataset = get_instance(config['dataset']['infer']) dataloader = torch.utils.data.DataLoader( dataset, batch_size=5, shuffle=False) # tbar.set_description_str(f'{iter_id}: {loss.item()}') model.eval() with torch.no_grad():
def prepare_data(self): self.train_dataset = get_instance(self.config['dataset']['train']) self.val_dataset = get_instance(self.config['dataset']['val'])
def __init__(self, config): super().__init__() self.config = config self.model = get_instance(self.config['model']) self.loss = get_instance(self.config['loss'])
def train(config): assert config is not None, "Do not have config file!" pprint.PrettyPrinter(indent=2).pprint(config) dev_id = 'cuda:{}'.format(config['gpus']) \ if torch.cuda.is_available() and config.get('gpus', None) is not None \ else 'cpu' device = torch.device(dev_id) # Get pretrained model pretrained_path = config["pretrained"] pretrained = None if (pretrained_path != None): pretrained = torch.load(pretrained_path, map_location=dev_id) for item in ["model"]: config[item] = pretrained["config"][item] # 1: Load datasets set_seed() train_dataset = get_instance(config['dataset']['train']) train_dataloader = get_instance(config['dataset']['train']['loader'], dataset=train_dataset) val_dataset = get_instance(config['dataset']['val']) val_dataloader = get_instance(config['dataset']['val']['loader'], dataset=val_dataset) # 2: Define network set_seed() model = get_instance(config['model']).to(device) # Train from pretrained if it is not None if pretrained is not None: model.load_state_dict(pretrained['model_state_dict']) # 3: Define loss criterion = get_instance(config['loss']).to(device) # 4: Define Optimizer optimizer = get_instance(config['optimizer'], params=model.parameters()) if pretrained is not None: optimizer.load_state_dict(pretrained['optimizer_state_dict']) # 5: Define Scheduler set_seed() scheduler = get_instance(config['scheduler'], optimizer=optimizer) # 6: Define metrics set_seed() metric = {mcfg['name']: get_instance(mcfg, net=model, device=device) for mcfg in config['metric']} # 6: Create trainer trainer = Trainer(device=device, config=config, model=model, criterion=criterion, optimier=optimizer, scheduler=scheduler, metric=metric) # 7: Start to train set_seed() trainer.train(train_dataloader=train_dataloader, val_dataloader=val_dataloader)
def __init__(self, extractor): super().__init__() self.embedding_net = getter.get_instance(extractor) self.feature_dim = self.embedding_net.feature_dim
def train(config): assert config is not None, "Do not have config file!" pprint.PrettyPrinter(indent=2).pprint(config) # Get device dev_id = 'cuda:{}'.format(config['gpus']) \ if torch.cuda.is_available() and config.get('gpus', None) is not None \ else 'cpu' device = torch.device(dev_id) # Get pretrained model pretrained_path = config["pretrained"] pretrained = None if (str(pretrained_path) != 'None'): pretrained = torch.load(pretrained_path, map_location=dev_id) # for item in ["model"]: # config[item] = pretrained["config"][item] # 1: Load datasets train_dataloader, val_dataloader = \ get_data(config['dataset'], config['seed']) # 2: Define network set_seed(config['seed']) model = get_instance(config['model']).to(device) # if config['parallel']: # print("Load parallel model") # model = nn.DataParallel(model) # Train from pretrained if it is not None if pretrained is not None: pretrained = torch.load(pretrained_path) if 'model_state_dict' in pretrained: model.load_state_dict(pretrained['model_state_dict']) else: print("Load model case 2") try: ret = model.load_state_dict(pretrained, strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) # 3: Define loss set_seed(config['seed']) criterion = get_instance(config['loss']).to(device) criterion.device = device # 4: Define Optimizer set_seed(config['seed']) optimizer = get_instance(config['optimizer'], params=model.parameters()) # 5: Define Scheduler set_seed(config['seed']) scheduler = get_instance(config['scheduler'], optimizer=optimizer) # 6: Define metrics set_seed(config['seed']) metric = {mcfg['name']: get_instance(mcfg) for mcfg in config['metric']} # 6: Create trainer set_seed(config['seed']) trainer = Trainer(device=device, config=config, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, metric=metric) # 7: Start to train set_seed(config['seed']) trainer.train(train_dataloader=train_dataloader, val_dataloader=val_dataloader)
def __init__(self, extractor_cfg, nclasses): super().__init__() self.nclasses = nclasses self.extractor = getter.get_instance(extractor_cfg) self.feature_dim = self.extractor.feature_dim self.classifier = nn.Linear(self.feature_dim, self.nclasses)
def __init__(self, margin, pair_selector): super().__init__() self.margin = margin self.pair_selector = getter.get_instance(pair_selector)
def __init__(self, margin, selector): super().__init__() self.margin = margin self.triplet_selector = getter.get_instance(selector) self.cls = nn.Linear(1280, 333)
def __init__(self, margin, selector): super(OnlineTripletLoss, self).__init__() self.margin = margin self.triplet_selector = getter.get_instance(selector)
help='(single) GPU to use (default: None)') parser.add_argument('-b', type=int, default=64, help='batch size (default: 64)') args = parser.parse_args() # Device dev_id = 'cuda:{}'.format(args.g) \ if torch.cuda.is_available() and args.g is not None \ else 'cpu' device = torch.device(dev_id) # Load model config = torch.load(args.w, map_location=dev_id) model = get_instance(config['config']['model']).to(device) model.load_state_dict(config['model_state_dict']) # Load data dataset = ShopeeDataset(img_dir=args.d, csv_path=args.c, is_train=False) dataloader = DataLoader(dataset, batch_size=args.b) # Metrics metrics = { 'Accuracy': Accuracy(), 'ConfusionMatrix': ConfusionMatrix(nclasses=42), } with torch.no_grad(): for m in metrics.values(): m.reset()
def __init__(self, extractor_cfg): super().__init__() self.extractor = getter.get_instance(extractor_cfg) self.feature_dim = self.extractor.feature_dim