def train_single_model(self, train_dir, train_csv, val_dir, val_csv, epochs): train_part = pd.read_csv(train_csv).values # array type val_part = pd.read_csv(val_csv).values train_dataset = utils.DYDataSet( train_dir, train_part, utils.get_transforms( mode='train', input_size=self.input_size, resize_size=self.input_size+42) ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=True, sampler=None) val_dataset = utils.DYDataSet( val_dir, val_part, utils.get_transforms(mode='valid', input_size=self.input_size, resize_size=self.input_size+42)) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4, pin_memory=True) print('[+] trainning with total %d images' % len(train_dataset)) self.model = get_model(self.model_name, pretrained=True) criterion = torch.nn.CrossEntropyLoss().cuda() utils.train(self.model, train_loader, val_loader, criterion, checkpoint_file=self.checkpoint_file, epochs=epochs)
def extract_features(feature_extractor, data_dir, data_csv, prediction_file_path): print('[+] Using Ten-Crop Extracting strategy') transform = utils.get_transforms( mode='test', input_size=args.input_size, resize_size=args.input_size+args.add_size) data_array = pd.read_csv(data_csv).values dataset = utils.DYDataSet( data_dir, data_array, transform ) data_loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True) feature_extractor = torch.nn.DataParallel(feature_extractor).cuda() feature_extractor.eval() all_labels = [] all_fts = [] with torch.no_grad(): print('extracting total %d images' % len(dataset)) for i, (input, labels) in enumerate(data_loader): # tensor type print('extracting batch: %d/%d' % (i, len(dataset)/args.batch_size)) bs, ncrops, c, h, w = input.size() input = input.view(-1, c, h, w).cuda() output = feature_extractor(input) output = output.view( bs, ncrops, -1).mean(1).view(bs, -1) # view to 2-D tensor all_labels.append(labels) all_fts.append(output.data.cpu()) if((i+1) % 800 == 0): all_labels = torch.cat( all_labels, dim=0).numpy().reshape(-1, 1) all_fts = torch.cat(all_fts, dim=0).numpy() print(f'[+] features shape: {all_fts.shape}') res = np.concatenate((all_fts, all_labels), axis=1) print(f'[+] save npy shape: {res.shape}') part = (i+1)/800 fts_file_name = prediction_file_path+'.' + str(part) print('[+] writing fts file: %s, part %d ...' % (fts_file_name, part)) np.save(fts_file_name, res) all_labels = [] all_fts = [] all_labels = torch.cat( all_labels, dim=0).numpy().reshape(-1, 1) all_fts = torch.cat(all_fts, dim=0).numpy() print(f'[+] features shape: {all_fts.shape}') res = np.concatenate((all_fts, all_labels), axis=1) print(f'[+] save npy shape: {res.shape}') part = (int(len(dataset)/args.batch_size))/800+1 fts_file_name = prediction_file_path+'.' + str(part) print('[+] writing fts file: %s, part %d ...' % (fts_file_name, part)) np.save(fts_file_name, res)
def test_single_model(self, checkpoint_file, test_dir, test_csv, prediction_file_path='test_prediction.npy', ten_crop=False, prob=False): print('[+] checkpoint file:{0:s}'.format(checkpoint_file)) transform = utils.get_transforms(mode='valid', input_size=self.input_size, resize_size=self.input_size + self.add_size) if (ten_crop): print('[+] Using Ten-Crop Testting strategy') transform = utils.get_transforms(mode='test', input_size=self.input_size, resize_size=self.input_size + self.add_size) # get the value of pd.DataFrame object test_array = pd.read_csv(test_csv).values test_dataset = utils.DYDataSet(test_dir, test_array, transform) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4, pin_memory=True) self.model = get_model(self.model_name, pretrained=False) load_model_multiGPU(self.model, checkpoint_file) # load_model(self.model, checkpoint_file) self.model = torch.nn.DataParallel(self.model).cuda() self.model.eval() all_idxs = [] all_labels = [] with torch.no_grad(): print('testting total %d images' % len(test_dataset)) for i, (input, labels) in enumerate(test_loader): # tensor type print('testting batch: %d/%d' % (i, len(test_dataset) / self.batch_size)) input = input.cuda() if (ten_crop): bs, ncrops, c, h, w = input.size() input = input.view(-1, c, h, w) output = self.model(input).view(bs, ncrops, -1).mean(1).view(bs, -1) else: output = self.model(input) # 2-D tensor if (not prob): pred = output.topk(1)[-1] # pytorch tensor type else: pred = output all_idxs.append(labels) all_labels.append(pred.data.cpu()) all_labels = torch.cat(all_labels, dim=0).numpy() all_idxs = torch.cat(all_idxs, dim=0).numpy().reshape(-1, 1) res = np.concatenate((all_idxs, all_labels), axis=1) print('writing pred file %s ...' % prediction_file_path) np.save(prediction_file_path, res) print('done.')