self.classifier = nn.Sequential( nn.Linear(16 * 5 * 5, 120), nn.Dropout(p=0.5), nn.Linear(120, 84), nn.Linear(84, 10), ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x transforms = transforms.Compose([ transforms.Resize(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) dataset = torchvision.datasets.MNIST(root=data_root, transform=transforms, download=True, train=True) train_data = utils.data.DataLoader(dataset, shuffle=True, batch_size=100, num_workers=2) test_dataset = torchvision.datasets.MNIST(root=data_root, transform=transforms,
transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) net = DarkNet19(100) train_set = CIFAR100(config.dataset, train=True, transform=transform, download=True) test_set = CIFAR100(config.dataset, train=False, transform=transforms.Compose([ transforms.Resize(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), download=True) train_set, val_set = random_split(train_set, [ int(len(train_set) * 0.9), len(train_set) - int(len(train_set) * 0.9) ]) # train_set = ImageFolder(config.train_dir, transform=transform) # val_set = ImageFolder(config.val_dir, transform=transform) train_loader = torch.utils.data.DataLoader(train_set, config.batch_size,
normalize, ])) train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(ImageFolder_iid( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) class Identity(nn.Module): def __init__(self, ): super(Identity, self).__init__() def forward(self, input):
class Config: data_root = args.root_dir #data_root = '/data1/xuyilun/LUNA16/data' training_size = 12500 test_size = 12500 #training_size = 6484 #test_size = 1622 #as_expertise = np.array([[0.6, 0.8, 0.7, 0.6, 0.7],[0.6,0.6,0.7,0.9,0.6]]) #----------------------------- lexpert = [[0.6, 0.4], [0.4, 0.6]] hexpert = [[.8, .2], [.2, .8]] if args.expertise == 0: as_expertise = np.array([lexpert, lexpert, lexpert, lexpert, lexpert]) senior_num = 5 elif args.expertise == 1: ''' as_expertise = np.array( [[[0.9,0.1],[0.1,0.9]], [[0.8,0.2],[0.2,0.8]], [[0.6,0.4],[0.4,0.6]], [[0.7,0.3],[0.3,0.7]], [[0.7,0.3],[0.3,0.7]]]) ''' as_expertise = np.array([[[0.6, 0.4], [0.2, 0.8]], [[0.7, 0.3], [0.4, 0.6]], [[0.6, 0.4], [0.4, 0.6]], [[0.7, 0.3], [0.3, 0.7]], [[0.7, 0.3], [0.4, 0.6]]]) as_expertise_lambda = np.zeros((as_expertise.shape[0], 2)) for i in range(as_expertise.shape[0]): as_expertise_lambda[i][0] = 4 * np.log(as_expertise[i][0][0] / (1 - as_expertise[i][0][0])) as_expertise_lambda[i][1] = 4 * np.log(as_expertise[i][1][1] / (1 - as_expertise[i][1][1])) senior_num = 5 save = args.save missing_label = np.array([0, 0, 0, 0, 0]) missing = False num_classes = 2 left_input_size = 28 * 28 batch_size = 16 left_learning_rate = 1e-4 right_learning_rate = 1e-4 epoch_num = 30 ######################### expert_num = args.expert_num device_id = args.device experiment_case = args.case ######################### train_transform = transforms.Compose([ transforms.Resize((150, 150), interpolation=2), transforms.RandomHorizontalFlip(), transforms.RandomRotation(45), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) test_transform = transforms.Compose([ transforms.Resize((150, 150), interpolation=2), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])
if torch.is_tensor(idx): idx = idx.tolist() print(idx) path, label, pos = self.samples[idx] img = Image.open(path) img_arr = np.asarray(img) img_arr = (img_arr - self.mean) / self.var if self.transform: img = self.transform(img) return img, label, pos train_dataset = AngleData(split=path_train, transform=transforms.Compose([transforms.Resize((640, 480), 2), transforms.ToTensor()])) test_dataset = AngleData(split=path_test, transform=transforms.Compose([transforms.Resize((640, 480), 2), transforms.ToTensor()])) #train_loader = DataLoader(dataset=train_dataset, batch_size=4, shuffle=True, num_workers=4) #test_loader = DataLoader(dataset=test_dataset, batch_size=4, shuffle=False, num_workers=4) def set_parameters_require_grad(model, feature_extracting): if feature_extracting: for name, param in model.named_parameters(): param.requires_grad = False class MyModel(nn.Module): def __init__(self):
def main(): global args, best_prec1 args = parser.parse_args() print(args) lrs = [] args.epochs = 0 for lr, epoch in zip(args.lr[0::2], args.lr[1::2]): lrs.extend([lr] * int(epoch)) args.epochs += int(epoch) args.lr = lrs # replicable results from fixed seed. random.seed(args.manualSeed) torch.cuda.manual_seed_all(args.manualSeed) torch.manual_seed(args.manualSeed) model = resnet50(pretrained=args.pretrained, num_classes=args.nclasses, pool_name=args.pool_name) print(model) # define loss function (criterion) and optimizer criterion = (nn.CrossEntropyLoss().cuda(), EntropyLoss().cuda()) optimizer = torch.optim.SGD(model.parameters(), 0, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict'], strict=False) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) model = model.cuda() cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.Resize(512), transforms.RandomResizedCrop(448, scale=(0.1, 1)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(512), transforms.CenterCrop(448), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: if args.train_val == "train": validate(train_loader, model, criterion, args.epochs - 1) elif args.train_val == "val": validate(val_loader, model, criterion, args.epochs - 1) else: print("wrong train_val flag") return return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), })
from torchvision.transforms import transforms def_train_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) brightness_jitter_transform = transforms.Compose([ transforms.ColorJitter(brightness=[0.5, 1.5]), transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) hflip_data_transform = transforms.Compose([ transforms.RandomHorizontalFlip(p=1.0), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) darkness_jitter_transform = transforms.Compose([ transforms.ColorJitter(brightness=[0.5, 0.9]), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) lightness_jitter_transform = transforms.Compose([ transforms.ColorJitter(brightness=[1.1, 1.5]), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
class CUBDataset(MNIST): urls = [ 'http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz' ] train_transform = test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), normalize]) def download(self): """Download the MNIST data if it doesn't exist in processed_folder already.""" from six.moves import urllib if self._check_exists(): return try: os.makedirs(self.raw_folder) os.makedirs(self.processed_folder) except: print("yeeted") folder = self.raw_folder for url in self.urls: print('Downloading ' + url) data = urllib.request.urlopen(url) filename = url.rpartition('/')[2] file_path = os.path.join(self.raw_folder, filename) with open(file_path, 'wb') as f: f.write(data.read()) tar = tarfile.open(file_path, "r:gz") tar.extractall(folder) tar.close() folder += '/CUB_200_2011' # process and save as torch files print('Processing...') training_set = [] testing_set = [] with open(folder + '/train_test_split.txt', 'r') as f: lines = f.readlines() for line in lines: image_id, cat = line.split(' ') cat = cat[0] if cat == '1': training_set.append(image_id) else: testing_set.append(image_id) class_dict = {} with open(folder + '/classes.txt', 'r') as f: lines = f.readlines() for line in lines: class_id, class_name = line.split(' ') class_dict[class_id] = class_name[:-1] images_dict = {} with open(folder + '/images.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, img_name = line.split(' ') images_dict[img_id] = img_name[:-1] image_class_dict = {} with open(folder + '/image_class_labels.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, class_id = line.split(' ') image_class_dict[img_id] = class_id[:-1] bbox_dict = {} with open(folder + '/bounding_boxes.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, x, y, w, h = line.split(' ') bbox_dict[img_id] = (float(x), float(y), float(w), float(h[:-1])) train_data, train_label = [], [] test_data, test_label = [], [] for i, img_id in enumerate(training_set): # print(i, img_id) x = np.array(Image.open(folder + '/images/' + images_dict[img_id])) bbox = tuple(int(x) for x in bbox_dict[img_id]) if x.shape[-1] == 1: x = x.repeat(3, -1) elif len(x.shape) == 2: x = x.reshape(x.shape[0], x.shape[1], 1) x = x.repeat(3, -1) x = x[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] train_data.append(torch.from_numpy(x)) train_label.append( torch.LongTensor([int(image_class_dict[img_id])])) for i, img_id in enumerate(testing_set): # print(i, img_id) x = np.array(Image.open(folder + '/images/' + images_dict[img_id])) bbox = tuple(int(x) for x in bbox_dict[img_id]) if x.shape[-1] == 1: x = x.repeat(3, -1) elif len(x.shape) == 2: x = x.reshape(x.shape[0], x.shape[1], 1) x = x.repeat(3, -1) x = x[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] test_data.append(torch.from_numpy(x)) test_label.append(torch.LongTensor([int(image_class_dict[img_id]) ])) training_set = (train_data, train_label) test_set = (test_data, test_label) with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f: torch.save(training_set, f) with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f: torch.save(test_set, f) print('Done!') def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is index of the target class. """ if self.train: img, target = self.train_data[index], self.train_labels[index] else: img, target = self.test_data[index], self.test_labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image if img.shape[-1] != 3: print(img.shape) print(index) img = Image.fromarray(img.numpy()) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) target = target - 1 if img.size(0) == 1: img = img.expand(3, 224, 224) return img, target
class ImagenetPartDataset(MNIST): urls = ['https://github.com/zqs1022/detanimalpart.git'] train_transform = test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), normalize]) def download(self): """Download the MNIST data if it doesn't exist in processed_folder already.""" from six.moves import urllib if self._check_exists(): return # download files try: os.makedirs(os.path.join(self.root, self.raw_folder)) os.makedirs(os.path.join(self.root, self.processed_folder)) except OSError as e: if e.errno == errno.EEXIST: pass else: raise folder = os.path.join(self.root, self.raw_folder) for url in self.urls: print('Downloading ' + url) current = os.getcwd() os.chdir(folder) call(['git', 'clone', url]) os.chdir(current) # process and save as torch files print('Processing...') folder1 = folder folder = os.path.join(folder, 'detanimalpart') training_set = [] testing_set = [] for f in os.listdir(folder): img_folder = folder + '/' + f + '/img/img' data_file = folder + '/' + f + '/img/data.mat' # with h5py.File(data_file, 'r') as f: # files = list(f) #pass with open(folder + '/train_test_split.txt', 'r') as f: lines = f.readlines() for line in lines: image_id, cat = line.split(' ') cat = cat[0] if cat == '1': training_set.append(image_id) else: testing_set.append(image_id) class_dict = {} with open(folder + '/classes.txt', 'r') as f: lines = f.readlines() for line in lines: class_id, class_name = line.split(' ') class_dict[class_id] = class_name[:-1] images_dict = {} with open(folder + '/images.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, img_name = line.split(' ') images_dict[img_id] = img_name[:-1] image_class_dict = {} with open(folder + '/image_class_labels.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, class_id = line.split(' ') image_class_dict[img_id] = class_id[:-1] bbox_dict = {} with open(folder + '/bounding_boxes.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, x, y, w, h = line.split(' ') bbox_dict[img_id] = (float(x), float(y), float(w), float(h[:-1])) train_data, train_label = [], [] test_data, test_label = [], [] for i, img_id in enumerate(training_set): # print(i, img_id) x = np.array(Image.open(folder + '/images/' + images_dict[img_id])) bbox = tuple(int(x) for x in bbox_dict[img_id]) if x.shape[-1] == 1: x = x.repeat(3, -1) x = x[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] train_data.append(torch.from_numpy(x)) train_label.append( torch.LongTensor([int(image_class_dict[img_id])])) for i, img_id in enumerate(testing_set): # print(i, img_id) x = np.array(Image.open(folder + '/images/' + images_dict[img_id])) bbox = tuple(int(x) for x in bbox_dict[img_id]) x = x[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] test_data.append(torch.from_numpy(x)) test_label.append(torch.LongTensor([int(image_class_dict[img_id]) ])) training_set = (train_data, train_label) test_set = (test_data, test_label) with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f: torch.save(training_set, f) with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f: torch.save(test_set, f) print('Done!') def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is index of the target class. """ if self.train: img, target = self.train_data[index], self.train_labels[index] else: img, target = self.test_data[index], self.test_labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img.numpy()) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) target = target - 1 if img.size(0) == 1: img = img.expand(3, 224, 224) return img, target
def __init__(self, path_root, n_way, k_shot, k_query, x_dim, split, augment='0', test=None, shuffle=True, fetch_global=False): self.n_way = n_way self.k_shot = k_shot self.k_query = k_query self.x_dim = list(map(int, x_dim.split(','))) self.split = split self.shuffle = shuffle self.path_root = path_root self.fet_global = fetch_global if augment == '0': self.transform = transforms.Compose([ transforms.Lambda(f1), transforms.Resize(self.x_dim[:2]), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) elif augment == '1': if self.split == 'train': self.transform = transforms.Compose([ # lambda x: Image.open(x).convert('RGB'), transforms.Lambda(f1), transforms.Resize( (self.x_dim[0] + 20, self.x_dim[1] + 20)), transforms.RandomCrop(self.x_dim[:2]), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=.1, contrast=.1, saturation=.1, hue=.1), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) else: self.transform = transforms.Compose([ # lambda x: Image.open(x).convert('RGB'), transforms.Lambda(f1), transforms.Resize( (self.x_dim[0] + 20, self.x_dim[1] + 20)), transforms.RandomCrop(self.x_dim[:2]), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) self.path = os.path.join(path_root, 'images') self.lmdb_file = os.path.join(path_root, "lmdb_data", "%s.lmdb" % self.split) if not os.path.exists(self.lmdb_file): print("lmdb_file is not found, start to generate %s" % self.lmdb_file) self._generate_lmdb() # read lmdb_file self.env = lmdb.open(self.lmdb_file, subdir=False, readonly=True, lock=False, readahead=False, meminit=False) with self.env.begin(write=False) as txn: self.total_sample = pyarrow.deserialize(txn.get(b'__len__')) self.keys = pyarrow.deserialize(txn.get(b'__keys__')) self.label2num = pyarrow.deserialize(txn.get(b'__label2num__')) self.num2label = pyarrow.deserialize(txn.get(b'__num2label__')) self.image_labels = [i.decode() for i in self.keys] self.total_cls = len(self.num2label) self.dic_img_label = defaultdict(list) for i in self.image_labels: self.dic_img_label[i[:9]].append(i) self.support_set_size = self.n_way * self.k_shot # num of samples per support set self.query_set_size = self.n_way * self.k_query self.episode = self.total_sample // ( self.support_set_size + self.query_set_size) # how many episode if platform.system().lower() == 'windows': self.platform = "win" del self.env elif platform.system().lower() == 'linux': self.platform = "linux"
class VOCPartDataset(MNIST): urls = [ 'http://www.stat.ucla.edu/~xianjie.chen/pascal_part_dataset/trainval.tar.gz', 'http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar' ] train_transform = test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), normalize]) def download(self): """Download the MNIST data if it doesn't exist in processed_folder already.""" from six.moves import urllib if self._check_exists(): return # download files try: os.makedirs(os.path.join(self.root, self.raw_folder)) os.makedirs(os.path.join(self.root, self.processed_folder)) except OSError as e: if e.errno == errno.EEXIST: pass else: raise folder = os.path.join(self.root, self.raw_folder) # for url in self.urls: # print('Downloading ' + url) # data = urllib.request.urlopen(url) # filename = url.rpartition('/')[2] # file_path = os.path.join(self.root, self.raw_folder, filename) # with open(file_path, 'wb') as f: # f.write(data.read()) # # if file_path.endswith('tar.gz'): # tar = tarfile.open(file_path, "r:gz") # else: # tar = tarfile.open(file_path, 'r:') # tar.extractall(folder) # tar.close() # process and save as torch files print('Processing...') image_folder = folder + '/VOCdevkit/VOC2010/JPEGImages' annotation_folder = folder + '/Annotations_Part' training_set = [] testing_set = [] for f in os.listdir(annotation_folder): file_name = annotation_folder + '/' + f file = sio.loadmat(file_name, squeeze_me=True) pass with open(folder + '/train_test_split.txt', 'r') as f: lines = f.readlines() for line in lines: image_id, cat = line.split(' ') cat = cat[0] if cat == '1': training_set.append(image_id) else: testing_set.append(image_id) class_dict = {} with open(folder + '/classes.txt', 'r') as f: lines = f.readlines() for line in lines: class_id, class_name = line.split(' ') class_dict[class_id] = class_name[:-1] images_dict = {} with open(folder + '/images.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, img_name = line.split(' ') images_dict[img_id] = img_name[:-1] image_class_dict = {} with open(folder + '/image_class_labels.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, class_id = line.split(' ') image_class_dict[img_id] = class_id[:-1] bbox_dict = {} with open(folder + '/bounding_boxes.txt', 'r') as f: lines = f.readlines() for line in lines: img_id, x, y, w, h = line.split(' ') bbox_dict[img_id] = (float(x), float(y), float(w), float(h[:-1])) train_data, train_label = [], [] test_data, test_label = [], [] for i, img_id in enumerate(training_set): # print(i, img_id) x = np.array(Image.open(folder + '/images/' + images_dict[img_id])) bbox = tuple(int(x) for x in bbox_dict[img_id]) if x.shape[-1] == 1: x = x.repeat(3, -1) x = x[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] train_data.append(torch.from_numpy(x)) train_label.append( torch.LongTensor([int(image_class_dict[img_id])])) for i, img_id in enumerate(testing_set): # print(i, img_id) x = np.array(Image.open(folder + '/images/' + images_dict[img_id])) bbox = tuple(int(x) for x in bbox_dict[img_id]) x = x[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] test_data.append(torch.from_numpy(x)) test_label.append(torch.LongTensor([int(image_class_dict[img_id]) ])) training_set = (train_data, train_label) test_set = (test_data, test_label) with open( os.path.join(self.root, self.processed_folder, self.training_file), 'wb') as f: torch.save(training_set, f) with open( os.path.join(self.root, self.processed_folder, self.test_file), 'wb') as f: torch.save(test_set, f) print('Done!') def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is index of the target class. """ if self.train: img, target = self.train_data[index], self.train_labels[index] else: img, target = self.test_data[index], self.test_labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img.numpy()) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) target = target - 1 if img.size(0) == 1: img = img.expand(3, 224, 224) return img, target
def __init__(self, root, mode, resize=84, startidx=0, memorize=False, elem_per_class=600, test=False, classes=list(range(400)), seed=10): """ :param root: root path of mini-imagenet :param mode: train, val or test :param batchsz: batch size of sets, not batch of imgs :param n_way: :param k_shot: :param k_query: num of qeruy imgs per class :param resize: resize to :param startidx: start to index label from startidx """ self.resize = resize # resize to self.startidx = startidx # index label not from 0, but from startidx self.mode = mode self.memorize = memorize # if test: # self.elem_per_class = 600 - elem_per_class # else: self.elem_per_class = elem_per_class if mode == 'train': self.transform = transforms.Compose([ lambda x: Image.open(x).convert('RGB'), transforms.Resize((self.resize, self.resize)), # transforms.RandomHorizontalFlip(), # transforms.RandomRotation(5), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) else: self.transform = transforms.Compose([ lambda x: Image.open(x).convert('RGB'), transforms.Resize((self.resize, self.resize)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) self.path = os.path.join(root, 'images') # image path csvdata = self.loadCSV(os.path.join(root, mode + '.csv')) # csv path self.data = [] self.labels = [] self.img2label = {} cur_class = 0 for i, (k, v) in enumerate(csvdata.items()): # print(classes) if i in classes: # print(i) # print(len(v)) if test: v = v[elem_per_class:2 * elem_per_class] else: v = v[0:elem_per_class] self.data.append(v) # [[img1, img2, ...], [img111, ...]] self.img2label[k] = i + self.startidx # {"img_name[:9]":label} self.labels.append(i) cur_class += 1 # from random import shuffle import random random.seed(seed) # random.shuffle(self.data) self.cls_num = len(self.data) print("Total classes = ", self.cls_num)
def infer_from_img_url(self, img_url): tik = time.time() response = requests.get(img_url, timeout=20) if response.status_code in [403, 404, 500]: return { 'status': 2, 'message': 'Invalid URL', 'elapse': time.time() - tik, 'results': None } else: img_content = response.content import io from PIL import Image img_np = np.array(Image.open(io.BytesIO(img_content))) img = Image.fromarray(img_np.astype(np.uint8)) preprocess = transforms.Compose([ transforms.Resize(227), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) img = preprocess(img) img.unsqueeze_(0) img = img.to(self.device) outputs = self.model.forward(img) outputs = F.softmax(outputs, dim=1) # get TOP-K output labels and corresponding probabilities topK_prob, topK_label = torch.topk(outputs, self.topK) prob = topK_prob.to("cpu").detach().numpy().tolist() _, predicted = torch.max(outputs.data, 1) tok = time.time() if prob[0][0] >= cfg['thresholds']['plant_recognition']: return { 'status': 0, 'message': 'success', 'elapse': tok - tik, 'results': [{ 'name': self.key_type[int(topK_label[0][i].to("cpu"))], 'category_id': int(topK_label[0][i].data.to("cpu").numpy()) + 1, 'prob': round(prob[0][i], 4) } for i in range(self.topK)] } else: return { 'status': 0, 'message': 'success', 'elapse': tok - tik, 'results': [{ 'name': "Unknown", 'category_id': -1, 'prob': round(prob[0][0], 4) }] }
def viz_feature_map(image_file, hmtnet_model_file='../main/model/hmt-net-fbp.pth'): """ visualize the activation heat map of HMT-Net :param image_file: :param hmtnet_model_file: :return: """ hmt_net = HMTNet() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: print("We are running on", torch.cuda.device_count(), "GPUs!") hmt_net = nn.DataParallel(hmt_net) hmt_net.load_state_dict(torch.load(hmtnet_model_file)) else: state_dict = torch.load(hmtnet_model_file) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v # load params hmt_net.load_state_dict(new_state_dict) img = io.imread(image_file) img = Image.fromarray(img.astype(np.uint8)) preprocess = transforms.Compose([ transforms.Resize(227), transforms.RandomCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) img = preprocess(img) img.unsqueeze_(0) input = img.to(device) hmt_net = hmt_net.to(device) for idx, module in hmt_net.named_children(): if idx != 'conv2': input = module.forward(input) else: break mat = np.transpose(input[0, :, :, :].data.cpu().numpy(), [1, 2, 0]) mat = cv2.resize(mat, (224, 224)) mat = np.average(mat, axis=2) # mat = 0.9 * mat + 0.1 * np.average(np.transpose(image, [1, 2, 0]), axis=2) fig, ax = plt.subplots() im = ax.imshow(mat) plt.show() # newly added heatmap = np.maximum(mat, 0) heatmap /= np.max(heatmap) plt.matshow(heatmap) plt.show() img = cv2.imread(image_file) heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0])) heatmap = np.uint8(255 * heatmap) heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) overlap_heatmap_img = heatmap * 0.7 + img * 0.3 cv2.imwrite('./heatmap.jpg', overlap_heatmap_img)
FASTAI_TRANSFORM = True TRAIN_TEST_SPLIT = 0.99 BATCH_SIZE = 32 BATCH_ACCUM = 4 NUM_EPOCHS = 30 IMAGE_RESIZE = 256 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') EXPERIMENT_NAME = f"{MODEL_TYPE}_PT={PRETRAINED}_BS={BATCH_SIZE}_FAT={FASTAI_TRANSFORM}_IRS={IMAGE_RESIZE}" MODEL_SAVE_DIR = Path(f"saved_models/{EXPERIMENT_NAME}") MODEL_SAVE_DIR.mkdir(exist_ok=True) tb_writer = SummaryWriter(log_dir=f"saved_models/tensorboard/{EXPERIMENT_NAME}") if TORCHVISION_TRANSFORM: INPUT_DIM = 256 tv_t = transforms.Compose([ transforms.Resize(INPUT_DIM), transforms.ToTensor()]) else: tv_t = None if FASTAI_TRANSFORM: fa_t_train, fa_t_eval = get_transforms( do_flip=False, max_rotate=25, xtra_tfms= [ RandTransform(jitter, kwargs={'magnitude':(-0.02, 0.02)}, p=0.2, do_run=True, is_random=True), RandTransform(perspective_warp, kwargs={'magnitude': (-0.5, 0.5)}, p=0.2, do_run=True, is_random=True), RandTransform(squish, kwargs={}, p=0.2, do_run=True, is_random=True), RandTransform(skew, kwargs={'magnitude': (-0.5, 0.5), 'direction': (0,7)}, p=0.2, do_run=True, is_random=True), RandTransform(tilt, kwargs={'magnitude': (-0.5, 0.5), 'direction': (0,3)}, p=0.2, do_run=True, is_random=True), ]
manual_morph = False output_path = init_experiment_output_dir( "celeba64" if not args.frgc else "frgc64", "model_evaluation", args) if args.eval: Gx.eval() Gz.eval() if args.train: Gx.train() Gz.train() trans = [] if args.res != 64: trans.append(transforms.Resize(args.res)) trans.append(transforms.ToTensor()) if args.tanh: trans.append(transforms.Lambda(lambda img: img * 2.0 - 1.0)) split = "valid" if args.test: split = "test" if args.frgc: dataset = FRGCPairsLookAlike(transform=transforms.Compose(trans)) else: dataset = CelebaCroppedPairsLookAlike(split=split, transform=transforms.Compose(trans))
def __init__(self, image_path, mat_file, n_way, k_shot, target_class): self.image_path = image_path self.mat_file = mat_file self.images = listdir(self.image_path) self.labels = loadmat(mat_file)['labels'][0] self.label2img = {} for i, j in enumerate(self.images): if self.labels[i] in target_class: if self.labels[i] in self.label2img: self.label2img[self.labels[i]].append( join(self.image_path, j)) else: self.label2img[self.labels[i]] = [join(self.image_path, j)] self.support_set_x = [] self.query_set_x = [] self.support_set_y = [] self.query_set_y = [] for i in range(100): cls = np.random.choice(target_class, n_way, False) sup_tmp_x = [] q_tmp_x = [] sup_tmp_y = [] q_tmp_y = [] for k, j in enumerate(cls): imgs = np.random.choice(len(self.label2img[j]), 2 * k_shot, False) sup_tmp_x.append( np.array(self.label2img[j])[np.array( imgs[:k_shot])].tolist()) q_tmp_x.append( np.array(self.label2img[j])[np.array( imgs[k_shot:])].tolist()) ch = torch.zeros(n_way) ch[k] = 1 ch = ch.unsqueeze(0).repeat(k_shot, 1) sup_tmp_y.append(ch.numpy()) q_tmp_y.append(ch.numpy()) self.support_set_x.append(sup_tmp_x) self.query_set_x.append(q_tmp_x) self.support_set_y.append(sup_tmp_y) self.query_set_y.append(q_tmp_y) self.transform = transforms.Compose([ lambda x: Image.open(x).convert('RGB'), transforms.Resize((128, 128)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])
def __getitem__(self, idx): image_path = self.file_info['path'][idx] if not os.path.isfile(image_path): print(image_path + ' does not exist!') return None image = Image.open(image_path).convert('RGB') label_class = int(self.file_info.iloc[idx]['classes']) label_species = int(self.file_info.iloc[idx]['species']) sample = {'image': image, 'classes': label_class, 'species': label_species} if self.transform: sample['image'] = self.transform(image) return sample train_transforms = transforms.Compose([transforms.Resize((500, 500)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) val_transforms = transforms.Compose([transforms.Resize((500, 500)), transforms.ToTensor() ]) train_dataset = MyDataset(root_dir= ROOT_DIR + TRAIN_DIR, annotations_file= TRAIN_ANNO, transform=train_transforms) test_dataset = MyDataset(root_dir= ROOT_DIR + VAL_DIR, annotations_file= VAL_ANNO, transform=val_transforms)
def ext_face_feats(sphere_face, img_path, pretrained_model=os.path.join(cfg['model_zoo_base'], 'sphere20a.pth')): """ extract face features :param sphere_face: :param img_path: :param pretrained_model: :return: """ assert os.path.exists(pretrained_model) if sphere_face is None: sphere_face = SphereFaceNet() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') sphere_face.load_state_dict(torch.load(pretrained_model)) sphere_face = sphere_face.to(device) img = cv2.imread(img_path) mtcnn_result = detect_face(img_path) print(mtcnn_result) if len(mtcnn_result) > 0: bbox = mtcnn_result[0]['box'] margin_pixel = 10 face_region = img[bbox[0] - margin_pixel:bbox[0] + bbox[2] + margin_pixel, bbox[1] - margin_pixel:bbox[1] + bbox[3] + margin_pixel] ratio = max(face_region.shape[0], face_region.shape[1]) / min( face_region.shape[0], face_region.shape[1]) if face_region.shape[0] < face_region.shape[1]: face_region = cv2.resize(face_region, (int(ratio * 64), 64)) face_region = face_region[:, int((face_region.shape[0] - 64) / 2):int((face_region.shape[0] - 64) / 2) + 64] else: face_region = cv2.resize(face_region, (64, int(ratio * 64))) face_region = face_region[int((face_region.shape[1] - 64) / 2):int((face_region.shape[1] - 64) / 2) + 64, :] face_region = Image.fromarray(face_region.astype(np.uint8)) preprocess = transforms.Compose([ transforms.Resize((96, 112)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) face_region = preprocess(face_region) face_region.unsqueeze_(0) face_region = face_region.to(device) x = face_region.to(device) x = sphere_face.forward(x) x = x.to("cpu").detach().numpy().flatten() return { 'status': 0, 'message': 'extracted feature', 'feature': x / np.linalg.norm(x) } else: return {'status': 0, 'message': 'No face detected!', 'feature': None}
image_path = self.file_info['data'][idx] if not os.path.isfile(image_path): print(image_path + 'does no exist') return None image = Image.open(image_path).convert('RGB') label_species = int(self.file_info.iloc[idx]['label']) sample = {'image': image, 'species': label_species} if self.transform: sample['image'] = self.transform(image) return sample train_transforms = transforms.Compose([ transforms.Resize((500, 500)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) val_transforms = transforms.Compose( [transforms.Resize((500, 500)), transforms.ToTensor()]) train_dataset = MyDataset(root_dir=ROOT_DIR, annotations_file=TRAIN_ANNO, transform=train_transforms) test_dataset = MyDataset(root_dir=ROOT_DIR, annotations_file=VAL_ANNO, transform=val_transforms) train_loader = DataLoader(dataset=train_dataset, batch_size=24, shuffle=True)
def main(): parser = argparse.ArgumentParser( description='A demo of the ViolenceDetector') parser.add_argument( '--video', type=str, help='The path to the video or the folder containing videos', required=True) parser.add_argument('--lstm-weight', type=str, help='lstm weight file', required=True) parser.add_argument('--hrnet-weight', type=str, help='hrnet weight file', default='weight/pose_hrnet_w48_384x288.pth') parser.add_argument('--series-length', type=int, help='the length of a pose series', default=10) parser.add_argument( '--min-poses', type=int, help= 'minimum number of poses detected for a series to be considered valid', default=7) parser.add_argument('--out', type=str, help='output file path', required=True) parser.add_argument('--mode', type=str, help="'video' or 'folder'", default='video') args = parser.parse_args() if args.mode not in ['video', 'folder']: raise ValueError('mode must be video or folder') if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') # Load yolov5 yolov5x = torch.hub.load('ultralytics/yolov5', 'yolov5x', pretrained=True) yolov5x.to(device) # transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Resize((384, 288)), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # load hrnet hrnet = HRNet(48) hrnet.to(device) hrnet.eval() checkpoint = torch.load(args.hrnet_weight, map_location=device) hrnet.load_state_dict(checkpoint) # Load pretrained lstm model checkpoint = torch.load(args.lstm_weight) lstm_model = DatLSTM(39, 64, 2, args.series_length) lstm_model.to(device) lstm_model.eval() lstm_model.load_state_dict(checkpoint) # Load the detector violence_detector = ViolenceDetector(yolov5x, hrnet, lstm_model, args.series_length, args.min_poses, device) if args.mode == 'video': # Load a video vid = Video(args.video, yolov5x, transform, device, hrnet, 'video') # inference violence_detector.predict_and_save(vid, args.out) else: for video in tqdm(os.listdir(args.video)): vid = Video(os.path.join(args.video, video), yolov5x, transform, device, hrnet, 'video') violence_detector.predict_and_save(vid, os.path.join(args.out, video))
import matplotlib.pyplot as plt import torch from torchvision.models import resnet18 from torch.nn import CrossEntropyLoss from torch.optim import Adam from torchvision.transforms import transforms from torch.utils.data import DataLoader from utils import classify, evaluate, generate_datasets, train if __name__ == '__main__': transformations = transforms.Compose([ transforms.ToPILImage(), transforms.Resize([224, 224]), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) """ train_data, val_data, test_data = generate_datasets( train_path="P:/face-mask-dataset/face-mask-dataset/train", test_path="P:/face-mask-dataset/face-mask-dataset/test", train_val_ratio=0.2, transformations=transformations ) train_loader = DataLoader(dataset=train_data, batch_size=8, shuffle=True, drop_last=True) val_loader = DataLoader(dataset=val_data, batch_size=8, shuffle=True, drop_last=True) model = resnet18(pretrained=True)
] std = [ 0.15201123862047256, 0.14087982537762958, 0.139965362113942, 0.10123220339551285 ] transform['train'] = transforms.Compose([ transforms.RandomAffine(20, shear=20, resample=PIL.Image.BILINEAR), #transforms.RandomRotation(20), transforms.RandomResizedCrop(512), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std) ]) transform['val'] = transforms.Compose([ transforms.Resize(570), transforms.CenterCrop(512), transforms.ToTensor(), transforms.Normalize(mean, std) ]) class ProteinDataset(torch.utils.data.Dataset): def __init__(self, root, phase, image_labels=None, size=None, transform=None): self.root = os.path.expanduser(root) self.phase = phase
import os import torch from torchvision.transforms import transforms from torch.utils.data import Dataset from PIL import Image import numpy as np transformer = transforms.Compose([ #transforms.RandomRotation(90), transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize(mean=[0.2], std=[0.58]) ]) #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) img_path = './dataset/img/' label_path = './dataset/label/label.txt' def read_label(path): label = [] txt_file = open(path, "r") xtlines = txt_file.readlines() for item in xtlines: single_label = [] item = item.split(" ") for i in range(1, 31): if i == 30: item[i] = item[i][:-1] item[i] = float(item[i]) single_label.append(item[i])
from torch import Tensor from torch.autograd import Variable, grad from torchvision import datasets from torchvision.transforms import transforms from models import Generator, Discriminator, FeatureExtractor iterations = 100 use_cuda = True batch_size = 16 models_path = Path("models/") models_path.mkdir(exist_ok=True, parents=True) scale = transforms.Compose( [transforms.ToPILImage(), transforms.Resize(16), transforms.ToTensor()]) transform = transforms.Compose([transforms.ToTensor()]) dataset = datasets.CIFAR100(root="data/", train=True, download=True, transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2) G = Generator(16, 2) D = Discriminator() feature_extractor = FeatureExtractor(torchvision.models.vgg19(pretrained=True)) content_criterion = nn.MSELoss()
def __init__(self, mode, batchsz, n_way, k_shot, k_query, imsize, data_path, startidx=0, verbose=True): """ :param mode: train, val or test :param batchsz: batch size of sets, not batch of imgs :param n_way: :param k_shot: :param k_query: num of query imgs per class :param imsize: resize to :param startidx: start to index label from startidx """ self.batchsz = batchsz # batch of set, not batch of imgs self.n_way = n_way # n-way self.k_shot = k_shot # k-shot self.k_query = k_query # for evaluation self.support_size = self.n_way * self.k_shot # num of samples per set self.query_size = self.n_way * self.k_query # number of samples per set for evaluation self.imsize = imsize # resize to self.startidx = startidx # index label not from 0, but from startidx self.data_path = data_path if verbose: print( 'shuffle DB :%s, b:%d, %d-way, %d-shot, %d-query, resize:%d' % (mode, batchsz, n_way, k_shot, k_query, imsize)) self.transform = transforms.Compose([ lambda x: Image.open(x).convert('RGB'), transforms.Resize((self.imsize, self.imsize), Image.LANCZOS), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) # check if images are all in one folder or separated into train/val/test folders if os.path.exists(os.path.join(data_path, 'images')): self.subfolder_split = False self.path_images = os.path.join(data_path, 'images') # image path self.path_preprocessed = os.path.join( data_path, 'images_preprocessed') # preprocessed image path elif os.path.exists(os.path.join(data_path, 'train')): self.subfolder_split = True self.path_images = os.path.join(data_path, mode) self.path_preprocessed = os.path.join(data_path, 'images_preprocessed') if not os.path.exists(self.path_preprocessed): os.mkdir(self.path_preprocessed) self.path_preprocessed = os.path.join(data_path, 'images_preprocessed', mode) if not os.path.exists(self.path_preprocessed): os.mkdir(self.path_preprocessed) else: raise FileNotFoundError( 'Mini-Imagenet data not found. ' 'Please add images in one of the following folder structures:' './data/miniimagenet/images' './data/miniimagenet/{train}{test}{val}' 'or specify --data_path in the arguments.') csvdata = [self.loadCSV(os.path.join(data_path, mode + '.csv'))] # csv path # check if we have the images if not os.listdir(self.path_images): raise FileNotFoundError( 'Mini-Imagenet data not found. ' 'Please add images in one of the following folder structures:' './data/miniimagenet/images' './data/miniimagenet/{train}{test}{val}' 'or specify --data_path in the arguments.') self.data = [] self.img2label = {} for c in csvdata: for i, (k, v) in enumerate(c.items()): self.data.append(v) # [[img1, img2, ...], [img111, ...]] self.img2label[k] = i + self.startidx # {"img_name[:9]":label} self.startidx += i + 1 self.num_classes = len(self.data) self.create_batch(self.batchsz)
if random.uniform(0, 1) > 0.5: i = random.randint(0, self.max_size - 1) to_return.append(self.data[i].clone()) self.data[i] = element else: to_return.append(element) return Variable(torch.cat(to_return)) #%% save_path = './L2H_intput' size = 256 batchSize = 10 transforms_ = [ transforms.Resize(int(size * 1.12)), transforms.RandomHorizontalFlip(), transforms.RandomCrop(size) ] transforms_256 = [transforms.ToTensor()] transforms_128 = [transforms.Resize(int(size / 2)), transforms.ToTensor()] transforms_64 = [transforms.Resize(int(size / 4)), transforms.ToTensor()] dataloader = DataLoader(ImageDataset(save_path, transforms_=transforms_, unaligned=True), batch_size=batchSize, shuffle=True, drop_last=True) #%% files_d = sorted(glob.glob(os.path.join(save_path) + '/*.*'))
def __init__(self, c, nof_joints, checkpoint_path, resolution=(384, 288), interpolation=cv2.INTER_CUBIC, multiperson=True, return_bounding_boxes=False, max_batch_size=32, yolo_model_def="./models/detectors/yolo/config/yolov3.cfg", yolo_class_path="./models/detectors/yolo/data/coco.names", yolo_weights_path="./models/detectors/yolo/weights/yolov3.weights", device=torch.device("cpu")): """ Initializes a new SimpleHRNet object. HRNet (and YOLOv3) are initialized on the torch.device("device") and its (their) pre-trained weights will be loaded from disk. Args: c (int): number of channels. nof_joints (int): number of joints. checkpoint_path (str): path to an official hrnet checkpoint or a checkpoint obtained with `train_coco.py`. resolution (tuple): hrnet input resolution - format: (height, width). Default: (384, 288) interpolation (int): opencv interpolation algorithm. Default: cv2.INTER_CUBIC multiperson (bool): if True, multiperson detection will be enabled. This requires the use of a people detector (like YOLOv3). Default: True return_bounding_boxes (bool): if True, bounding boxes will be returned along with poses by self.predict. Default: False max_batch_size (int): maximum batch size used in hrnet inference. Useless without multiperson=True. Default: 16 yolo_model_def (str): path to yolo model definition file. Default: "./models/detectors/yolo/config/yolov3.cfg" yolo_class_path (str): path to yolo class definition file. Default: "./models/detectors/yolo/data/coco.names" yolo_weights_path (str): path to yolo pretrained weights file. Default: "./models/detectors/yolo/weights/yolov3.weights.cfg" device (:class:`torch.device`): the hrnet (and yolo) inference will be run on this device. Default: torch.device("cpu") """ self.c = c self.nof_joints = nof_joints self.checkpoint_path = checkpoint_path self.resolution = resolution # in the form (height, width) as in the original implementation self.interpolation = interpolation self.multiperson = multiperson self.return_bounding_boxes = return_bounding_boxes self.max_batch_size = max_batch_size self.yolo_model_def = yolo_model_def self.yolo_class_path = yolo_class_path self.yolo_weights_path = yolo_weights_path self.device = device self.model = HRNet(c=c, nof_joints=nof_joints).to(device) checkpoint = torch.load(checkpoint_path, map_location=self.device) if 'model' in checkpoint: self.model.load_state_dict(checkpoint['model']) else: self.model.load_state_dict(checkpoint) self.model.eval() if not self.multiperson: self.transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) else: self.detector = YOLOv3(model_def=yolo_model_def, class_path=yolo_class_path, weights_path=yolo_weights_path, classes=('person',), max_batch_size=self.max_batch_size, device=device) self.transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((self.resolution[0], self.resolution[1])), # (height, width) transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])
]) TrainLabelTransform = transforms.Compose([ transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.5, 2.), shear=10), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.RandomResizedCrop(ImgResize, scale=(1., 1.), interpolation=Image.NEAREST), transforms.ToTensor(), ]) ValImgTransform = transforms.Compose([ transforms.Resize(ImgResize), transforms.ToTensor(), transforms.Normalize(mean=[0.46], std=[0.10]), ]) ValLabelTransform = transforms.Compose([ transforms.Resize(ImgResize, interpolation=Image.NEAREST), transforms.ToTensor(), ]) class PipeDataset(Dataset): def __init__( self, DatasetFolderPath, ImgTransform, LabelTransform,
def pil_img_to_tensor_of_with_size(pil_image, img_shape): transform = transforms.Compose( [transforms.Resize((img_shape[1], img_shape[2])), ToTensor()]) return transform(pil_image)