def get_dataset(dataset): if dataset == 'imagenet': transform_train = Compose([ RandomResizedCrop(C.get()['target_size'] + 32, scale=(0.9, 1.0), interpolation=PIL.Image.BICUBIC), ]) transform_test = Compose([ Resize(C.get()['target_size'] + 32, interpolation=PIL.Image.BICUBIC) ]) trainset = ImageNet(root=imagenet_path, split='train', transform=transform_train) testset1 = ImageNet(root=imagenet_path, split='val', transform=transform_train) testset2 = ImageNet(root=imagenet_path, split='val', transform=transform_test) trainset.num_class = testset1.num_class = testset2.num_class = 1000 trainset.targets = [lb for _, lb in trainset.samples] else: raise ValueError(dataset) return trainset, testset1, testset2
def retrieve_image_tensor(image_path): transform = Compose([Resize((256, 256)), Normalize(mean=[0.5], std=[0.5])]) pil_img = Image.open(image_path) tensor_image = torchvision.transforms.ToTensor()(pil_img) return transform(tensor_image)
def __init__(self, target_length, interpolation_strategy, pad_value): if not isinstance(target_length, (int, Sequence)): raise TypeError("Size should be int or sequence. Got {}".format( type(target_length))) if isinstance(target_length, Sequence) and len(target_length) not in (1, 2): raise ValueError( "If size is a sequence, it should have 1 or 2 values") self.target_length = target_length self.interpolation_strategy = interpolation_strategy self.pad_value = pad_value Resize.__init__(self, size=(512, 512), interpolation=self.interpolation_strategy) Pad.__init__(self, padding=(0, 0, 0, 0), fill=self.pad_value, padding_mode="constant")
def __call__(self, img): w, h = img.size if w > h: self.size = (int(np.round(self.target_length * (h / w))), self.target_length) img = Resize.__call__(self, img) total_pad = self.size[1] - self.size[0] half_pad = total_pad // 2 self.padding = (0, half_pad, 0, total_pad - half_pad) return Pad.__call__(self, img) else: self.size = (self.target_length, int(np.round(self.target_length * (w / h)))) img = Resize.__call__(self, img) total_pad = self.size[0] - self.size[1] half_pad = total_pad // 2 self.padding = (half_pad, 0, total_pad - half_pad, 0) return Pad.__call__(self, img)
def retrieve_inference_dataloader(dataframe, batch_size=4): transform = Compose([Resize((256, 256)), Normalize(mean=[0.5], std=[0.5])]) dataset = MoleculesDatasetInference(dataframe, transform) dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0, pin_memory=True, shuffle=False) return dataloader
def evaluate_on_imagenet(model: NFNet, dataset_dir: Path, batch_size=50, device='cuda:0'): transforms = Compose([ #Pad32CenterCrop(model.test_imsize), ToTensor(), Resize((model.test_imsize, model.test_imsize), PIL.Image.BICUBIC), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) print(f"Starting evaluation from {dataset_dir}") dataset = get_dataset(dataset_dir, transforms=transforms) dataloader = DataLoader( dataset=dataset, batch_size=batch_size, # F0: 120, F1: 100, F2: 80 shuffle=False, pin_memory=False, num_workers=8) print(f"Validation set contains {len(dataset)} images.") model.to(device) model.eval() processed_imgs = 0 correct_labels = 0 for step, data in enumerate(dataloader): with torch.no_grad(): inputs = data[0].to(device) targets = data[1].to(device) output = model(inputs).type(torch.float32) processed_imgs += targets.size(0) _, predicted = torch.max(output, 1) correct_labels += (predicted == targets).sum().item() batch_padding = int(math.log10(len(dataloader.dataset)) + 1) print( f"\rProcessing {processed_imgs:{batch_padding}d}/{len(dataloader.dataset)}. Accuracy: {100.0*correct_labels/processed_imgs:6.4f}", sep=' ', end='', flush=True) print( f"\nFinished eval. Accuracy: {100.0*correct_labels/processed_imgs:6.4f}" )
def retrieve_evaluate_dataloader(dataframe, vocab: Vocabulary, batch_size=8, shuffle=False, sequence_length=None): pad_idx = vocab.stoi['<PAD>'] transform = Compose([ Resize((256,256)), Normalize(mean=[0.5], std=[0.5]) ]) dataset = MoleculesDataset(dataframe, vocab, transform) dataloader = DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, num_workers=0, pin_memory=True, collate_fn=CapsCollate(pad_idx=pad_idx,batch_first=True, sequence_length=sequence_length) ) return dataloader
def retrieve_train_dataloader(dataframe, vocab: Vocabulary, batch_size=8, shuffle=True, sequence_length=None): pad_idx = vocab.stoi['<PAD>'] transform = Compose([ # RandomVerticalFlip(), # RandomHorizontalFlip(), # RandomRotation(180), Resize((256,256)), Normalize(mean=[0.5], std=[0.5]), ]) dataset = MoleculesDataset(dataframe, vocab, transform) dataloader = DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, num_workers=0, pin_memory=True, collate_fn=CapsCollate(pad_idx=pad_idx,batch_first=True, sequence_length=sequence_length) ) return dataloader
import PIL from PIL import Image import torch import torch.nn as nn from torch.nn.utils.rnn import pad_sequence from torchvision.transforms.transforms import Compose, Normalize, Resize, ToTensor, RandomHorizontalFlip, RandomCrop # IO transform = Compose([ #RandomHorizontalFlip(), Resize((256,256), PIL.Image.BICUBIC), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) def get_train_file_path(image_ids): # print(image_id) # return "../input/bms-molecular-translation/train/{}/{}/{}/{}.png".format( # image_id[0], image_id[1], image_id[2], image_id # ) return [ "./input/train/{}/{}/{}/{}.png".format( image_id[0], image_id[1], image_id[2], image_id) for image_id in image_ids ] def get_test_file_path(image_ids): return [ "./input/test/{}/{}/{}/{}.png".format(
def train(config: dict) -> None: if config['device'].startswith('cuda'): if torch.cuda.is_available(): print( f"Using CUDA{torch.version.cuda} with cuDNN{torch.backends.cudnn.version()}" ) else: raise ValueError( "You specified to use cuda device, but cuda is not available.") if config['pretrained'] is not None: model = pretrained_nfnet(path=config['pretrained'], stochdepth_rate=config['stochdepth_rate'], alpha=config['alpha'], activation=config['activation']) else: model = NFNet(num_classes=config['num_classes'], variant=config['variant'], stochdepth_rate=config['stochdepth_rate'], alpha=config['alpha'], se_ratio=config['se_ratio'], activation=config['activation']) transforms = Compose([ RandomHorizontalFlip(), Resize((model.train_imsize, model.train_imsize), PIL.Image.BICUBIC), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) device = config['device'] dataset = get_dataset(path=config['dataset'], transforms=transforms) if config['overfit']: dataset = Subset(dataset, [i * 50 for i in range(0, 1000)]) dataloader = DataLoader(dataset=dataset, batch_size=config['batch_size'], shuffle=True, num_workers=config['num_workers'], pin_memory=config['pin_memory']) if config['scale_lr']: learning_rate = config['learning_rate'] * config['batch_size'] / 256 else: learning_rate = config['learning_rate'] if not config['do_clip']: config['clipping'] = None if config['use_fp16']: model.half() model.to(device) # "memory_format=torch.channels_last" TBD optimizer = SGD_AGC( # The optimizer needs all parameter names # to filter them by hand later named_params=model.named_parameters(), lr=learning_rate, momentum=config['momentum'], clipping=config['clipping'], weight_decay=config['weight_decay'], nesterov=config['nesterov']) # Find desired parameters and exclude them # from weight decay and clipping for group in optimizer.param_groups: name = group['name'] if model.exclude_from_weight_decay(name): group['weight_decay'] = 0 if model.exclude_from_clipping(name): group['clipping'] = None criterion = nn.CrossEntropyLoss() runs_dir = Path('runs') run_index = 0 while (runs_dir / ('run' + str(run_index))).exists(): run_index += 1 runs_dir = runs_dir / ('run' + str(run_index)) runs_dir.mkdir(exist_ok=False, parents=True) checkpoints_dir = runs_dir / 'checkpoints' checkpoints_dir.mkdir() writer = SummaryWriter(str(runs_dir)) scaler = amp.GradScaler() for epoch in range(config['epochs']): model.train() running_loss = 0.0 processed_imgs = 0 correct_labels = 0 epoch_time = time.time() for step, data in enumerate(dataloader): inputs = data[0].half().to( device) if config['use_fp16'] else data[0].to(device) targets = data[1].to(device) optimizer.zero_grad() with amp.autocast(enabled=config['amp']): output = model(inputs) loss = criterion(output, targets) # Gradient scaling # https://www.youtube.com/watch?v=OqCrNkjN_PM scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() running_loss += loss.item() processed_imgs += targets.size(0) _, predicted = torch.max(output, 1) correct_labels += (predicted == targets).sum().item() epoch_padding = int(math.log10(config['epochs']) + 1) batch_padding = int(math.log10(len(dataloader.dataset)) + 1) print( f"\rEpoch {epoch+1:0{epoch_padding}d}/{config['epochs']}" f"\tImg {processed_imgs:{batch_padding}d}/{len(dataloader.dataset)}" f"\tLoss {running_loss / (step+1):6.4f}" f"\tAcc {100.0*correct_labels/processed_imgs:5.3f}%\t", sep=' ', end='', flush=True) elapsed = time.time() - epoch_time print( f"({elapsed:.3f}s, {elapsed/len(dataloader):.3}s/step, {elapsed/len(dataset):.3}s/img)" ) global_step = epoch * len(dataloader) + step writer.add_scalar('training/loss', running_loss / (step + 1), global_step) writer.add_scalar('training/accuracy', 100.0 * correct_labels / processed_imgs, global_step) #if not config['overfit']: if epoch % 10 == 0 and epoch != 0: cp_path = checkpoints_dir / ("checkpoint_epoch" + str(epoch + 1) + ".pth") torch.save( { 'epoch': epoch, 'model': model.state_dict(), 'optim': optimizer.state_dict(), 'loss': loss }, str(cp_path)) print(f"Saved checkpoint to {str(cp_path)}")
# Read csv (video_name, label) df = pd.read_json(args.json_file, orient='records') total_videos = int(df['video_name'].count()) # Add video index column (to be utilized by json) df['video_idx'] = range(total_videos) # Compute the sequence length (no. of frames) for each video (row) df['video_length'] = df['video_name'].apply(lambda x: _count_frames(x, args.frames_dir)) # Image Mean & Std-Dev for Normalization mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) dataset = VideoFramesDataset(args.frames_dir, df, Compose([Resize((224, 224)), ToTensor(), Normalize(mean, std)])) # dataset = VideoFramesDataset(args.frames_dir, df, Compose([Resize((224, 224)), ToTensor()])) # for sanity check # Compute the max sequence length, needed for embedding array - [N, F, D] max_video_len = compute_max_frames_len(args.frames_dir) total_frames = dataset.__len__() print('Total Videos: {} | Total Frames: {} | Max Video length: {}'. format(total_videos, total_frames, max_video_len)) dataloader = DataLoader(dataset, args.batch_size, num_workers=args.num_workers) # Load model model, emb_dim = load_cnn(args.model) model.to(device)
# print("caption_vec SOS: ", (e-s)) # s = time() caption_vec += self.vocab.numericalize(row["InChI"]) # e = time() # print("caption_vec InChI: ", (e-s)) # s = time() caption_vec += [self.vocab.stoi["<EOS>"]] # e = time() # print("caption_vec EOS: ", (e-s)) return (self.transform(tensorImage), torch.as_tensor(caption_vec)) transform = Compose([ #RandomHorizontalFlip(), Resize((256, 256)), #ToTensor(), Normalize(mean=[0.5], std=[0.5]), ]) dataset = MoleculesDataset("data.csv", transform) pad_idx = dataset.vocab.stoi["<PAD>"] class CapsCollate: """ Collate to apply the padding to the captions with dataloader """ def __init__(self, pad_idx, batch_first=False): self.pad_idx = pad_idx
from PIL import ImageFile from utils.CBAMresnet import resnet18 from utils.load_data import XRayDataSet from torch.utils.data import DataLoader from collections.abc import Iterable from torchvision import datasets, models, transforms from torchvision.transforms.transforms import Compose, Resize, RandomRotation, RandomHorizontalFlip, RandomVerticalFlip, ToTensor from tensorboardX import SummaryWriter data_path = './COVID19_dataset' metadata = './metadata.csv' PATH = 'X:/covid19/CBAM_model_pkls' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) train_aug = Compose([Resize((224, 224)), RandomRotation(180), RandomHorizontalFlip(), RandomVerticalFlip(), ToTensor() ]) val_aug = Compose([Resize((224, 224)), ToTensor() ]) BS = 24 pre_epoch = 0 EPOCH = 50 writer = SummaryWriter(comment = 'Linear')
# Load darknet weights model.load_darknet_weights(opt.weights_path) else: # Load checkpoint weights model.load_state_dict(torch.load(opt.weights_path)) model.eval() # Set in evaluation mode img_num = 1100 root = '/Users/youzunzhi/Downloads/nyuv2-seg' origin_img_path = f'{root}/test/nyu_rgb_{img_num:04d}.png' color_img_path = f'{root}/test_color_0/nyu_rgb_{img_num:04d}.png' origin_img = Image.open(origin_img_path).convert('RGB') color_img = Image.open(color_img_path).convert('RGB') from torchvision.transforms.transforms import Resize, CenterCrop origin_img = Resize(288)(origin_img) origin_img = CenterCrop((256, 256))(origin_img) origin_img = Resize(128)(origin_img) color_img = Resize(128)(color_img) origin_img = transforms.ToTensor()(origin_img) color_img = transforms.ToTensor()(color_img) origin_img = origin_img.unsqueeze(0) color_img = color_img.unsqueeze(0) classes = load_classes(opt.class_path) # Extracts class labels from file imgs = [] # Stores image paths img_detections = [] # Stores detections for each image index print("\nPerforming object detection:")
from sklearn.decomposition import PCA from sklearn.metrics import classification_report duration = 2000 freq = 440 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_path = './COVID19_dataset' metadata = './metadata.csv' PATH = './' model = resnet18(pretrained=True).to(device) model.load_state_dict( torch.load('./CBAM_model_pkls/CBAM(freezemore)_model_epoch_48.pkl')) train_aug = Compose([Resize((224, 224)), ToTensor()]) val_aug = Compose([Resize((224, 224)), ToTensor()]) BS = 1 Train_features = [] Train_classes = [] Test_features = [] Test_classes = [] trainset = XRayDataSet(os.path.join(data_path, 'train'), metadata, transforms=train_aug) testset = XRayDataSet(os.path.join(data_path, 'test'), metadata, transforms=val_aug) trainloader = DataLoader(trainset, batch_size=BS, shuffle=True, num_workers=0) testloader = DataLoader(testset, batch_size=BS, shuffle=True, num_workers=0)