import torch from tqdm import tqdm import numpy as np from torch.autograd import Variable from torchvision.models import squeezenet1_1, resnet18, alexnet, vgg11, densenet121 from torchvision.transforms import Normalize, Compose, RandomSizedCrop, RandomHorizontalFlip, ToTensor print(torch.__version__) model = squeezenet1_1() model.eval() transform = Compose([ RandomSizedCrop(224), RandomHorizontalFlip(), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pil_image = Image.open('cat.jpg') inferences = [] predictions = [] for i in tqdm(range(1, 10)): prediction_meter = time.time() image = transform(pil_image) image.unsqueeze_(0) image_tensor = Variable(image)
images[vid] = {frame_nr: path} print(vid, len(images[vid]), list(images[vid].keys())[-1]) assert list(images[vid].keys()) == sorted(list(images[vid].keys())) assert list(images[vid].keys())[-1] == len(images[vid]) - 1 #%% from torchvision.datasets.folder import default_loader import os from torchvision.transforms import ToTensor, Compose, Normalize from tqdm import tqdm loaded_images = {x: [] for x in vids} transform = Compose((ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))) for vid in tqdm(vids): for image_path in sorted(list(images[vid].values())): img = default_loader(os.path.join(f.root, image_path)) loaded_images[vid].append(transform(img)) # %% from importlib import reload import forgery_detection.models.audio.similarity_stuff as s reload(s) from forgery_detection.models.audio.utils import ContrastiveLoss import torch from collections import OrderedDict
def high_res_transform(crop_size): return Compose([RandomCrop(crop_size), ToTensor()])
def get_trainval_multiinput_batches(train_aug_str, test_aug_str, fold_index, n_splits, batch_size, num_workers, seed=None, limit_n_samples=None): """ 2 input normalized bands -> [ batches: b1, b2, b1+b2 batches: np.power(20, 0.1 * b1) * np.power(20, 0.1 * b2) batches: fft(b1), fft(b2) ] """ trainval_ds = IcebergDataset('Train', normalized_inc_angle=True, smart_crop_size=None, limit_n_samples=limit_n_samples) train_aug = get_data_transforms(train_aug_str) val_aug = get_data_transforms(test_aug_str) train_aug_ds1 = TransformedDataset(trainval_ds, x_transforms=partial(x_transform, aug_fn=train_aug), y_transforms=to_tensor) val_aug_ds1 = TransformedDataset(trainval_ds, x_transforms=partial(x_transform, aug_fn=val_aug), y_transforms=to_tensor) # Elements of train_aug_ds1, val_aug_ds1 should be still numpy arrays: # train_aug_ds1[0] = ( (x, a), y ) assert isinstance(train_aug_ds1[0][0][0], np.ndarray) and isinstance(val_aug_ds1[0][0][0], np.ndarray), \ "type(train_aug_ds1[0][0][0]): {} and type(val_aug_ds1[0][0][0]): {}" \ .format(type(train_aug_ds1[0][0][0]), type(val_aug_ds1[0][0][0])) train_aug_ds2 = TransformedDataset(train_aug_ds1, x_transforms=Compose([x_to_linear, partial(x_range_normalize, q_min=0.5, q_max=99.5), x_transpose])) val_aug_ds2 = TransformedDataset(val_aug_ds1, x_transforms=Compose([x_to_linear, partial(x_range_normalize, q_min=0.5, q_max=99.5), x_transpose])) train_aug_ds3 = TransformedDataset(train_aug_ds1, x_transforms=Compose([x_to_fft, partial(x_range_normalize, q_min=2.5, q_max=97.5), x_transpose])) val_aug_ds3 = TransformedDataset(val_aug_ds1, x_transforms=Compose([x_to_fft, partial(x_range_normalize, q_min=2.5, q_max=97.5), x_transpose])) train_aug_ds1 = TransformedDataset(train_aug_ds1, x_transforms=x_transpose) val_aug_ds1 = TransformedDataset(val_aug_ds1, x_transforms=x_transpose) train_aug_mds = MultipleInputsDataset([train_aug_ds1, train_aug_ds2, train_aug_ds3], target_reduce_fn=lambda targets: targets[0]) val_aug_mds = MultipleInputsDataset([val_aug_ds1, val_aug_ds2, val_aug_ds3], target_reduce_fn=lambda targets: targets[0]) # Integrate size to Kfold stratified split _trainval_ds = IcebergDataset('Train', limit_n_samples=limit_n_samples, return_object_size_hint=True) x_array = [] y_array = [] new_classes = { (0, 0): 0, (0, 1): 1, (1, 0): 2, (1, 1): 3, } for i, ((_, _, is_small), y) in enumerate(_trainval_ds): x_array.append(i) y = (int(y), int(is_small)) y_array.append(new_classes[y]) # Stratified split: train_indices = None val_indices = None skf = StratifiedKFold(n_splits=n_splits, random_state=seed) for i, (train_indices, val_indices) in enumerate(skf.split(x_array, y_array)): if i == fold_index: break train_sampler = SubsetRandomSampler(train_indices) val_sampler = SubsetRandomSampler(val_indices) train_batches = OnGPUDataLoader(train_aug_mds, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers, drop_last=True, pin_memory=True) val_batches = OnGPUDataLoader(val_aug_mds, batch_size=batch_size, sampler=val_sampler, num_workers=num_workers, drop_last=True, pin_memory=True) return train_batches, val_batches
import torch import torch.nn as nn import matplotlib.pyplot as plt from torch.autograd import Variable from mpl_toolkits.axes_grid1 import ImageGrid from torchvision.transforms import Compose, ToTensor # compose a transform configuration transform_config = Compose([ToTensor()]) def accumulate_group_evidence(class_mu, class_logvar, labels_batch, is_cuda=True): """ :param class_mu: mu values for class latent embeddings of each sample in the mini-batch :param class_logvar: logvar values for class latent embeddings for each sample in the mini-batch :param labels_batch: class labels of each sample (the operation of accumulating class evidence can also be performed using group labels instead of actual class labels) :param is_cuda: :return: """ var_dict = {} mu_dict = {} # convert logvar to variance for calculations class_var = class_logvar.exp() # calculate var inverse for each group using group vars for i in range(len(labels_batch)):
def main(): args = arguments() if torch.cuda.is_available(): device = torch.device( "cuda:0") # Can continue going on here, like cuda:1 cuda:2....etc. print("Running on the GPU") else: device = torch.device("cpu") print("Running on the CPU") transforms = Compose([Resize((50, 50)), ToTensor()]) dataset = ImageFolder("Data_Test", transform=transforms) testset = ImageFolder("Test", transform=transforms) INPUT_SIZE = dataset[0][0].shape train_len = int(0.8 * len(dataset)) val_len = int(len(dataset) - train_len) train, val = random_split(dataset, lengths=(train_len, val_len)) train_loader = DataLoader(train, batch_size=args.train_batch_size, shuffle=True) val_loader = DataLoader(val, batch_size=args.val_batch_size, shuffle=False) prediction_loader = DataLoader(testset, batch_size=args.pred_batch_size) net = Net(INPUT_SIZE).to(device) optimizer = optim.Adam(net.parameters(), lr=0.001) loss_function = nn.CrossEntropyLoss() # with open("CNN_model.log", "a") as f: for epoch in range(args.epochs): net.train() sum_acc = 0 for x, y in train_loader: x = x.to(device) y = y.to(device) acc, loss = step(x, y, net=net, optimizer=optimizer, loss_function=loss_function, train=True) sum_acc += acc train_avg_acc = sum_acc / len(train_loader) print(f"Training accuracy: {train_avg_acc:.2f}") net.eval() sum_acc = 0 for x, y in val_loader: x = x.to(device) y = y.to(device) val_acc, val_loss = step(x, y, net=net, optimizer=optimizer, loss_function=loss_function, train=True) sum_acc += val_acc val_avg_acc = sum_acc / len(val_loader) print(f"Validation accuracy: {val_avg_acc:.2f}") train_steps = len(train_loader) * (epoch + 1) wandb.log( { "Train Accuracy": train_avg_acc, "Validation Accuracy": val_avg_acc }, step=train_steps) train_preds = get_all_preds(net, loader=prediction_loader, device=device) print(f"Train predictions shape: {train_preds.shape}") print( f"The label the network predicts strongly: {train_preds.argmax(dim=1)}" ) predictions = train_preds.argmax(dim=1) # Scans folder for files, and then writes the filenames to CSV format with open('Test_data.csv', 'w', newline='') as f: # creates csv writer_obj = csv.writer(f) writer_obj.writerow(['filename']) with os.scandir( 'Test/sub/') as folder: # scans folder to read file names for file in folder: print(f"Entry: {file.name}") writer_obj.writerow([file.name]) # Read csv file and create a data frame containing the filenames and predictions. Then this data frame is # written to csv. df = pd.read_csv('Test_data.csv') filename = df['filename'] signal_dict = { # 'filename': file.name, 'filename': filename, 'prediction': predictions.tolist(), } df = pd.DataFrame(signal_dict) df.to_csv('new_df.csv') plt.figure(figsize=(10, 10)) wandb.sklearn.plot_confusion_matrix(testset.targets, train_preds.argmax(dim=1), LABELS) precision, recall, f1_score, support = score(testset.targets, train_preds.argmax(dim=1)) test_acc = accuracy_score(testset.targets, train_preds.argmax(dim=1)) print(f"Test Accuracy: {test_acc}") print('precision: {}'.format(precision)) print('recall: {}'.format(recall)) print('f1_score: {}'.format(f1_score)) print('support: {}'.format(support))
def target_transform(crop_size): return Compose([ CenterCrop(crop_size), ToTensor(), ])
def LR_transform(crop_size): return Compose([ Scale(crop_size // 8), ToTensor(), ])
def HR_4_transform(crop_size): return Compose([ Scale(crop_size // 2), ToTensor(), ])
def go(arg): tbw = SummaryWriter(log_dir=arg.tb_dir) ## Load the data if arg.task == 'mnist': trainset = torchvision.datasets.MNIST(root=arg.data_dir, train=True, download=True, transform=ToTensor()) trainloader = torch.utils.data.DataLoader(trainset, batch_size=arg.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.MNIST(root=arg.data_dir, train=False, download=True, transform=ToTensor()) testloader = torch.utils.data.DataLoader(testset, batch_size=arg.batch_size, shuffle=False, num_workers=2) C, H, W = 1, 28, 28 elif arg.task == 'cifar10': trainset = torchvision.datasets.CIFAR10(root=arg.data_dir, train=True, download=True, transform=ToTensor()) trainloader = torch.utils.data.DataLoader(trainset, batch_size=arg.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root=arg.data_dir, train=False, download=True, transform=ToTensor()) testloader = torch.utils.data.DataLoader(testset, batch_size=arg.batch_size, shuffle=False, num_workers=2) C, H, W = 3, 32, 32 elif arg.task == 'cifar-gs': transform = Compose([Grayscale(), ToTensor()]) trainset = torchvision.datasets.CIFAR10(root=arg.data_dir, train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=arg.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root=arg.data_dir, train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=arg.batch_size, shuffle=False, num_workers=2) C, H, W = 1, 32, 32 elif arg.task == 'imagenet64': transform = Compose([ToTensor()]) trainset = torchvision.datasets.ImageFolder(root=arg.data_dir + os.sep + 'train', transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=arg.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.ImageFolder(root=arg.data_dir + os.sep + 'valid', transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=arg.batch_size, shuffle=False, num_workers=2) C, H, W = 3, 64, 64 else: raise Exception('Task {} not recognized.'.format(arg.task)) ## Set up the model fm = arg.channels krn = arg.kernel_size pad = krn // 2 if arg.model == 'simple': modules = [] for i in range(arg.num_layers): modules.append( PlainMaskedConv2d(i > 0, fm if i > 0 else C, fm, krn, 1, pad, bias=False)) modules.append(ReLU(True)) modules.extend([Conv2d(fm, 256 * C, 1), util.Reshape((256, C, W, H))]) model = Sequential(*modules) elif arg.model == 'gated-old': modules = [ Conv2d( C, fm, 1, groups=C ), # the groups allow us to block out certain colors in the first layer util.Lambda(lambda x: (x, x)) ] for i in range(arg.num_layers): modules.append( MaskedConv2d( fm, colors=C, self_connection=i > 0, res_connection=(not arg.no_res) if i > 0 else False, gates=not arg.no_gates, hv_connection=not arg.no_hv, k=krn, padding=pad)) modules.extend([ util.Lambda(lambda xs: xs[1]), Conv2d(fm, 256 * C, 1, groups=C), util.Reshape((C, 256, W, H)), util.Lambda( lambda x: x.transpose(1, 2)) # index for batched tensor ]) model = Sequential(*modules) elif arg.model == 'gated': model = models.Gated((C, H, W), arg.channels, num_layers=arg.num_layers, k=arg.kernel_size, padding=arg.kernel_size // 2) else: raise Exception('model "{}" not recognized'.format(arg.model)) print('Constructed network', model) # A sample of 144 square images with 3 channels, of the chosen resolution # (144 so we can arrange them in a 12 by 12 grid) sample_init_zeros = torch.zeros(72, C, H, W) sample_init_seeds = torch.zeros(72, C, H, W) sh, sw = H // SEEDFRAC, W // SEEDFRAC # Init second half of sample with patches from test set, to seed the sampling testbatch = util.readn(testloader, n=12) testbatch = testbatch.unsqueeze(1).expand(12, 6, C, H, W).contiguous().view( 72, 1, C, H, W).squeeze(1) sample_init_seeds[:, :, :sh, :] = testbatch[:, :, :sh, :] optimizer = Adam(model.parameters(), lr=arg.lr) if torch.cuda.is_available(): model.cuda() instances_seen = 0 for epoch in range(arg.epochs): # Train err_tr = [] model.train(True) for i, (input, _) in enumerate(tqdm.tqdm(trainloader)): if arg.limit is not None and i * arg.batch_size > arg.limit: break # Prepare the input b, c, w, h = input.size() if torch.cuda.is_available(): input = input.cuda() target = (input.data * 255).long() input, target = Variable(input), Variable(target) # Forward pass result = model(input) loss = cross_entropy(result, target) loss = loss * util.LOG2E # Convert from nats to bits instances_seen += input.size(0) tbw.add_scalar('pixel-models/training-loss', float(loss.data.item()), instances_seen) err_tr.append(float(loss.data.item())) # Backward pass optimizer.zero_grad() loss.backward() optimizer.step() del loss, result # Evaluate # - we evaluate on the test set, since this is only a simple reproduction experiment # make sure to split off a validation set if you want to tune hyperparameters for something important if epoch % arg.eval_every == 0 and epoch != 0: with torch.no_grad(): err_test = 0.0 err_total = 0 model.train(False) for i, (input, _) in enumerate(tqdm.tqdm(testloader)): if arg.limit is not None and i * arg.batch_size > arg.limit: break if torch.cuda.is_available(): input = input.cuda() target = (input.data * 255).long() input, target = Variable(input), Variable(target) result = model(input) loss = cross_entropy(result, target, reduction='none') loss = loss * util.LOG2E # Convert from nats to bits err_test += float(loss.data.sum()) err_total += util.prod(input.size()) del loss, result testloss = err_test / err_total tbw.add_scalar('pixel-models/test-loss', testloss, epoch) print('epoch={:02}; training loss: {:.3f}; test loss: {:.3f}'. format(epoch, sum(err_tr) / len(err_tr), testloss)) # Compute loss pixel by pixel, color by color, to make sure we're not leaking if arg.pbp: sum_bits = 0 total = 0 for i, (input, _) in enumerate(tqdm.tqdm(testloader)): mask = torch.zeros(*input.size()) if torch.cuda.is_available(): input, mask = input.cuda(), mask.cuda() target = (input.data * 255).long() input = Variable(input) for h in range(H): for w in range(W): for c in range(C): result = model(input * mask) result = F.log_softmax(result, dim=1) for b in range(input.size(0)): t = target[b, c, h, w] sum_bits += -float(result[b, t, c, h, w].sum()) total += 1 mask[:, c, h, w] += 1 print( 'epoch={:02}; pixel-by-pixel test loss: {:.3f}'.format( epoch, sum_bits / total)) model.train(False) sample_zeros = draw_sample(sample_init_zeros, model, seedsize=(0, 0), batch_size=arg.batch_size) sample_seeds = draw_sample(sample_init_seeds, model, seedsize=(sh, W), batch_size=arg.batch_size) sample = torch.cat([sample_zeros, sample_seeds], dim=0) utils.save_image(sample, 'sample_{:02d}.png'.format(epoch), nrow=12, padding=0)
opt.adadelta = True opt.lr = 1 # opt.STN_type = 'Affine' # opt.tps_inputsize = [32, 64] opt.STN_type = 'TPS' opt.tps_inputsize = [48, 128] opt.tps_outputsize = [96, 256] htr = BaseHTR(opt, dataset_name) htr.nheads = 1 l1 = ['pn', 'bn', 'od', 'gu'] # indo-aryan languages smaller wdith l2 = ['kn', 'ma', 'ta'] if lang in l1: elastic_alpha = 0.3 else: elastic_alpha = 0.2 htr.train_transforms = Compose([ GD(0.5), IRescale(max_width=htr.opt.imgW, height=htr.opt.imgH), ElasticTransformation(0.5, alpha=elastic_alpha), AffineTransformation(0.5, rotate=5, shear=0.5), RandomApply([ColorJitter(brightness=0.5, contrast=0.5)], p=0.5), ToTensor()]) htr.test_transforms = Compose([IRescale(max_width=htr.opt.imgW, height=htr.opt.imgH), ToTensor()]) htr.run()
nn.Linear(64, 64), nn.ELU(), nn.Linear(64, num_classes)) model = sets.DeepSetSSL(obs_encoder=obs_encoder, # loc_encoder=loc_encoder, locs=locs, learnable_locs=False, obs_loc_encoder=obs_loc_encoder, pooling_function=pooling_function, classifier=classifier, p_subsample=1., subsample_same_locs=True).to(device) set_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,)), sets.image_to_set]) model.p_subsample = 0.8 quick_experiment(model=model, dataset='mnist', data_dir='/Users/jsb/datasets/', p_splits={'train': 1.}, task='classify', transform=set_transform, epochs_per_eval=1, print_training_loss_every=2, training_kwargs={'lr': 1e-4, 'batch_size': 32, 'n_epoch': 100})
def run(img, model_type="large", optimize=True): # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network if model_type == "large": model = MidasNet(os.path.dirname(__file__) + '/saved_model/model-f6b98070.pt', non_negative=True) net_w, net_h = 384, 384 elif model_type == "small": model = MidasNet_small(os.path.dirname(__file__) + '/saved_model/model-small-70d6b9c8.pt', features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) net_w, net_h = 256, 256 transform = Compose([ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.eval() if optimize == True: print('optimizing') rand_example = torch.rand(1, 3, net_h, net_w) model(rand_example) traced_script_module = torch.jit.trace(model, rand_example) model = traced_script_module if device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) print("start processing") img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if optimize == True and device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) depth_img = denorm(prediction) return depth_img
losses, accuracies = trainer.fit(net, train_dl, valid_dl, epoch_end_callback=epoch_end_callback) return accuracies[-1] if __name__ == '__main__': DEVICE = 'cuda:0' ROOT = next(p for p in [Path('C:/datasets'), Path('/home/ubuntu/datasets')] if p.is_dir()) BATCH_SIZE = 64 N_EPOCHS = 20 INPUT_SHAPE = (32, 128) tfms = Compose([Resize(INPUT_SHAPE[1], INPUT_SHAPE[0]), ToTensor()]) train_ds = IAMWords(ROOT, split='train', transform=tfms) train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) valid_ds = IAMWords(ROOT, split='valid', transform=tfms) valid_dl = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) test_ds = IAMWords(ROOT, split='test', transform=tfms) test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) n_classes = len(CHARACTERS) + 1 study = optuna.create_study(study_name='hparam_search', load_if_exists=True, storage='sqlite:///hparam_search.db', direction='maximize') study.optimize(lambda trial: experiment(trial, train_dl, valid_dl, device=DEVICE, n_classes=n_classes, n_epochs=20, input_shape=INPUT_SHAPE),
def hr_transform(crop_size: int): return Compose([RandomCrop(crop_size), ToTensor()])
def main() -> None: args = get_arguments() # configuration config = get_config(args.config) # save log files in the directory which contains config file. result_path = os.path.dirname(args.config) experiment_name = os.path.basename(result_path) # if a experiment has already done, train won't start. if os.path.exists(os.path.join(result_path, "final_model.prm")): print("Already done.") return # cpu or cuda device = get_device(allow_only_gpu=True) # Dataloader train_transform = Compose([ Resize(config.size), RandomAffine(degrees=config.degrees, translate=(config.translate, config.translate)), ToTensor(), Normalize(mean=get_mean(), std=get_std()), ]) val_transform = Compose([ Resize(config.size), ToTensor(), Normalize(mean=get_mean(), std=get_std()) ]) imgs = np.load(config.train_imgs)["arr_0"] imgs = imgs.reshape(-1, 28, 28) ids = np.load(config.train_ids)["arr_0"] train_imgs, val_imgs, train_ids, val_ids = train_test_split( imgs, ids, test_size=0.1, random_state=random_seed, stratify=ids) train_loader = get_dataloader( imgs=train_imgs, ids=train_ids, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, pin_memory=True, drop_last=True, transform=train_transform, ) val_loader = get_dataloader( imgs=val_imgs, ids=val_ids, batch_size=1, shuffle=False, num_workers=config.num_workers, pin_memory=True, transform=val_transform, ) # the number of classes n_classes = len(get_cls2id_map()) # define a model model = get_model(config.model, n_classes, pretrained=config.pretrained) # send the model to cuda/cpu model.to(device) optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) # keep training and validation log begin_epoch = 0 best_loss = float("inf") log = pd.DataFrame(columns=[ "epoch", "lr", "train_time[sec]", "train_loss", "train_acc@1", "train_f1s", "val_time[sec]", "val_loss", "val_acc@1", "val_f1s", ]) # resume if you want if args.resume: resume_path = os.path.join(result_path, "checkpoint.pth") begin_epoch, model, optimizer, best_loss = resume( resume_path, model, optimizer) log_path = os.path.join(result_path, "log.csv") assert os.path.exists( log_path), "there is no checkpoint at the result folder" log = pd.read_csv(log_path) # criterion for loss criterion = get_criterion(config.use_class_weight, train_ids, device) # Weights and biases if not args.no_wandb: wandb.init( name=experiment_name, config=config, project="image_classification_template", job_type="training", dirs="./wandb_result/", ) # Magic wandb.watch(model, log="all") # train and validate model print("---------- Start training ----------") for epoch in range(begin_epoch, config.max_epoch): # training start = time.time() train_loss, train_acc1, train_f1s = train(train_loader, model, criterion, optimizer, epoch, device) train_time = int(time.time() - start) # validation start = time.time() val_loss, val_acc1, val_f1s, c_matrix = evaluate( val_loader, model, criterion, device) val_time = int(time.time() - start) # save a model if top1 acc is higher than ever if best_loss > val_loss: best_loss = val_loss torch.save( model.state_dict(), os.path.join(result_path, "best_model.prm"), ) # save checkpoint every epoch save_checkpoint(result_path, epoch, model, optimizer, best_loss) # write logs to dataframe and csv file tmp = pd.Series( [ epoch, optimizer.param_groups[0]["lr"], train_time, train_loss, train_acc1, train_f1s, val_time, val_loss, val_acc1, val_f1s, ], index=log.columns, ) log = log.append(tmp, ignore_index=True) log.to_csv(os.path.join(result_path, "log.csv"), index=False) make_graphs(os.path.join(result_path, "log.csv")) # save logs to wandb if not args.no_wandb: wandb.log( { "lr": optimizer.param_groups[0]["lr"], "train_time[sec]": train_time, "train_loss": train_loss, "train_acc@1": train_acc1, "train_f1s": train_f1s, "val_time[sec]": val_time, "val_loss": val_loss, "val_acc@1": val_acc1, "val_f1s": val_f1s, }, step=epoch, ) print("""epoch: {}\tepoch time[sec]: {}\tlr: {}\ttrain loss: {:.4f}\t\ val loss: {:.4f} val_acc1: {:.5f}\tval_f1s: {:.5f} """.format( epoch, train_time + val_time, optimizer.param_groups[0]["lr"], train_loss, val_loss, val_acc1, val_f1s, )) # save models torch.save(model.state_dict(), os.path.join(result_path, "final_model.prm")) # delete checkpoint os.remove(os.path.join(result_path, "checkpoint.pth")) print("Done")
def lr_transform(crop_size: int, scale: int): return Compose([ ToPILImage(), Resize(crop_size // scale, interpolation=Image.BICUBIC), ToTensor(), ])
def main(): opt = parsing_data() print("[INFO]Reading data") # Dictionary with data parameters for NiftyNet Reader if torch.cuda.is_available(): print('[INFO] GPU available.') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") else: raise Exception( "[INFO] No GPU found or Wrong gpu id, please run without --cuda") # FOLDERS fold_dir = opt.model_dir fold_dir_model = os.path.join(fold_dir, 'models') if not os.path.exists(fold_dir_model): os.makedirs(fold_dir_model) save_path = os.path.join(fold_dir_model, './CP_{}.pth') output_path = os.path.join(fold_dir, 'output') if not os.path.exists(output_path): os.makedirs(output_path) output_path = os.path.join(output_path, 'output_{}.nii.gz') # LOGGING orig_stdout = sys.stdout if os.path.exists(os.path.join(fold_dir, 'out.txt')): compt = 0 while os.path.exists( os.path.join(fold_dir, 'out_' + str(compt) + '.txt')): compt += 1 f = open(os.path.join(fold_dir, 'out_' + str(compt) + '.txt'), 'w') else: f = open(os.path.join(fold_dir, 'out.txt'), 'w') sys.stdout = f # SPLITS split_path_source = opt.dataset_split_source assert os.path.isfile(split_path_source), 'source file not found' split_path_target = opt.dataset_split_target assert os.path.isfile(split_path_target), 'target file not found' split_path = dict() split_path['source'] = split_path_source split_path['target'] = split_path_target path_file = dict() path_file['source'] = opt.path_source path_file['target'] = opt.path_target list_split = [ 'training', 'validation', ] paths_dict = dict() for domain in ['source', 'target']: df_split = pd.read_csv(split_path[domain], header=None) list_file = dict() for split in list_split: list_file[split] = df_split[df_split[1].isin([split])][0].tolist() paths_dict_domain = {split: [] for split in list_split} for split in list_split: for subject in list_file[split]: subject_data = [] for modality in MODALITIES[domain]: subject_data.append( Image( modality, path_file[domain] + subject + modality + '.nii.gz', torchio.INTENSITY)) if split in ['training', 'validation']: subject_data.append( Image('label', path_file[domain] + subject + 'Label.nii.gz', torchio.LABEL)) #subject_data[] = paths_dict_domain[split].append(Subject(*subject_data)) print(domain, split, len(paths_dict_domain[split])) paths_dict[domain] = paths_dict_domain # PREPROCESSING transform_training = dict() transform_validation = dict() for domain in ['source', 'target']: transform_training[domain] = ( ToCanonical(), ZNormalization(), CenterCropOrPad((144, 192, 48)), RandomAffine(scales=(0.9, 1.1), degrees=10), RandomNoise(std_range=(0, 0.10)), RandomFlip(axes=(0, )), ) transform_training[domain] = Compose(transform_training[domain]) transform_validation[domain] = ( ToCanonical(), ZNormalization(), CenterCropOrPad((144, 192, 48)), ) transform_validation[domain] = Compose(transform_validation[domain]) transform = { 'training': transform_training, 'validation': transform_validation } # MODEL norm_op_kwargs = {'eps': 1e-5, 'affine': True} dropout_op_kwargs = {'p': 0, 'inplace': True} net_nonlin = nn.LeakyReLU net_nonlin_kwargs = {'negative_slope': 1e-2, 'inplace': True} print("[INFO] Building model") model = Generic_UNet(input_modalities=MODALITIES_TARGET, base_num_features=32, num_classes=nb_classes, num_pool=4, num_conv_per_stage=2, feat_map_mul_on_downscale=2, conv_op=torch.nn.Conv3d, norm_op=torch.nn.InstanceNorm3d, norm_op_kwargs=norm_op_kwargs, nonlin=net_nonlin, nonlin_kwargs=net_nonlin_kwargs, convolutional_pooling=False, convolutional_upsampling=False, final_nonlin=torch.nn.Softmax(1)) print("[INFO] Training") train(paths_dict, model, transform, device, save_path, opt) sys.stdout = orig_stdout f.close()
def input_transform(crop_size, upscale_factor): return Compose([ CenterCrop(crop_size), ToTensor(), ])
from torchvision.transforms import Compose, CenterCrop, Normalize, Resize from torchvision.transforms import ToTensor, ToPILImage from eval.dataset import cityscapes from eval.erfnet import ERFNet from eval.transform import Relabel, ToLabel, Colorize import visdom NUM_CHANNELS = 3 NUM_CLASSES = 20 image_transform = ToPILImage() input_transform_cityscapes = Compose([ Resize((160, 384), Image.BILINEAR), ToTensor(), #Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform_cityscapes = Compose([ Resize((160, 384), Image.NEAREST), ToLabel(), Relabel(255, 19), #ignore label to 19 ]) cityscapes_trainIds2labelIds = Compose([ Relabel(19, 255), Relabel(18, 33), Relabel(17, 32), Relabel(16, 31), Relabel(15, 28), Relabel(14, 27),
def tagVideo(modelpath, videopath, outputPath=None): """ detect if persons in video are wearing masks or not """ model = MaskDetector() model.load_state_dict(torch.load(modelpath)['state_dict'], strict=False) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) model.eval() faceDetector = FaceDetector( prototype='./models/deploy.prototxt.txt', model='./models/res10_300x300_ssd_iter_140000.caffemodel', ) transformations = Compose([ ToPILImage(), Resize((100, 100)), ToTensor(), ]) if outputPath: writer = FFmpegWriter(str(outputPath)) # fontC = 'simsun.ttc' font = cv2.FONT_HERSHEY_SIMPLEX cv2.FONT_HERSHEY_SIMPLEX cv2.namedWindow('main', cv2.WINDOW_NORMAL) labels = ['No mask', 'Mask'] labelColor = [ (255, 255, 255), (10, 255, 0) ] # Can have a different color for predicted with mask or without for frame in vreader(str(videopath)): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) faces = faceDetector.detect(frame) for face in faces: xStart, yStart, width, height = face # clamp coordinates that are outside of the image xStart, yStart = max(xStart, 0), max(yStart, 0) # predict mask label on extracted face faceImg = frame[yStart:yStart + height, xStart:xStart + width] output = model(transformations(faceImg).unsqueeze(0).to(device)) _, predicted = torch.max(output.data, 1) # draw face frame cv2.rectangle(frame, (xStart, yStart), (xStart + width, yStart + height), (255, 255, 255), thickness=2) # draw the prediction label in CHINESE imgNoMask = np.zeros([20, 40, 3], dtype=np.uint8) imgMask = np.zeros([20, 20, 3], dtype=np.uint8) imgNoMask.fill(255) imgMask.fill(255) b, g, r, a = 0, 0, 0, 0 if predicted == 0: img = cv2ImgAddText(imgNoMask, "没有", 3, 3, (b, g, r), 15) else: img = cv2ImgAddText(imgMask, "有", 3, 3, (b, g, r), 15) img_height, img_width, _ = img.shape frame[ yStart:yStart + img_height, xStart:xStart + img_width] = img # Replace the top corner left with the image of Chinese words # Add the Prediction Label in ENGLISH according to the face frame # center text according to the face frame textSize = cv2.getTextSize(labels[predicted], font, 1, 2)[0] textX = xStart + width // 2 - textSize[0] // 2 # draw prediction label cv2.putText(frame, labels[predicted], (textX + 40, yStart + 20), font, 0.5, labelColor[predicted], 1) if outputPath: writer.writeFrame(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) cv2.imshow('main', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break if outputPath: writer.close() cv2.destroyAllWindows()
x = torch.stack((x,x,x),1).float() inputs = (input.squeeze(0)*x.squeeze(0)) if len(inputs) == 3 : inputs = inputs.unsqueeze(0) x = self.fine_grained_model(inputs) return x, road_mask NUM_CHANNELS = 3 NUM_CLASSES = 10 color_transform = Colorize(NUM_CLASSES) image_transform = ToPILImage() input_transform = Compose([ CenterCrop(256), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ CenterCrop(256), ToLabel(), Relabel(255, 21), ]) # The dataset has 0-18 labels as mentioned in cityscapes dataset and correspondingly labeled with pothole as 19, waterlog as 20, muddyroad as 21, obstruction as 22, cementroad as 23, rough road patch as 24, wet road patch as 25, side road as 26, bumps as 27. # Also the folder structure of dataset is same as cityscapes. # As mentioned in the paper various hierachies in the dataset can be obtained by relabeling and combining multiple labels to a single label. # So here we relabel all the 0-18 labels of cityscapes as 255 and change road defect labels from 1 - 9 respectively.
def main(dataset_root, relevance_format, composite_name, model_name, parameters, input_format, batch_size, max_samples, n_outputs, cpu, shuffle, cmap, level, seed): '''Generate heatmaps of an image folder at DATASET_ROOT to files RELEVANCE_FORMAT. RELEVANCE_FORMAT is a format string, for which {sample} is replaced with the sample index. ''' # set a manual seed for the RNG torch.manual_seed(seed) # use the gpu if requested and available, else use the cpu device = torch.device( 'cuda:0' if torch.cuda.is_available() and not cpu else 'cpu') # mean and std of ILSVRC2012 as computed for the torchvision models norm_fn = BatchNormalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225), device=device) # transforms as used for torchvision model evaluation transform = Compose([ Resize(256), CenterCrop(224), ToTensor(), ]) # the dataset is a folder containing folders with samples, where each folder corresponds to one label dataset = ImageFolder(dataset_root, transform=transform) # limit the number of output samples, if requested, by creating a subset if max_samples is not None: if shuffle: indices = sorted( np.random.choice(len(dataset), min(len(dataset), max_samples), replace=False)) else: indices = range(min(len(dataset), max_samples)) dataset = Subset(dataset, indices) loader = DataLoader(dataset, shuffle=shuffle, batch_size=batch_size) model = MODELS[model_name][0]() # load model parameters if requested; the parameter file may need to be downloaded separately if parameters is not None: state_dict = torch.load(parameters) model.load_state_dict(state_dict) model.to(device) model.eval() # convenience identity matrix to produce one-hot encodings eye = torch.eye(n_outputs, device=device) composite_kwargs = {} if composite_name == 'epsilon_gamma_box': # the maximal input shape, needed for the ZBox rule shape = (batch_size, 3, 224, 224) # the highest and lowest pixel values for the ZBox rule composite_kwargs['low'] = norm_fn(torch.zeros(*shape, device=device)) composite_kwargs['high'] = norm_fn(torch.ones(*shape, device=device)) # use torchvision specific canonizers, as supplied in the MODELS dict composite_kwargs['canonizers'] = [MODELS[model_name][1]()] # create a composite specified by a name; the COMPOSITES dict includes all preset composites provided by zennit. composite = COMPOSITES[composite_name](**composite_kwargs) # the current sample index for creating file names sample = 0 # create the composite context outside the main loop, such that it canonizers and hooks do not need to be # registered and removed for each step. with composite.context(model) as modified: for data, target in loader: # we use data without the normalization applied for visualization, and with the normalization applied as # the model input data_norm = norm_fn(data.to(device)) data_norm.requires_grad_() # one-hot encoding of the target labels of size (len(target), 1000) output_relevance = eye[target] out = modified(data_norm) # a simple backward pass will accumulate the relevance in data_norm.grad torch.autograd.backward((out, ), (output_relevance, )) # sum over the color channel for visualization relevance = np.array(data_norm.grad.sum(1).detach().cpu()) # normalize symmetrically around 0 amax = relevance.max((1, 2), keepdims=True) relevance = (relevance + amax) / 2 / amax for n in range(len(data)): fname = relevance_format.format(sample=sample + n) # zennit.image.imsave will create an appropriate heatmap given a cmap specification imsave(fname, relevance[n], vmin=0., vmax=1., level=level, cmap=cmap) if input_format is not None: fname = input_format.format(sample=sample + n) # if there are 3 color channels, imsave will not create a heatmap, but instead save the image with # its appropriate colors imsave(fname, data[n]) sample += len(data)
cv2.destroyAllWindows() # 参数 # dataset_root = r'E:\datasets\CVC-912\test' # val_csv_path = r'E:\code\polyp_seg\data\fixed-csv\test.csv' # savedir = r'E:\code\polyp_seg\plot\results' # checkpoint = r'E:\code\polyp_seg\unet_baseline\checkpoint\0unet_params.pkl' dataset_root = os.path.join(sys.path[0], '../data/CVC-912/test') val_csv_path = os.path.join(sys.path[0], '../data/fixed-csv/test.csv') checkpoint = os.path.join(sys.path[0], '../unet_baseline/checkpoint/deeplabV3+/0run0.pkl') savedir = os.path.join(sys.path[0], 'results') test_index = 100 img_size_to_net = 256 test_img_aug = Compose([Resize(size=(288, 384)), ToTensor()]) test_mask_aug = Compose([Resize(size=(288, 384)), ToTensor()]) # 加载参数 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") checkpoint = torch.load(checkpoint) # model = UNet(colordim=3, n_classes=2) model = DeepLab(num_classes=2, backbone='resnet', output_stride=16, sync_bn=True, freeze_bn=False) model.load_state_dict(checkpoint['net']) model.to(device) dataframe = pd.read_csv(val_csv_path)
from data.basicFunction import CheckImageFile parser = argparse.ArgumentParser() parser.add_argument('--input', type=str, default='', help='input damaged image') parser.add_argument('--mask', type=str, default='', help='input mask') parser.add_argument('--output', type=str, default='output', help='output file name') parser.add_argument('--pretrained', type=str, default='', help='load pretrained model') parser.add_argument('--loadSize', type=int, default=350, help='image loading size') parser.add_argument('--cropSize', type=int, default=256, help='image training size') args = parser.parse_args() ImageTransform = Compose([ Resize(size=args.cropSize, interpolation=Image.NEAREST), ToTensor(), ]) MaskTransform = Compose([ Resize(size=args.cropSize, interpolation=Image.NEAREST), ToTensor(), ]) if not CheckImageFile(args.input): print('Input file is not image file!') elif not CheckImageFile(args.mask): print('Input mask is not image file!') elif args.pretrained == '': print('Provide pretrained model!') else:
# coding: utf-8 import random import cv2 import numpy as np from torch.utils.data import Dataset from torchvision.transforms import ToTensor, Normalize, Compose import supervisely_lib as sly input_image_normalizer = Compose([ ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) class PytorchSlyDataset(Dataset): catcher_retries = 100 def __init__(self, project_meta, samples, out_size_wh, class_mapping, bkg_color, allow_corrupted_cnt): self.project_meta = project_meta self.samples = samples self.out_size_wh = tuple(out_size_wh) self.class_mapping = class_mapping self.bkg_color = bkg_color self.sample_catcher = sly.CorruptedSampleCatcher(allow_corrupted_cnt) def __len__(self): return len(self.samples)
if self.transform is not None: sel_images = self.transform(sel_images) sel_sils = self.transform(sel_sils) return sel_images, sel_images, torch.FloatTensor( sel_params) # return all parameter in tensor form def __len__(self): return len(self.images) # return the length of the dataset # ------------------------------------------------------------------ normalize = Normalize(mean=[0.5], std=[0.5]) transforms = Compose([ToTensor(), normalize]) test_dataset = CubeDataset(test_im, test_sil, test_param, transforms) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2) # ------------------------------------------------------------------ # for image, sil, param in test_dataloader: # # # print(image[2]) # print(image.size(), param.size()) #torch.Size([batch, 3, 512, 512]) torch.Size([batch, 6]) # im =2
def get_transform(): transforms = [] transforms.append(ToTensor()) return Compose(transforms)
def main(args): model = load_config(args.model) dataset = load_config(args.dataset) cuda = model["common"]["cuda"] device = torch.device("cuda" if cuda else "cpu") def map_location(storage, _): return storage.cuda() if cuda else storage.cpu() if cuda and not torch.cuda.is_available(): sys.exit("Error: CUDA requested but not available") num_classes = len(dataset["common"]["classes"]) # https://github.com/pytorch/pytorch/issues/7178 chkpt = torch.load(args.checkpoint, map_location=map_location) net = UNet(num_classes).to(device) net = nn.DataParallel(net) if cuda: torch.backends.cudnn.benchmark = True net.load_state_dict(chkpt["state_dict"]) net.eval() mean, std = [0.392, 0.364, 0.369], [0.301, 0.310, 0.319] transform = Compose([ ConvertImageMode(mode="RGB"), ImageToTensor(), Normalize(mean=mean, std=std) ]) directory = BufferedSlippyMapDirectory(args.tiles, transform=transform, size=args.tile_size, overlap=args.overlap) loader = DataLoader(directory, batch_size=args.batch_size, num_workers=args.workers) # don't track tensors with autograd during prediction with torch.no_grad(): for images, tiles in tqdm(loader, desc="Eval", unit="batch", ascii=True): images = images.to(device) outputs = net(images) # manually compute segmentation mask class probabilities per pixel probs = nn.functional.softmax(outputs, dim=1).data.cpu().numpy() for tile, prob in zip(tiles, probs): x, y, z = list(map(int, tile)) # we predicted on buffered tiles; now get back probs for original image prob = directory.unbuffer(prob) # Quantize the floating point probabilities in [0,1] to [0,255] and store # a single-channel `.png` file with a continuous color palette attached. assert prob.shape[ 0] == 2, "single channel requires binary model" assert np.allclose( np.sum(prob, axis=0), 1. ), "single channel requires probabilities to sum up to one" foreground = prob[1:, :, :] anchors = np.linspace(0, 1, 256) quantized = np.digitize(foreground, anchors).astype(np.uint8) palette = continuous_palette_for_color("pink", 256) out = Image.fromarray(quantized.squeeze(), mode="P") out.putpalette(palette) os.makedirs(os.path.join(args.probs, str(z), str(x)), exist_ok=True) path = os.path.join(args.probs, str(z), str(x), str(y) + ".png") out.save(path, optimize=True)
from datasets import VOCTestSet from PIL import Image import numpy as np import config import cv2 import os import shutil shutil.rmtree("./test") os.makedirs("./test") from evaluation import get_iou_list image_transform = Compose([ Scale((256, 256), Image.BILINEAR), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ Scale((256, 256), Image.NEAREST), ToLabel(), CocoLabel(), ]) batch_size = 1 dst = VOCTestSet("/root/group-incubation-bj", img_transform=image_transform, label_transform=target_transform) testloader = data.DataLoader(dst, batch_size=batch_size, num_workers=8)