def train(args): joint_transform = transforms.Compose([ transforms.RandomScale(), transforms.Mirror(), transforms.RandomCrop() ]) trainset = datasets[args.dataset](mode=args.mode, root=args.dataset_root) net = models[args.g]
def __init__(self, train=True): self.root = './data/ESC50/ESC-50-master/audio/' self.train = train #getting name of all files inside the all of the train_folds temp = open('./data/ESC50/ESC10_file_names.txt', 'r').read().split('\n') temp.sort() self.file_names = [] if train: for i in range(len(temp)): if int(temp[i].split('-')[0]) in config.train_folds: self.file_names.append(temp[i]) else: for i in range(len(temp)): if int(temp[i].split('-')[0]) in config.test_fold: self.file_names.append(temp[i]) if self.train: self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(), transforms.RandomScale(max_scale=1.25), transforms.RandomPadding(out_len=220500), transforms.RandomCrop(out_len=220500) ]) self.spec_transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), transforms.FrequencyMask(max_width=config.freq_masks_width, numbers=config.freq_masks), transforms.TimeMask(max_width=config.time_masks_width, numbers=config.time_masks) ]) else: #for test self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(), transforms.RandomPadding(out_len=220500), transforms.RandomCrop(out_len=220500) ]) self.spec_transforms = torchvision.transforms.Compose( [torchvision.transforms.ToTensor()])
def get_dataloader(): # TODO(xwd): Adaptive normalization by some large image. # E.g. In medical image processing, WSI image is very large and different to ordinary images. value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.RandomScale([cfg['scale_min'], cfg['scale_max']]), transform.RandomRotate([cfg['rotate_min'], cfg['rotate_max']], padding=mean, ignore_label=cfg['ignore_label']), transform.RandomGaussianBlur(), transform.RandomHorizontallyFlip(), transform.RandomCrop([cfg['train_h'], cfg['train_w']], crop_type='rand', padding=mean, ignore_label=cfg['ignore_label']), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) train_data = cityscapes.Cityscapes(cfg['data_path'], split='train', transform=train_transform) # Use data sampler to make sure each GPU loads specific parts of dataset to avoid data reduntant. train_sampler = DistributedSampler(train_data) train_loader = DataLoader(train_data, batch_size=cfg['batch_size'] // cfg['world_size'], shuffle=(train_sampler is None), num_workers=4, pin_memory=True, sampler=train_sampler, drop_last=True) return train_loader, train_sampler
def __init__(self, train=True): self.root = './data/US8K/audio/' self.train = train self.file_paths = [] #only includes the name of the fold and name of the file, like: 'fold2/4201-3-0-0.wav' if train: for f in config.train_folds: file_names = os.listdir(self.root + 'fold' + str(f) + '/' ) for name in file_names: if name.split('.')[-1] == 'wav': self.file_paths.append('fold' + str(f) + '/' + name) else: file_names = os.listdir(self.root + 'fold' + str(config.test_fold[0]) + '/' ) for name in file_names: if name.split('.')[-1] == 'wav': self.file_paths.append('fold' + str(config.test_fold[0]) + '/' + name) if self.train: self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(), transforms.RandomScale(max_scale = 1.25), transforms.RandomPadding(out_len = 176400), transforms.RandomCrop(out_len = 176400)]) self.spec_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), transforms.FrequencyMask(max_width = config.freq_masks_width, numbers = config.freq_masks), transforms.TimeMask(max_width = config.time_masks_width, numbers = config.time_masks)]) else: #for test self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(), transforms.RandomPadding(out_len = 176400), transforms.RandomCrop(out_len = 176400)]) self.spec_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor() ])
from experiment_builder import ExperimentBuilder from utils.arg_extractor import get_args import utils.transforms as trans from model.deeplab import DeepLab import matplotlib.pyplot as plt from tools import prediction from utils.metrics import Evaluator args = get_args() rng = np.random.RandomState(seed=args.seed) torch.manual_seed(seed=args.seed) transform_train = trans.Compose([ trans.RandomHorizontalFlip(), #trans.FixScale((args.crop_size,args.crop_size)), trans.RandomScale((0.5, 2.0)), #trans.FixScale(args.crop_size), trans.RandomCrop(args.crop_size), trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), trans.ToTensor(), ]) transform_val = trans.Compose([ #trans.FixScale((args.crop_size,args.crop_size)), trans.FixScale(args.crop_size), trans.CenterCrop(args.crop_size), trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), trans.ToTensor(), ]) if (args.aug == True): voc_train = VOCSegmentation(root='./data',