示例#1
0
def train(args):

    joint_transform = transforms.Compose([
        transforms.RandomScale(),
        transforms.Mirror(),
        transforms.RandomCrop()
    ])
    trainset = datasets[args.dataset](mode=args.mode, root=args.dataset_root)

    net = models[args.g]
示例#2
0
    def __init__(self, train=True):
        self.root = './data/ESC50/ESC-50-master/audio/'
        self.train = train

        #getting name of all files inside the all of the train_folds
        temp = open('./data/ESC50/ESC10_file_names.txt',
                    'r').read().split('\n')
        temp.sort()
        self.file_names = []
        if train:
            for i in range(len(temp)):
                if int(temp[i].split('-')[0]) in config.train_folds:
                    self.file_names.append(temp[i])
        else:
            for i in range(len(temp)):
                if int(temp[i].split('-')[0]) in config.test_fold:
                    self.file_names.append(temp[i])

        if self.train:
            self.wave_transforms = torchvision.transforms.Compose([
                transforms.ToTensor1D(),
                transforms.RandomScale(max_scale=1.25),
                transforms.RandomPadding(out_len=220500),
                transforms.RandomCrop(out_len=220500)
            ])

            self.spec_transforms = torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                transforms.FrequencyMask(max_width=config.freq_masks_width,
                                         numbers=config.freq_masks),
                transforms.TimeMask(max_width=config.time_masks_width,
                                    numbers=config.time_masks)
            ])

        else:  #for test
            self.wave_transforms = torchvision.transforms.Compose([
                transforms.ToTensor1D(),
                transforms.RandomPadding(out_len=220500),
                transforms.RandomCrop(out_len=220500)
            ])

            self.spec_transforms = torchvision.transforms.Compose(
                [torchvision.transforms.ToTensor()])
示例#3
0
def get_dataloader():
    # TODO(xwd): Adaptive normalization by some large image.
    # E.g. In medical image processing, WSI image is very large and different to ordinary images.

    value_scale = 255
    mean = [0.485, 0.456, 0.406]
    mean = [item * value_scale for item in mean]
    std = [0.229, 0.224, 0.225]
    std = [item * value_scale for item in std]

    train_transform = transform.Compose([
        transform.RandomScale([cfg['scale_min'], cfg['scale_max']]),
        transform.RandomRotate([cfg['rotate_min'], cfg['rotate_max']],
                               padding=mean,
                               ignore_label=cfg['ignore_label']),
        transform.RandomGaussianBlur(),
        transform.RandomHorizontallyFlip(),
        transform.RandomCrop([cfg['train_h'], cfg['train_w']],
                             crop_type='rand',
                             padding=mean,
                             ignore_label=cfg['ignore_label']),
        transform.ToTensor(),
        transform.Normalize(mean=mean, std=std)
    ])

    train_data = cityscapes.Cityscapes(cfg['data_path'],
                                       split='train',
                                       transform=train_transform)

    # Use data sampler to make sure each GPU loads specific parts of dataset to avoid data reduntant.
    train_sampler = DistributedSampler(train_data)

    train_loader = DataLoader(train_data,
                              batch_size=cfg['batch_size'] //
                              cfg['world_size'],
                              shuffle=(train_sampler is None),
                              num_workers=4,
                              pin_memory=True,
                              sampler=train_sampler,
                              drop_last=True)

    return train_loader, train_sampler
示例#4
0
	def __init__(self, train=True):
		self.root = './data/US8K/audio/'
		self.train = train
        
		self.file_paths = [] #only includes the name of the fold and name of the file, like: 'fold2/4201-3-0-0.wav'
        
		if train:
			for f in config.train_folds:
				file_names = os.listdir(self.root + 'fold' + str(f) + '/' )
                
				for name in file_names:
					if name.split('.')[-1] == 'wav':
						self.file_paths.append('fold' + str(f) + '/' + name)
		else:
			file_names = os.listdir(self.root + 'fold' + str(config.test_fold[0]) + '/' )
			for name in file_names:
				if name.split('.')[-1] == 'wav':
					self.file_paths.append('fold' + str(config.test_fold[0]) + '/' + name)
        

		if self.train:
			self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(),
                                                                   transforms.RandomScale(max_scale = 1.25), 
                                                                  transforms.RandomPadding(out_len = 176400),
                                                                  transforms.RandomCrop(out_len = 176400)])
             
			self.spec_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                                                   transforms.FrequencyMask(max_width = config.freq_masks_width, numbers = config.freq_masks),
                                                                   transforms.TimeMask(max_width = config.time_masks_width, numbers = config.time_masks)])
            
            
		else: #for test
			self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(),
                                                                    transforms.RandomPadding(out_len = 176400),
                                                                    transforms.RandomCrop(out_len = 176400)])
            
			self.spec_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor() ])
示例#5
0
from experiment_builder import ExperimentBuilder
from utils.arg_extractor import get_args
import utils.transforms as trans
from model.deeplab import DeepLab
import matplotlib.pyplot as plt
from tools import prediction
from utils.metrics import Evaluator
args = get_args()
rng = np.random.RandomState(seed=args.seed)

torch.manual_seed(seed=args.seed)

transform_train = trans.Compose([
    trans.RandomHorizontalFlip(),
    #trans.FixScale((args.crop_size,args.crop_size)),
    trans.RandomScale((0.5, 2.0)),
    #trans.FixScale(args.crop_size),
    trans.RandomCrop(args.crop_size),
    trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    trans.ToTensor(),
])

transform_val = trans.Compose([
    #trans.FixScale((args.crop_size,args.crop_size)),
    trans.FixScale(args.crop_size),
    trans.CenterCrop(args.crop_size),
    trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    trans.ToTensor(),
])
if (args.aug == True):
    voc_train = VOCSegmentation(root='./data',