示例#1
0
 def build(self):
     dataloader = DataLoader(self.dataset,
                             batch_size=self.batch_size,
                             shuffle=True,
                             num_workers=0,
                             drop_last=True)
     self.dataiter = DataLoaderIter(dataloader)
示例#2
0
class CustomIterator:
    def __init__(self, batch_size, is_cuda):
        self.batch_size = batch_size
        self.dataset = CustomDataset(self.batch_size,
                                     transform_=transforms.Compose(
                                         [transforms.Scale([224, 224]),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                               std=[0.229, 0.224, 0.225])]),
                                     )
        self.is_cuda = is_cuda
        self.dataiter = None
        self.iteration = 0
        self.epoch = 0

    def build(self):
        # dataloader = get_data_loader(batch_size=self.batch_size)
        dataloader = DataLoader(self.dataset, batch_size=1, shuffle=True, num_workers=1, collate_fn=collate_fn,
                                drop_last=True)
        self.dataiter = DataLoaderIter(dataloader)

    def next(self):
        if self.dataiter is None:
            self.build()
        try:
            batch = self.dataiter.next()
            self.iteration += 1

            if self.is_cuda:
                batch = [batch[0].cuda(), batch[1].cuda(), batch[2]]
            return batch

        except StopIteration:
            self.epoch += 1
            self.build()
            self.iteration = 1  # reset and return the 1st batch

            batch = self.dataiter.next()
            if self.is_cuda:
                batch = [batch[0].cuda(), batch[1].cuda(), batch[2]]
            return batch
    def main():
        net =  aet( sz=1024, dnn_size=[512], hp=16,
            dropout=0.2, avg_len=5, ortho=False, transform='dft')

        # Start GPUs
        net = torch.nn.DataParallel( net, device_ids=[0, 1])
        net = net.cuda()

        random.seed(25)
        print('Select data')
        M = Mix_Dataset('/usr/local/timit/timit-wav/train/dr1/f*',
                        '/usr/local/timit/timit-wav/train/dr1/m*',
                       val=8)
        MI = DataLoaderIter( DataLoader( M, batch_size=args.batchsize, num_workers=0, pin_memory=True))

        # Setup optimizer
        opt = torch.optim.RMSprop( filter( lambda p: p.requires_grad, net.parameters()), lr=0.01)

        # Get validation data
        xv,yv = M.getvals()

        # Initialize these for training
        e = []
        try:
            it = 0
            while it <= iterations:

                # Get data and move to GPU
                x,y = next(MI)
                inps = Variable( x).type_as( next( net.parameters()))
                target = Variable( y).type_as( next( net.parameters()))

                # Get loss
                net.train()
                out,h1,mag,phase = net( inps )
                loss = myloss( z, target, par1=h1, par2=mag, mse=False)

                # Update
                opt.zero_grad()
                loss.backward()
                opt.step()

                # Report
                e.append( abs( loss.data[0]) )
                it += batchsize
                be = [list( evaluate( net, xv, yv))]

                if it%10 == 0:
                    print(it, abs( loss.data[0]), be)
            net.eval()
            except KeyboardInterrupt:
示例#4
0
class DataProvider:
    def __init__(self, batch_size, is_cuda):
        self.batch_size = batch_size
        self.dataset = Dataset_triple(self.batch_size,
                                      transform_=default_transform)
        self.is_cuda = is_cuda
        self.dataiter = None
        self.iteration = 0
        self.epoch = 0

    def build(self):
        dataloader = DataLoader(self.dataset,
                                batch_size=self.batch_size,
                                shuffle=True,
                                num_workers=0,
                                drop_last=True)
        self.dataiter = DataLoaderIter(dataloader)

    def next(self):
        if self.dataiter is None:
            self.build()
        try:
            batch = self.dataiter.next()
            self.iteration += 1

            if self.is_cuda:
                batch = [batch[0].cuda(), batch[1].cuda(), batch[2].cuda()]
            return batch

        except StopIteration:
            self.epoch += 1
            self.build()
            self.iteration = 1  # reset and return the 1st batch

            batch = self.dataiter.next()
            if self.is_cuda:
                batch = [batch[0].cuda(), batch[1].cuda(), batch[2].cuda()]
            return batch
 def get_stream(self):
     while True:
         for data in DataLoaderIter(self):
             yield data
示例#6
0
        target = torch.FloatTensor(target)

        return features, target

    def __len__(self):
        return len(self.audio_paths)

    def fit_stats(self, k_samples=100):
        """ Estimate the mean and std of the features from the training set
        Params:
            k_samples (int): Use this number of samples for estimation
        """
        k_samples = min(k_samples, len(self.audio_paths))
        rng = random.Random(42)
        samples = rng.sample(range(len(self.audio_paths)), k_samples)
        feats = [self.featurize(self.audio_paths[s]) for s in samples]
        feats = np.vstack(feats)
        self.feats_mean = np.mean(feats, axis=0)
        self.feats_std = np.std(feats, axis=0)


if __name__ == '__main__':
    from torch.utils.data.dataloader import DataLoaderIter
    from torch.utils.data import DataLoader
    d = STFTDataset(pad=4, normalize_targets=True)
    d.load_metadata_from_desc_file('valid.json')
    d.fit_stats()
    loader = DataLoader(d, 4)
    itr = DataLoaderIter(loader)
    x, y, z = next(itr)
示例#7
0
 def build(self):
     # dataloader = get_data_loader(batch_size=self.batch_size)
     dataloader = DataLoader(self.dataset, batch_size=1, shuffle=True, num_workers=1, collate_fn=collate_fn,
                             drop_last=True)
     self.dataiter = DataLoaderIter(dataloader)
示例#8
0
def train():
    logger = logging.getLogger('PHOCNet-Experiment::train')
    logger.info('--- Running PHOCNet Training ---')
    # argument parsing
    parser = argparse.ArgumentParser()
    # - train arguments
    parser.add_argument('--learning_rate_step', '-lrs', type=learning_rate_step_parser, default='6000:1e-4,10000:1e-5',
                        help='A dictionary-like string indicating the learning rate for up to the number of iterations. ' +
                             'E.g. the default \'70000:1e-4,80000:1e-5\' means learning rate 1e-4 up to step 70000 and 1e-5 till 80000.')
    parser.add_argument('--momentum', '-mom', action='store', type=float, default=0.9,
                        help='The momentum for SGD training (or beta1 for Adam). Default: 0.9')
    parser.add_argument('--momentum2', '-mom2', action='store', type=float, default=0.999,
                        help='Beta2 if solver is Adam. Default: 0.999')
    parser.add_argument('--delta', action='store', type=float, default=1e-8,
                        help='Epsilon if solver is Adam. Default: 1e-8')
    parser.add_argument('--solver_type', '-st', choices=['SGD', 'Adam'], default='Adam',
                        help='Which solver type to use. Possible: SGD, Adam. Default: Adam')
    parser.add_argument('--display', action='store', type=int, default=500,
                        help='The number of iterations after which to display the loss values. Default: 100')
    parser.add_argument('--test_interval', action='store', type=int, default=500,
                        help='The number of iterations after which to periodically evaluate the PHOCNet. Default: 500')
    parser.add_argument('--iter_size', '-is', action='store', type=int, default=10,
                        help='The batch size after which the gradient is computed. Default: 10')
    parser.add_argument('--batch_size', '-bs', action='store', type=int, default=1,
                        help='The batch size after which the gradient is computed. Default: 1')
    parser.add_argument('--weight_decay', '-wd', action='store', type=float, default=0.00005,
                        help='The weight decay for SGD training. Default: 0.00005')
    #parser.add_argument('--gpu_id', '-gpu', action='store', type=int, default=0,
    #                    help='The ID of the GPU to use. If not specified, training is run in CPU mode.')
    parser.add_argument('--gpu_id', '-gpu', action='store',
                        type=lambda str_list: [int(elem) for elem in str_list.split(',')],
                        default='0, 1',
                        help='The ID of the GPU to use. If not specified, training is run in CPU mode.')
    # - experiment arguments
    parser.add_argument('--min_image_width_height', '-miwh', action='store', type=int, default=26,
                        help='The minimum width or height of the images that are being fed to the AttributeCNN. Default: 26')
    parser.add_argument('--phoc_unigram_levels', '-pul',
                        action='store',
                        type=lambda str_list: [int(elem) for elem in str_list.split(',')],
                        default='1,2,4,8',
                        help='The comma seperated list of PHOC unigram levels. Default: 1,2,4,8')
    parser.add_argument('--embedding_type', '-et', action='store',
                        choices=['phoc', 'spoc', 'dctow', 'phoc-ppmi', 'phoc-pruned'],
                        default='phoc',
                        help='The label embedding type to be used. Possible: phoc, spoc, phoc-ppmi, phoc-pruned. Default: phoc')
    parser.add_argument('--fixed_image_size', '-fim', action='store',
                        type=lambda str_tuple: tuple([int(elem) for elem in str_tuple.split(',')]),
                        default=None ,
                        help='Specifies the images to be resized to a fixed size when presented to the CNN. Argument must be two comma seperated numbers.')
    parser.add_argument('--dataset', '-ds', required=True, choices=['gw','iam', 'maps'], default= 'gw',
                        help='The dataset to be trained on')
    args = parser.parse_args()

    # sanity checks
    if not torch.cuda.is_available():
        logger.warning('Could not find CUDA environment, using CPU mode')
        args.gpu_id = None

    # print out the used arguments
    logger.info('###########################################')
    logger.info('Experiment Parameters:')
    for key, value in vars(args).iteritems():
        logger.info('%s: %s', str(key), str(value))
    logger.info('###########################################')

    # prepare datset loader
    #TODO: add augmentation
    logger.info('Loading dataset %s...', args.dataset)


    if args.dataset == 'gw':
        train_set = GWDataset(gw_root_dir='../../pytorch-phocnet-master/data/gw',
                              cv_split_method='almazan',
                              cv_split_idx=1,
                              image_extension='.tif',
                              embedding=args.embedding_type,
                              phoc_unigram_levels=args.phoc_unigram_levels,
                              fixed_image_size=args.fixed_image_size,
                              min_image_width_height=args.min_image_width_height)


    if args.dataset == 'iam':
        train_set = IAMDataset(gw_root_dir='../../pytorch-phocnet-master/data/IAM',
                               image_extension='.png',
                               embedding=args.embedding_type,
                               phoc_unigram_levels=args.phoc_unigram_levels,
                               fixed_image_size=args.fixed_image_size,
                               min_image_width_height=args.min_image_width_height)

    if args.dataset == 'maps':

        f = open('../splits/train_files.txt', 'rb')
        all_files = f.readlines()
        all_files = [x.strip('\n') for x in all_files]
        f.close()

        dataset_dir = "../make_dataset/"

        train_set = MAPSDataset(map_root_dir1 = dataset_dir,
                               map_root_dir2 = dataset_dir,
                               all_files = all_files,
                               embedding = args.embedding_type,
                               phoc_unigram_levels = args.phoc_unigram_levels,
                               fixed_image_size = args.fixed_image_size,
                               min_image_width_height = args.min_image_width_height)

    test_set = copy.copy(train_set)

    train_set.mainLoader(partition='train', transforms=None)
    test_set.mainLoader(partition='test', transforms=None)

    # augmentation using data sampler
    n_train_images = 50000
    augmentation = True

    if augmentation:
        train_loader = DataLoader(train_set,
                                  sampler=WeightedRandomSampler(train_set.weights, n_train_images),
                                  batch_size=args.batch_size,
                                  num_workers=8)
    else:
        train_loader = DataLoader(train_set,
                                  batch_size=args.batch_size, shuffle=True,
                                  num_workers=8)

    train_loader_iter = DataLoaderIter(loader=train_loader)
    test_loader = DataLoader(test_set,
                             batch_size=1,
                             shuffle=False,
                             num_workers=8)
    #print(dir(test_loader))
    # load CNN
    logger.info('Preparing PHOCNet...')

    #print(list(train_set[0][1].size()))
    #cnn = PHOCNet(n_out=train_set[0][1].shape[0],
    #              input_channels=3,
    #              gpp_type='gpp',
    #              pooling_levels=([1], [5]))

    cnn = PHOCNet(n_out=list(train_set[0][1].size())[0],
                  input_channels=3,
                  gpp_type='gpp',
                  pooling_levels=([1], [5]))

    cnn.init_weights()

    ## pre-trained!!!!
    load_pretrained = False
    if load_pretrained:
        #cnn.load_state_dict(torch.load('PHOCNet.pt', map_location=lambda storage, loc: storage))
        my_torch_load(cnn, 'PHOCNet.pt')

    loss_selection = 'BCE' # or 'cosine'
    if loss_selection == 'BCE':
        loss = nn.BCEWithLogitsLoss(size_average=True)
    elif loss_selection == 'cosine':
        loss = CosineLoss(size_average=False, use_sigmoid=True)
    else:
        raise ValueError('not supported loss function')

    # move CNN to GPU
    if args.gpu_id is not None:
        if len(args.gpu_id) > 1:
            cnn = nn.DataParallel(cnn, device_ids=args.gpu_id)
            cnn.cuda()
        else:
            cnn.cuda(args.gpu_id[0])

    # run training
    lr_cnt = 0
    max_iters = args.learning_rate_step[-1][0]
    if args.solver_type == 'SGD':
        optimizer = torch.optim.SGD(cnn.parameters(), args.learning_rate_step[0][1],
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    if args.solver_type == 'Adam':
        optimizer = torch.optim.Adam(cnn.parameters(), args.learning_rate_step[0][1],
                                    weight_decay=args.weight_decay)


    optimizer.zero_grad()
    logger.info('Training:')
    for iter_idx in range(max_iters):
        if iter_idx % args.test_interval == 0: # and iter_idx > 0:
            logger.info('Evaluating net after %d iterations', iter_idx)
            evaluate_cnn(cnn=cnn,
                         dataset_loader=test_loader,
                         args=args)
        for _ in range(args.iter_size):
            if train_loader_iter.batches_outstanding == 0:
                train_loader_iter = DataLoaderIter(loader=train_loader)
                logger.info('Resetting data loader')
            word_img, embedding, _, _ = train_loader_iter.next()
            #print(word_img.shape)
            #print(type(word_img))
            if len(word_img.shape) > 4:
                word_img = torch.squeeze(word_img, dim=1)
            #print(word_img.shape)
            if args.gpu_id is not None:
                if len(args.gpu_id) > 1:
                    word_img = word_img.cuda()
                    embedding = embedding.cuda()
                else:
                    word_img = word_img.cuda(args.gpu_id[0])
                    embedding = embedding.cuda(args.gpu_id[0])

            word_img = torch.autograd.Variable(word_img)
            embedding = torch.autograd.Variable(embedding)
            output = cnn(word_img)
            ''' BCEloss ??? '''
            loss_val = loss(output, embedding)*args.batch_size
            loss_val.backward()
        optimizer.step()
        optimizer.zero_grad()

        # mean runing errors??
        if (iter_idx+1) % args.display == 0:
            logger.info('Iteration %*d: %f', len(str(max_iters)), iter_idx+1, loss_val.data[0])

        # change lr
        if (iter_idx + 1) == args.learning_rate_step[lr_cnt][0] and (iter_idx+1) != max_iters:
            lr_cnt += 1
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.learning_rate_step[lr_cnt][1]

        if (iter_idx + 1) % 100 == 0:
            torch.save(cnn, 'PHOCNet.pt')
            # .. to load your previously training model:
            #cnn.load_state_dict(torch.load('PHOCNet.pt'))

    #torch.save(cnn.state_dict(), 'PHOCNet.pt')
    print('saving the model')
    my_torch_save(cnn, 'PHOCNet.pt')
示例#9
0
def train():
    logger = logging.getLogger('PHOCNet-Experiment::train')
    logger.info('--- Running PHOCNet Training ---')
    # argument parsing
    parser = argparse.ArgumentParser()    
    # - train arguments
    parser.add_argument('--learning_rate_step', '-lrs', type=learning_rate_step_parser, default='60000:1e-4,100000:1e-5',
                        help='A dictionary-like string indicating the learning rate for up to the number of iterations. ' +
                             'E.g. the default \'70000:1e-4,80000:1e-5\' means learning rate 1e-4 up to step 70000 and 1e-5 till 80000.')
    parser.add_argument('--momentum', '-mom', action='store', type=float, default=0.9,
                        help='The momentum for SGD training (or beta1 for Adam). Default: 0.9')
    parser.add_argument('--momentum2', '-mom2', action='store', type=float, default=0.999,
                        help='Beta2 if solver is Adam. Default: 0.999')
    parser.add_argument('--delta', action='store', type=float, default=1e-8,
                        help='Epsilon if solver is Adam. Default: 1e-8')
    parser.add_argument('--solver_type', '-st', choices=['SGD', 'Adam'], default='Adam',
                        help='Which solver type to use. Possible: SGD, Adam. Default: Adam')
    parser.add_argument('--display', action='store', type=int, default=500,
                        help='The number of iterations after which to display the loss values. Default: 100')
    parser.add_argument('--test_interval', action='store', type=int, default=2000,
                        help='The number of iterations after which to periodically evaluate the PHOCNet. Default: 500')
    parser.add_argument('--iter_size', '-is', action='store', type=int, default=10,
                        help='The batch size after which the gradient is computed. Default: 10')
    parser.add_argument('--batch_size', '-bs', action='store', type=int, default=1,
                        help='The batch size after which the gradient is computed. Default: 1')
    parser.add_argument('--weight_decay', '-wd', action='store', type=float, default=0.00005,
                        help='The weight decay for SGD training. Default: 0.00005')
    #parser.add_argument('--gpu_id', '-gpu', action='store', type=int, default=0,
    #                    help='The ID of the GPU to use. If not specified, training is run in CPU mode.')
    parser.add_argument('--gpu_id', '-gpu', action='store',
                        type=lambda str_list: [int(elem) for elem in str_list.split(',')],
                        default='0',
                        help='The ID of the GPU to use. If not specified, training is run in CPU mode.')
    # - experiment arguments
    parser.add_argument('--min_image_width_height', '-miwh', action='store', type=int, default=26,
                        help='The minimum width or height of the images that are being fed to the AttributeCNN. Default: 26')
    parser.add_argument('--phoc_unigram_levels', '-pul',
                        action='store',
                        type=lambda str_list: [int(elem) for elem in str_list.split(',')],
                        default='1,2,4,8',
                        help='The comma seperated list of PHOC unigram levels. Default: 1,2,4,8')
    parser.add_argument('--embedding_type', '-et', action='store',
                        choices=['phoc', 'spoc', 'dctow', 'phoc-ppmi', 'phoc-pruned'],
                        default='phoc',
                        help='The label embedding type to be used. Possible: phoc, spoc, phoc-ppmi, phoc-pruned. Default: phoc')
    parser.add_argument('--fixed_image_size', '-fim', action='store',
                        type=lambda str_tuple: tuple([int(elem) for elem in str_tuple.split(',')]),
                        default=None ,
                        help='Specifies the images to be resized to a fixed size when presented to the CNN. Argument must be two comma seperated numbers.')
    parser.add_argument('--dataset', '-ds', required=True, choices=['gw','iam'], default= 'gw',
                        help='The dataset to be trained on')
    args = parser.parse_args()

    # sanity checks
    if not torch.cuda.is_available():
        logger.warning('Could not find CUDA environment, using CPU mode')
        args.gpu_id = None

    # print out the used arguments
    logger.info('###########################################')
    logger.info('Experiment Parameters:')
    for key, value in vars(args).iteritems():
        logger.info('%s: %s', str(key), str(value))
    logger.info('###########################################')

    # prepare datset loader
    #TODO: add augmentation
    logger.info('Loading dataset %s...', args.dataset)
    if args.dataset == 'gw':
        train_set = GWDataset(gw_root_dir='../../../phocnet-pytorch-master/data/gw',
                              cv_split_method='almazan',
                              cv_split_idx=1,
                              image_extension='.tif',
                              embedding=args.embedding_type,
                              phoc_unigram_levels=args.phoc_unigram_levels,
                              fixed_image_size=args.fixed_image_size,
                              min_image_width_height=args.min_image_width_height)


    if args.dataset == 'iam':
        train_set = IAMDataset(gw_root_dir='../../../phocnet-pytorch-master/data/IAM',
                               image_extension='.png',
                               embedding=args.embedding_type,
                               phoc_unigram_levels=args.phoc_unigram_levels,
                               fixed_image_size=args.fixed_image_size,
                               min_image_width_height=args.min_image_width_height)

    test_set = copy.copy(train_set)

    train_set.mainLoader(partition='train')
    test_set.mainLoader(partition='test', transforms=None)

    # augmentation using data sampler
    n_train_images = 500000
    augmentation = True

    if augmentation:
        train_loader = DataLoader(train_set,
                                  sampler=WeightedRandomSampler(train_set.weights, n_train_images),
                                  batch_size=args.batch_size,
                                  num_workers=8)
    else:
        train_loader = DataLoader(train_set,
                                  batch_size=args.batch_size, shuffle=True,
                                  num_workers=8)

    train_loader_iter = DataLoaderIter(loader=train_loader)
    test_loader = DataLoader(test_set,
                             batch_size=1,
                             shuffle=False,
                             num_workers=8)
    # load CNN
    logger.info('Preparing PHOCNet...')

    cnn = PHOCNet(n_out=train_set[0][1].shape[0],
                  input_channels=1,
                  gpp_type='gpp',
                  pooling_levels=([1], [5]))

    cnn.init_weights()

    ## pre-trained!!!!
    load_pretrained = True
    if load_pretrained:
        #cnn.load_state_dict(torch.load('PHOCNet.pt', map_location=lambda storage, loc: storage))
        my_torch_load(cnn, 'PHOCNet.pt')

    loss_selection = 'BCE' # or 'cosine'
    if loss_selection == 'BCE':
        loss = nn.BCEWithLogitsLoss(size_average=True)
    elif loss_selection == 'cosine':
        loss = CosineLoss(size_average=False, use_sigmoid=True)
    else:
        raise ValueError('not supported loss function')

    # move CNN to GPU
    if args.gpu_id is not None:
        if len(args.gpu_id) > 1:
            cnn = nn.DataParallel(cnn, device_ids=args.gpu_id)
            cnn.cuda()
        else:
            cnn.cuda(args.gpu_id[0])

    # run training
    lr_cnt = 0
    max_iters = args.learning_rate_step[-1][0]
    if args.solver_type == 'SGD':
        optimizer = torch.optim.SGD(cnn.parameters(), args.learning_rate_step[0][1],
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    if args.solver_type == 'Adam':
        optimizer = torch.optim.Adam(cnn.parameters(), args.learning_rate_step[0][1],
                                    weight_decay=args.weight_decay)


    optimizer.zero_grad()
    logger.info('Training:')
    for iter_idx in range(max_iters):
        if iter_idx % args.test_interval == 0: # and iter_idx > 0:
            logger.info('Evaluating net after %d iterations', iter_idx)
            evaluate_cnn(cnn=cnn,
                         dataset_loader=test_loader,
                         args=args)        
        for _ in range(args.iter_size):
            if train_loader_iter.batches_outstanding == 0:
                train_loader_iter = DataLoaderIter(loader=train_loader)
                logger.info('Resetting data loader')
            word_img, embedding, _, _ = train_loader_iter.next()
            if args.gpu_id is not None:
                if len(args.gpu_id) > 1:
                    word_img = word_img.cuda()
                    embedding = embedding.cuda()
                else:
                    word_img = word_img.cuda(args.gpu_id[0])
                    embedding = embedding.cuda(args.gpu_id[0])

            word_img = torch.autograd.Variable(word_img)
            embedding = torch.autograd.Variable(embedding)
            output = cnn(word_img)
            ''' BCEloss ??? '''
            loss_val = loss(output, embedding)*args.batch_size
            loss_val.backward()
        optimizer.step()
        optimizer.zero_grad()

        # mean runing errors??
        if (iter_idx+1) % args.display == 0:
            logger.info('Iteration %*d: %f', len(str(max_iters)), iter_idx+1, loss_val.data[0])

        # change lr
        if (iter_idx + 1) == args.learning_rate_step[lr_cnt][0] and (iter_idx+1) != max_iters:
            lr_cnt += 1
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.learning_rate_step[lr_cnt][1]

        #if (iter_idx + 1) % 10000 == 0:
        #    torch.save(cnn.state_dict(), 'PHOCNet.pt')
            # .. to load your previously training model:
            #cnn.load_state_dict(torch.load('PHOCNet.pt'))

    #torch.save(cnn.state_dict(), 'PHOCNet.pt')
    my_torch_save(cnn, 'PHOCNet.pt')
示例#10
0
def main():
    import argparse
    parser = argparse.ArgumentParser( description='tsep network')

    # Transform options
    parser.add_argument( '--filters', '-f', type=int, default=1024,
                        help='Number of filters in front end')
    parser.add_argument( '--fourier', '-dft', action='store_true', default=False,
                        help='Use a Fourier transform')
    parser.add_argument( '--mask', '-mask', action='store_true', default=False,
                        help='Apply a mask')
    parser.add_argument( '--orthogonal', '-or', action='store_true', default=False,
                        help='Use an "orthogonal" transform')
    parser.add_argument( '--hop', '-hp', type=int, default=16,
                        help='Transform hop/stride')

    # Network options
    parser.add_argument( '--smoother', '-sm', type=int, default=5,
                        help='Smoothing layer length')
    parser.add_argument( '--denoisersize', '-ds', type=int, default=[1024],
                        nargs='*', help='Denoiser layer sizes')

    # Training options
    parser.add_argument( '--learningrate', '-lr', type=float, default=.001,
                        help='Learning rate')
    parser.add_argument( '--batchsize', '-b', type=int, default=16,
                        help='Batch size')
    parser.add_argument( '--dropout', '-d', type=float, default=0.1,
                        help='Dropout rate')
    parser.add_argument( '--iterations', '-it', type=int, default=2,
                        help='Number of training samples to learn from')
    parser.add_argument( '--name', '-n', default=None,
                        help='Model name')
    parser.add_argument( '--mse', '-mse', action='store_true', default=False,
                        help='Use MSE instead of SDR')

    # Get the arguments
    args = parser.parse_args()

    # Optionally dump arguments to a log file
    if args.name is not None:
        f = open( args.name + '-args.txt', 'w')
        for keys,values in vars( args).items():
            f.write( '%13s: %s\n' % ((keys), str( values)))
        f.close()

    # Instantiate network

    net = fd_snn_t( ft_size=args.filters, hop=args.hop, smoother=args.smoother, sep_sizes=args.denoisersize, dropout=args.dropout, adapt_fe=not args.fourier, ortho=args.orthogonal, masking=args.mask)
    net = torch.nn.DataParallel( net, device_ids=[0,1])
    net = net.cuda()

    # Select data
    random.seed(25)
    M = Mix_Dataset('/usr/local/timit/timit-wav/train/dr1/f*',
                    '/usr/local/timit/timit-wav/train/dr1/m*',
                   # '/usr/local/snd/Nonspeech/',
                   val=16)
    MI = DataLoaderIter( DataLoader( M, batch_size=args.batchsize, num_workers=0, pin_memory=True))

    # Setup optimizer
    opt = torch.optim.Adam( filter( lambda p: p.requires_grad, net.parameters()), lr=args.learningrate)

    # Initialize these for training
    vis = visdom.Visdom( port=5800)

    # Get validation data
    xv,yv = M.getvals()

    # Clear performance metrics
    e, be = [], []

    # Training loop
    pb = trange( args.iterations, unit='s', unit_scale=True, mininterval=.5, smoothing=.9)
    pli = 30 # Plotting/validation interval in seconds

    # Return bases ordered in mag freq domain
    def frbases( w):
        fw = abs( rfft( w, axis=1))**2
        fw /= (fw.max( axis=1)[:,None] + 2e-7)
        fw[:,0] = 0 # ignore DC
        i = argsort( argmax( fw, axis=1))
        return fw[i,:].T

    lt = time.time()-2*pli
    try:
        it = 0
        while it <= args.iterations:

            # Get data and move to GPU
            x,y = next( MI)
            inps = Variable( x).type_as( next( net.parameters()))
            target = Variable( y).type_as( next( net.parameters()))

            # Get loss
            net.train()
            z,h = net( inps)
            loss =

            # Update
            opt.zero_grad()
            loss.backward()
            opt.step()

            # Report
            net.eval()
            e.append( abs( loss.data[0]))

            # Test on validation data
            if time.time() - lt > pli or it == args.iterations:
                be += [list( evaluate( net, xv, yv, args.name))]
                trainplot( e, [], be, vis, win=['loss','bss'], eid=args.name)

                # Save training metrics for posterity
                if args.name is not None:
                    savez( args.name, loss=e, bss=be)
                lt = time.time()

            # Update the progress bar
            pb.set_description( 'L:%.3f P:%.1f/%.1f/%.1f' %
                  (e[-1], be[-1][0], be[-1][1], be[-1][2]))
            pb.update( args.batchsize)
            it += args.batchsize

    except KeyboardInterrupt:
        pass