Exemplo n.º 1
0
def adapt_f0(s, t):
    if use_predicted_pitch:
        s = utils.to_gpu(torch.from_numpy(s)).view(1, -1, 1).float()
        t = utils.to_gpu(torch.from_numpy(t)).view(1, -1, 1).float()
        s = pitch_model(s, t)[0, :].cpu().numpy()
        return s
    else:
        tmp_s = np.asarray([x for x in s if x > 0]).mean()
        tmp_t = np.asarray([x for x in t if x > 0]).mean()
        for i in range(s.shape[0]):
            if s[i] > 0:
                s[i] = s[i] * tmp_t / tmp_s
        return s
Exemplo n.º 2
0
def main(model_filename, pitch_model_filename, output_dir, batch_size):
    model = torch.nn.Module()
    model.add_module('encoder', Encoder(**encoder_config))
    model.add_module('generator',
                     Generator(sum(encoder_config['n_out_channels'])))
    model = load_checkpoint(model_filename, model).cuda()
    model.eval()

    if os.path.isfile(pitch_model_filename):
        global pitch_model, use_predicted_pitch
        use_predicted_pitch = True
        pitch_model = PitchModel(**pitch_config)
        pitch_model = load_checkpoint(pitch_model_filename, pitch_model).cuda()
        pitch_model.eval()

    testset = TestSet(**(data_config))
    cond, name = testset[0]
    for files in chunker(testset, batch_size):
        files = list(zip(*files))
        cond_input, file_paths = files[:-1], files[-1]
        cond_input = [
            utils.to_gpu(torch.from_numpy(np.stack(x))).float()
            for x in cond_input
        ]

        #cond_input = model.encoder(cond_input.transpose(1, 2)).transpose(1, 2)
        cond_input = model.encoder(cond_input[0])
        audio = model.generator(cond_input)

        for i, file_path in enumerate(file_paths):
            print("writing {}".format(file_path))
            wav = audio[i].cpu().squeeze().detach().numpy() * 32768.0
            write("{}/{}.wav".format(output_dir, file_path),
                  data_config['sampling_rate'], wav.astype(np.int16))
Exemplo n.º 3
0
 def get_noise(self, num_samples=None):
     if num_samples is None:
         num_samples = self.cfg.batch_size
     noise = Variable(torch.ones(num_samples, self.cfg.z_size))
     noise = to_gpu(self.cfg.cuda, noise)
     noise.data.normal_(0, 1)
     return noise
Exemplo n.º 4
0
    def _decode_free_run(self, code_w, max_len):
        code_w = code_w.unsqueeze(1)
        batch_size = code_w.size(0)

        # <sos>
        sos_w = self._get_sos_batch(batch_size, self.vocab_w)
        embed_in_w = self.embed_w(sos_w)
        # sos_embedding : [batch_size, 1, embedding_size]
        state_w = self._init_hidden(batch_size, self.cfg.hidden_size_w)

        # unroll
        if self.cfg.dec_embed:
            all_embed_w = []  # for differentiable input of discriminator
        all_prob_w = []  # for grad norm scaling
        all_id_w = []

        finished = torch.ByteTensor(batch_size, 1).zero_()
        finished = to_gpu(self.cfg.cuda, Variable(finished,
                                                  requires_grad=False))

        for i in range(max_len):  # for each step
            # Decoder
            input_w = torch.cat([embed_in_w, code_w], 2)
            output_w, state_w = self.decoder(input_w, state_w)
            if self.cfg.dec_embed:
                embed_out_w = self.linear_w(output_w)
                cosim_w = self._compute_cosine_sim(embed_out_w,
                                                   self.embed_w.embed)
                prob_w = F.log_softmax(cosim_w * self.cfg.embed_temp, 2)
                _, id_w = torch.max(cosim_w, 2)
                # if eos token has already appeared, fill zeros
                id_w, embed_out_w, finished = \
                    self._pads_after_eos(id_w, embed_out_w, finished)
            else:
                prob_w = F.log_softmax(self.linear_w(output_w), 2)
                _, id_w = torch.max(prob_w, 2)
                id_w, finished = self._pad_ids_after_eos(id_w, finished)
            # NOTE : words_prob is not considered here

            embed_in_w = self.embed_w(id_w)
            #embed_in_w = embed_out_w

            # append generated token ids & outs at each step
            if self.cfg.dec_embed:
                all_embed_w.append(embed_out_w)
            all_prob_w.append(prob_w)
            all_id_w.append(id_w)

        # concatenate all the results
        # words_id = torch.cat(all_words_id, 1)
        if self.cfg.dec_embed:
            embed_w = torch.cat(all_embed_w, 1)
        prob_w = torch.cat(all_prob_w, 1)
        id_w = torch.cat(all_id_w, 1)

        if self.cfg.dec_embed:
            return self.packer_w.new(probs=prob_w, ids=id_w, embeds=embed_w)
        else:
            return self.packer_w.new(probs=prob_w, ids=id_w)
Exemplo n.º 5
0
 def _get_interpolated_z(self, num_samples):
     # sample 2 points and compute the distance btwn them
     z_a = np.random.normal(0, 1, (1, self.cfg.z_size))
     z_b = np.random.normal(0, 1, (1, self.cfg.z_size))
     # get intermediate points by interpolation
     offset = (z_b - z_a) / num_samples
     z = np.vstack([z_a + offset * i for i in range(num_samples)])
     return to_gpu(self.cfg.cuda, Variable(torch.FloatTensor(z)))
Exemplo n.º 6
0
 def _decode_from_z(self, z):
     self.net.set_modules_train_mode(True)
     # Build graph
     z = Variable(torch.FloatTensor(z))
     z = to_gpu(self.cfg.cuda, z)
     code_fake = self.net.gen(z)
     decoded = self.net.dec.free_running(code_fake, self.cfg.max_len)
     return decoded
Exemplo n.º 7
0
    def __init__(self, alpha=0, class_weights=None, num_classes=1):
        
        if class_weights is not None:
            nll_weight = to_gpu(torch.from_numpy(class_weights.astype(dtype=np.float32)))
        else:
            nll_weight = None

        self.nll_loss = nn.NLLLoss(weight=nll_weight)
        self.alpha = alpha
        self.num_classes = num_classes
Exemplo n.º 8
0
def to_one_hot(cfg, indices, num_class):
    size = indices.size()
    dim = len(size)
    indices = torch.unsqueeze(indices.data, dim)
    one_hot = torch.FloatTensor(*size, num_class).zero_()
    if isinstance(indices, Variable):
        one_hot = Variable(one_hot, requires_grad=False)
    if cfg.cuda:
        one_hot = to_gpu(cfg.cuda, one_hot)
    one_hot.scatter_(dim, indices, 1.)
    return one_hot
Exemplo n.º 9
0
    def forward(self, noise):
        assert noise.size(1) == self.cfg.z_size
        x = noise
        for i, layer in enumerate(self.layers):
            x = layer(x)

        if self._with_noise:
            noise = torch.normal(mean=torch.zeros(x.size()), std=0.1)
            noise = to_gpu(self.cfg.cuda, Variable(noise))
            x = x + noise
            with_noise = False
        return x
Exemplo n.º 10
0
    def parse_batch(self, batch):
        current_text_padded, input_lengths, pre_text_padded, pre_text_len, post_text_padded, post_text_len, \
        mel_padded, gate_padded, output_lengths = batch

        current_text_padded = to_gpu(current_text_padded).long()
        input_lengths = to_gpu(input_lengths).long()

        pre_text_padded = to_gpu(pre_text_padded).long()
        pre_text_len = to_gpu(pre_text_len).long()

        post_text_padded = to_gpu(post_text_padded).long()
        post_text_len = to_gpu(post_text_len).long()

        max_len = torch.max(input_lengths.data).item()
        mel_padded = to_gpu(mel_padded).float()
        gate_padded = to_gpu(gate_padded).float()
        output_lengths = to_gpu(output_lengths).long()

        return ((current_text_padded, input_lengths, pre_text_padded, pre_text_len, post_text_padded, post_text_len, \
             mel_padded, max_len, output_lengths), (mel_padded, gate_padded))
Exemplo n.º 11
0
    def parse_batch(self, batch):
        inputs, alignments, inputs_ctc = batch

        inputs = utl.Inputs(text=utl.to_gpu(inputs.text).long(),
                            mels=utl.to_gpu(inputs.mels).float(),
                            gate=utl.to_gpu(inputs.gate).float(),
                            text_len=utl.to_gpu(inputs.text_len).long(),
                            mel_len=utl.to_gpu(inputs.mel_len).long())

        if alignments is not None:
            alignments = utl.to_gpu(inputs.alignments).float()

        if inputs_ctc is not None:
            inputs_ctc = utl.InputsCTC(text=utl.to_gpu(inputs_ctc.text).long(),
                                       length=utl.to_gpu(
                                           inputs_ctc.length).long())

        return inputs, alignments, inputs_ctc
Exemplo n.º 12
0
    def __init__(self, net):
        log.info("Training start!")
        #set_random_seed(net.cfg)

        self.net = net
        self.cfg = net.cfg
        #self.fixed_noise = net.gen.make_noise_size_of(net.cfg.eval_size)

        self.test_sents = load_test_data(net.cfg)
        self.pos_one = to_gpu(net.cfg.cuda, torch.FloatTensor([1]))
        self.neg_one = self.pos_one * (-1)

        self.result = ResultWriter(net.cfg)
        self.sv = TrainingSupervisor(net, self.result)
        #self.sv.interval_func_train.update({net.enc.decay_noise_radius: 200})

        while not self.sv.is_end_of_training():
            self.train_loop(self.cfg, self.net, self.sv)
Exemplo n.º 13
0
    def __init__(self, net):
        log.info("Testing start!")
        # set_random_seed(net.cfg)
        self.net = net
        self.cfg = net.cfg
        #self.fixed_noise = net.gen.make_noise_size_of(net.cfg.eval_size)

        self.test_sents = load_test_data(net.cfg)
        self.pos_one = to_gpu(net.cfg.cuda, torch.FloatTensor([1]))
        self.neg_one = self.pos_one * (-1)

        self.result = ResultWriter(net.cfg)
        self.sv = TestingSupervisor(net, self.result)
        #self.sv.interval_func_train.update({net.enc.decay_noise_radius: 200})

        self.num_sample = 10
        self.max_sample = 64
        spacy_en = spacy.load('en')
        self.tokenizer = lambda s: [tok.text for tok in spacy_en.tokenizer(s)]

        end_of_loop = False
        while not end_of_loop:
            end_of_loop = self.test_loop(self.cfg, self.net, self.sv)
Exemplo n.º 14
0
    def forward(self, enc_h):
        #code = F.relu(code)
        self._mu = mu = self.mu_layers(enc_h)

        if self._with_var:
            #self._sigma = sigma = self.sigma_layers(enc_h)
            self._logvar = logvar = self.sigma_layers(enc_h)
            self._sigma = sigma = torch.exp(logvar * 0.5)
            #log_sigma = self.sigma_layers(enc_h)
            #self._sigma = sigma = torch.exp(log_sigma)
            std = np.random.normal(0, 1, size=sigma.size())
            std = Variable(torch.from_numpy(std).float(), requires_grad=False)
            std = to_gpu(self.cfg.cuda, std)
            code = mu + sigma * std
        else:
            code = mu
            self._sigma = None
            self._with_var = True

        # normalization
        if self.cfg.code_norm:
            code = self._normalize(code)

        return code
Exemplo n.º 15
0
def predict_batch(models: nn.ModuleList, path2images, path2save, thresh=0.5):
    """
        Perfrom prediction for a batch images
        Params:
            models          : NN models
            path2images     : path to an image
            path2save       : should be a dir
            thresh          : preiction threshold 
    """

    path2images = Path(path2images)
    path2save = Path(path2save)

    if not path2images.is_dir():
        raise RuntimeError("File '{}' is not dir.".format(str(path2images)))

    if not path2save.is_dir():
        raise RuntimeError("File '{}' is not dir.".format(str(path2save)))

    imgs_paths = sorted(list(path2images.glob("*")))

    count_processed = 0
    for idx, ip in enumerate(imgs_paths):
        src_img = cv2.imread(str(ip))

        transform = test_trasformations()
        augmented = transform(image=src_img)
        src_img = augmented["image"]

        img2predict = src_img.copy()
        img2predict = cv2.cvtColor(img2predict,
                                   cv2.COLOR_BGR2RGB).astype(dtype=np.float32)
        img2predict = normalize(img2predict)

        img2predict = utils.to_gpu(
            numpy_to_tensor(img2predict).unsqueeze(0).contiguous()).float()

        if len(models) == 1:
            model = models[0].eval()

            with torch.set_grad_enabled(False):
                predict = model(img2predict)

            #Probs
            predict = F.sigmoid(predict).squeeze(0).squeeze(0)

            mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8)
            overlayed_img = alpha_overlay(src_img, mask)
        else:
            #Averaging all predictions for one point of test data
            sum_predicts = utils.to_gpu(
                torch.zeros(
                    (1, 1, src_img.shape[0], src_img.shape[1])).float())

            for model in models:
                model.eval()
                with torch.set_grad_enabled(False):
                    predict = model(img2predict)
                sum_predicts += F.sigmoid(predict)

            predict = (sum_predicts /
                       len(models)).squeeze(0).squeeze(0).float()

            mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8)
            overlayed_img = alpha_overlay(src_img, mask)

        #save
        cv2.imwrite(str(path2save / "{}".format(ip.name)), overlayed_img)

        print("Image '{}' was processed successfully.".format(str(ip)))
        count_processed += 1

    print("{} images were processed.".format(count_processed))
Exemplo n.º 16
0
 def _init_hidden(self, bsz, nhidden):
     nlayers = self.cfg.nlayers
     zeros1 = Variable(torch.zeros(nlayers, bsz, nhidden))
     zeros2 = Variable(torch.zeros(nlayers, bsz, nhidden))
     return (to_gpu(self.cfg.cuda, zeros1), to_gpu(self.cfg.cuda, zeros2))
Exemplo n.º 17
0
 def _init_state(self, bsz):
     zeros = Variable(torch.zeros(self.cfg.nlayers, bsz, self.cfg.nhidden))
     return to_gpu(self.cfg.cuda, zeros)
Exemplo n.º 18
0
 def make_noise_size_of(self, *size):
     noise = Variable(torch.ones(*size))
     noise = to_gpu(self.cfg.cuda, noise)
     noise.data.normal_(0, 1)
     return noise
Exemplo n.º 19
0
def predict(models: nn.ModuleList, img_path, path2save, thresh=0.5):
    """
        Perfrom prediction for single image
        Params:
            models     : NN models
            img_path   : path to an image
            path2save  :
            thresh     : preiction threshold 
    """

    img_path = Path(img_path)

    if not img_path.exists():
        raise FileNotFoundError("File '{}' not found.".format(str(img_path)))

    src_img = cv2.imread(str(img_path))

    transform = test_trasformations()
    augmented = transform(image=src_img)
    src_img = augmented["image"]

    img2predict = src_img.copy()
    img2predict = cv2.cvtColor(img2predict,
                               cv2.COLOR_BGR2RGB).astype(dtype=np.float32)
    img2predict = normalize(img2predict)

    img2predict = utils.to_gpu(
        numpy_to_tensor(img2predict).unsqueeze(0).contiguous()).float()

    if len(models) == 1:
        #evaluate mode
        model = models[0].eval()

        with torch.set_grad_enabled(False):
            predict = model(img2predict)

        #Probs
        predict = F.sigmoid(predict).squeeze(0).squeeze(0)

        mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8)
        overlayed_img = alpha_overlay(src_img, mask)
    else:
        #Averaging all predictions for one point of test data
        sum_predicts = utils.to_gpu(
            torch.zeros((1, 1, src_img.shape[0], src_img.shape[1])).float())

        for model in models:
            model.eval()
            with torch.set_grad_enabled(False):
                predict = model(img2predict)
            sum_predicts += F.sigmoid(predict)

        predict = (sum_predicts / len(models)).squeeze(0).squeeze(0).float()

        mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8)
        overlayed_img = alpha_overlay(src_img, mask)

    #save
    cv2.imwrite(path2save, overlayed_img)

    #show
    cv2.imshow("Predicted", overlayed_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    print("Image '{}' was processed successfully.".format(str(img_path)))
Exemplo n.º 20
0
 def _get_sos_batch(self, bsz, vocab):
     sos_ids = to_gpu(self.cfg.cuda, Variable(torch.ones(bsz, 1).long()))
     sos_ids.fill_(vocab.SOS_ID)
     return sos_ids
Exemplo n.º 21
0
 def _add_noise_to(self, code, std):
     if std > 0:
         noise = torch.normal(mean=torch.zeros(code.size()), std=std)
         noise = to_gpu(self.cfg.cuda, Variable(noise))
         code = code + noise
     return code
Exemplo n.º 22
0
def evaluate(path2models, model: nn.Module, threshold: float, holdout_dataset: str, evaluate_one=True):
    """
        Perform evaluation of a model or list of models.
        
        Params:
            path2models     : path to a model(or models in cross-validation case)
            model           : A model. Can be RekNetM1, etc
            threshold       : 
            holdout_datset  : path to a hold-out dataset(must contains 'imgs' and 'masks' subdirs)
            evaluate_one    : if True then perform evaluation of one model. Otherwise evaluation of multiple models(in case of cross-val)
    """

    assert threshold >= 0 and threshold < 1.0, "Error. Invalid threshold: {}".format(threshold)

    path2models = Path(path2models)
    holdout_dataset = Path(holdout_dataset)

    img_paths = list(map(str, (holdout_dataset / 'imgs').glob('*')))
    mask_paths = list(map(str, (holdout_dataset / 'masks').glob('*')))

    test_dataset = RoadDataset2(img_paths=img_paths, mask_paths=mask_paths, transforms=valid_tranformations())
    fmax_test_datset = RoadDataset2(img_paths=img_paths, mask_paths=mask_paths, transforms=valid_tranformations(), fmeasure_eval=True)

    #metrics lists
    jaccards = []
    dices = []

    if evaluate_one:
        path2models = path2models / 'model.pt'
        
        print("Evluation for the single model: {}".format(str(path2models)))

        if not path2models.exists():
            raise RuntimeError("Model {} does not exists.".format(str(path2models)))
        
        state = torch.load(str(path2models))
        model.load_state_dict(state["model"])
        
        #eval mode
        model.eval()
        
        for idx, data in enumerate(test_dataset):
            img, mask = data
            img = to_gpu(img.unsqueeze(0).contiguous())
            mask = to_gpu(mask.unsqueeze(0).contiguous())

            with torch.set_grad_enabled(False):
                predict = model(img)

            predict = F.sigmoid(predict)
            jacc = jaccard(mask, (predict > threshold).float()) 
            d = dice(mask, (predict > threshold).float())
            
            jaccards.append(jacc)
            dices.append(d)

        evaluation_jaccard = np.mean(jaccards).astype(dtype=np.float64)
        evaluation_dice = np.mean(dices).astype(dtype=np.float64)
        uu_metrics, um_metrics, umm_metrics = fmeasure_evaluation([model], valid_dataset=fmax_test_datset)

        return {"eval_jacc" : evaluation_jaccard, "eval_dice" : evaluation_dice}, uu_metrics, um_metrics, umm_metrics
    else:
        #Imporant! path2models dir should contains a few subdirs. These subdirs by itself contains models which were trained on folds.
        list_models_paths = sorted(list(path2models.glob('*')))

        print("Evaluation for multiple models: {}".format([str(lmp/'model.pt') for lmp in list_models_paths]))

        models_list = []
        for lmp in list_models_paths:
            model_path = lmp / 'model.pt'

            if not model_path.exists():
                raise RuntimeError("Model {} does not exists.".format(str(model_path)))

            state = torch.load(str(model_path))
            model.load_state_dict(state["model"])

            models_list.append(model.eval())

        #Evaluate on the test data
        for idx, data in enumerate(test_dataset):
            img, mask = data
            img = to_gpu(img.unsqueeze(0).contiguous())
            mask = to_gpu(mask.unsqueeze(0).contiguous())

            #Averaging all predictions for one point of test data
            sum_predicts = to_gpu(torch.zeros(mask.shape).float())
            for m in models_list:
                with torch.set_grad_enabled(False):
                    predict = m(img)
                sum_predicts += F.sigmoid(predict)

            predict = (sum_predicts / len(models_list)).float()

            jacc = jaccard(mask, (predict > threshold).float()) 
            d = dice(mask, (predict > threshold).float())
            
            jaccards.append(jacc)
            dices.append(d)

        evaluation_jaccard = np.mean(jaccards).astype(dtype=np.float64)
        evaluation_dice = np.mean(dices).astype(dtype=np.float64)
        uu_metrics, um_metrics, umm_metrics = fmeasure_evaluation(models_list, valid_dataset=fmax_test_datset)

        return {"eval_jacc" : evaluation_jaccard, "eval_dice" : evaluation_dice}, uu_metrics, um_metrics, umm_metrics
Exemplo n.º 23
0
def train(num_gpus, rank, group_name, output_directory, epochs,
          g_learning_rate, d_learning_rate, adv_ag, adv_fd, lamda_adv,
          lamda_feat, warmup_steps, decay_learning_rate, iters_per_checkpoint,
          batch_size, seed, checkpoint_path):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    model = torch.nn.Module()
    model.add_module('encoder', Encoder(**encoder_config))
    model.add_module('generator',
                     Generator(sum(encoder_config['n_out_channels'])))
    model.add_module('discriminator',
                     MultiScaleDiscriminator(**discriminator_config))
    model.add_module(
        'disentangler',
        Disentangler(encoder_config['n_out_channels'][0],
                     sum(encoder_config['n_out_channels'][1:])))
    model = model.cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    # Using RAdam as optimizer
    # Lookahead has resume training issues:
    # lr schedule doesn't affect nested RAdam of Lookahead
    g_parameters = list(model.generator.parameters())
    g_parameters = list(model.encoder.parameters()) + g_parameters
    g_optimizer = RAdam(g_parameters, lr=g_learning_rate)

    d_parameters = list(model.discriminator.parameters())
    d_parameters = list(model.disentangler.parameters()) + d_parameters
    d_optimizer = RAdam(d_parameters, lr=d_learning_rate)

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, g_optimizer, d_optimizer, iteration = load_checkpoint(
            checkpoint_path, model, g_optimizer, d_optimizer)
        iteration += 1  # next iteration is iteration + 1

    customer_g_optimizer = Optimizer(g_optimizer, g_learning_rate, iteration,
                                     warmup_steps, decay_learning_rate)
    customer_d_optimizer = Optimizer(d_optimizer, d_learning_rate, iteration,
                                     warmup_steps, decay_learning_rate)

    criterion = nn.MSELoss()
    l1_loss = nn.L1Loss()
    stft_criterion = MultiResolutionSTFTLoss()

    trainset = Dataset(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=(train_sampler is None),
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)
        logdir = os.path.join(
            output_directory,
            time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()))
        os.makedirs(logdir, exist_ok=True)
        writer = SummaryWriter(logdir=logdir)
        anchors = [
            'loss_g', 'loss_g_sc', 'loss_g_mag', 'loss_g_adv', 'loss_g_feat',
            'loss_g_fd', 'loss_d', 'loss_d_real', 'loss_d_fake', 'loss_d_fd'
        ]
        meters = {
            x: LossMeter(x, writer, 100, iteration, True)
            for x in anchors
        }

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))

    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        train_sampler.set_epoch(epoch) if train_sampler is not None else None
        tbar = tqdm(
            enumerate(train_loader)) if rank == 0 else enumerate(train_loader)
        for i, batch in tbar:
            model.zero_grad()

            cond, a = [to_gpu(x) for x in batch]
            # Get generator outputs
            x = model.encoder(cond)
            g_outputs = model.generator(x)

            losses = {}

            # Get Discrimiantor loss
            customer_d_optimizer.zero_grad()
            d_loss = []
            # Adversarial training for audio generation
            if adv_ag == True:
                real_scores, _ = model.discriminator(a.unsqueeze(1))
                fake_scores, _ = model.discriminator(g_outputs.detach())

                d_loss_fake_list, d_loss_real_list = [], []
                for (real_score, fake_score) in zip(real_scores, fake_scores):
                    d_loss_real_list.append(
                        criterion(real_score, torch.ones_like(real_score)))
                    d_loss_fake_list.append(
                        criterion(fake_score, torch.zeros_like(fake_score)))

                d_loss_real = sum(d_loss_real_list) / len(d_loss_real_list)
                d_loss_fake = sum(d_loss_fake_list) / len(d_loss_fake_list)
                d_loss = d_loss + [d_loss_real, d_loss_fake]
                losses.update({
                    'loss_d_real': d_loss_real,
                    'loss_d_fake': d_loss_fake
                })
            # Adversarial training for feature disentanglement
            if adv_fd == True:
                split_x = torch.split(x.detach(),
                                      encoder_config['n_out_channels'],
                                      dim=1)
                pred = model.disentangler(split_x[0])
                d_loss_fd = F.l1_loss(pred, torch.cat((split_x[1:]), dim=1))
                d_loss = d_loss + [d_loss_fd]
                losses.update({'loss_d_fd': d_loss_fd})
            if len(d_loss) > 0:
                d_loss = sum(d_loss)
                d_loss.backward()
                nn.utils.clip_grad_norm_(d_parameters, max_norm=10)
                customer_d_optimizer.step_and_update_lr()
                losses.update({'loss_d': d_loss})

            # Get generator loss
            customer_g_optimizer.zero_grad()
            g_clip_norm_scale = 10
            # STFT Loss
            sc_loss, mag_loss = stft_criterion(g_outputs.squeeze(1), a)
            g_loss = sc_loss + mag_loss
            losses.update({'loss_g_sc': sc_loss, 'loss_g_mag': mag_loss})
            # Adversarial training for audio generation
            if adv_ag == True:
                fake_scores, fake_feats = model.discriminator(g_outputs)
                real_scores, real_feats = model.discriminator(a.unsqueeze(1))

                adv_loss_list, feat_loss_list = [], []
                for i, fake_score in enumerate(fake_scores):
                    adv_loss_list.append(
                        criterion(fake_score, torch.ones_like(fake_score)))
                adv_loss = sum(adv_loss_list) / len(adv_loss_list)

                for i in range(len(fake_feats)):
                    for j in range(len(fake_feats[i])):
                        feat_loss_list.append(
                            l1_loss(fake_feats[i][j],
                                    real_feats[i][j].detach()))
                feat_loss = sum(feat_loss_list) / len(feat_loss_list)

                g_loss = g_loss + adv_loss * lamda_adv + feat_loss * lamda_feat
                losses.update({'loss_g_adv': adv_loss})
                losses.update({'loss_g_feat': feat_loss})
                g_clip_norm_scale = 0.5
            # Adversarial training for feature disentanglement
            if adv_fd == True:
                split_x = torch.split(x,
                                      encoder_config['n_out_channels'],
                                      dim=1)
                pred = model.disentangler(split_x[0])
                g_loss_fd = F.l1_loss(pred,
                                      torch.cat((split_x[1:]), dim=1).detach())
                g_loss = g_loss + (-1.0) * g_loss_fd
                losses.update({'loss_g_fd': g_loss_fd})
            g_loss.backward()
            nn.utils.clip_grad_norm_(g_parameters, max_norm=g_clip_norm_scale)
            customer_g_optimizer.step_and_update_lr()
            losses.update({'loss_g': g_loss})

            # only output log of 0-th GPU
            if rank == 0:
                tbar.set_description("{:>7}:  ".format(iteration) + ', '.join([
                    "{}: {:.1e}".format(x[5:], losses[x].item())
                    for x in losses.keys()
                ]))
                for x in losses:
                    meters[x].add(losses[x].item())
                if (iteration % iters_per_checkpoint == 0):
                    checkpoint_path = "{}/model_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, g_optimizer, d_optimizer, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 24
0
 def _get_tag_batch(self, size, num):
     return to_gpu(self.cfg.cuda, Variable(torch.ones(*size, 1))) * num
Exemplo n.º 25
0
 def _add_gaussian_noise_to(self, code):
     # gaussian noise
     noise = torch.normal(mean=torch.zeros(code.size()),
                          std=self.noise_radius)
     noise = to_gpu(self.cfg.cuda, Variable(noise))
     return code + noise
Exemplo n.º 26
0
def main():

    params = parseyaml()

    if params['arch'] == 'Generator':

        device = to_gpu(ngpu=params['n_gpu'])

        if params['image_size'] == 64:

            netG = Generator(ngpu=0, nz=256,
                             ngf=64, nc=64).to(device)

        elif params['image_size'] == 128:

            netG = Generator_128(ngpu=0, nz=256,
                                 ngf=64, nc=64).to(device)

        elif params['image_size'] == 256:

            netG = Generator_256(ngpu=0, nz=256,
                                 ngf=64, nc=64).to(device)

        netG.apply(weights_init)
        netG.load_state_dict(torch.load(params['path']))

        for i in range(params['quantity']):

            fixed_noise = torch.randn(64, 256, 1, 1, device=device)
            fakes = netG(fixed_noise)

            for j in range(len(fakes)):
                save_image(fakes[j], params['out'] + params['run'] +
                           '_' + str(i) + '_' + str(j) + '_img.png')

    else:

        dataloader = dataLoader(
            path=params['path'], image_size=params['image_size'], batch_size=params['batch_size'],
            workers=params['loader_workers'])

        device = to_gpu(ngpu=params['n_gpu'])

        if params['arch'] == 'DCGAN':

            if params['image_size'] == 64:

                netG = Generator(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                 ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator(params['n_gpu'], nc=params['number_channels'],
                                     ndf=params['dis_feature_maps']).to(device)

            elif params['image_size'] == 128:

                netG = Generator_128(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                     ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_128(params['n_gpu'], nc=params['number_channels'],
                                         ndf=params['dis_feature_maps']).to(device)

            elif params['image_size'] == 256:

                netG = Generator_256(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                     ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_256(params['n_gpu'], nc=params['number_channels'],
                                         ndf=params['dis_feature_maps']).to(device)

        elif params['arch'] == 'SNGAN':

            if params['image_size'] == 64:

                netG = Generator(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                 ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_SN(params['n_gpu'], nc=params['number_channels'],
                                        ndf=params['dis_feature_maps']).to(device)

            elif params['image_size'] == 128:

                netG = Generator_128(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                     ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_SN_128(params['n_gpu'], nc=params['number_channels'],
                                            ndf=params['dis_feature_maps']).to(device)

            elif params['image_size'] == 256:

                netG = Generator_256(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                     ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_SN_256(params['n_gpu'], nc=params['number_channels'],
                                            ndf=params['dis_feature_maps']).to(device)

        if (device.type == 'cuda') and (params['n_gpu'] > 1):
            netG = nn.DataParallel(netG, list(range(params['n_gpu'])))

        if (device.type == 'cuda') and (params['n_gpu'] > 1):
            netD = nn.DataParallel(netD, list(range(params['n_gpu'])))

        netG.apply(weights_init)
        netD.apply(weights_init)

        print(netG)
        print(netD)

        criterion = nn.BCELoss()

        fixed_noise = torch.randn(params['image_size'],
                                  params['latent_vector'], 1, 1, device=device)

        if params['learning_rate'] >= 1:

            optimizerD = optim.Adam(netD.parameters(), lr=0.0002 * params['learning_rate'], betas=(
                params['beta_adam'], 0.999))
            optimizerG = optim.Adam(netG.parameters(), lr=0.0002, betas=(
                params['beta_adam'], 0.999))

        else:

            optimizerD = optim.Adam(netD.parameters(), lr=params['learning_rate'], betas=(
                params['beta_adam'], 0.999))
            optimizerG = optim.Adam(netG.parameters(), lr=params['learning_rate'], betas=(
                params['beta_adam'], 0.999))

        G_losses, D_losses, img_list, img_list_only = training_loop(num_epochs=params['num_epochs'], dataloader=dataloader,
                                                                    netG=netG, netD=netD, device=device, criterion=criterion, nz=params[
                                                                        'latent_vector'],
                                                                    optimizerG=optimizerG, optimizerD=optimizerD, fixed_noise=fixed_noise, out=params['out'] + params['run'] + '_')

        loss_plot(G_losses=G_losses, D_losses=D_losses, out=params['out'] + params['run'] + '_')

        image_grid(dataloader=dataloader, img_list=img_list,
                   device=device, out=params['out'] + params['run'] + '_')

        compute_metrics(real=next(iter(dataloader)), fakes=img_list_only,
                        size=params['image_size'], out=params['out'] + params['run'] + '_')