Exemplo n.º 1
0
    def train(self, epoch):
        self.logger.info('Start training from epoch: {:d}, iter: {:d}'.format(epoch, 1))
        self.dpcl.train()
        num_batchs = len(self.train_dataloader)
        total_loss = 0.0
        num_index = 1
        start_time = time.time()
        for mix_wave, target_waves, non_slient in self.train_dataloader:
            mix_wave = mix_wave.to(self.device)
            target_waves = target_waves.to(self.device)
            non_slient = non_slient.to(self.device)
            model = torch.nn.DataParallel(self.dpcl)
            mix_embs = model(mix_wave)
            l = Loss(mix_embs, target_waves, non_slient, self.num_spks)
            epoch_loss = l.loss()
            total_loss += epoch_loss.item()
            self.optimizer.zero_grad()
            epoch_loss.backward()
            
            if self.clip_norm:
                torch.nn.utils.clip_grad_norm_(self.dpcl.parameters(),self.clip_norm)

            self.optimizer.step()
            if num_index % self.print_freq == 0:
                message = '<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}>, loss:{:.3f}'.format(
                    epoch, num_index, self.optimizer.param_groups[0]['lr'], total_loss/num_index)
                self.logger.info(message)
        end_time = time.time()
        total_loss = total_loss/num_batchs
        message = '<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '.format(
            epoch, num_batchs, self.optimizer.param_groups[0]['lr'], total_loss, (end_time-start_time)/60)
        self.logger.info(message)
        return total_loss
Exemplo n.º 2
0
 def validation(self, epoch):
     self.logger.info(
         'Start Validation from epoch: {:d}, iter: {:d}'.format(epoch, 1))
     self.dpcl.eval()
     num_batchs = len(self.val_dataloader)
     num_index = 1
     total_loss = 0.0
     start_time = time.time()
     with torch.no_grad():
         for mix_wave, target_waves, non_slient in self.val_dataloader:
             mix_wave = mix_wave.to(self.device)
             target_waves = target_waves.to(self.device)
             non_slient = non_slient.to(self.device)
             mix_embs = self.dpcl(mix_wave)
             l = Loss(mix_embs, target_waves, non_slient, self.num_spks)
             epoch_loss = l.loss()
             total_loss += epoch_loss.item()
             if num_index % self.print_freq == 0:
                 message = '<epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}>'.format(
                     epoch, num_index, self.optimizer.param_groups[0]['lr'],
                     total_loss / num_index)
                 self.logger.info(message)
             num_index += 1
     end_time = time.time()
     total_loss = total_loss / num_batchs
     message = '<epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '.format(
         epoch, num_batchs, self.optimizer.param_groups[0]['lr'],
         total_loss, (end_time - start_time) / 60)
     self.logger.info(message)
     return total_loss
Exemplo n.º 3
0
    def train(self, epoch):
        self.logger.info('Start training from epoch: {:d}, iter: {:d}'.format(
            epoch, 1))
        self.danet.train()
        num_batchs = len(self.train_dataloader)
        total_loss = 0.0
        num_index = 1
        start_time = time.time()
        for mix_samp, wf, ibm, non_silent in self.train_dataloader:
            mix_samp = Variable(mix_samp).contiguous().to(self.device)
            wf = Variable(wf).contiguous().to(self.device)
            ibm = Variable(ibm).contiguous().to(self.device)
            non_silent = Variable(non_silent).contiguous().to(self.device)

            hidden = self.danet.init_hidden(mix_samp.size(0))

            input_list = [mix_samp, ibm, non_silent, hidden]
            self.optimizer.zero_grad()

            if self.gpuid:
                #mask=torch.nn.parallel.data_parallel(self.danet,input_list,device_ids=self.gpuid)
                mask, hidden = self.danet(input_list)
            else:
                mask, hidden = self.danet(mix_samp, ibm, non_silent)

            l = Loss(mix_samp, wf, mask)
            epoch_loss = l.loss()
            total_loss += epoch_loss.item()
            epoch_loss.backward()

            #if self.clip_norm:
            #    torch.nn.utils.clip_grad_norm_(
            #        self.danet.parameters(), self.clip_norm)

            self.optimizer.step()
            if num_index % self.print_freq == 0:
                message = '<epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}>'.format(
                    epoch, num_index, self.optimizer.param_groups[0]['lr'],
                    total_loss / num_index)
                self.logger.info(message)
            num_index += 1
        end_time = time.time()
        total_loss = total_loss / num_index
        message = '<epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '.format(
            epoch, num_index, self.optimizer.param_groups[0]['lr'], total_loss,
            (end_time - start_time) / 60)
        self.logger.info(message)
        return total_loss
    def validation(self, epoch):
        self.logger.info(
            'Start Validation from epoch: {:d}, iter: {:d}'.format(epoch, 0))
        self.dualrnn.eval()
        num_batchs = len(self.val_dataloader)
        num_index = 1
        total_loss = 0.0
        start_time = time.time()
        with torch.no_grad():
            for mix, ref in self.val_dataloader:
                mix = mix.to(self.device)
                ref = [ref[i].to(self.device) for i in range(self.num_spks)]
                # self.optimizer.zero_grad()

                # if self.gpuid:
                #     #model = torch.nn.DataParallel(self.dualrnn)
                #     #out = model(mix)
                #     out = torch.nn.parallel.data_parallel(self.dualrnn,mix,device_ids=self.gpuid)
                # else:
                out = self.dualrnn(mix)
                
                l = Loss(out, ref)
                epoch_loss = l
                total_loss += epoch_loss.item()
                if num_index % self.print_freq == 0:
                    message = '<epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}>'.format(
                        epoch, num_index, self.optimizer.param_groups[0]['lr'], total_loss/num_index)
                    self.logger.info(message)
                num_index += 1
        end_time = time.time()
        total_loss = total_loss/num_index
        message = 'Finished *** <epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '.format(
            epoch, num_index, self.optimizer.param_groups[0]['lr'], total_loss, (end_time-start_time)/60)
        self.logger.info(message)
        return total_loss
Exemplo n.º 5
0
    def validation(self, epoch):
        self.logger.info(
            'Start Validation from epoch: {:d}, iter: {:d}'.format(epoch, 0))
        self.dualrnn.eval()
        num_batchs = len(self.val_dataloader)
        num_index = 1
        total_loss = 0.0
        start_time = time.time()
        with torch.no_grad():
            pbar = tqdm(self.val_dataloader, desc='Val loop', leave=False)
            for batch in pbar:
                # mix = mix.to(self.device)
                # ref = [ref[i].to(self.device) for i in range(self.num_spks)]
                batch["first_videos_features"] = batch[
                    "first_videos_features"][:, :50, :].detach()
                batch["second_videos_features"] = batch[
                    "second_videos_features"][:, :50, :].detach()
                batch = {k: v.to(self.device) for k, v in batch.items()}

                ref = [batch[f"{i}_audios"] for i in ["first", "second"]]
                # self.optimizer.zero_grad()

                # if self.gpuid:
                #     #model = torch.nn.DataParallel(self.dualrnn)
                #     #out = model(mix)
                #     out = torch.nn.parallel.data_parallel(self.dualrnn,mix,device_ids=self.gpuid)
                # else:
                out = self.dualrnn(batch)  #, batch["audios_lens"])

                l = Loss(out, ref)  #, batch["audios_lens"])
                pbar.set_description(f"Loss: {round(l.item(), 4)}")
                epoch_loss = l
                total_loss += epoch_loss.item()
                if num_index:
                    message = 'Val: <epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}>'.format(
                        epoch, num_index, self.optimizer.param_groups[0]['lr'],
                        total_loss / num_index)
                    self.logger.info(message)
                num_index += 1
        end_time = time.time()
        total_loss = total_loss / num_index
        message = 'Finished *** <epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '.format(
            epoch, num_index, self.optimizer.param_groups[0]['lr'], total_loss,
            (end_time - start_time) / 60)
        self.logger.info(message)

        audio_writer = [
            handler for handler in self.logger.handlers
            if hasattr(handler, 'write_audio')
        ][0]

        audio_idx = random.randrange(0, out[0].size(0))

        audio_writer.write_audio((ref[0][audio_idx], ref[1][audio_idx]),
                                 (out[0][audio_idx], out[1][audio_idx]),
                                 batch['mix_noised_audios'][audio_idx],
                                 batch["audios_lens"][audio_idx],
                                 batch["noise_audios"][audio_idx], epoch)

        return total_loss
Exemplo n.º 6
0
    def train(self, epoch):
        self.logger.info('Start training from epoch: {:d}, iter: {:d}'.format(
            epoch, 0))
        self.dualrnn.train()
        num_batchs = len(self.train_dataloader)
        total_loss = 0.0
        num_index = 1
        start_time = time.time()
        pbar = tqdm(self.train_dataloader, leave=False)
        for batch in pbar:
            batch["first_videos_features"] = batch[
                "first_videos_features"][:, :50, :].detach()
            batch["second_videos_features"] = batch[
                "second_videos_features"][:, :50, :].detach()
            batch = {k: v.to(self.device) for k, v in batch.items()}
            #mix = mix.to(self.device)
            ref = [batch[f"{i}_audios"] for i in ["first", "second"]]
            #ref = [ref[i].to(self.device) for i in range(self.num_spks)]
            self.optimizer.zero_grad()

            # if self.gpuid:
            #     out = torch.nn.parallel.data_parallel(self.dualrnn,mix,device_ids=self.gpuid)
            #     out = self.dualrnn(mix)
            # else:
            st_time = perf_counter()
            out = self.dualrnn(batch)  #, batch["audios_lens"])
            st_time = perf_counter()
            l = Loss(out, ref)  #, batch["audios_lens"])

            epoch_loss = l

            pbar.set_description(f"Loss: {round(l.item(), 4)}")
            #pbar.update(1)
            total_loss += epoch_loss.item()
            #epoch_loss.backward()
            with amp.scale_loss(epoch_loss, self.optimizer) as scaled_loss:
                scaled_loss.backward()
            if self.clip_norm:
                torch.nn.utils.clip_grad_norm_(self.dualrnn.parameters(),
                                               self.clip_norm)

            self.optimizer.step()
            if num_index % self.print_freq == 0:
                message = 'Train: <epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}>'.format(
                    epoch, num_index, self.optimizer.param_groups[0]['lr'],
                    total_loss / num_index)
                self.logger.info(message)
            num_index += 1
        end_time = time.time()
        total_loss = total_loss / num_index
        message = 'Finished *** <epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '.format(
            epoch, num_index, self.optimizer.param_groups[0]['lr'], total_loss,
            (end_time - start_time) / 60)
        self.logger.info(message)
        return total_loss
Exemplo n.º 7
0
    def validation(self, epoch):
        self.logger.info(
            'Start Validation from epoch: {:d}, iter: {:d}'.format(epoch, 1))
        self.danet.eval()
        num_batchs = len(self.val_dataloader)
        num_index = 1
        total_loss = 0.0
        start_time = time.time()
        with torch.no_grad():
            for mix_samp, wf, ibm, non_silent in self.val_dataloader:
                mix_samp = Variable(mix_samp).contiguous().to(self.device)
                wf = Variable(wf).contiguous().to(self.device)
                ibm = Variable(ibm).contiguous().to(self.device)
                non_silent = Variable(non_silent).contiguous().to(self.device)

                hidden = self.danet.init_hidden(mix_samp.size(0))
                input_list = [mix_samp, ibm, non_silent, hidden]

                if self.gpuid:
                    #mask=torch.nn.parallel.data_parallel(self.danet,input_list,device_ids=self.gpuid)
                    mask, hidden = self.danet(input_list)
                else:
                    mask, hidden = self.danet(mix_samp, ibm, non_silent)

                l = Loss(mix_samp, wf, mask)
                epoch_loss = l.loss()
                total_loss += epoch_loss.item()
                if num_index % self.print_freq == 0:
                    message = '<epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}>'.format(
                        epoch, num_index, self.optimizer.param_groups[0]['lr'],
                        total_loss / num_index)
                    self.logger.info(message)
                num_index += 1
        end_time = time.time()
        total_loss = total_loss / num_index
        message = '<epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '.format(
            epoch, num_index, self.optimizer.param_groups[0]['lr'], total_loss,
            (end_time - start_time) / 60)
        self.logger.info(message)
        return total_loss
Exemplo n.º 8
0
def run(load_last_checkpoint=False):
    save_dir = f'{OUTPUT_PATH}/models/'
    os.makedirs(save_dir, exist_ok=True)
    neural_net = Net()
    loss_fn = Loss()
    optim = torch.optim.SGD(neural_net.parameters(), DEFAULT_LR, momentum=0.9, weight_decay=1e-4)
    starting_epoch = 0
    initial_loss = None
    if load_last_checkpoint:
        model_paths = glob(f'''{save_dir}*.ckpt''')
        model_names = [int(i.split('/')[-1][:-5]) for i in model_paths]
        latest_model_path = f'''{save_dir}{max(model_names)}.ckpt'''
        print('loading latest model from:', latest_model_path)
        checkpoint = torch.load(latest_model_path)
        neural_net.load_state_dict(checkpoint['model_state_dict'])
        optim.load_state_dict(checkpoint['optimizer_state_dict'])
        starting_epoch = checkpoint['epoch']
        initial_loss = checkpoint['loss']
    if torch.cuda.is_available():
        neural_net = neural_net.cuda()
        loss_fn = loss_fn.cuda()
    print(f'''Training from epoch: {starting_epoch} towards: {TOTAL_EPOCHS},
with learning rate starting from: {get_lr(starting_epoch)}, and loss: {initial_loss}''')
    meta = pd.read_csv(f'{OUTPUT_PATH}/augmented_meta.csv', index_col=0).sample(frac=1).reset_index(drop=True)
    meta_group_by_series = meta.groupby(['seriesuid']).indices
    list_of_groups = [{i: list(meta_group_by_series[i])} for i in meta_group_by_series.keys()]
    random.Random(0).shuffle(list_of_groups)
    val_split = int(VAL_PCT * len(list_of_groups))
    val_indices = list(itertools.chain(*[list(i.values())[0] for i in list_of_groups[:val_split]]))
    train_indices = list(itertools.chain(*[list(i.values())[0] for i in list_of_groups[val_split:]]))
    ltd = LunaDataSet(train_indices, meta)
    lvd = LunaDataSet(val_indices, meta)
    train_loader = DataLoader(ltd, batch_size=1, shuffle=False)
    val_loader = DataLoader(lvd, batch_size=1, shuffle=False)

    for ep in range(starting_epoch, TOTAL_EPOCHS):
        train(train_loader, neural_net, loss_fn, ep, optim, get_lr, save_dir=save_dir)
        validate(val_loader, neural_net, loss_fn)
Exemplo n.º 9
0
def main():
    par = ArgumentParser()
    par.add_argument('--batch_size', type=int, default=16)
    arg = par.parse_args()
    if torch.cuda.is_available():
        device = torch.device('cuda')
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True
    else:
        device = torch.device('cpu')
    dataset = VOCDataset('VOCdevkit', split='trainval')
    dataloader = DataLoader(
        dataset,
        batch_size=arg.batch_size,
        shuffle=True,
        num_workers=8,
        collate_fn=detection_collate
    )
    dataset[0]
    torch.backends.cuda.enabled = True
    torch.backends.cuda.benchmark = True
    model = YOLOv2().to(device)
    criterion = Loss().cuda()
    lr = 1e-4
    opt = SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
    for epoch in range(160):
        i = 0
        if epoch == 60:
            adjust_learning_rate(opt, lr/10)
        if epoch == 90:
            adjust_learning_rate(opt, lr/100)
        for batch in dataloader:
            i += 1
            img, boxes, label, num_obj = batch
            img = Variable(img).to(device)
            boxes = Variable(boxes).to(device)
            output = model(img)
            target = boxes, label, num_obj
            box_loss, iou_loss, class_loss = criterion(output, target)
            opt.zero_grad()
            loss = box_loss.mean() + iou_loss.mean() \
                + class_loss.mean()
            if i % 10 == 0:
                print(
                    f"batch {epoch} {i}/{len(dataloader)} loss:{round(loss.item(),3)} box: {round(box_loss.mean().item(),3)} iou: {round(iou_loss.mean().item(),3)} class: {round(class_loss.mean().item(),3)}")
            loss.backward()
            opt.step()
        if epoch % 5 == 0:
            print(f"epoch {epoch} save model")
            torch.save(model, f'weights/yolov2_{epoch}.pth')
    def __init__(self, dataset, batch_size, device=torch.device('cpu')):
        self.anchors = dataset.s_anchors, dataset.m_anchors, dataset.l_anchors
        self.device = device

        self.data_loader = DataLoader(dataset, batch_size, shuffle=True)

        self.model = Yolov3Net(self.anchors)
        self.model.train()

        if torch.cuda.is_available():
            self.model = torch.nn.DataParallel(
                self.model).to(device=self.device)

        self.optimizer = Adam(self.model.parameters(), weight_decay=0.0005)
        self.criterion = Loss(self.anchors, input_size=dataset.input_size)
Exemplo n.º 11
0
    def train(self, epoch):
        self.logger.info('Start training from epoch: {:d}, iter: {:d}'.format(
            epoch, 0))
        self.dualrnn.train()
        num_batchs = len(self.train_dataloader)
        total_loss = 0.0
        num_index = 1
        start_time = time.time()
        for mix, ref in self.train_dataloader:
            mix = mix.to(self.device)
            ref = [ref[i].to(self.device) for i in range(self.num_spks)]
            self.optimizer.zero_grad()

            if self.gpuid:
                out = torch.nn.parallel.data_parallel(self.dualrnn,
                                                      mix,
                                                      device_ids=self.gpuid)
                #out = self.dualrnn(mix)
            else:
                out = self.dualrnn(mix)

            l = Loss(out, ref)
            epoch_loss = l
            total_loss += epoch_loss.item()
            epoch_loss.backward()

            if self.clip_norm:
                torch.nn.utils.clip_grad_norm_(self.dualrnn.parameters(),
                                               self.clip_norm)

            self.optimizer.step()
            if num_index % self.print_freq == 0:
                message = '<epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}>'.format(
                    epoch, num_index, self.optimizer.param_groups[0]['lr'],
                    total_loss / num_index)
                self.logger.info(message)
            num_index += 1
        end_time = time.time()
        total_loss = total_loss / num_index
        message = 'Finished *** <epoch:{:d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '.format(
            epoch, num_index, self.optimizer.param_groups[0]['lr'], total_loss,
            (end_time - start_time) / 60)
        self.logger.info(message)
        return total_loss
Exemplo n.º 12
0
 def __init__(self, args):
     super(Model, self).__init__()
     self.save_hyperparameters()
     self.args = args
     self.f1_score = F1(args)
     self.model = UNetLoc(args) if args.type == "pre" else get_dmg_unet(
         args)
     self.loss = Loss(args)
     self.best_f1 = torch.tensor(0)
     self.best_epoch = 0
     self.tta_flips = [[2], [3], [2, 3]]
     self.lr = args.lr
     self.n_class = 2 if self.args.type == "pre" else 5
     self.softmax = nn.Softmax(dim=1)
     self.test_idx = 0
     self.dllogger = Logger(backends=[
         JSONStreamBackend(
             Verbosity.VERBOSE,
             os.path.join(args.results, f"{args.logname}.json")),
         StdOutBackend(Verbosity.VERBOSE,
                       step_format=lambda step: f"Epoch: {step} "),
     ])
Exemplo n.º 13
0
def train(args):
    # build model
    model = Model()
    #print(sum(p.numel() for p in model.parameters() if p.requires_grad))
    mode(model, True)
    optimizer = torch.optim.AdamW(model.parameters(), lr=hps.lr)
    criterion = Loss()

    # load checkpoint
    iteration = 1
    if args.ckpt_pth != '':
        model, optimizer, iteration = load_checkpoint(args.ckpt_pth, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration+1

    # get scheduler
    if hps.sch:
        if args.ckpt_pth != '':
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                        hps.sch_step,
                                                        hps.sch_g,
                                                        last_epoch=iteration)
        else:
            scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer, hps.sch_step, hps.sch_g)

    # make dataset
    train_loader = prepare_dataloaders(args.data_dir)

    # get logger ready
    if args.log_dir != '':
        if not os.path.isdir(args.log_dir):
            os.makedirs(args.log_dir)
            os.chmod(args.log_dir, 0o775)
        logger = Logger(args.log_dir)

    # get ckpt_dir ready
    if args.ckpt_dir != '' and not os.path.isdir(args.ckpt_dir):
        os.makedirs(args.ckpt_dir)
        os.chmod(args.ckpt_dir, 0o775)

    model.train()
    # ================ MAIN TRAINNIG LOOP ===================
    while iteration <= hps.max_iter:
        for batch in train_loader:
            if iteration > hps.max_iter:
                break
            start = time.perf_counter()
            wavs, mels = batch
            wavs = mode(wavs)
            mels = mode(mels)

            # forward
            outputs = model(wavs, mels)
            p_wavs = model.infer(mels) if iteration % hps.n == 0 else None

            # loss
            loss = criterion(outputs, p_wavs, wavs)

            # zero grad ans backward
            model.zero_grad()
            loss[0].backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       hps.gn)

            # update
            optimizer.step()
            if hps.sch:
                scheduler.step(min(iteration, hps.sch_stop))

            # info
            dur = time.perf_counter() - start
            print('Iter: {} Loss(z/s): {:.2e}/{:.2e} GN: {:.2e} {:.1f}s/it'.
                  format(iteration, loss[1].item(), loss[2].item(), grad_norm,
                         dur))
            # log
            if args.log_dir != '' and (iteration % hps.iters_per_log == 0):
                learning_rate = optimizer.param_groups[0]['lr']
                logger.log_training(loss[1].item(), loss[2].item(),
                                    learning_rate, iteration)

            # save ckpt
            if args.ckpt_dir != '' and (iteration % hps.iters_per_ckpt == 0):
                ckpt_pth = os.path.join(args.ckpt_dir,
                                        'ckpt_{}'.format(iteration))
                save_checkpoint(model, optimizer, iteration, ckpt_pth)

            # sample
            if args.log_dir != '' and (iteration % hps.iters_per_sample == 0):
                model.eval()
                with torch.no_grad():
                    pred = model.infer(mels[:1])
                    logger.sample_training(wavs[0], pred[0], iteration)
                model.train()

            iteration += 1

    if args.log_dir != '':
        logger.close()
Exemplo n.º 14
0
test_set = VOCLoader(root='./datasets_raid1/voc/VOC2007',
                     image_set='test',
                     transform=transform)
#import pdb; pdb.set_trace()
test_loader = DataLoader(test_set,
                         batch_size=32,
                         shuffle=False,
                         collate_fn=train_set.collate_fn)

model = SSD(21).to(device)
optimizer = torch.optim.SGD(model.parameters(),
                            lr=1e-3,
                            momentum=0.9,
                            weight_decay=0.0005)
criterion = Loss().to(device)

#import pdb; pdb.set_trace()

epochs = 1
for epoch in range(epochs):
    print("%d/%d" % (epoch, epochs))
    model.train()
    for i, (images, categories, boxes) in enumerate(test_loader):

        #import pdb; pdb.set_trace()
        images = images.to(device)
        boxes = [box.to(device) for box in boxes]
        categories = [category.to(device) for category in categories]

        predicted_loc, predicted_cls = model(images)
Exemplo n.º 15
0
def main(opt):
    if torch.cuda.is_available():
        print('Will compute using CUDA')
        # torch.distributed.init_process_group(backend='nccl', init_method='env://')
        # num_gpus = torch.distributed.get_world_size()
        num_gpus = 1
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
        num_gpus = 1

    train_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": False,
        "num_workers": opt.num_workers,
        "collate_fn": collate_fn
    }

    test_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": False,
        "num_workers": opt.num_workers,
        "collate_fn": collate_fn
    }

    dboxes = generate_dboxes()
    model = SSD()
    train_set = OIDataset(SimpleTransformer(dboxes))
    train_loader = DataLoader(train_set, **train_params)
    test_set = OIDataset(SimpleTransformer(dboxes, eval=True), train=False)
    test_loader = DataLoader(test_set, **test_params)

    encoder = Encoder(dboxes)

    opt.lr = opt.lr * num_gpus * (opt.batch_size / 32)
    criterion = Loss(dboxes)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=opt.lr,
                                momentum=opt.momentum,
                                weight_decay=opt.weight_decay,
                                nesterov=True)
    scheduler = MultiStepLR(optimizer=optimizer,
                            milestones=opt.multistep,
                            gamma=0.1)

    if torch.cuda.is_available():
        model.cuda()
        criterion.cuda()

    model = torch.nn.DataParallel(model)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.save_folder):
        os.makedirs(opt.save_folder)
    checkpoint_path = os.path.join(opt.save_folder, "SSD.pth")

    writer = SummaryWriter(opt.log_path)

    if os.path.isfile(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        first_epoch = checkpoint["epoch"] + 1
        model.module.load_state_dict(checkpoint["model_state_dict"])
        scheduler.load_state_dict(checkpoint["scheduler"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        # evaluate(model, test_loader, encoder, opt.nms_threshold)
    else:
        first_epoch = 0

    for epoch in range(first_epoch, opt.epochs):
        train(model, train_loader, epoch, writer, criterion, optimizer,
              scheduler)
        evaluate(model, test_loader, encoder, opt.nms_threshold)

        checkpoint = {
            "epoch": epoch,
            "model_state_dict": model.module.state_dict(),
            "optimizer": optimizer.state_dict(),
            "scheduler": scheduler.state_dict()
        }
        torch.save(checkpoint, checkpoint_path)
Exemplo n.º 16
0
                          topology_vis[-1], loss_obj.x_grids, loss_obj.y_grids,
                          loss_obj.z_grids, itest, args, 'val')

    print('')
    return loss_eval


if __name__ == '__main__':

    # parse args
    args = parse_args()

    # load data
    args, data_val = load_data(args, dtype, 'val')

    # setup loss object
    loss_obj = Loss(args)

    # initialize the model
    assert (os.path.isfile(args.model))
    print "Validating with snapshotted model %s ..." % args.model
    deep_marching_cubes = torch.load(args.model)
    if torch.cuda.is_available():
        deep_marching_cubes.cuda()

    # validation
    loss = run_val(deep_marching_cubes, loss_obj, data_val, args, 'val')
    print('============== average loss:%f' % (loss / args.num_val))

    print 'Done!'
Exemplo n.º 17
0
def train():
    parser = argparse.ArgumentParser(
        description='Parameters for training Model')
    # configuration fiule
    parser.add_argument('--opt', type=str, help='Path to option YAML file.')
    args = parser.parse_args()
    opt = option.parse(args.opt)

    set_logger.setup_logger(opt['logger']['name'],
                            opt['logger']['path'],
                            screen=opt['logger']['screen'],
                            tofile=opt['logger']['tofile'])
    logger = logging.getLogger(opt['logger']['name'])
    day_time = datetime.date.today().strftime('%y%m%d')

    # build model
    model = opt['model']['MODEL']
    logger.info("Building the model of {}".format(model))
    # Extraction and Suppression model
    if opt['model']['MODEL'] == 'DPRNN_Speaker_Extraction' or opt['model'][
            'MODEL'] == 'DPRNN_Speaker_Suppression':
        net = model_function.Extractin_Suppression_Model(
            **opt['Dual_Path_Aux_Speaker'])
    # Separation model
    if opt['model']['MODEL'] == 'DPRNN_Speech_Separation':
        net = model_function.Speech_Serapation_Model(
            **opt['Dual_Path_Aux_Speaker'])
    if opt['train']['gpuid']:
        if len(opt['train']['gpuid']) > 1:
            logger.info('We use GPUs : {}'.format(opt['train']['gpuid']))
        else:
            logger.info('We use GPUs : {}'.format(opt['train']['gpuid']))

        device = torch.device('cuda:{}'.format(opt['train']['gpuid'][0]))
        gpuids = opt['train']['gpuid']
        if len(gpuids) > 1:
            net = torch.nn.DataParallel(net, device_ids=gpuids)
        net = net.to(device)
    logger.info('Loading {} parameters: {:.3f} Mb'.format(
        model, check_parameters(net)))

    # build optimizer
    logger.info("Building the optimizer of {}".format(model))
    Optimizer = make_optimizer(net.parameters(), opt)

    Scheduler = ReduceLROnPlateau(Optimizer,
                                  mode='min',
                                  factor=opt['scheduler']['factor'],
                                  patience=opt['scheduler']['patience'],
                                  verbose=True,
                                  min_lr=opt['scheduler']['min_lr'])

    # build dataloader
    logger.info('Building the dataloader of {}'.format(model))
    train_dataloader, val_dataloader = make_dataloader(opt)
    logger.info('Train Datasets Length: {}, Val Datasets Length: {}'.format(
        len(train_dataloader), len(val_dataloader)))

    # build trainer
    logger.info('............. Training ................')

    total_epoch = opt['train']['epoch']
    num_spks = opt['num_spks']
    print_freq = opt['logger']['print_freq']
    checkpoint_path = opt['train']['path']
    early_stop = opt['train']['early_stop']
    max_norm = opt['optim']['clip_norm']
    best_loss = np.inf
    no_improve = 0
    ce_loss = torch.nn.CrossEntropyLoss()
    weight = 0.1

    epoch = 0
    # Resume training settings
    if opt['resume']['state']:
        opt['resume']['path'] = opt['resume'][
            'path'] + '/' + '200722_epoch{}.pth.tar'.format(
                opt['resume']['epoch'])
        ckp = torch.load(opt['resume']['path'], map_location='cpu')
        epoch = ckp['epoch']
        logger.info("Resume from checkpoint {}: epoch {:.3f}".format(
            opt['resume']['path'], epoch))
        net.load_state_dict(ckp['model_state_dict'])
        net.to(device)
        Optimizer.load_state_dict(ckp['optim_state_dict'])

    while epoch < total_epoch:

        epoch += 1
        logger.info('Start training from epoch: {:d}, iter: {:d}'.format(
            epoch, 0))
        num_steps = len(train_dataloader)

        # trainning process
        total_SNRloss = 0.0
        total_CEloss = 0.0
        num_index = 1
        start_time = time.time()
        for inputs, targets in train_dataloader:
            # Separation train
            if opt['model']['MODEL'] == 'DPRNN_Speech_Separation':
                mix = inputs
                ref = targets
                net.train()

                mix = mix.to(device)
                ref = [ref[i].to(device) for i in range(num_spks)]

                net.zero_grad()
                train_out = net(mix)
                SNR_loss = Loss(train_out, ref)
                loss = SNR_loss

            # Extraction train
            if opt['model']['MODEL'] == 'DPRNN_Speaker_Extraction':
                mix, aux = inputs
                ref, aux_len, sp_label = targets
                net.train()

                mix = mix.to(device)
                aux = aux.to(device)
                ref = ref.to(device)
                aux_len = aux_len.to(device)
                sp_label = sp_label.to(device)

                net.zero_grad()
                train_out = net([mix, aux, aux_len])
                SNR_loss = Loss_SI_SDR(train_out[0], ref)
                CE_loss = torch.mean(ce_loss(train_out[1], sp_label))
                loss = SNR_loss + weight * CE_loss
                total_CEloss += CE_loss.item()

            # Suppression train
            if opt['model']['MODEL'] == 'DPRNN_Speaker_Suppression':
                mix, aux = inputs
                ref, aux_len = targets
                net.train()

                mix = mix.to(device)
                aux = aux.to(device)
                ref = ref.to(device)
                aux_len = aux_len.to(device)

                net.zero_grad()
                train_out = net([mix, aux, aux_len])
                SNR_loss = Loss_SI_SDR(train_out[0], ref)
                loss = SNR_loss

            # BP processs
            loss.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm)
            Optimizer.step()

            total_SNRloss += SNR_loss.item()

            if num_index % print_freq == 0:
                message = '<Training epoch:{:d} / {:d} , iter:{:d} / {:d}, lr:{:.3e}, SI-SNR_loss:{:.3f}, CE loss:{:.3f}>'.format(
                    epoch, total_epoch, num_index, num_steps,
                    Optimizer.param_groups[0]['lr'], total_SNRloss / num_index,
                    total_CEloss / num_index)
                logger.info(message)

            num_index += 1

        end_time = time.time()
        mean_SNRLoss = total_SNRloss / num_index
        mean_CELoss = total_CEloss / num_index

        message = 'Finished Training *** <epoch:{:d} / {:d}, iter:{:d}, lr:{:.3e}, ' \
                  'SNR loss:{:.3f}, CE loss:{:.3f}, Total time:{:.3f} min> '.format(
            epoch, total_epoch, num_index, Optimizer.param_groups[0]['lr'], mean_SNRLoss, mean_CELoss, (end_time - start_time) / 60)
        logger.info(message)

        # development processs
        val_num_index = 1
        val_total_loss = 0.0
        val_CE_loss = 0.0
        val_acc_total = 0.0
        val_acc = 0.0
        val_start_time = time.time()
        val_num_steps = len(val_dataloader)
        for inputs, targets in val_dataloader:
            net.eval()
            with torch.no_grad():
                # Separation development
                if opt['model']['MODEL'] == 'DPRNN_Speech_Separation':
                    mix = inputs
                    ref = targets
                    mix = mix.to(device)
                    ref = [ref[i].to(device) for i in range(num_spks)]
                    Optimizer.zero_grad()
                    val_out = net(mix)
                    val_loss = Loss(val_out, ref)
                    val_total_loss += val_loss.item()

                # Extraction development
                if opt['model']['MODEL'] == 'DPRNN_Speaker_Extraction':
                    mix, aux = inputs
                    ref, aux_len, label = targets
                    mix = mix.to(device)
                    aux = aux.to(device)
                    ref = ref.to(device)
                    aux_len = aux_len.to(device)
                    label = label.to(device)
                    Optimizer.zero_grad()
                    val_out = net([mix, aux, aux_len])
                    val_loss = Loss_SI_SDR(val_out[0], ref)
                    val_ce = torch.mean(ce_loss(val_out[1], label))
                    val_acc = accuracy_speaker(val_out[1], label)
                    val_acc_total += val_acc
                    val_total_loss += val_loss.item()
                    val_CE_loss += val_ce.item()

                # suppression development
                if opt['model']['MODEL'] == 'DPRNN_Speaker_Suppression':
                    mix, aux = inputs
                    ref, aux_len = targets
                    mix = mix.to(device)
                    aux = aux.to(device)
                    ref = ref.to(device)
                    aux_len = aux_len.to(device)
                    Optimizer.zero_grad()
                    val_out = net([mix, aux, aux_len])
                    val_loss = Loss_SI_SDR(val_out[0], ref)
                    val_total_loss += val_loss.item()

                if val_num_index % print_freq == 0:
                    message = '<Valid-Epoch:{:d} / {:d}, iter:{:d} / {:d}, lr:{:.3e}, ' \
                              'val_SISNR_loss:{:.3f}, val_CE_loss:{:.3f}, val_acc :{:.3f}>' .format(
                        epoch, total_epoch, val_num_index, val_num_steps, Optimizer.param_groups[0]['lr'],
                        val_total_loss / val_num_index,
                        val_CE_loss / val_num_index,
                        val_acc_total / val_num_index)
                    logger.info(message)
            val_num_index += 1

        val_end_time = time.time()
        mean_val_total_loss = val_total_loss / val_num_index
        mean_val_CE_loss = val_CE_loss / val_num_index
        mean_acc = val_acc_total / val_num_index
        message = 'Finished *** <epoch:{:d}, iter:{:d}, lr:{:.3e}, val SI-SNR loss:{:.3f}, val_CE_loss:{:.3f}, val_acc:{:.3f}' \
                  ' Total time:{:.3f} min> '.format(epoch, val_num_index, Optimizer.param_groups[0]['lr'],
                                                    mean_val_total_loss, mean_val_CE_loss, mean_acc,
                                                    (val_end_time - val_start_time) / 60)
        logger.info(message)

        Scheduler.step(mean_val_total_loss)

        if mean_val_total_loss >= best_loss:
            no_improve += 1
            logger.info(
                'No improvement, Best SI-SNR Loss: {:.4f}'.format(best_loss))

        if mean_val_total_loss < best_loss:
            best_loss = mean_val_total_loss
            no_improve = 0
            save_checkpoint(epoch, checkpoint_path, net, Optimizer, day_time)
            logger.info(
                'Epoch: {:d}, Now Best SI-SNR Loss Change: {:.4f}'.format(
                    epoch, best_loss))

        if no_improve == early_stop:
            save_checkpoint(epoch, checkpoint_path, net, Optimizer, day_time)
            logger.info("Stop training cause no impr for {:d} epochs".format(
                no_improve))
            break