예제 #1
0
def train_and_test():
    # pick a test sample
    dataset = Data(config.train_size, config.batch_size, config.test_size)
    for test_batch in dataset.test_dataset:
        test_sample = test_batch[:config.num_examples_to_generate, :, :, :]

    random_vector_for_generation = tf.random.normal(
        shape=[config.num_examples_to_generate, config.latent_dim])
    model = VAE(config.latent_dim)

    #utils.generate_and_save_images(model, 0, test_sample)

    optimizer = tf.keras.optimizers.Adam(1e-4)
    for epoch in range(1, config.epochs + 1):
        start_time = time.time()
        for train_x in dataset.train_dataset:
            utils.train_step(model, train_x, optimizer=optimizer)
        end_time = time.time()

        loss = tf.keras.metrics.Mean()
        for test_x in dataset.test_dataset:
            loss(utils.compute_loss(model, test_x))

        elbo = -loss.result()
        print(
            'Epoch: {}, Test set ELBO: {}, time elapse for current epoch: {}'.
            format(epoch, elbo, end_time - start_time))
        utils.generate_and_save_images(model, epoch, test_sample)
예제 #2
0
def loss_func(model, predicted, boxes, classes):
    # if not model.training:
    #     predicted = predicted[1]
    targets = []
    bs = classes.shape[0]
    max_detections = classes.shape[1]
    for img_idx in range(bs):
        for detect_idx in range(max_detections):
            clazz = classes[img_idx, detect_idx]
            if clazz == 0: continue
            t, l, b, r = boxes[img_idx, detect_idx] * 0.5 + 0.5
            w = r - l
            h = b - t
            cx = (l + r) / 2.0
            cy = (t + b) / 2.0
            targets.append([
                img_idx,
                float(clazz - 1),
                float(cx),
                float(cy),
                float(w),
                float(h)
            ])
    ft = torch.cuda.FloatTensor if predicted[0].is_cuda else torch.Tensor
    targets = ft(targets)
    loss, _ = compute_loss(predicted, targets, model)
    loss *= bs / 64
    return loss[0]
예제 #3
0
def test(model, fetcher):
    model.eval()
    val_loss = 0
    classes = fetcher.loader.dataset.classes
    num_classes = len(classes)
    total_size = 0
    # true positive / intersection
    tp = torch.zeros(num_classes)
    fp = torch.zeros(num_classes)
    fn = torch.zeros(num_classes)
    pbar = tqdm(fetcher)
    for idx, (inputs, targets) in enumerate(pbar):
        batch_idx = idx + 1
        outputs = model(inputs)
        loss = compute_loss(outputs, targets, model)
        val_loss += loss.item()
        predicted = outputs.max(1)[1]
        if idx == 0:
            show_batch(inputs, predicted)
        predicted = predicted.view(-1)
        targets = targets.view(-1)
        eq = predicted.eq(targets)
        total_size += predicted.size(0)
        for c_i, c in enumerate(classes):
            indices = targets.eq(c_i)
            positive = indices.sum().item()
            tpi = eq[indices].sum().item()
            fni = positive - tpi
            fpi = predicted.eq(c_i).sum().item() - tpi
            tp[c_i] += tpi
            fn[c_i] += fni
            fp[c_i] += fpi
        T, P, R, miou, F1 = compute_metrics(tp, fn, fp)
        pbar.set_description(
            'loss: %8g, mAP: %8g, F1: %8g, miou: %8g' %
            (val_loss / batch_idx, P.mean(), F1.mean(), miou.mean()))
    if dist.is_initialized():
        tp = tp.to(device)
        fn = fn.to(device)
        fp = fp.to(device)
        dist.all_reduce(tp, op=dist.ReduceOp.SUM)
        dist.all_reduce(fn, op=dist.ReduceOp.SUM)
        dist.all_reduce(fp, op=dist.ReduceOp.SUM)
        T, P, R, miou, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu())
    if len(classes) < 10:
        for c_i, c in enumerate(classes):
            print(
                'cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g'
                % (c, T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i]))
    else:
        print('top error 5')
        copy_miou = miou.clone()
        for i in range(5):
            c_i = copy_miou.min(0)[1]
            copy_miou[c_i] = 1
            print(
                'cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g'
                % (classes[c_i], T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i]))
    return miou.mean().item()
예제 #4
0
def test(model, fetcher):
    model.eval()
    val_loss = 0
    classes = fetcher.loader.dataset.classes
    num_classes = len(classes)
    total_size = torch.Tensor([0])
    true_size = torch.Tensor([0])
    tp = torch.zeros(num_classes)
    fp = torch.zeros(num_classes)
    fn = torch.zeros(num_classes)
    pbar = tqdm(enumerate(fetcher), total=len(fetcher))
    for idx, (inputs, targets) in pbar:
        batch_idx = idx + 1
        outputs = model(inputs)
        loss = compute_loss(outputs, targets, model)
        val_loss += loss.item()
        predicted = outputs.max(1)[1]
        if idx == 0:
            show_batch(inputs.cpu(), predicted.cpu(), classes)
        eq = predicted.eq(targets)
        total_size += predicted.size(0)
        true_size += eq.sum()
        for c_i, c in enumerate(classes):
            indices = targets.eq(c_i)
            positive = indices.sum().item()
            tpi = eq[indices].sum().item()
            fni = positive - tpi
            fpi = predicted.eq(c_i).sum().item() - tpi
            tp[c_i] += tpi
            fn[c_i] += fni
            fp[c_i] += fpi
        pbar.set_description('loss: %8g, acc: %8g' %
                             (val_loss / batch_idx, true_size / total_size))
    if dist.is_initialized():
        tp = tp.to(device)
        fn = fn.to(device)
        fp = fp.to(device)
        total_size = total_size.to(device)
        true_size = true_size.to(device)
        dist.all_reduce(tp, op=dist.ReduceOp.SUM)
        dist.all_reduce(fn, op=dist.ReduceOp.SUM)
        dist.all_reduce(fp, op=dist.ReduceOp.SUM)
        dist.all_reduce(total_size, op=dist.ReduceOp.SUM)
        dist.all_reduce(true_size, op=dist.ReduceOp.SUM)
    T, P, R, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu())
    if len(classes) < 10:
        for c_i, c in enumerate(classes):
            print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, F1: %8g' %
                  (c, T[c_i], P[c_i], R[c_i], F1[c_i]))
    else:
        print('top error 5')
        copy_P = P.clone()
        for i in range(5):
            c_i = copy_P.min(0)[1]
            copy_P[c_i] = 1
            print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, F1: %8g' %
                  (classes[c_i], T[c_i], P[c_i], R[c_i], F1[c_i]))
    return true_size.item() / total_size.item()
예제 #5
0
def test(model, fetcher):
    model.eval()
    val_loss = 0
    classes = fetcher.loader.dataset.classes
    num_classes = len(classes)
    # true positive / intersection
    n = torch.zeros(num_classes)
    l2_sum = torch.zeros(num_classes)
    pbar = tqdm(fetcher)
    for idx, (inputs, targets) in enumerate(pbar):
        batch_idx = idx + 1
        outputs = model(inputs)
        if idx == 0:
            show_batch(inputs, outputs)
        loss = compute_loss(outputs, targets, model)
        val_loss += loss.item()
        normalize_size = (64, 64)
        targets = F.interpolate(targets,
                                normalize_size,
                                mode='bilinear',
                                align_corners=False).view(
                                    targets.size(0), targets.size(1),
                                    normalize_size[0] *
                                    normalize_size[1]).argmax(2)
        outputs = F.interpolate(outputs,
                                normalize_size,
                                mode='bilinear',
                                align_corners=False).view(
                                    outputs.size(0), outputs.size(1),
                                    normalize_size[0] *
                                    normalize_size[1]).argmax(2)
        y_dis = (targets // normalize_size[0] -
                 outputs // normalize_size[0]) / float(normalize_size[1])
        x_dis = (targets % normalize_size[0] -
                 outputs % normalize_size[0]) / float(normalize_size[0])
        l2 = y_dis**2 + x_dis**2
        l2 = torch.sqrt(l2)
        n += len(l2)
        l2_sum += l2.sum(0).cpu()
        pbar.set_description(
            'loss: %8g, NME: %8g' %
            (val_loss / batch_idx, l2_sum.sum() / max(1, n.sum())))
    if dist.is_initialized():
        n = n.to(device)
        l2_sum = l2_sum.to(device)
        dist.all_reduce(n, op=dist.ReduceOp.SUM)
        dist.all_reduce(l2_sum, op=dist.ReduceOp.SUM)

    for c_i, c in enumerate(classes):
        print('cls: %8s, NME: %8g' % (c, l2_sum[c_i] / max(1, n[c_i])))
    return (l2_sum.sum() / max(1, n.sum())).item()
예제 #6
0
def test(model, fetcher, distributed=False):
    model.eval()
    val_loss = 0
    classes = fetcher.loader.dataset.classes
    num_classes = len(classes)
    total_size = 0
    # true positive / intersection
    tp = torch.zeros(num_classes)
    fp = torch.zeros(num_classes)
    fn = torch.zeros(num_classes)
    with torch.no_grad():
        pbar = tqdm(enumerate(fetcher), total=len(fetcher))
        for idx, (inputs, targets) in pbar:
            batch_idx = idx + 1
            outputs = model(inputs)
            loss = compute_loss(outputs, targets)
            val_loss += loss.item()
            predicted = outputs
            if idx == 0:
                show_batch('test_batch.png', inputs.cpu(), predicted.cpu())
            predicted = predicted.max(1)[1].view(-1)
            targets = targets.max(1)[1].view(-1)
            eq = predicted.eq(targets)
            total_size += predicted.size(0)
            for c_i, c in enumerate(classes):
                indices = targets.eq(c_i)
                positive = indices.sum().item()
                tpi = eq[indices].sum().item()
                fni = positive - tpi
                fpi = predicted.eq(c_i).sum().item() - tpi
                tp[c_i] += tpi
                fn[c_i] += fni
                fp[c_i] += fpi
            T, P, R, miou, F1 = compute_metrics(tp, fn, fp)
            pbar.set_description(
                'loss: %8g, mAP: %8g, F1: %8g, miou: %8g' %
                (val_loss / batch_idx, P.mean(), F1.mean(), miou.mean()))
    if distributed:
        tp = tp.to(device)
        fn = fn.to(device)
        fp = fp.to(device)
        dist.all_reduce(tp, op=dist.ReduceOp.SUM)
        dist.all_reduce(fn, op=dist.ReduceOp.SUM)
        dist.all_reduce(fp, op=dist.ReduceOp.SUM)
        T, P, R, miou, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu())
    for c_i, c in enumerate(classes):
        print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g' %
              (c, T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i]))
    return miou.mean().item()
예제 #7
0
    def post_train_step(self, outputs, batch, batch_idx, epoch):
        _, targets, _, _, _ = batch
       
        # Loss
        loss, loss_items = compute_loss(outputs, targets, self.model)
        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss_items)
            exit(-1)

        loss *= self.batch_size / 64  # scale loss

        if self.calc_ni(batch_idx, epoch) % self.accumulate == 0:
            self.ema.update(self.model)

        return loss, loss_items
예제 #8
0
    def train_batch(self, batch: TorchData, model: nn.Module, epoch_idx: int,
                    batch_idx: int) -> Dict[str, torch.Tensor]:

        # opt = get_cli_args(
        #     batch_size=pedl_batch_size, prebias=pedl_prebias, accumulate=pedl_accumulate
        # )  # This seems to impact performance => replacing it with just the values
        (imgs, targets, paths, _) = batch

        imgs = imgs.float() / 255.0

        pred = model(imgs)
        loss, loss_items = compute_loss(pred, targets, model, not pedl_prebias)

        loss *= opt.batch_size / (pedl_batch_size * pedl_accumulate)

        if not torch.isfinite(loss):
            print("WARNING: non-finite loss, ending training ", loss_items)

        return {"loss": loss}
예제 #9
0
 def forward(self, x, targets=None):
     img_dim = x.shape[2]
     loss = 0
     layer_outputs, yolo_outputs = [], []
     for i, (module_def,
             module) in enumerate(zip(self.module_defs, self.module_list)):
         if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
             x = module(x)
         elif module_def["type"] == "route":
             x = torch.cat([
                 layer_outputs[int(layer_i)]
                 for layer_i in module_def["layers"].split(",")
             ], 1)
         elif module_def["type"] == "shortcut":
             layer_i = int(module_def["from"])
             x = layer_outputs[-1] + layer_outputs[layer_i]
         elif module_def["type"] == "yolo":
             x, predictions = module[0](x, img_dim)
             layer_loss = compute_loss(predictions, targets, module[0])
             loss += layer_loss
             yolo_outputs.append(x)
         layer_outputs.append(x)
     yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
     return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #10
0
def test(model, fetcher, conf_thres=1e-3, nms_thres=0.5):
    model.eval()
    val_loss = 0
    classes = fetcher.loader.dataset.classes
    num_classes = len(classes)
    seen = 0
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP',
                                 'F1')
    p, r, f1, mp, mr, mAP, mf1 = 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []
    pbar = tqdm(enumerate(fetcher), total=len(fetcher))
    for idx, (imgs, targets) in pbar:
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Run model
        inf_out, train_out = model(imgs)  # inference and training outputs

        # Compute loss
        val_loss += compute_loss(train_out, targets,
                                 model).item()  # GIoU, obj, cls

        # Run NMS
        output = non_max_suppression(inf_out,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)
        # Plot images with bounding boxes
        if idx == 0:
            show_batch(imgs, output)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > 0.5 and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct, pred[:,
                               4].cpu().numpy(), pred[:,
                                                      6].cpu().numpy(), tcls))
        pbar.set_description('loss: %8g' % (val_loss / (idx + 1)))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]

    # sync stats
    if dist.is_initialized():
        for i in range(len(stats)):
            stat = torch.FloatTensor(stats[i]).to(device)
            ls = torch.IntTensor([len(stat)]).to(device)
            ls_list = [
                torch.IntTensor([0]).to(device)
                for _ in range(dist.get_world_size())
            ]
            dist.all_gather(ls_list, ls)
            ls_list = [ls_item.item() for ls_item in ls_list]
            max_ls = max(ls_list)
            if len(stat) < max_ls:
                stat = torch.cat(
                    [stat, torch.zeros(max_ls - len(stat)).to(device)])
            stat_list = [
                torch.zeros(max_ls).to(device)
                for _ in range(dist.get_world_size())
            ]
            dist.all_gather(stat_list, stat)
            stat_list = [
                stat_list[si][:ls_list[si]]
                for si in range(dist.get_world_size()) if ls_list[si] > 0
            ]
            stat = torch.cat(stat_list)
            stats[i] = stat.cpu().numpy()

    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, mAP, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=num_classes)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, mAP, mf1))

    # Print results per class
    for i, c in enumerate(ap_class):
        print(pf % (classes[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
    # Return results
    mAPs = np.zeros(num_classes) + mAP
    for i, c in enumerate(ap_class):
        mAPs[c] = ap[i]
    # return (mp, mr, mAP, mf1, *(loss / len(dataloader)).tolist()), mAPs
    return mAP
예제 #11
0
    def validation_step(self, opt, outputs, batch, batch_idx, epoch):
        imgs, targets, paths, shapes, pad = batch
        _, _, height, width = imgs.shape
        
        inf_out, train_out = outputs
        whwh = torch.Tensor([width, height, width, height]).to(imgs.device)

        losses = compute_loss(train_out, targets, self.model)[1][:3]  # GIoU, obj, cls
        output = non_max_suppression(inf_out, conf_thres=opt.conf_thres, iou_thres=opt.iou_thres, multi_label=self.calc_ni(batch_idx, epoch) > self.n_burn)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            self.seen += 1

            if pred is None:
                if nl:
                    self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0], self.niou, dtype=torch.bool, device=imgs.device)

            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(-1)  # target indices
                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # prediction indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices

                        # Append detections
                        for j in (ious > self.iouv[0].to(ious.device)).nonzero():
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[pi[j]] = ious[j] > self.iouv  # iou_thres is 1xn
                                if len(detected) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            self.stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
        return losses
예제 #12
0
            img_size = img_tensor.size()[2]  # TODO: 目前只支持正方形,如416x416
            ######

            ### 训练过程主要包括以下几个步骤:
            # (1) 前传
            #print('img_tensor:', img_tensor[0][1][208][208])
            p, p_box = model(
                img_tensor
            )  # tuple, have 3 tensors; tensor[0]: (64, 3, 13, 13, 4)

            # (2) 计算损失
            ######  clw add: for debug, localize in build_target() first, and can get target size, so catch the same target size there
            # if target_tensor.size()[0] == 679:
            #     print('aaa')
            ######
            loss, loss_items = compute_loss(p, p_box, target_tensor, model,
                                            img_size)
            if not torch.isfinite(loss):
                raise Exception('WARNING: non-finite loss, ending training ',
                                loss_items)

            # (3) 损失:反向传播,求出梯度
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # (4) 优化器:更新参数、梯度清零
            ni = i + nb * epoch  # number integrated batches (since train start)
            if ni % accumulate == 0:  # Accumulate gradient for x batches before optimizing
                optimizer.step()
예제 #13
0
def train():

    # 0、Initialize parameters( set random seed, get cfg info, )
    cfg = opt.cfg
    weights = opt.weights
    img_size = opt.img_size
    batch_size = opt.batch_size
    total_epochs = opt.epochs
    init_seeds()
    data = parse_data_cfg(opt.data)
    train_txt_path = data['train']
    valid_txt_path = data['valid']
    nc = int(data['classes'])

    # 0、打印配置文件信息,写log等
    print('config file:', cfg)
    print('pretrained weights:', weights)

    # 1、加载模型
    model = Darknet(cfg).to(device)

    if weights.endswith('.pt'):

        ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层
                                                                #          会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]).
                                                               #           TODO:map_location=device ?
        chkpt = torch.load(weights, map_location=device)
        try:
            chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(chkpt['model'], strict=False)
            # model.load_state_dict(chkpt['model'])
        except KeyError as e:
            s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
            raise KeyError(s) from e

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)

    elif weights.endswith('.pth'):    # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
        model_state_dict = model.state_dict()
        chkpt = torch.load(weights, map_location=device)
        #try:
        state_dict = {}
        block_cnt = 0
        fc_item_num = 2
        chkpt_keys = list(chkpt.keys())
        model_keys = list(model.state_dict().keys())
        model_values = list(model.state_dict().values())
        for i in range(len(chkpt_keys) - fc_item_num):  # 102 - 2
            if i % 5 == 0:
                state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]]
            elif i % 5 == 1 or i % 5 == 2:
                state_dict[model_keys[i+block_cnt+2]] = chkpt[chkpt_keys[i]]
            elif i % 5 == 3 or i % 5 == 4:
                state_dict[model_keys[i+block_cnt-2]] = chkpt[chkpt_keys[i]]
                if i % 5 == 4:
                    block_cnt += 1
                    state_dict[model_keys[i + block_cnt]] = model_values[i + block_cnt]


        #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(state_dict, strict=False)

        # model.load_state_dict(chkpt['model'])

        # except KeyError as e:
        #     s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
        #     raise KeyError(s) from e

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' +  repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)

    elif len(weights) > 0:  # darknet format
        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
        load_darknet_weights(model, weights)

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' +  repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)
    # else:
    #     raise Exception("pretrained model's path can't be NULL!")

    # 2、设置优化器 和 学习率
    start_epoch = 0
    #optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=weight_decay, nesterov=True)  # TODO:nesterov ?  weight_decay=0.0005 ?

    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if '.bias' in k:
            pg2 += [v]  # biases
        elif 'Conv2d.weight' in k:
            pg1 += [v]  # apply weight_decay
        else:
            pg0 += [v]  # parameter group 0

    optimizer = torch.optim.SGD(pg0, lr=lr0, momentum=momentum, nesterov=True)

    optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay})  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    del pg0, pg1, pg2


    ###### apex need ######
    if mixed_precision:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend='nccl',  # 'distributed backend'
                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
                                world_size=1,  # number of nodes for distributed training
                                rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True)  # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level


    ######
    model.nc = nc

    #### 阶梯学习率
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1)
    ### 余弦学习率
    #lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2
    #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    # 3、加载数据集
    train_dataset = VocDataset(train_txt_path, img_size, with_label=True)
    dataloader = DataLoader(train_dataset,
                            batch_size=batch_size,
                            shuffle=True,  # TODO: True
                            num_workers=8, # TODO
                            collate_fn=train_dataset.train_collate_fn,
                            pin_memory=True)


    # 4、训练
    print('')   # 换行
    print('Starting training for %g epochs...' % total_epochs)
    nb = len(dataloader)

    mloss = torch.zeros(4).to(device)  # mean losses
    writer = SummaryWriter()    # tensorboard --logdir=runs, view at http://localhost:6006/

    prebias = start_epoch == 0

    for epoch in range(start_epoch, total_epochs):  # epoch ------------------------------
        model.train()  # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval()

        # # Prebias
        # if prebias:
        #     if epoch < 3:  # prebias
        #         ps = 0.1, 0.9  # prebias settings (lr=0.1, momentum=0.9)
        #     else:  # normal training
        #         ps = lr0, momentum  # normal training settings
        #         print_model_biases(model)
        #         prebias = False
        #
        #     # Bias optimizer settings
        #     optimizer.param_groups[2]['lr'] = ps[0]
        #     if optimizer.param_groups[2].get('momentum') is not None:  # for SGD but not Adam
        #         optimizer.param_groups[2]['momentum'] = ps[1]

        start = time.time()
        title = ('\n' + '%10s' * 11 ) % ('Epoch', 'Batch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', 'lr', 'time_use')
        print(title)
        #pbar = tqdm(dataloader, ncols=20)  # 行数参数ncols=10,这个值可以自己调:尽量大到不能引起上下滚动,同时满足美观的需求。
        #for i, (img_tensor, target_tensor, img_path, _) in enumerate(pbar):

        # # Freeze darknet53.conv.74 for first epoch
        # freeze_backbone = False
        # if freeze_backbone and (epoch < 3):
        #     for i, (name, p) in enumerate(model.named_parameters()):
        #         if int(name.split('.')[2]) < 75:  # if layer < 75  # 多卡是[2],单卡[1]
        #             p.requires_grad = False if (epoch < 3) else True

        for i, (img_tensor, target_tensor, img_path, _) in enumerate(dataloader):

            # # SGD burn-in
            # ni = epoch * nb + i
            # if ni <= 1000:  # n_burnin = 1000
            #     lr = lr0 * (ni / 1000) ** 2
            #     for g in optimizer.param_groups:
            #         g['lr'] = lr

            batch_start = time.time()
            #print(img_path)
            img_tensor = img_tensor.to(device)
            target_tensor = target_tensor.to(device)
            ### 训练过程主要包括以下几个步骤:
            # (1) 前传
            #print('img_tensor:', img_tensor[0][1][208][208])
            pred = model(img_tensor)

            # (2) 计算损失
            loss, loss_items = compute_loss(pred, target_tensor, model)
            if not torch.isfinite(loss):
               raise Exception('WARNING: non-finite loss, ending training ', loss_items)

            # (3) 损失:反向传播,求出梯度
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # (4) 优化器:更新参数、梯度清零
            # ni = i + nb * epoch  # number integrated batches (since train start)
            # if ni % accumulate == 0:  # Accumulate gradient for x batches before optimizing
            optimizer.step()
            optimizer.zero_grad()

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
            #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)
            #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start)
            s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)

            if i % 10 == 0:
                print(s)
                
            # Plot
            if epoch == start_epoch  and i == 0:
                fname = 'train_batch.jpg' # filename
                cur_path = os.getcwd()
                res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname))
                writer.add_image(fname, res, dataformats='HWC', global_step=epoch)
                # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------

        print('time use per epoch: %.3fs' % (time.time() - start))

        write_to_file(title, log_file_path)
        write_to_file(s, log_file_path)

        # Update scheduler
        scheduler.step()

        # compute mAP
        results, maps = test.test(cfg,
                                  'cfg/voc.data',
                                  batch_size=batch_size,
                                  img_size=img_size,
                                  conf_thres=0.05,
                                  iou_thres=0.5,
                                  nms_thres=0.5,
                                  src_txt_path=valid_txt_path,
                                  dst_path='./output',
                                  weights=None,
                                  model=model,
                                  log_file_path = log_file_path)

        # Tensorboard
        tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
                'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1']
        for x, tag in zip(list(mloss[:-1]) + list(results), tags):
            writer.add_scalar(tag, x, epoch)

        # save model 保存模型
        chkpt = {'epoch': epoch,
                 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(),  # clw note: 多卡
                 'optimizer': optimizer.state_dict()}

        torch.save(chkpt, last_model_path)

    print('end')
예제 #14
0
            #     lr = lr0 * (ni / 1000) ** 2
            #     for g in optimizer.param_groups:
            #         g['lr'] = lr
            scheduler.step(len(dataloader) * epoch + i)

            batch_start = time.time()
            #print(img_path)
            img_tensor = img_tensor.to(device)
            target_tensor = target_tensor.to(device)
            ### 训练过程主要包括以下几个步骤:
            # (1) 前传
            #print('img_tensor:', img_tensor[0][1][208][208])
            pred = model(img_tensor)

            # (2) 计算损失
            loss, loss_items = compute_loss(pred, target_tensor, model)
            if not torch.isfinite(loss):
               raise Exception('WARNING: non-finite loss, ending training ', loss_items)

            # (3) 损失:反向传播,求出梯度
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # (4) 优化器:更新参数、梯度清零
            # ni = i + nb * epoch  # number integrated batches (since train start)
            # if ni % accumulate == 0:  # Accumulate gradient for x batches before optimizing
            optimizer.step()
            optimizer.zero_grad()
예제 #15
0
def channels_select(prune_cfg, data, origin_model, aux_util, device,
                    data_loader, select_layer, pruned_rate):
    with open(progress_result, 'a') as f:
        f.write(('\n' + '%10s' * 9 + '\n') %
                ('Stage', 'Change', 'MSELoss', 'AuxLoss', 'Total', 'P', 'R',
                 '[email protected]', 'F1'))
    logger.info(('%10s' * 6) %
                ('Stage', 'Channels', 'Batch', 'MSELoss', 'AuxLoss', 'Total'))

    batch_size = data_loader.batch_size
    img_size = data_loader.dataset.img_size
    accumulate = 64 // batch_size
    hook_util = HookUtils()
    handles = []
    n_iter = math.floor(500 / batch_size)

    pruning_model = Darknet(prune_cfg,
                            img_size=(img_size, img_size)).to(device)
    chkpt = torch.load(progress_chkpt, map_location=device)
    pruning_model.load_state_dict(chkpt['model'], strict=True)

    aux_in_layer = aux_util.conv_layer_dict[select_layer]
    aux_model = aux_util.creat_aux_model(aux_in_layer)
    aux_model.to(device)

    aux_model.load_state_dict(chkpt['aux_in{}'.format(aux_in_layer)],
                              strict=True)
    aux_loss_scalar = max(0.01, pow((int(aux_in_layer) + 1) / 75, 2))

    del chkpt

    solve_sub_problem_optimizer = optim.SGD(
        pruning_model.module_list[int(aux_in_layer)].MaskConv2d.parameters(),
        lr=hyp['lr0'],
        momentum=hyp['momentum'])

    for name, child in origin_model.module_list.named_children():
        if name == aux_in_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_origin_output))
        if name == select_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_origin_output))

    for name, child in pruning_model.module_list.named_children():
        if name == aux_in_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_output))
        if name == select_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_output))

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        origin_model = torch.nn.parallel.DistributedDataParallel(
            origin_model, find_unused_parameters=True)
        origin_model.yolo_layers = origin_model.module.yolo_layers
        pruning_model = torch.nn.parallel.DistributedDataParallel(
            pruning_model, find_unused_parameters=True)
        pruning_model.yolo_layers = pruning_model.module.yolo_layers

    retain_channels_num = math.floor(
        aux_util.layer_info[select_layer]["in_channels"] * (1 - pruned_rate))
    pruning_model.nc = 80
    pruning_model.hyp = hyp
    pruning_model.arc = 'default'
    pruning_model.eval()
    aux_model.eval()
    MSE = nn.MSELoss(reduction='mean')
    mloss = torch.zeros(3).to(device)

    for i_k in range(retain_channels_num):

        data_iter = iter(data_loader)
        pbar = tqdm(range(n_iter), total=n_iter)
        print(('\n' + '%10s' * 6) %
              ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total'))
        for i in pbar:

            imgs, targets, _, _ = data_iter.next()

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = pruning_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets,
                                           pruning_model)

            hook_util.cat_to_gpu0()
            mse_loss = torch.zeros(1, device=device)

            aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets)
            aux_loss = compute_loss_for_DCP(aux_pred, targets)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + aux_loss + 0 * pruning_loss

            loss.backward()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            s = ('%10s' * 3 + '%10.3g' * 3) % (
                'Prune ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, retain_channels_num), hyp['joint_loss'] * mse_loss,
                aux_loss, loss)
            pbar.set_description(s)

            # if (i + 1) % 10 == 0:
            #     logger.info(('%10s' * 3 + '%10.3g' * 3) %
            #                 ('Prune' + select_layer, str(i_k), '%g/%g' % (i, n_iter), hyp['joint_loss'] * mse_loss,
            #                  aux_loss, loss))

            hook_util.clean_hook_out()

        grad = pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.weight.grad.detach()**2
        grad = grad.sum((2, 3)).sqrt().sum(0)

        if i_k == 0:
            pruning_model.module.module_list[int(
                select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5
            if select_layer in aux_util.sync_guide.keys():
                sync_layer = aux_util.sync_guide[select_layer]
                pruning_model.module.module_list[int(
                    sync_layer)].MaskConv2d.selected_channels_mask[(
                        -1 * aux_util.layer_info[select_layer]["in_channels"]
                    ):] = 1e-5

        selected_channels_mask = pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask
        _, indices = torch.topk(grad * (1 - selected_channels_mask), 1)

        pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask[indices] = 1
        if select_layer in aux_util.sync_guide.keys():
            pruning_model.module.module_list[int(
                sync_layer)].MaskConv2d.selected_channels_mask[-(
                    aux_util.layer_info[select_layer]["in_channels"] -
                    indices)] = 1

        pruning_model.zero_grad()

        pbar = tqdm(range(n_iter), total=n_iter)
        print(('\n' + '%10s' * 6) %
              ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total'))
        for i in pbar:

            imgs, targets, _, _ = data_iter.next()

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = pruning_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets,
                                           pruning_model)

            hook_util.cat_to_gpu0()
            mse_loss = torch.zeros(1, device=device)

            aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets)
            aux_loss = compute_loss_for_DCP(aux_pred, targets)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp[
                'joint_loss'] * mse_loss + aux_loss_scalar * aux_loss + 0 * pruning_loss

            loss.backward()

            if i % accumulate == 0:
                solve_sub_problem_optimizer.step()
                solve_sub_problem_optimizer.zero_grad()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            mloss = (mloss * i +
                     torch.cat([hyp['joint_loss'] * mse_loss, aux_loss, loss
                                ]).detach()) / (i + 1)
            s = ('%10s' * 3 + '%10.3g' * 3) % (
                'SubProm ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, retain_channels_num), *mloss)
            pbar.set_description(s)

            if (i + 1) % n_iter == 0:
                logger.info(('%10s' * 3 + '%10.3g' * 3) %
                            ('SubPro' + select_layer, str(i_k), '%g/%g' %
                             (i, n_iter), *mloss))

            hook_util.clean_hook_out()

    for handle in handles:
        handle.remove()

    greedy_indices = pruning_model.module.module_list[int(
        select_layer)].MaskConv2d.selected_channels_mask < 1
    pruning_model.module.module_list[int(
        select_layer)].MaskConv2d.selected_channels_mask[greedy_indices] = 0

    res, _ = test.test(prune_cfg,
                       data,
                       batch_size=batch_size * 2,
                       img_size=416,
                       model=pruning_model,
                       conf_thres=0.1,
                       iou_thres=0.5,
                       save_json=False,
                       dataloader=None)

    chkpt = torch.load(progress_chkpt, map_location=device)
    chkpt['current_layer'] = aux_util.next_prune_layer(select_layer)
    chkpt['epoch'] = -1
    chkpt['model'] = pruning_model.module.state_dict() if type(
        pruning_model
    ) is nn.parallel.DistributedDataParallel else pruning_model.state_dict()
    chkpt['optimizer'] = None

    torch.save(chkpt, progress_chkpt)

    torch.save(chkpt, last)
    del chkpt

    with open(progress_result, 'a') as f:
        f.write(('%10s' * 2 + '%10.3g' * 7) %
                ('Pruning ' + select_layer,
                 str(aux_util.layer_info[select_layer]['in_channels']) + '->' +
                 str(retain_channels_num), *mloss, *res[:4]) + '\n')

    torch.cuda.empty_cache()
예제 #16
0
def YOLO_Gradcam(model, dataloader, device, args):
    l = len(dataloader)
    j = 0
    model.eval()
    for i, (imgs, labels, paths, _) in enumerate(dataloader):
        imgs = imgs.to(device).float() / 255.0
        labels = labels.to(device)

        for img, path in tqdm(zip(imgs, paths)):
            # One (image, bboxes) per time #
            img = torch.stack([img])
            id = labels[:, 0] == j
            _, y_hat, fts = model(imgs, [model.yolo_layers[args['head']] - 1])
            j += 1

            # Saving features
            fts[0].register_hook(save_)

            # Computing loss and backward
            loss, _ = compute_loss(y_hat, labels[id], model)
            model.zero_grad()
            loss.backward(retain_graph=True)

            ###########
            # Gradcam #
            ###########
            # getting grandients and features
            grads_val = gradients[0]
            target = fts[0]
            target = target[0, :]
            # weighting gradients in cam
            weights = torch.mean(grads_val, axis=(2, 3))[0, :]
            cam = torch.zeros(target.shape[1:],
                              device=device,
                              dtype=torch.float32)
            for i, w in enumerate(weights):
                cam += w * target[i, :, :]
            # creating the mask
            cam = cam = torch.where(cam > 0, cam,
                                    torch.tensor(0., device=device))
            resize = Transforms.Compose([
                Transforms.ToPILImage(),
                Transforms.Resize(img.shape[2:]),
                Transforms.ToTensor()
            ])
            cam = resize(torch.stack([
                cam.cpu()
            ]))[0]  # torch resizes only 3D or moreD tensors, not 2D
            cam = cam - torch.min(cam)
            mask = cam / torch.max(cam)

            # creating a name to grad image
            ext = path.split('.')[-1]
            name = path.split(os.sep)[-1].split('.')[0]
            grad_name = f"{args['output']}{os.sep}{name}_{args['head']}_{'all'}.{ext}"
            orig_name = f"{args['output']}{os.sep}{name}.{ext}"
            # Saving results
            img = cv2.cvtColor(img[0].cpu().numpy().transpose(1, 2, 0),
                               cv2.COLOR_RGB2BGR)
            show_cam_on_image(img, mask, grad_name)
            cv2.imwrite(orig_name, np.uint8(255 * img))
def smooth_bbox_losses(p, current_loss, model):
    utils.compute_loss(p, target, model)
    return 0
예제 #18
0
파일: infer_PEC.py 프로젝트: 562225807/SEDS
def main(args):
    # loading configurations
    with open(args.config) as f:
        config = yaml.safe_load(f)["configuration"]

    name = config["Name"]

    # Construct or load embeddings
    print("Initializing embeddings ...")
    vocab_size = config["embeddings"]["vocab_size"]
    embed_size = config["embeddings"]["embed_size"]
    per_num = config["embeddings"]["person_num"]
    per_embed_size = config["embeddings"]["person_embed_size"]
    embeddings = init_embeddings(vocab_size, embed_size, name=name)

    print("\tDone.")

    # Build the model and compute losses
    source_ids = tf.placeholder(tf.int32, [None, 40], name="source")
    target_ids = tf.placeholder(tf.int32, [None, 40], name="target")
    person_ids = tf.placeholder(tf.int32, [None], name="person_ids")
    lexicons_ids = tf.placeholder(tf.int32, [per_num, 1000],
                                  name="lexicons_ids")
    spectrogram = tf.placeholder(tf.float32, [None, 400, 200], name="audio")
    sequence_mask = tf.placeholder(tf.bool, [None, 40], name="mask")
    choice_qs = tf.placeholder(tf.float32, [None, 40], name="choice")
    emo_cat = tf.placeholder(tf.int32, [None], name="emotion_category")
    is_train = tf.placeholder(tf.bool)

    (enc_num_layers, enc_num_units, enc_cell_type, enc_bidir, dec_num_layers,
     dec_num_units, dec_cell_type, state_pass, num_emo, emo_cat_units,
     emo_int_units, infer_batch_size, beam_size, max_iter, attn_num_units,
     l2_regularize, word_config, spectrogram_config, lstm_int_num, batch_size,
     loss_weight) = get_PEC_config(config)

    print("Building model architecture ...")
    CE, loss, cla_loss, train_outs, infer_outputs, score = compute_loss(
        source_ids, target_ids, sequence_mask, choice_qs, embeddings,
        enc_num_layers, enc_num_units, enc_cell_type, enc_bidir,
        dec_num_layers, dec_num_units, dec_cell_type, state_pass, num_emo,
        emo_cat, emo_cat_units, emo_int_units, infer_batch_size, spectrogram,
        word_config, per_num, person_ids, per_embed_size, spectrogram_config,
        loss_weight, lstm_int_num, is_train, False, lexicons_ids, beam_size,
        max_iter, attn_num_units, l2_regularize, name)
    print("\tDone.")

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    (logdir, restore_from, learning_rate, gpu_fraction, max_checkpoints,
     train_steps, batch_size, print_every, checkpoint_every, s_filename,
     t_filename, q_filename, s_max_leng, t_max_leng, dev_s_filename,
     dev_t_filename, dev_q_filename, loss_fig, perp_fig, sp_filename,
     sp_max_leng, test_s_filename, test_t_filename, test_q_filename,
     test_output) = get_training_config(config, "training")

    # Set up session
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                            gpu_options=gpu_options))
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    var_list = tf.trainable_variables()
    g_list = tf.global_variables()
    bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
    bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
    var_list += bn_moving_vars

    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=max_checkpoints)

    try:
        saved_global_step = load(saver, sess, logdir)
        if saved_global_step is None:
            raise ValueError("Cannot find the checkpoint to restore from.")

    except Exception:
        print("Something went wrong while restoring checkpoint. ")
        raise

    # ##### Inference #####
    # Load data
    print("Loading inference data ...")

    # id_0, id_1, id_2 preserved for SOS, EOS, constant zero padding
    embed_shift = 3

    lexicons = load_lexicons() + embed_shift

    test_source_sentences_ids, test_source_person, test_source_data = loadfile(
        test_s_filename, is_dialog=True, is_source=True, max_length=s_max_leng)
    test_source_data += embed_shift
    test_target_sentences_ids, test_target_person, test_target_data, test_category_data = loadfile(
        test_t_filename,
        is_dialog=True,
        is_source=False,
        max_length=t_max_leng)
    test_target_data += embed_shift
    test_spectrogram_data = load_spectrogram(sp_filename,
                                             test_source_sentences_ids)
    test_choice_data = loadfile(test_q_filename,
                                is_dialog=False,
                                is_source=False,
                                max_length=t_max_leng)
    test_choice_data[test_choice_data < 0] = 0
    test_choice_data = test_choice_data.astype(np.float32)

    test_masks = (test_target_data >= embed_shift)
    test_masks = np.append(np.ones([len(test_masks), 1], dtype=bool),
                           test_masks,
                           axis=1)
    test_masks = test_masks[:, :-1]
    print("\tDone.")

    # test
    print("testing")
    if test_source_data is not None:
        CE_words = N_words = 0.0
        for start in range(0, len(test_source_data), batch_size):
            test_feed_dict = {
                source_ids: test_source_data[start:start + batch_size],
                target_ids: test_target_data[start:start + batch_size],
                person_ids: test_target_person[start:start + batch_size],
                spectrogram: test_spectrogram_data[start:start + batch_size],
                choice_qs: test_choice_data[start:start + batch_size],
                emo_cat: test_category_data[start:start + batch_size],
                sequence_mask: test_masks[start:start + batch_size],
                lexicons_ids: lexicons,
                is_train: False,
            }
            CE_word, N_word = compute_test_perplexity(
                sess, CE, test_masks[start:start + batch_size], test_feed_dict)
            CE_words += CE_word
            N_words += N_word

        print("test_perp: {:.3f}".format(np.exp(CE_words / N_words)))

        infer_results = []
        for start in range(0, len(test_source_data), infer_batch_size):
            # infer_result = sess.run(infer_outputs,
            #                         feed_dict={source_ids: test_source_data[start:start + infer_batch_size],
            #                                    spectrogram: test_spectrogram_data[start:start + infer_batch_size],
            #                                    person_ids: test_target_person[start:start + infer_batch_size],
            #                                    emo_cat: test_category_data[start:start + infer_batch_size],
            #                                    lexicons_ids: lexicons,
            #                                    is_train: False,
            #                                    })
            #
            # infer_result = infer_result.ids[:, :, 0]
            # if infer_result.shape[1] < max_iter:
            #     l_pad = max_iter - infer_result.shape[1]
            #     infer_result = np.concatenate((infer_result, np.ones((infer_batch_size, l_pad))), axis=1)
            # else:
            #     infer_result = infer_result[:, :max_iter]
            tmp_result = []
            scores = []
            for i in range(num_emo):
                cat = i * np.ones(
                    [len(test_target_person[start:start + infer_batch_size])])
                infer_result, sco = sess.run(
                    [infer_outputs, score],
                    feed_dict={
                        source_ids:
                        test_source_data[start:start + infer_batch_size],
                        spectrogram:
                        test_spectrogram_data[start:start + infer_batch_size],
                        #spectrogram: np.zeros([len(test_source_data[start:start + infer_batch_size]), 400, 200]),
                        person_ids:
                        test_target_person[start:start + infer_batch_size],
                        emo_cat:
                        cat,
                        lexicons_ids:
                        lexicons,
                        is_train:
                        False,
                    })

                infer_result = infer_result.ids[:, :, 0]
                if infer_result.shape[1] < max_iter:
                    l_pad = max_iter - infer_result.shape[1]
                    infer_result = np.concatenate(
                        (infer_result, np.ones((infer_batch_size, l_pad))),
                        axis=1)
                else:
                    infer_result = infer_result[:, :max_iter]
                tmp_result.append(infer_result)
                scores.append(sco)
            tmp_result = np.transpose(np.array(tmp_result), [1, 0, 2])
            scores = np.array(scores)
            scores = np.exp(scores) / np.sum(np.exp(scores), axis=0)
            scores = np.transpose(np.array(scores), [1, 0])
            scores[range(infer_batch_size),
                   test_category_data[start:start + infer_batch_size]] += 1
            ind = np.argmax(scores, axis=-1)
            infer_results.extend(
                tmp_result[range(tmp_result.shape[0]),
                           test_category_data[start:start + infer_batch_size]])
            #infer_results.extend(infer_result)

        final_result = np.array(infer_results) - embed_shift
        final_result[final_result >= vocab_size] -= (vocab_size + embed_shift)

        final_result = id2_word(final_result.astype(int).tolist())
        with open(os.path.join(test_output, "PEC_out_emo.tsv"), "w") as f:
            f.writelines('\n'.join([
                "0\t0\t" + str(emo) + "\t" + ' '.join(sen)
                for emo, sen in zip(test_category_data, final_result)
            ]) + '\n')
        with open(os.path.join(test_output, "PEC_out_per.tsv"), "w") as f:
            f.writelines('\n'.join([
                "0\t0\t" + str(per) + "\t" + ' '.join(sen)
                for per, sen in zip(test_target_person, final_result)
            ]) + '\n')
예제 #19
0
def fine_tune(prune_cfg,
              data,
              aux_util,
              device,
              train_loader,
              test_loader,
              epochs=10):
    with open(progress_result, 'a') as f:
        f.write(('\n' + '%10s' * 10 + '\n') %
                ('Stage', 'Epoch', 'DIoU', 'obj', 'cls', 'Total', 'P', 'R',
                 '[email protected]', 'F1'))

    batch_size = train_loader.batch_size
    img_size = train_loader.dataset.img_size
    accumulate = 64 // batch_size
    hook_util = HookUtils()

    pruned_model = Darknet(prune_cfg, img_size=(img_size, img_size)).to(device)

    chkpt = torch.load(progress_chkpt, map_location=device)
    pruned_model.load_state_dict(chkpt['model'], strict=True)

    current_layer = chkpt['current_layer']
    aux_in_layer = aux_util.conv_layer_dict[current_layer]
    aux_model = aux_util.creat_aux_model(aux_in_layer)
    aux_model.to(device)

    aux_model.load_state_dict(chkpt['aux_in{}'.format(aux_in_layer)],
                              strict=True)
    aux_loss_scalar = max(0.01, pow((int(aux_in_layer) + 1) / 75, 2))

    start_epoch = chkpt['epoch'] + 1

    if start_epoch == epochs:
        return current_layer  # fine tune 完毕,返回需要修剪的层名

    pg0, pg1 = [], []  # optimizer parameter groups
    for k, v in dict(pruned_model.named_parameters()).items():
        if 'MaskConv2d.weight' in k:
            pg1 += [v]  # parameter group 1 (apply weight_decay)
        else:
            pg0 += [v]  # parameter group 0

    for v in aux_model.parameters():
        pg0 += [v]  # parameter group 0

    optimizer = optim.SGD(pg0,
                          lr=hyp['lr0'],
                          momentum=hyp['momentum'],
                          nesterov=True)
    optimizer.add_param_group({
        'params': pg1,
        'weight_decay': hyp['weight_decay']
    })  # add pg1 with weight_decay
    del pg0, pg1

    if chkpt['optimizer'] is not None:
        optimizer.load_state_dict(chkpt['optimizer'])

    del chkpt

    scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[epochs // 3, 2 * (epochs // 3)], gamma=0.1)
    scheduler.last_epoch = start_epoch - 1

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        pruned_model = nn.parallel.DistributedDataParallel(
            pruned_model, find_unused_parameters=True)
        pruned_model.yolo_layers = pruned_model.module.yolo_layers

    # -------------start train-------------
    nb = len(train_loader)
    pruned_model.nc = 80
    pruned_model.hyp = hyp
    pruned_model.arc = 'default'
    for epoch in range(start_epoch, epochs):

        # -------------register hook for model-------------
        for name, child in pruned_model.module.module_list.named_children():
            if name == aux_in_layer:
                handle = child.register_forward_hook(
                    hook_util.hook_prune_output)

        # -------------register hook for model-------------

        pruned_model.train()
        aux_model.train()

        print(('\n' + '%10s' * 7) %
              ('Stage', 'Epoch', 'gpu_mem', 'DIoU', 'obj', 'cls', 'total'))

        # -------------start batch-------------
        mloss = torch.zeros(4).to(device)
        pbar = tqdm(enumerate(train_loader), total=nb)
        for i, (img, targets, _, _) in pbar:
            if len(targets) == 0:
                continue

            ni = nb * epoch + i
            img = img.to(device).float() / 255.0
            targets = targets.to(device)

            pruned_pred = pruned_model(img)
            pruned_loss, pruned_loss_items = compute_loss(
                pruned_pred, targets, pruned_model)
            pruned_loss *= batch_size / 64

            hook_util.cat_to_gpu0()

            aux_pred = aux_model(hook_util.prune_features['gpu0'][0], targets)

            aux_loss = compute_loss_for_DCP(aux_pred, targets)
            aux_loss *= aux_loss_scalar * batch_size / 64

            loss = pruned_loss + aux_loss
            loss.backward()

            hook_util.clean_hook_out()
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()

            pruned_loss_items[2] += aux_loss.item()
            mloss = (mloss * i + pruned_loss_items) / (i + 1)
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            s = ('%10s' * 3 +
                 '%10.3g' * 4) % ('FiTune ' + current_layer, '%g/%g' %
                                  (epoch, epochs - 1), '%.3gG' % mem, *mloss)
            pbar.set_description(s)
        # -------------end batch-------------

        scheduler.step()
        handle.remove()

        results, _ = test.test(prune_cfg,
                               data,
                               batch_size=batch_size * 2,
                               img_size=416,
                               model=pruned_model,
                               conf_thres=0.1,
                               iou_thres=0.5,
                               save_json=False,
                               dataloader=test_loader)
        """
        chkpt = {'current_layer':
                 'epoch':
                 'model': 
                 'optimizer': 
                 'aux_in12': 
                 'aux_in37':
                 'aux_in62':
                 'aux_in75':
                 'prune_guide':}
        """
        chkpt = torch.load(progress_chkpt, map_location=device)
        chkpt['current_layer'] = current_layer
        chkpt['epoch'] = epoch
        chkpt['model'] = pruned_model.module.state_dict() if type(
            pruned_model
        ) is nn.parallel.DistributedDataParallel else pruned_model.state_dict(
        )
        chkpt[
            'optimizer'] = None if epoch == epochs - 1 else optimizer.state_dict(
            )
        chkpt['aux_in{}'.format(aux_in_layer)] = aux_model.state_dict()

        torch.save(chkpt, progress_chkpt)

        torch.save(chkpt, last)

        if epoch == epochs - 1:
            torch.save(chkpt,
                       '../weights/DCP/backup{}.pt'.format(current_layer))

        del chkpt

        with open(progress_result, 'a') as f:
            f.write(('%10s' * 2 + '%10.3g' * 8) %
                    ('FiTune ' + current_layer, '%g/%g' %
                     (epoch, epochs - 1), *mloss, *results[:4]) + '\n')
    # -------------end train-------------
    torch.cuda.empty_cache()
    return current_layer
예제 #20
0
def train():
    img_size, img_size_test = opt.img_size if len(
        opt.img_size) == 2 else opt.img_size * 2  # train, test sizes
    epochs = opt.epochs
    batch_size = opt.batch_size
    accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
    weights = opt.weights  # initial training weights

    # remove previous results
    for f in glob.glob('*_batch*.png') + glob.glob(results_file):
        os.remove(f)

    # init model
    model = UltraNet().to(device)
    model.apply(weights_init_normal)

    # optimizer
    optimizer = torch.optim.Adam(model.parameters())

    # cosine lr
    lf = lambda x: (1 + math.cos(x * math.pi / epochs)
                    ) / 2 * 0.99 + 0.01  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(epochs * x) for x in [0.8, 0.9]], gamma=0.1)
    scheduler.last_epoch = 0

    root = "/share/DAC2020/dataset/"
    dataset = DACDataset(root, "train", BaseTransform(320, 160))

    # Dataloader
    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=nw,
        shuffle=not opt.
        rect,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        #collate_fn=dataset.collate_fn
    )

    # Testloader
    testloader = torch.utils.data.DataLoader(
        DACDataset(root, "test", BaseTransform(320, 160)),
        batch_size=batch_size * 2,
        num_workers=nw,
        pin_memory=True,
        #collate_fn=dataset.collate_fn
    )

    nc = 13
    model.nc = nc  # attach number of classes to model
    model.arc = opt.arc  # attach yolo architecture
    model.hyp = hyp  # attach hyperparameters to model
    #model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
    model.class_weights = torch.ones(13) / 13
    maps = np.zeros(nc)  # mAP per class

    for epoch in range(opt.epochs):
        model.train()
        start_time = time.time()
        train_loss = 0
        pbar = tqdm(enumerate(dataloader),
                    total=len(dataloader))  # progress bar
        for batch_i, (_, imgs, targets) in pbar:
            batches_done = len(dataloader) * epoch + batch_i

            imgs = Variable(imgs.to(device))
            targets = Variable(targets.to(device), requires_grad=False)

            # multi-scale is not used here

            # forward
            pred = model(imgs)

            # compute loss
            loss, loss_items = compute_loss(pred, targets, model)
            if not torch.isfinite(loss):
                print('WARNING: non-finite loss, ending training ', loss_items)
                return results
            loss.backward()
            loss = loss * batch_size / 64

            train_loss = (train_loss * batch_i + loss.item()) / (batch_i + 1)

            # optimize every accumulate
            if batch_done % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()

            s = ('%10s' + '%10.3g' * 3) % ('%g/%g' % (epoch, epochs - 1),
                                           train_loss, len(targets), img_size)
            pbar.set_description(s)

        # end one epoch
        scheduler.step()

        # process data of current epoch
        final_epoch = (epoch + 1 == epochs)
        results = test.test(
            batch_size=batch_size * 2,
            img_size=img_size_test,
            model=model,
            conf_thres=
            0.001,  # 0.001 if opt.evolve or (final_epoch and is_coco) else 0.01,
            iou_thres=0.6,
            save_json=final_epoch and is_coco,
            single_cls=opt.single_cls,
            dataloader=testloader)

        # Write epoch results
        with open(results_file, 'a') as f:
            f.write(s + '%10.3g' * len(results) % results +
                    '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)

        x = list(train_loss) + list(results)
        titles = ['Train loss', 'iou', 'Test_loss', 'Giou loss', 'obj loss']
        for xi, title in zip(x, titles):
            tb_writer.add_scalar(title, xi, epoch)

        # Save training results
        save = (not opt.nosave) or (final_epoch)
        if save:
            with open(results_file, 'r') as f:
                # Create checkpoint
                chkpt = {
                    'epoch':
                    epoch,
                    # 'best_fitness': best_fitness,
                    'training_results':
                    f.read(),
                    'model':
                    model.module.state_dict()
                    if type(model) is nn.parallel.DistributedDataParallel else
                    model.state_dict(),
                    'optimizer':
                    None if final_epoch else optimizer.state_dict()
                }

            # Save last checkpoint
            torch.save(chkpt, last)

            # Delete checkpoint
            del chkpt

        # end training

        torch.cuda.empty_cache()
        return results
예제 #21
0
def greedy_channel_select(origin_model, prune_cfg, origin_weights,
                          select_layer, device, aux_util, data_loader,
                          pruned_rate):
    init_state_dict = mask_converted(prune_cfg, origin_weights, target=None)

    prune_model = Darknet(prune_cfg).to(device)
    prune_model.load_state_dict(init_state_dict, strict=True)
    del init_state_dict
    solve_sub_problem_optimizer = optim.SGD(
        prune_model.module_list[int(select_layer)].MaskConv2d.parameters(),
        lr=hyp['lr0'],
        momentum=hyp['momentum'])
    hook_util = HookUtils()
    handles = []

    info = aux_util.layer_info[int(select_layer)]
    in_channels = info['in_channels']
    remove_k = math.floor(in_channels * pruned_rate)
    k = in_channels - remove_k

    for name, child in origin_model.module_list.named_children():
        if name == select_layer:
            handles.append(
                child.BatchNorm2d.register_forward_hook(
                    hook_util.hook_origin_input))

    aux_idx = aux_util.conv_layer_dict[select_layer]
    hook_layer_aux = aux_util.down_sample_layer[aux_idx]
    for name, child in prune_model.module_list.named_children():
        if name == select_layer:
            handles.append(
                child.BatchNorm2d.register_forward_hook(
                    hook_util.hook_prune_input))
        elif name == hook_layer_aux:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_input))

    aux_net = aux_util.creat_aux_list(416,
                                      device,
                                      conv_layer_name=select_layer)
    chkpt_aux = torch.load(aux_weight, map_location=device)
    aux_net.load_state_dict(chkpt_aux['aux{}'.format(aux_idx)])
    del chkpt_aux

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        prune_model = torch.nn.parallel.DistributedDataParallel(
            prune_model, find_unused_parameters=True)
        prune_model.yolo_layers = prune_model.module.yolo_layers
        aux_net = torch.nn.parallel.DistributedDataParallel(
            aux_net, find_unused_parameters=True)

    nb = len(data_loader)
    prune_model.nc = 80
    prune_model.hyp = hyp
    prune_model.arc = 'default'
    prune_model.eval()
    aux_net.eval()
    MSE = nn.MSELoss(reduction='mean')

    greedy = torch.zeros(k)
    for i_k in range(k):
        pbar = tqdm(enumerate(data_loader), total=nb)
        print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss',
                                     'PdLoss', 'AuxLoss', 'Total', 'targets'))
        for i, (imgs, targets, _, _) in pbar:
            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = prune_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model)
            hook_util.cat_to_gpu0('prune')

            aux_pred = aux_net(hook_util.prune_features['gpu0'][1])
            aux_loss, _ = AuxNetUtils.compute_loss_for_aux(
                aux_pred, aux_net, targets)

            mse_loss = torch.zeros(1).to(device)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss

            loss.backward()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            s = ('%10s' * 3 + '%10.3g' * 5) % (
                'Pruning ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, k), mse_loss, pruning_loss, aux_loss, loss, len(targets))
            pbar.set_description(s)

            hook_util.clean_hook_out('origin')
            hook_util.clean_hook_out('prune')

        grad = prune_model.module.module_list[int(
            select_layer)].MaskConv2d.weight.grad.detach().clone()**2
        grad = grad.sum((2, 3)).sqrt().sum(0)

        if i_k == 0:
            prune_model.module.module_list[int(
                select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5
            _, non_greedy_indices = torch.topk(grad, k)
            logger.info('non greedy layer{}: selected==>{}'.format(
                select_layer, str(non_greedy_indices)))

        selected_channels_mask = prune_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask
        _, indices = torch.topk(grad * (1 - selected_channels_mask), 1)
        prune_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask[indices] = 1
        greedy[i_k] = indices
        logger.info('greedy layer{} iter{}: indices==>{}'.format(
            select_layer, str(i_k), str(indices)))

        prune_model.zero_grad()

        pbar = tqdm(enumerate(data_loader), total=nb)
        mloss = torch.zeros(4).to(device)
        print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss',
                                     'PdLoss', 'AuxLoss', 'Total', 'targets'))
        for i, (imgs, targets, _, _) in pbar:

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = prune_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model)
            hook_util.cat_to_gpu0('prune')

            aux_pred = aux_net(hook_util.prune_features['gpu0'][1])
            aux_loss, _ = AuxNetUtils.compute_loss_for_aux(
                aux_pred, aux_net, targets)

            mse_loss = torch.zeros(1).to(device)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss

            loss.backward()

            solve_sub_problem_optimizer.step()
            solve_sub_problem_optimizer.zero_grad()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            mloss = (mloss * i + torch.cat(
                [mse_loss, pruning_loss, aux_loss, loss]).detach()) / (i + 1)
            s = ('%10s' * 3 + '%10.3g' * 5) % ('SubProm ' + select_layer,
                                               '%.3gG' % mem, '%g/%g' %
                                               (i_k, k), *mloss, len(targets))
            pbar.set_description(s)

            hook_util.clean_hook_out('origin')
            hook_util.clean_hook_out('prune')

    for handle in handles:
        handle.remove()

    logger.info(
        ("greedy layer{}: selected==>{}".format(select_layer, str(greedy))))
예제 #22
0
def main(args):
    # loading configurations
    with open(args.config) as f:
        config = yaml.safe_load(f)["configuration"]

    name = config["Name"]

    # Construct or load embeddings
    print("Initializing embeddings ...")
    vocab_size = config["embeddings"]["vocab_size"]
    embed_size = config["embeddings"]["embed_size"]
    per_num = config["embeddings"]["person_num"]
    per_embed_size = config["embeddings"]["person_embed_size"]
    ori_emb, ori_p_emb = load_embedding("model/emb.tsv")
    embeddings = init_embeddings(vocab_size,
                                 embed_size,
                                 initial_values=ori_emb,
                                 name=name)

    print("\tDone.")

    # Build the model and compute losses
    source_ids = tf.placeholder(tf.int32, [None, 40], name="source")
    target_ids = tf.placeholder(tf.int32, [None, 40], name="target")
    person_ids = tf.placeholder(tf.int32, [None], name="person_ids")
    lexicons_ids = tf.placeholder(tf.int32, [per_num, 1000],
                                  name="lexicons_ids")
    spectrogram = tf.placeholder(tf.float32, [None, 400, 200], name="audio")
    sequence_mask = tf.placeholder(tf.bool, [None, 40], name="mask")
    choice_qs = tf.placeholder(tf.float32, [None, 40], name="choice")
    emo_cat = tf.placeholder(tf.int32, [None], name="emotion_category")
    is_train = tf.placeholder(tf.bool)

    (enc_num_layers, enc_num_units, enc_cell_type, enc_bidir, dec_num_layers,
     dec_num_units, dec_cell_type, state_pass, num_emo, emo_cat_units,
     emo_int_units, infer_batch_size, beam_size, max_iter, attn_num_units,
     l2_regularize, word_config, spectrogram_config, lstm_int_num, batch_size,
     loss_weight) = get_PEC_config(config)

    print("Building model architecture ...")
    CE, loss, cla_loss, train_outs, infer_outputs, score = compute_loss(
        source_ids, target_ids, sequence_mask, choice_qs, embeddings,
        enc_num_layers, enc_num_units, enc_cell_type, enc_bidir,
        dec_num_layers, dec_num_units, dec_cell_type, state_pass, num_emo,
        emo_cat, emo_cat_units, emo_int_units, infer_batch_size, spectrogram,
        word_config, per_num, person_ids, per_embed_size, spectrogram_config,
        loss_weight, lstm_int_num, is_train, False, lexicons_ids, beam_size,
        max_iter, attn_num_units, l2_regularize, name)
    print("\tDone.")

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    (logdir, restore_from, learning_rate, gpu_fraction, max_checkpoints,
     train_steps, batch_size, print_every, checkpoint_every, s_filename,
     t_filename, q_filename, s_max_leng, t_max_leng, dev_s_filename,
     dev_t_filename, dev_q_filename, loss_fig, perp_fig, sp_filename,
     sp_max_leng, test_s_filename, test_t_filename, test_q_filename,
     test_output) = get_training_config(config, "training")

    is_overwritten_training = logdir != restore_from

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       epsilon=1e-4)
    trainable = tf.trainable_variables()

    gradients = tf.gradients(loss, trainable)
    clipped_gradients, gradient_norm = tf.clip_by_global_norm(gradients, 5.0)
    optim = optimizer.apply_gradients(zip(clipped_gradients, trainable))

    # optim = optimizer.minimize(loss, var_list=trainable)

    # Set up session
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                            gpu_options=gpu_options))
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    var_list = tf.trainable_variables()
    g_list = tf.global_variables()
    bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
    bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
    var_list += bn_moving_vars

    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=max_checkpoints)

    # BN
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except Exception:
        print("Something went wrong while restoring checkpoint. "
              "Training is terminated to avoid the overwriting.")
        raise

    # ##### Training #####
    # Load data
    print("Loading data ...")

    # id_0, id_1, id_2 preserved for SOS, EOS, constant zero padding
    embed_shift = 3

    lexicons = load_lexicons() + embed_shift

    source_sentences_ids, source_person, source_data = loadfile(
        s_filename, is_dialog=True, is_source=True, max_length=s_max_leng)
    source_data += embed_shift
    target_sentences_ids, target_person, target_data, category_data = loadfile(
        t_filename, is_dialog=True, is_source=False, max_length=t_max_leng)
    target_data += embed_shift

    spectrogram_data = load_spectrogram(sp_filename, source_sentences_ids)
    choice_data = loadfile(q_filename,
                           is_dialog=False,
                           is_source=False,
                           max_length=t_max_leng)
    choice_data = choice_data.astype(np.float32)

    masks = (target_data >= embed_shift)
    masks = np.append(np.ones([len(masks), 1], dtype=bool), masks, axis=1)
    masks = masks[:, :-1]
    n_data = len(source_data)

    dev_source_data = None
    if dev_s_filename is not None:
        dev_source_sentences_ids, dev_source_person, dev_source_data = loadfile(
            dev_s_filename,
            is_dialog=True,
            is_source=True,
            max_length=s_max_leng)
        dev_source_data += embed_shift
        dev_target_sentences_ids, dev_target_person, dev_target_data, dev_category_data = loadfile(
            dev_t_filename,
            is_dialog=True,
            is_source=False,
            max_length=t_max_leng)
        dev_target_data += embed_shift
        dev_spectrogram_data = load_spectrogram(sp_filename,
                                                dev_source_sentences_ids)
        dev_choice_data = loadfile(dev_q_filename,
                                   is_dialog=False,
                                   is_source=False,
                                   max_length=t_max_leng)
        dev_choice_data[dev_choice_data < 0] = 0
        dev_choice_data = dev_choice_data.astype(np.float32)

        dev_masks = (dev_target_data >= embed_shift)
        dev_masks = np.append(np.ones([len(dev_masks), 1], dtype=bool),
                              dev_masks,
                              axis=1)
        dev_masks = dev_masks[:, :-1]
    print("\tDone.")

    # Training
    last_saved_step = saved_global_step
    num_steps = saved_global_step + train_steps
    losses = []
    cla_losses = []
    steps = []
    perps = []
    dev_perps = []

    print("Start training ...")
    try:
        step = last_saved_step
        for step in range(saved_global_step + 1, num_steps):
            start_time = time.time()
            rand_indexes = np.random.choice(n_data, batch_size)
            source_batch = source_data[rand_indexes]
            target_batch = target_data[rand_indexes]
            person_batch = target_person[rand_indexes]
            spectrogram_batch = spectrogram_data[rand_indexes]
            mask_batch = masks[rand_indexes]
            choice_batch = choice_data[rand_indexes]
            emotions = category_data[rand_indexes]

            feed_dict = {
                source_ids: source_batch,
                target_ids: target_batch,
                person_ids: person_batch,
                spectrogram: spectrogram_batch,
                sequence_mask: mask_batch,
                choice_qs: choice_batch,
                emo_cat: emotions,
                lexicons_ids: lexicons,
                is_train: True,
            }
            loss_value, cla_value, _, __ = sess.run(
                [loss, cla_loss, optim, extra_update_ops], feed_dict=feed_dict)
            losses.append(loss_value)
            cla_losses.append(cla_value)

            duration = time.time() - start_time

            if step % print_every == 0:
                # train perplexity
                t_perp = compute_perplexity(sess, CE, mask_batch, feed_dict)
                perps.append(t_perp)

                # dev perplexity
                dev_str = ""
                if dev_source_data is not None:
                    CE_words = N_words = 0.0
                    for start in range(0, len(dev_source_data), batch_size):
                        dev_feed_dict = {
                            source_ids:
                            dev_source_data[start:start + batch_size],
                            target_ids:
                            dev_target_data[start:start + batch_size],
                            person_ids:
                            dev_target_person[start:start + batch_size],
                            spectrogram:
                            dev_spectrogram_data[start:start + batch_size],
                            choice_qs:
                            dev_choice_data[start:start + batch_size],
                            emo_cat:
                            dev_category_data[start:start + batch_size],
                            sequence_mask:
                            dev_masks[start:start + batch_size],
                            lexicons_ids:
                            lexicons,
                            is_train:
                            False,
                        }
                        CE_word, N_word = compute_test_perplexity(
                            sess, CE, dev_masks[start:start + batch_size],
                            dev_feed_dict)
                        CE_words += CE_word
                        N_words += N_word

                    dev_str = "dev_prep: {:.3f}, ".format(
                        np.exp(CE_words / N_words))
                    dev_perps.append(np.exp(CE_words / N_words))

                steps.append(step)
                info = 'step {:d}, loss = {:.6f}, cla_loss = {:.6f} '
                info += 'perp: {:.3f}, {}({:.3f} sec/step)'
                print(
                    info.format(step, loss_value, cla_value, t_perp, dev_str,
                                duration))

            if step % checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C so save message is on its own line.
        print()

    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)