示例#1
0
    def train(self):
        device = self.device
        model = self.model.to(device)
        ceLoss = self.smloss.to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

        train_data = OneData(config=self.config, data_dir=self.train_dir, transform=train_transform(self.config))
        valid_data = OneData(config=self.config, data_dir=self.valid_dir, transform=val_transform(self.config))
        train_loader = DataLoader(dataset=train_data, batch_size=self.batch_size, shuffle=True,
                                  num_workers=self.config.num_workers)
        valid_loader = DataLoader(dataset=valid_data, batch_size=self.batch_size, num_workers=self.config.num_workers)
        # print()
        with open('{}/dfcn_gt.txt'.format(self.model_dir), 'w') as f:
            for step in trange(self.epoch):
                model.train()
                # correct = 0
                for idx, (inputs, labels) in enumerate(train_loader):
                    # inputs = torch.cat(inputs, 0)
                    # labels = torch.cat(labels, 0)
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    output, fc1, fc11 = model(inputs)
                    # print(output)
                    loss = ceLoss(output, labels)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    predicted = torch.max(output.data, 1)[1]
                    # correct += (predicted == labels).sum()
                    # print(correct)
                    # print('Epoch :{}[{}/{}({:.0f}%)]\t Loss:{:.6f}'.format(step, idx * len(inputs), len(train_loader),
                    #                                                                          100. * idx / len(train_loader), loss.data.item()))


                with torch.no_grad():
                    model.eval()
                    _prec1, _prec3, _batch1, _batch3 = 0, 0, 0, 0
                    correct = 0
                    for input, target in valid_loader:
                        input = input.to(device)
                        target = target.to(device)
                        # print('input', input)
                        # print('target', target)
                        output, fc1, fc11 = model(input)
                        # predicted = torch.max(output.data, 1)[1]
                        # correct += (target == predicted).sum()
                        # output = model.classifier(output)
                        # print('output', output)
                        # return 0
                        (prec1, batch1), (prec3, batch3) = accuracy(output.data, target.data, topk=(1, 3))
                        _prec1 += prec1
                        _prec3 += prec3
                        _batch1 += batch1
                        _batch3 += batch3
                    print(' * Prec@1 {prec1}/{batch1} Prec@3 {prec3}/{batch3}'.format(prec1=_prec1, batch1=_batch1, prec3=_prec3, batch3=_batch3))
        torch.save(model, self.config.model_path)
示例#2
0
def run(config_file):
    config = get_config(config_file)

    cur_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
    outdir = os.path.join(config['outputdir'], cur_time)
    os.makedirs(outdir)

    logger = utils.genlogger(os.path.join(outdir, 'log.txt'))
    logger.info("Output Path: {}".format(outdir))
    logger.info("<---- config details ---->")
    for key in config:
        logger.info("{}: {}".format(key, config[key]))
    logger.info("<---- end of config ---->")

    train_dev = pd.read_csv(config['train_dev'], sep=',')
    n_class = config['n_class']

    #train_set, dev_set, test_set = utils.train_dev_test_split(df, outdir)
    train_set, dev_set = utils.train_dev_split(train_dev, outdir)
    test_set = pd.read_csv(config['test'], sep=',').values
    num = 10 if args.debug else None

    train_label = utils.one_hot(train_set, n_class, num)
    dev_label = utils.one_hot(dev_set, n_class, num)
    test_label = utils.one_hot(test_set, n_class, num)
    logger.info("train set: {} samples".format(len(train_label)))
    logger.info("dev set: {} samples".format(len(dev_label)))
    logger.info("test set: {} samples".format(len(test_label)))

    #Net = torchvision.models.resnet152(pretrained=False)
    #Net.load_state_dict(torch.load(config['Net']).state_dict())
    Net = torch.load(config['Net'])
    #mrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn(
    #        pretrained=True
    #        #pretrained_backbone=False
    #    )
    #mrcnn = torch.load(config['mrcnn'])

    model = getattr(M, config['model'])(Net,
                                        n_class=n_class,
                                        **config['model_param'])
    if config['pretrain']:
        obj = torch.load(config['pretrain_model'], lambda x, y: x)
        model.load_param(obj['param'])
    logger.info("model: {}".format(str(model)))
    origin_model = model

    if (torch.cuda.device_count() > 1):
        model = torch.nn.DataParallel(model)

    logger.info("Use {} GPU(s)".format(torch.cuda.device_count()))

    model = model.to(device)

    # if config['model_param']['Net_grad']:
    #     optimizer = getattr(optim, config['optim'])([
    #         {'params': origin_model.get_Net_param(), 'lr': config['Net_lr']},
    #         {'params': origin_model.get_other_param()}
    #         ], lr=config['other_lr']
    #     )
    # else:
    #     optimizer = getattr(optim, config['optim'])(
    #         origin_model.get_other_param(),
    #         lr=config['other_lr']
    #     )
    optimizer = getattr(optim, config['optim'])(origin_model.parameters(),
                                                lr=config['other_lr'])

    lr_scheduler = getattr(optim.lr_scheduler,
                           config['lr_scheduler'])(optimizer,
                                                   **config['scheduler_param'])

    #criterion = getattr(l, 'FocalSymmetricLovaszHardLogLoss')()
    criterion = getattr(losses, config['Loss'])(**config['Loss_param'])

    #train_transform = utils.augmentation()
    train_transform = utils.train_transform()
    test_transform = utils.simple_transform()

    train_dataloader = oversample_dataloader(
        config['data_h5'], train_label, train_transform, \
        T = config['time_step'], **config['dataloader_param']
    )
    dev_dataloader = dataloader_single(
        config['data_h5'], dev_label, test_transform, \
        T = config['time_step'], **config['dataloader_param']
    )
    test_dataloader = dataloader_single(
        config['data_h5'], test_label, test_transform, \
        T = config['time_step'], **config['dataloader_param']
    )

    best_dev_loss = np.inf

    dev_loss, f1_macro, f1_micro, acc, auc = utils.evaluate(
        model, dev_dataloader, device, criterion, config['threshold'])
    best_f1 = f1_macro + f1_micro
    logger.info("dev_loss: {:.4f}\tf1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}"\
        .format(dev_loss, f1_macro, f1_micro, acc, auc))

    for epoch in range(1, config['n_epoch'] + 1):
        logger.info("<---- Epoch: {} start ---->".format(epoch))

        #if (epoch >= 10 and config['model_param']['Net_grad']):
        #    optimizer.param_groups[0]['lr'] = optimizer.param_groups[1]['lr'] / 1000
        train_loss = one_epoch(model, optimizer, criterion, train_dataloader,
                               True, config['grad_clip'])

        dev_loss, f1_macro, f1_micro, acc, auc = utils.evaluate(
            model, dev_dataloader, device, criterion, config['threshold'])
        logger.info("train_loss: {:.4f}\tdev_loss: {:.4f}".format(
            train_loss, dev_loss))

        logger.info(
            "DEV: f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}"
            .format(f1_macro, f1_micro, acc, auc))

        if epoch % config['saveinterval'] == 0:
            model_path = os.path.join(outdir, 'model_{}.th'.format(epoch))
            torch.save(
                {
                    "param": origin_model.get_param(),
                    "train_label": train_label,
                    "dev_label": dev_label,
                    "test_label": test_label,
                    "config": config
                }, model_path)
        # if best_dev_loss > dev_loss:
        #     model_path = os.path.join(outdir, 'model.th')
        #     torch.save({
        #         "param": origin_model.get_param(),
        #         "train_label": train_label,
        #         "dev_label": dev_label,
        #         "test_label": test_label,
        #         "config": config
        #     }, model_path)
        #     best_dev_loss = dev_loss

        if best_f1 < f1_macro + f1_micro:
            model_path = os.path.join(outdir, 'model_acc.th')
            torch.save(
                {
                    "param": origin_model.get_param(),
                    "train_label": train_label,
                    "dev_label": dev_label,
                    "test_label": test_label,
                    "config": config
                }, model_path)
            best_f1 = f1_macro + f1_micro

        _, f1_macro, f1_micro, acc, auc = utils.evaluate(
            model, test_dataloader, device, None, config['threshold'])

        logger.info(
            "TEST: f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}"
            .format(f1_macro, f1_micro, acc, auc))

        schedarg = dev_loss if lr_scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None
        lr_scheduler.step(schedarg)

    _, f1_macro, f1_micro, acc, auc = utils.evaluate(model, test_dataloader,
                                                     device, None,
                                                     config['threshold'])

    logger.info("<---- test evaluation: ---->")
    logger.info(
        "f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}".format(
            f1_macro, f1_micro, acc, auc))
def run(config_file):
    config = get_config(config_file)

    cur_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
    outdir = os.path.join(config['outputdir'], cur_time)
    os.makedirs(outdir)

    logger = utils.genlogger(os.path.join(outdir, 'log.txt'))
    logger.info("Output Path: {}".format(outdir))
    logger.info("<---- config details ---->")
    for key in config:
        logger.info("{}: {}".format(key, config[key]))
    logger.info("<---- end of config ---->")

    n_class = config['n_class']

    train_set = pd.read_csv(config['train_dev'], sep=',').values
    #test_set = pd.read_csv(config['test'], sep=',').values
    num = 10 if args.debug else None

    train_label = utils.one_hot(train_set, n_class, num)
    #test_label = utils.one_hot(test_set, n_class, num)
    logger.info("train set: {} samples".format(len(train_label)))
    #logger.info("test set: {} samples".format(len(test_label)))

    Net = torch.load(config['Net'])

    model = getattr(M, config['model'])(Net,
                                        n_class=n_class,
                                        **config['model_param'])
    if config['pretrain']:
        obj = torch.load(config['pretrain_model'], lambda x, y: x)
        model.load_param(obj['param'])
        logger.info('load from {}'.format(config['pretrain_model']))
    logger.info("model: {}".format(str(model)))
    origin_model = model

    if (torch.cuda.device_count() > 1):
        model = torch.nn.DataParallel(model)

    logger.info("Use {} GPU(s)".format(torch.cuda.device_count()))

    model = model.to(device)

    optimizer = getattr(optim, config['optim'])(origin_model.parameters(),
                                                lr=config['other_lr'])

    criterion = getattr(losses, config['Loss'])(**config['Loss_param'])

    train_transform = utils.train_transform()
    #test_transform = utils.simple_transform()

    train_dataloader = oversample_dataloader(
        config['data_h5'], train_label, train_transform, \
        T = config['time_step'], **config['dataloader_param']
    )

    # test_dataloader = dataloader_single(
    #     config['data_h5'], test_label, test_transform, \
    #     T = config['time_step'], **config['dataloader_param']
    # )

    # test_loss, f1_macro, f1_micro, acc, auc = utils.evaluate(
    #     model, test_dataloader, device,
    #     criterion, config['threshold']
    # )
    # best_f1 = f1_macro + f1_micro
    # logger.info("test_loss: {:.4f}\tf1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}"\
    #     .format(test_loss, f1_macro, f1_micro, acc, auc))
    for epoch in range(1, config['n_epoch'] + 1):
        logger.info("<---- Epoch: {} start ---->".format(epoch))

        #if (epoch >= 10 and config['model_param']['Net_grad']):
        #    optimizer.param_groups[0]['lr'] = optimizer.param_groups[1]['lr'] / 1000
        train_loss = one_epoch(model, optimizer, criterion, train_dataloader,
                               True, config['grad_clip'])

        # test_loss, f1_macro, f1_micro, acc, auc = utils.evaluate(
        #     model, test_dataloader, device,
        #     criterion, config['threshold']
        # )
        # logger.info("train_loss: {:.4f}\tdev_loss: {:.4f}".format(train_loss, test_loss))

        # logger.info("TEST: f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}".format(f1_macro, f1_micro, acc, auc))

        if epoch % config['saveinterval'] == 0:
            model_path = os.path.join(outdir, 'model_{}.th'.format(epoch))
            torch.save(
                {
                    "param": origin_model.get_param(),
                    #"train_label": train_label,
                    #"test_label": test_label,
                    "config": config
                },
                model_path)

        model_path = os.path.join(outdir, 'model.th')
        torch.save(
            {
                "param": origin_model.get_param(),
                #"train_label": train_label,
                "config": config
            },
            model_path)
示例#4
0
def run(config_file):
    config = get_config(config_file)

    cur_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
    outdir = os.path.join(config['outputdir'], cur_time)
    os.makedirs(outdir)

    logger = utils.genlogger(os.path.join(outdir, 'log.txt'))
    logger.info("Output Path: {}".format(outdir))
    logger.info("<---- config details ---->")
    for key in config:
        logger.info("{}: {}".format(key, config[key]))
    logger.info("<---- end of config ---->")

    train_dev = pd.read_csv(config['train_dev'], sep=',')
    n_class = config['n_class']


    #train_set, dev_set, test_set = utils.train_dev_test_split(df, outdir)
    train_set, dev_set = utils.train_dev_split(train_dev, outdir)
    test_set = pd.read_csv(config['test'], sep=',').values
    num = 5 if args.debug else None

    train_label = utils.one_hot(train_set, n_class, num)
    dev_label = utils.one_hot(dev_set, n_class, num)
    test_label = utils.one_hot(test_set, n_class, num)
    logger.info("train set: {} samples".format(len(train_label)))
    logger.info("dev set: {} samples".format(len(dev_label)))
    logger.info("test set: {} samples".format(len(test_label)))

    Net = torchvision.models.densenet201(pretrained=False)
    Net.load_state_dict(torch.load(config['Net']).state_dict())

    model = getattr(M, config['model'])(
        Net, n_class=n_class, **config['model_param']
    )
    logger.info("model: {}".format(str(model.other)))
    origin_model = model

    if (torch.cuda.device_count() > 1):
        model = torch.nn.DataParallel(model)

    logger.info("Use {} GPU(s)".format(torch.cuda.device_count()))

    model = model.to(device)
    if config['model_param']['Net_grad']:
        optimizer = getattr(optim, config['optim'])([
            {'params': origin_model.get_Net_param(), 'lr': config['Net_lr']}, 
            {'params': origin_model.get_other_param()}
            ], lr=config['other_lr']
        )
    else:
        optimizer = getattr(optim, config['optim'])(
            origin_model.get_other_param(), 
            lr=config['other_lr']
        )

    lr_scheduler = getattr(optim.lr_scheduler, config['lr_scheduler'])(
        optimizer, **config['scheduler_param']
    )

    criterion = getattr(torch.nn, config['Loss'])()

    train_transform = utils.train_transform()
    test_transform = utils.test_transform()
    train_dataloader = dataloader_multiple(
        config['data_h5'], train_label, train_transform, \
        T=config['time_step'], **config['dataloader_param']
    )
    dev_dataloader = dataloader_multiple(
        config['data_h5'], dev_label, test_transform, \
        T=config['time_step'], **config['dataloader_param']
    )
    test_dataloader = dataloader_multiple(
        config['data_h5'], test_label, test_transform, \
        T=config['time_step'], **config['dataloader_param']
    )

    best_dev_loss = np.inf


    dev_loss = one_epoch(
            model, optimizer, criterion, dev_dataloader, False)
    f1_macro, f1_micro, acc = utils.evaluate(
        model, dev_dataloader, device, config['threshold'])
    best_f1 = f1_macro + f1_micro
    logger.info("dev_loss: {:.4f}\tf1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}"\
        .format(dev_loss, f1_macro, f1_micro, acc))
 
    for epoch in range(1, config['n_epoch'] + 1):
        logger.info("<---- Epoch: {} start ---->".format(epoch))
        train_loss = one_epoch(
            model, optimizer, criterion, train_dataloader, True, config['grad_clip']
        )
        dev_loss = one_epoch(
            model, optimizer, criterion, dev_dataloader, False
        )
        logger.info("train_loss: {:.4f}\tdev_loss: {:.4f}".format(train_loss, dev_loss))

        f1_macro, f1_micro, acc = utils.evaluate(
            model, dev_dataloader, device, config['threshold'])

        logger.info("f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}".format(f1_macro, f1_micro, acc))


        if epoch % config['saveinterval'] == 0:
            model_path = os.path.join(outdir, 'model_{}.th'.format(epoch))
            torch.save({
                "param": origin_model.get_param(),
                "train_label": train_label,
                "dev_label": dev_label,
                "test_label": test_label,
                "config": config
            }, model_path)
        if best_dev_loss > dev_loss:
            model_path = os.path.join(outdir, 'model.th')
            torch.save({
                "param": origin_model.get_param(),
                "train_label": train_label,
                "dev_label": dev_label,
                "test_label": test_label,
                "config": config
            }, model_path)
            best_dev_loss = dev_loss
        if best_f1 < f1_macro + f1_micro:
            model_path = os.path.join(outdir, 'model_acc.th')
            torch.save({
                "param": origin_model.get_param(),
                "train_label": train_label,
                "dev_label": dev_label,
                "test_label": test_label,
                "config": config
            }, model_path)
            best_f1 = f1_macro + f1_micro

        schedarg = dev_loss if lr_scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None
        lr_scheduler.step(schedarg)

    f1_macro, f1_micro, acc = utils.evaluate(
        model, test_dataloader, device, config['threshold'])


    logger.info("f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}".format(f1_macro, f1_micro, acc))
示例#5
0
    def load_dir(self, data_path):
        dir2label = os.path.join(data_path, 'subdir_to_label.json')
        with open(dir2label, 'r') as f:
            self.dir2label = json.load(f)

    def get_img(self, data_dir):
        self.data_map = list()
        for roots, dirs, files in os.walk(data_dir):
            for dir in dirs:
                image_names = os.listdir(os.fspath(roots + "/" + dir))
                for i in range(len(image_names)):
                    image_name = image_names[i]
                    image_path = os.path.join(roots, dir, image_name)
                    self.data_map.append((image_path, self.dir2label[dir]))


if __name__ == '__main__':
    config, unparsed = get_config()
    dataset = OneData(config, data_dir=config.train_dir, transform=train_transform(config))
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=config.batch_size, shuffle=True,
                                               num_workers=config.num_workers)

    for batch_idx, (inputs, labels) in enumerate(train_loader):
        # total_inputs = torch.cat(inputs, 0)
        # print(total_inputs)
        print(labels)
        print(inputs[[0][0]])
        break

示例#6
0
parser.add_argument('--feature-weight', type=float, default=0.1)

# Verbosity
parser.add_argument('--eval-loss-every', type=int, default=500)
parser.add_argument('--print-every', type=int, default=500)
parser.add_argument('--save-every', type=int, default=1000)
parser.add_argument('--test-every', type=int, default=1000)

args = parser.parse_args()

device = torch.device('cuda:%s' % args.gpu if torch.cuda.is_available() and args.gpu != 1 else 'cpu')

encoder = VGG19().to(device)
decoder = Decoder().to(device)

train_transform = train_transform(args.image_size, args.final_size)
test_transform = eval_transform(args.image_size)

dataset = Dataset(args.dataset_dir, train_transform)

data_loader = iter(data.DataLoader(
    dataset,
    batch_size=args.batch_size,
    num_workers=args.nThreads,
    sampler=InfiniteSampler(dataset)
))

optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate)

criterion = nn.MSELoss()
示例#7
0
    def eval_model(self):
        device = self.device
        model = self.model.to(device)
        # model.load_state_dict(torch.load(self.path))
        # model.eval()

        dfcn_feats, dcnn_feats = [], []
        names = list()
        h5f1 = h5py.File('featsDCNN.h5', 'w')
        h5f2 = h5py.File('featsDFCN.h5', 'w')
        imgs = get_imgs('net/train')

        train_data = OneData(config=self.config,
                             data_dir=self.train_dir,
                             transform=train_transform(self.config))
        train_loader = DataLoader(dataset=train_data,
                                  batch_size=self.batch_size,
                                  shuffle=True,
                                  num_workers=self.config.num_workers)
        # print(imgs)
        for idx, (inputs, labels) in enumerate(train_loader):
            # name = os.path.split(inputs)[-1]
            # inputs = inputs[np.newaxis, :]
            inputs = inputs.to(device)
            # labels = labels.to(device)
            # print(labels)
            # print(inputs[0])

            output, dfcn, dcnn = model(inputs)
            # dfcn = np.array(dfcn)
            # dfcn = np.squeeze(dfcn)
            # dcnn = np.squeeze(dcnn)
            # print(dfcn.shape)
            # print(dcnn.shape)
            for it in dfcn:
                tmp = list()
                for i in it:
                    tmp.append(float(i))
                #
                # it = list(it)
                # it = np.squeeze(it)
                # print(it.shape)
                dfcn_feats.append(tmp)

            for it in dcnn:
                tmp = list()
                for i in it:
                    tmp.append(float(i))
                dcnn_feats.append(tmp)
            # return
            # dfcn_feats.append(dfcn)
            # dcnn_feats.append(dcnn)
            print("extracting feature from image No. %d , %d images in total" %
                  ((idx + 1), len(train_loader)))
            for name in labels:
                names.append(name)
            # names.append(name)
            # if idx == 9:
            #     break
        # print(dcnn_feats)
        # print(np.array(dcnn_feats).shape)

        feats = np.array(dcnn_feats)
        print(feats)
        print(feats.shape)
        # return
        h5f1.create_dataset('dataset_1', data=feats)
        feats = np.array(dfcn_feats)
        h5f2.create_dataset('dataset_1', data=feats)
        names = [name.encode() for name in names]
        h5f1.create_dataset('dataset_2', data=np.string_(names))
        h5f2.create_dataset('dataset_2', data=np.string_(names))
        h5f1.close()
        h5f2.close()