def Val_Dataset(self, root_dir, coco_dir, img_dir, set_dir):
        self.system_dict["dataset"]["val"]["status"] = True
        self.system_dict["dataset"]["val"]["root_dir"] = root_dir
        self.system_dict["dataset"]["val"]["coco_dir"] = coco_dir
        self.system_dict["dataset"]["val"]["img_dir"] = img_dir
        self.system_dict["dataset"]["val"]["set_dir"] = set_dir

        self.system_dict["local"]["val_params"] = {
            "batch_size": self.system_dict["params"]["batch_size"],
            "shuffle": False,
            "drop_last": False,
            "collate_fn": collater,
            "num_workers": self.system_dict["params"]["num_workers"]
        }

        self.system_dict["local"]["val_set"] = CocoDataset(
            root_dir=self.system_dict["dataset"]["val"]["root_dir"] + "/" +
            self.system_dict["dataset"]["val"]["coco_dir"],
            img_dir=self.system_dict["dataset"]["val"]["img_dir"],
            set_dir=self.system_dict["dataset"]["val"]["set_dir"],
            transform=transforms.Compose([Normalizer(),
                                          Resizer()]))

        self.system_dict["local"]["test_generator"] = DataLoader(
            self.system_dict["local"]["val_set"],
            **self.system_dict["local"]["val_params"])
    def Train_Dataset(self,
                      root_dir,
                      coco_dir,
                      img_dir,
                      set_dir,
                      batch_size=8,
                      image_size=512,
                      use_gpu=True,
                      num_workers=3):
        self.system_dict["dataset"]["train"]["root_dir"] = root_dir
        self.system_dict["dataset"]["train"]["coco_dir"] = coco_dir
        self.system_dict["dataset"]["train"]["img_dir"] = img_dir
        self.system_dict["dataset"]["train"]["set_dir"] = set_dir

        self.system_dict["params"]["batch_size"] = batch_size
        self.system_dict["params"]["image_size"] = image_size
        self.system_dict["params"]["use_gpu"] = use_gpu
        self.system_dict["params"]["num_workers"] = num_workers

        if (self.system_dict["params"]["use_gpu"]):
            if torch.cuda.is_available():
                self.system_dict["local"][
                    "num_gpus"] = torch.cuda.device_count()
                torch.cuda.manual_seed(123)
            else:
                torch.manual_seed(123)

        self.system_dict["local"]["training_params"] = {
            "batch_size":
            self.system_dict["params"]["batch_size"] *
            self.system_dict["local"]["num_gpus"],
            "shuffle":
            True,
            "drop_last":
            True,
            "collate_fn":
            collater,
            "num_workers":
            self.system_dict["params"]["num_workers"]
        }

        self.system_dict["local"]["training_set"] = CocoDataset(
            root_dir=self.system_dict["dataset"]["train"]["root_dir"] + "/" +
            self.system_dict["dataset"]["train"]["coco_dir"],
            img_dir=self.system_dict["dataset"]["train"]["img_dir"],
            set_dir=self.system_dict["dataset"]["train"]["set_dir"],
            transform=transforms.Compose(
                [Normalizer(), Augmenter(),
                 Resizer()]))

        self.system_dict["local"]["training_generator"] = DataLoader(
            self.system_dict["local"]["training_set"],
            **self.system_dict["local"]["training_params"])
示例#3
0
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    image_dir = "data/"
    json_path = image_dir + "annotations/captions_train2014.json"
    root_dir = image_dir + "train2014"

    dataset = CocoDataset(json_path=json_path,
                          root_dir=root_dir,
                          transform=transform)

    data_loader = get_data_loader(dataset, batch_size=32)

    # Build models
    encoder = FeatureExtractor(args.embed_size).eval(
    )  # eval mode (batchnorm uses moving mean/variance)
    decoder = CaptionGenerator(args.embed_size, args.hidden_size,
                               len(dataset.vocabulary), args.num_layers)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(args.encoder_path))
    decoder.load_state_dict(torch.load(args.decoder_path))

    # Prepare an image
    image = load_image(args.image, transform)
    image_tensor = image.to(device)

    # Generate an caption from the image
    feature = encoder(image_tensor)
    sampled_ids = decoder.sample(feature)
    sampled_ids = sampled_ids[0].cpu().numpy(
    )  # (1, max_seq_length) -> (max_seq_length)

    # Convert word_ids to words
    sampled_caption = []
    for word_id in sampled_ids:
        word = data_loader.dataset.id_to_word[word_id]
        sampled_caption.append(word)
        if word == '<end>':
            break
    sentence = ' '.join(sampled_caption)

    # Print out the image and the generated caption
    print(sentence)
    image = Image.open(args.image)
    plt.imshow(np.asarray(image))
def get_loader(root, json, transform, batch_size, shuffle, num_workers):
    """Returns torch.utils.data.DataLoader for custom coco dataset."""
    # COCO caption dataset
    coco = CocoDataset(root=root,
                       json=json,
                       transform=transform)
    
    # Data loader for COCO dataset
    data_loader = torch.utils.data.DataLoader(dataset=coco, 
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              num_workers=num_workers)
    return data_loader
示例#5
0
def main():
    args = parse_args()

    transform = transforms.Compose([
        transforms.Resize(args.imsize_pre),
        transforms.CenterCrop(args.imsize),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    if args.dataset == "coco":
        val_dset = CocoDataset(
            root=args.root_path,
            split="val",
            transform=transform,
        )
    val_loader = DataLoader(
        val_dset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.n_cpu,
        collate_fn=collater,
    )

    vocab = Vocabulary(max_len=args.max_len)
    vocab.load_vocab(args.vocab_path)

    model = SPVSE(
        len(vocab),
        args.emb_size,
        args.out_size,
        args.max_len,
        args.cnn_type,
        args.rnn_type,
        pad_idx=vocab.padidx,
        bos_idx=vocab.bosidx,
    )

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    model = model.to(device)

    assert args.checkpoint is not None
    print("loading model and optimizer checkpoint from {} ...".format(
        args.checkpoint),
          flush=True)
    ckpt = torch.load(args.checkpoint, map_location=device)
    model.load_state_dict(ckpt["model_state"])
    _ = validate(1000, val_loader, model, vocab, args)
示例#6
0
文件: eval.py 项目: yiskw713/VSE
def main():
    args = parse_args()

    transform = transforms.Compose([
        transforms.Resize((args.imsize, args.imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    if args.dataset == 'coco':
        val_dset = CocoDataset(root=args.root_path,
                               imgdir='val2017',
                               jsonfile='annotations/captions_val2017.json',
                               transform=transform,
                               mode='all')
    val_loader = DataLoader(val_dset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.n_cpu,
                            collate_fn=collater_eval)

    vocab = Vocabulary(max_len=args.max_len)
    vocab.load_vocab(args.vocab_path)

    imenc = ImageEncoder(args.out_size, args.cnn_type)
    capenc = CaptionEncoder(len(vocab), args.emb_size, args.out_size,
                            args.rnn_type)

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    imenc = imenc.to(device)
    capenc = capenc.to(device)

    assert args.checkpoint is not None
    print("loading model and optimizer checkpoint from {} ...".format(
        args.checkpoint),
          flush=True)
    ckpt = torch.load(args.checkpoint)
    imenc.load_state_dict(ckpt["encoder_state"])
    capenc.load_state_dict(ckpt["decoder_state"])

    begin = time.time()
    dset = EmbedDataset(val_loader, imenc, capenc, vocab, args)
    print("database created | {} ".format(sec2str(time.time() - begin)),
          flush=True)

    retrieve_i2c(dset, val_dset, imenc, vocab, args)
    retrieve_c2i(dset, val_dset, capenc, vocab, args)
示例#7
0
def get_dataset(image_dir, json_path, resize_dim):
    cocoset = CocoDataset(image_dir,
                          json_path,
                          transforms=trsf.Compose([Resizer(resize_dim)]))
    return cocoset, cocoset.num_classes
示例#8
0
文件: train.py 项目: skasai5296/VSE
def main():
    args = parse_args()
    print(args)

    train_transform = transforms.Compose([
        transforms.Resize(args.imsize_pre),
        transforms.RandomCrop(args.imsize),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    val_transform = transforms.Compose([
        transforms.Resize(args.imsize_pre),
        transforms.CenterCrop(args.imsize),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    if args.dataset == "coco":
        train_dset = CocoDataset(root=args.root_path,
                                 split="train",
                                 transform=train_transform)
        val_dset = CocoDataset(
            root=args.root_path,
            split="val",
            transform=val_transform,
        )
    train_loader = DataLoader(
        train_dset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.n_cpu,
        collate_fn=collater,
    )
    val_loader = DataLoader(
        val_dset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.n_cpu,
        collate_fn=collater,
    )

    vocab = Vocabulary(max_len=args.max_len)
    vocab.load_vocab(args.vocab_path)

    # model = VSE(
    model = SPVSE(
        len(vocab),
        args.emb_size,
        args.out_size,
        args.max_len,
        args.cnn_type,
        args.rnn_type,
        pad_idx=vocab.padidx,
        bos_idx=vocab.bosidx,
    )

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    model = model.to(device)
    if args.freeze_ep > 0:
        model.freeze()
        print("freezing model")

    cfgs = [
        {
            "params": model.im_enc.parameters(),
            "lr": args.lr_cnn
        },
        {
            "params": model.cap_enc.parameters(),
            "lr": args.lr_rnn
        },
        {
            "params": model.cap_gen.parameters(),
            "lr": args.lr_gen
        },
        {
            "params": model.cap_rec.parameters(),
            "lr": args.lr_rec
        },
    ]
    if args.optimizer == "SGD":
        optimizer = optim.SGD(cfgs,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)
    elif args.optimizer == "Adam":
        optimizer = optim.Adam(cfgs,
                               betas=(args.beta1, args.beta2),
                               weight_decay=args.weight_decay)
    elif args.optimizer == "RMSprop":
        optimizer = optim.RMSprop(cfgs,
                                  alpha=args.alpha,
                                  weight_decay=args.weight_decay)
    if args.scheduler == "Plateau":
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode="max",
            factor=args.dampen_factor,
            patience=args.patience,
            verbose=True)
    elif args.scheduler == "Step":
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=args.patience,
                                              gamma=args.dampen_factor)
    lossfunc = SPVSELoss(
        vocab.padidx,
        weight_rank=args.weight_rank,
        weight_gen=args.weight_gen,
        weight_rec=args.weight_rec,
    )

    if args.checkpoint is not None:
        print("loading model and optimizer checkpoint from {} ...".format(
            args.checkpoint),
              flush=True)
        ckpt = torch.load(args.checkpoint)
        model.load_state_dict(ckpt["model_state"])
        optimizer.load_state_dict(ckpt["optimizer_state"])
        if args.scheduler != "None":
            scheduler.load_state_dict(ckpt["scheduler_state"])
        offset = ckpt["epoch"]
        data = ckpt["stats"]
        bestscore = 0
        for rank in [1, 5, 10, 20]:
            bestscore += data["i2c_recall@{}".format(rank)] + data[
                "c2i_recall@{}".format(rank)]
        bestscore = int(bestscore)
    else:
        offset = 0
        bestscore = -1
    model = nn.DataParallel(model)

    metrics = {}
    es_cnt = 0

    assert offset < args.max_epochs
    for ep in range(offset, args.max_epochs):
        if ep == args.freeze_ep:
            model.module.unfreeze()
            print("unfreezing model")
        train(ep + 1, train_loader, model, optimizer, lossfunc, vocab, args)
        data = validate(ep + 1, val_loader, model, vocab, args)
        totalscore = 0
        for rank in [1, 5, 10, 20]:
            totalscore += data["i2c_recall@{}".format(rank)] + data[
                "c2i_recall@{}".format(rank)]
        totalscore = int(totalscore)
        if args.scheduler == "Plateau":
            scheduler.step(totalscore)
        if args.scheduler == "Step":
            scheduler.step()

        # save checkpoint
        ckpt = {
            "stats": data,
            "epoch": ep + 1,
            "model_state": model.module.state_dict(),
            "optimizer_state": optimizer.state_dict(),
        }
        if args.scheduler != "None":
            ckpt["scheduler_state"] = scheduler.state_dict()
        savedir = os.path.join("models", args.config_name)
        if not os.path.exists(savedir):
            os.makedirs(savedir)

        for k, v in data.items():
            if k not in metrics.keys():
                metrics[k] = [v]
            else:
                metrics[k].append(v)

        savepath = os.path.join(
            savedir,
            "epoch_{:04d}_score_{:03d}.ckpt".format(ep + 1, totalscore))
        if int(totalscore) > int(bestscore):
            print(
                "score: {:03d}, saving model and optimizer checkpoint to {} ..."
                .format(totalscore, savepath),
                flush=True,
            )
            bestscore = totalscore
            torch.save(ckpt, savepath)
            es_cnt = 0
        else:
            print(
                "score: {:03d}, no improvement from best score of {:03d}, not saving"
                .format(totalscore, bestscore),
                flush=True,
            )
            es_cnt += 1
            if es_cnt == args.es_cnt:
                print(
                    "early stopping at epoch {} because of no improvement for {} epochs"
                    .format(ep + 1, args.es_cnt))
                break
        print("done for epoch {:04d}".format(ep + 1), flush=True)

    visualize(metrics, args)
    print("complete training")
        n_train,
        "\nNumbers of validation images: ",
        n_valid,
    )
    print(
        "This training model:"
        "\nBiFPN:",
        bifpn_mode,
        "\nEvaluate traning model:",
        eval_train_mode,
    )
    print()
    print()
    print()

    train_dataset = CocoDataset(cfg.train_path, train_id, Is_Train=True)
    valid_dataset = CocoDataset(cfg.train_path, valid_id, Is_Train=False)

    batch_size = cfg.batch_size
    accumulation_steps = cfg.accumulation_steps
    workers = cfg.num_workers

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=workers,
        collate_fn=train_dataset.collate_fn,
    )
    valid_loader = DataLoader(
        valid_dataset,
示例#10
0
        "ids": "../data/coco_bodypose/ids.pkl",
        "file_info": "../data/coco_bodypose/file_infos.pkl",
        "annotations": "../data/coco_bodypose/annotation_ids.pkl"
    }
    val_file_info = {
        "ids": "../data/coco_bodypose/val_ids.pkl",
        "file_info": "../data/coco_bodypose/val_file_infos.pkl",
        "annotations": "../data/coco_bodypose/val_annotation_ids.pkl"
    }

    if parser.state == "train":


        # data augumentation
        data_transforms = COCOTransformation(height=FIX_HEIGHT, width=FIX_WIDTH)
        trainSet = CocoDataset(parser.train, train_file_info, transform=data_transforms)
        # trainSet = CocoDataset(parser.val, val_file_info, transform=data_transforms)
        valSet = CocoDataset(parser.val, val_file_info, transform=data_transforms)
        trainLoader = DataLoader(trainSet, batch_size=20, shuffle=True, num_workers=10)
        valLoader = DataLoader(valSet, batch_size=10, shuffle=False, num_workers=5)

        model = OpenPoseLightning()
        loss = compute_loss
        optimizer = torch.optim.Adam(model.parameters(), lr=parser.lr)

        train_frame = TrainingProcessOpenPose(trainLoader,
                                               valLoader,
                                               optimizer,
                                               loss,
                                               model,
                                               num_epoch=10,
示例#11
0
confidence_threshold = 0.5
nms_threshold = 0.4
input_width = 416
input_height = 416

train_path = '/home/user/Data/coco2014/train2014'
train_ann_file = '/home/user/Data/coco2014/annotations/instances_train2014.json'

val_path = '/home/user/Data/coco2014/val2014'
val_ann_file = '/home/user/Data/coco2014/annotations/instances_val2014.json'

detector = Detector(cls_file, cfg_file, weight_file, confidence_threshold,
                    nms_threshold, input_width, input_height)

train_dataset = CocoDataset(train_path, train_ann_file, num_classes)
val_dataset = CocoDataset(val_path, val_ann_file, num_classes)
train_loader = DataLoader(train_dataset,
                          batch_size=1,
                          shuffle=False,
                          num_workers=1)
val_loader = DataLoader(val_dataset,
                        batch_size=1,
                        shuffle=False,
                        num_workers=1)

train_detections = {}
val_detections = {}

print('Running...')
示例#12
0
import matplotlib.pyplot as plt
from pycocotools.coco import COCO

train_path = '/home/user/Data/coco2014/train2014'
train_ann_file = '/home/user/Data/coco2014/annotations/instances_train2014.json'
val_path = '/home/user/Data/coco2014/val2014'
val_ann_file = '/home/user/Data/coco2014/annotations/instances_val2014.json'
coco = COCO(train_ann_file)

num_labels = 80

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


train_dataset = CocoDataset(train_path, train_ann_file, transform, num_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True)
images, labels = next(iter(train_loader))

print(f'images.shape: {images.shape}')
print(labels.shape)
img = tensor_to_image(images[0])
class_names = get_classes_from_labels(labels[0])
print(class_names)

plt.imshow(img)
plt.show()
示例#13
0
from dataset import CocoDataset
from model import RetinaNet

if __name__ == '__main__':
    coco = CocoDataset()
    item = coco.__getitem__(0)
    net = RetinaNet()
    losses = net(item['img'].data.unsqueeze(0), item['img_meta'].data,
                 item['gt_bboxes'].data, item['gt_labels'].data)
    print(losses)
示例#14
0
def main():
    args = parse_args()

    transform = transforms.Compose([
        transforms.Resize((args.imsize, args.imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    if args.dataset == 'coco':
        train_dset = CocoDataset(root=args.root_path,
                                 transform=transform,
                                 mode='one')
        val_dset = CocoDataset(root=args.root_path,
                               imgdir='val2017',
                               jsonfile='annotations/captions_val2017.json',
                               transform=transform,
                               mode='all')
    train_loader = DataLoader(train_dset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.n_cpu,
                              collate_fn=collater_train)
    val_loader = DataLoader(val_dset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.n_cpu,
                            collate_fn=collater_eval)

    vocab = Vocabulary(max_len=args.max_len)
    vocab.load_vocab(args.vocab_path)

    imenc = ImageEncoder(args.out_size, args.cnn_type)
    capenc = CaptionEncoder(len(vocab), args.emb_size, args.out_size,
                            args.rnn_type)

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    imenc = imenc.to(device)
    capenc = capenc.to(device)

    optimizer = optim.SGD([{
        'params': imenc.parameters(),
        'lr': args.lr_cnn,
        'momentum': args.mom_cnn
    }, {
        'params': capenc.parameters(),
        'lr': args.lr_rnn,
        'momentum': args.mom_rnn
    }])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='max',
                                                     factor=0.1,
                                                     patience=args.patience,
                                                     verbose=True)
    lossfunc = PairwiseRankingLoss(margin=args.margin,
                                   method=args.method,
                                   improved=args.improved,
                                   intra=args.intra)

    if args.checkpoint is not None:
        print("loading model and optimizer checkpoint from {} ...".format(
            args.checkpoint),
              flush=True)
        ckpt = torch.load(args.checkpoint)
        imenc.load_state_dict(ckpt["encoder_state"])
        capenc.load_state_dict(ckpt["decoder_state"])
        optimizer.load_state_dict(ckpt["optimizer_state"])
        scheduler.load_state_dict(ckpt["scheduler_state"])
        offset = ckpt["epoch"]
    else:
        offset = 0
    imenc = nn.DataParallel(imenc)
    capenc = nn.DataParallel(capenc)

    metrics = {}

    assert offset < args.max_epochs
    for ep in range(offset, args.max_epochs):
        imenc, capenc, optimizer = train(ep + 1, train_loader, imenc, capenc,
                                         optimizer, lossfunc, vocab, args)
        data = validate(ep + 1, val_loader, imenc, capenc, vocab, args)
        totalscore = 0
        for rank in [1, 5, 10, 20]:
            totalscore += data["i2c_recall@{}".format(rank)] + data[
                "c2i_recall@{}".format(rank)]
        scheduler.step(totalscore)

        # save checkpoint
        ckpt = {
            "stats": data,
            "epoch": ep + 1,
            "encoder_state": imenc.module.state_dict(),
            "decoder_state": capenc.module.state_dict(),
            "optimizer_state": optimizer.state_dict(),
            "scheduler_state": scheduler.state_dict()
        }
        if not os.path.exists(args.model_save_path):
            os.makedirs(args.model_save_path)
        savepath = os.path.join(
            args.model_save_path,
            "epoch_{:04d}_score_{:05d}.ckpt".format(ep + 1,
                                                    int(100 * totalscore)))
        print(
            "saving model and optimizer checkpoint to {} ...".format(savepath),
            flush=True)
        torch.save(ckpt, savepath)
        print("done for epoch {}".format(ep + 1), flush=True)

        for k, v in data.items():
            if k not in metrics.keys():
                metrics[k] = [v]
            else:
                metrics[k].append(v)

    visualize(metrics, args)
示例#15
0
    }
    val_params = {
        'batch_size': batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]

    training_set = CocoDataset(root_dir=os.path.join(data_path,
                                                     params.project_name),
                               set=params.train_set,
                               transform=transforms.Compose([
                                   Normalizer(mean=params.mean,
                                              std=params.std),
                                   Augmenter(),
                                   Resizer(input_sizes[compound_coef])
                               ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = CocoDataset(root_dir=os.path.join(data_path,
                                                params.project_name),
                          set=params.val_set,
                          transform=transforms.Compose([
                              Normalizer(mean=params.mean, std=params.std),
                              Resizer(input_sizes[compound_coef])
                          ]))
    val_generator = DataLoader(val_set, **val_params)
示例#16
0
def main(num_epochs=10, embedding_dim=256, data_dir="data/"):
    """ Function to train the model.
    
    Args:
        num_epochs: int
            Number of full dataset iterations to train the model.
        embedding_dim: int
            Output of the CNN model and input of the LSTM embedding size.
        data_dir: str
            Path to the folder of the data.
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"WORKING WITH: {device}")

    # Define the paths for train and validation
    train_json_path = data_dir + "annotations/captions_train2014.json"
    train_root_dir = data_dir + "train2014"
    valid_json_path = data_dir + "annotations/captions_val2014.json"
    valid_root_dir = data_dir + "val2014"

    transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_dataset = CocoDataset(json_path=train_json_path,
                                root_dir=train_root_dir,
                                transform=transform)

    train_coco_dataset = get_data_loader(train_dataset, batch_size=128)

    valid_dataset = CocoDataset(json_path=valid_json_path,
                                root_dir=valid_root_dir,
                                transform=transform)

    valid_coco_dataset = get_data_loader(valid_dataset, batch_size=1)

    encoder = FeatureExtractor(embedding_dim).to(device)
    decoder = CaptionGenerator(embedding_dim, 512,
                               len(train_dataset.vocabulary), 1).to(device)

    criterion = nn.CrossEntropyLoss()
    # params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = optim.Adam(params, lr=0.01)

    print(f"TRAIN DATASET: {len(train_coco_dataset)}")
    print(f"VALID DATASET: {len(valid_coco_dataset)}")

    total_step = len(train_coco_dataset)
    for epoch in range(num_epochs):
        encoder.train()
        decoder.train()
        train_loss = 0.0
        valid_loss = 0.0
        for i, (images, captions,
                descriptions) in enumerate(train_coco_dataset):

            # targets = pack_padded_sequence(caption, 0, batch_first=True)[0]

            images = images.to(device)
            captions = captions.to(device)
            # targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

            features = encoder(images)
            outputs = decoder(features, captions)

            loss = criterion(outputs.view(-1, len(train_dataset.vocabulary)),
                             captions.view(-1))
            # bleu = calculate_bleu(decoder, features, descriptions, coco_dataset)
            # print(bleu)

            encoder.zero_grad()
            decoder.zero_grad()

            loss.backward()
            optimizer.step()

            # Print log info
            train_loss += loss.item()
            '''
            if i % 10 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                      .format(epoch, num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) 
            '''

            # Save the model checkpoints
            if (i + 1) % 1000 == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join("models",
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join("models",
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
        encoder.eval()
        decoder.eval()
        bleu = 0.0
        for i, (images, captions,
                descriptions) in enumerate(valid_coco_dataset):
            if (i > 80000):
                break
            images = images.to(device)
            captions = captions.to(device)
            features = encoder(images)
            outputs = decoder(features, captions)
            loss = criterion(outputs.view(-1, len(train_dataset.vocabulary)),
                             captions.view(-1))
            valid_loss += loss.item()
            bleu += calculate_bleu(decoder, features, descriptions,
                                   train_coco_dataset)
        # print(f"BLEU: {bleu / 10000}")
        print(
            "Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, BLEU: {:.4f}".
            format(epoch, train_loss / len(train_coco_dataset),
                   valid_loss / 80000, bleu / 80000))
示例#17
0
    logging.debug("Initializing tokenizer and loading vocabulary from {} ...".format(os.path.join(CONFIG.data_path, CONFIG.caption_file_path)))
    tokenizer = BasicTokenizer(min_freq=CONFIG.min_freq, max_len=CONFIG.max_len)
    tokenizer.from_textfile(os.path.join(CONFIG.data_path, CONFIG.caption_file_path))
    logging.debug("done!")

    logging.debug("Initializing Dataset...")
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    train_dset = CocoDataset(CONFIG.data_path, mode="train", tokenizer=tokenizer, transform=transform)
    train_loader = DataLoader(train_dset, batch_size=CONFIG.batch_size, shuffle=True, num_workers=CONFIG.num_worker, pin_memory=True, collate_fn=train_collater)
    val_dset = CocoDataset(CONFIG.data_path, mode="val", tokenizer=tokenizer, transform=transform)
    val_loader = DataLoader(val_dset, batch_size=CONFIG.batch_size, shuffle=True, num_workers=CONFIG.num_worker, pin_memory=True, collate_fn=val_collater)
    logging.debug("done!")

    logging.debug("loading model...")
    if torch.cuda.is_available:
        device = torch.device("cuda")
        logging.debug("using {} GPU(s)".format(torch.cuda.device_count()))
    else:
        device = torch.device("cpu")
        logging.debug("using CPU")
    if CONFIG.attention:
        model = Captioning_Attention(cnn_type=CONFIG.cnn_arch, pretrained=True, spatial_size=CONFIG.spatial_size, emb_dim=CONFIG.emb_dim, memory_dim=CONFIG.memory_dim,
            vocab_size=len(tokenizer), max_seqlen=CONFIG.max_len, dropout_p=CONFIG.dropout_p, ss_prob=CONFIG.ss_prob, bos_idx=tokenizer.bosidx)
示例#18
0
def main():
    args = parse_args()

    transform = transforms.Compose([
        transforms.Resize((args.imsize, args.imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    if args.dataset == "coco":
        val_dset = CocoDataset(
            root=args.root_path,
            split="val",
            transform=transform,
        )
    val_loader = DataLoader(
        val_dset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.n_cpu,
        collate_fn=collater,
    )

    vocab = Vocabulary(max_len=args.max_len)
    vocab.load_vocab(args.vocab_path)

    imenc = ImageEncoder(args.out_size, args.cnn_type)
    capenc = CaptionEncoder(len(vocab), args.emb_size, args.out_size,
                            args.rnn_type)

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    imenc = imenc.to(device)
    capenc = capenc.to(device)

    assert args.checkpoint is not None
    print("loading model and optimizer checkpoint from {} ...".format(
        args.checkpoint),
          flush=True)
    ckpt = torch.load(args.checkpoint, map_location=device)
    imenc.load_state_dict(ckpt["encoder_state"])
    capenc.load_state_dict(ckpt["decoder_state"])

    begin = time.time()
    dset = EmbedDataset(val_loader, imenc, capenc, vocab, args)
    print("database created | {} ".format(sec2str(time.time() - begin)),
          flush=True)

    savedir = os.path.join("out", args.config_name)
    if not os.path.exists(savedir):
        os.makedirs(savedir, 0o777)

    image = dset.embedded["image"]
    caption = dset.embedded["caption"]
    n_i = image.shape[0]
    n_c = caption.shape[0]
    all = np.concatenate([image, caption], axis=0)

    emb_file = os.path.join(savedir, "embedding_{}.npy".format(n_i))
    save_file = os.path.join(savedir, "{}.npy".format(args.method))
    vis_file = os.path.join(savedir, "{}.png".format(args.method))
    np.save(emb_file, all)
    print("saved embeddings to {}".format(emb_file), flush=True)
    dimension_reduction(emb_file, save_file, method=args.method)
    plot_embeddings(save_file, n_i, vis_file, method=args.method)