def Val_Dataset(self, root_dir, coco_dir, img_dir, set_dir): self.system_dict["dataset"]["val"]["status"] = True self.system_dict["dataset"]["val"]["root_dir"] = root_dir self.system_dict["dataset"]["val"]["coco_dir"] = coco_dir self.system_dict["dataset"]["val"]["img_dir"] = img_dir self.system_dict["dataset"]["val"]["set_dir"] = set_dir self.system_dict["local"]["val_params"] = { "batch_size": self.system_dict["params"]["batch_size"], "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": self.system_dict["params"]["num_workers"] } self.system_dict["local"]["val_set"] = CocoDataset( root_dir=self.system_dict["dataset"]["val"]["root_dir"] + "/" + self.system_dict["dataset"]["val"]["coco_dir"], img_dir=self.system_dict["dataset"]["val"]["img_dir"], set_dir=self.system_dict["dataset"]["val"]["set_dir"], transform=transforms.Compose([Normalizer(), Resizer()])) self.system_dict["local"]["test_generator"] = DataLoader( self.system_dict["local"]["val_set"], **self.system_dict["local"]["val_params"])
def Train_Dataset(self, root_dir, coco_dir, img_dir, set_dir, batch_size=8, image_size=512, use_gpu=True, num_workers=3): self.system_dict["dataset"]["train"]["root_dir"] = root_dir self.system_dict["dataset"]["train"]["coco_dir"] = coco_dir self.system_dict["dataset"]["train"]["img_dir"] = img_dir self.system_dict["dataset"]["train"]["set_dir"] = set_dir self.system_dict["params"]["batch_size"] = batch_size self.system_dict["params"]["image_size"] = image_size self.system_dict["params"]["use_gpu"] = use_gpu self.system_dict["params"]["num_workers"] = num_workers if (self.system_dict["params"]["use_gpu"]): if torch.cuda.is_available(): self.system_dict["local"][ "num_gpus"] = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) self.system_dict["local"]["training_params"] = { "batch_size": self.system_dict["params"]["batch_size"] * self.system_dict["local"]["num_gpus"], "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": self.system_dict["params"]["num_workers"] } self.system_dict["local"]["training_set"] = CocoDataset( root_dir=self.system_dict["dataset"]["train"]["root_dir"] + "/" + self.system_dict["dataset"]["train"]["coco_dir"], img_dir=self.system_dict["dataset"]["train"]["img_dir"], set_dir=self.system_dict["dataset"]["train"]["set_dir"], transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) self.system_dict["local"]["training_generator"] = DataLoader( self.system_dict["local"]["training_set"], **self.system_dict["local"]["training_params"])
def main(args): # Image preprocessing transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) image_dir = "data/" json_path = image_dir + "annotations/captions_train2014.json" root_dir = image_dir + "train2014" dataset = CocoDataset(json_path=json_path, root_dir=root_dir, transform=transform) data_loader = get_data_loader(dataset, batch_size=32) # Build models encoder = FeatureExtractor(args.embed_size).eval( ) # eval mode (batchnorm uses moving mean/variance) decoder = CaptionGenerator(args.embed_size, args.hidden_size, len(dataset.vocabulary), args.num_layers) encoder = encoder.to(device) decoder = decoder.to(device) # Load the trained model parameters encoder.load_state_dict(torch.load(args.encoder_path)) decoder.load_state_dict(torch.load(args.decoder_path)) # Prepare an image image = load_image(args.image, transform) image_tensor = image.to(device) # Generate an caption from the image feature = encoder(image_tensor) sampled_ids = decoder.sample(feature) sampled_ids = sampled_ids[0].cpu().numpy( ) # (1, max_seq_length) -> (max_seq_length) # Convert word_ids to words sampled_caption = [] for word_id in sampled_ids: word = data_loader.dataset.id_to_word[word_id] sampled_caption.append(word) if word == '<end>': break sentence = ' '.join(sampled_caption) # Print out the image and the generated caption print(sentence) image = Image.open(args.image) plt.imshow(np.asarray(image))
def get_loader(root, json, transform, batch_size, shuffle, num_workers): """Returns torch.utils.data.DataLoader for custom coco dataset.""" # COCO caption dataset coco = CocoDataset(root=root, json=json, transform=transform) # Data loader for COCO dataset data_loader = torch.utils.data.DataLoader(dataset=coco, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) return data_loader
def main(): args = parse_args() transform = transforms.Compose([ transforms.Resize(args.imsize_pre), transforms.CenterCrop(args.imsize), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) if args.dataset == "coco": val_dset = CocoDataset( root=args.root_path, split="val", transform=transform, ) val_loader = DataLoader( val_dset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_cpu, collate_fn=collater, ) vocab = Vocabulary(max_len=args.max_len) vocab.load_vocab(args.vocab_path) model = SPVSE( len(vocab), args.emb_size, args.out_size, args.max_len, args.cnn_type, args.rnn_type, pad_idx=vocab.padidx, bos_idx=vocab.bosidx, ) device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") model = model.to(device) assert args.checkpoint is not None print("loading model and optimizer checkpoint from {} ...".format( args.checkpoint), flush=True) ckpt = torch.load(args.checkpoint, map_location=device) model.load_state_dict(ckpt["model_state"]) _ = validate(1000, val_loader, model, vocab, args)
def main(): args = parse_args() transform = transforms.Compose([ transforms.Resize((args.imsize, args.imsize)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) if args.dataset == 'coco': val_dset = CocoDataset(root=args.root_path, imgdir='val2017', jsonfile='annotations/captions_val2017.json', transform=transform, mode='all') val_loader = DataLoader(val_dset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_cpu, collate_fn=collater_eval) vocab = Vocabulary(max_len=args.max_len) vocab.load_vocab(args.vocab_path) imenc = ImageEncoder(args.out_size, args.cnn_type) capenc = CaptionEncoder(len(vocab), args.emb_size, args.out_size, args.rnn_type) device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") imenc = imenc.to(device) capenc = capenc.to(device) assert args.checkpoint is not None print("loading model and optimizer checkpoint from {} ...".format( args.checkpoint), flush=True) ckpt = torch.load(args.checkpoint) imenc.load_state_dict(ckpt["encoder_state"]) capenc.load_state_dict(ckpt["decoder_state"]) begin = time.time() dset = EmbedDataset(val_loader, imenc, capenc, vocab, args) print("database created | {} ".format(sec2str(time.time() - begin)), flush=True) retrieve_i2c(dset, val_dset, imenc, vocab, args) retrieve_c2i(dset, val_dset, capenc, vocab, args)
def get_dataset(image_dir, json_path, resize_dim): cocoset = CocoDataset(image_dir, json_path, transforms=trsf.Compose([Resizer(resize_dim)])) return cocoset, cocoset.num_classes
def main(): args = parse_args() print(args) train_transform = transforms.Compose([ transforms.Resize(args.imsize_pre), transforms.RandomCrop(args.imsize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) val_transform = transforms.Compose([ transforms.Resize(args.imsize_pre), transforms.CenterCrop(args.imsize), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) if args.dataset == "coco": train_dset = CocoDataset(root=args.root_path, split="train", transform=train_transform) val_dset = CocoDataset( root=args.root_path, split="val", transform=val_transform, ) train_loader = DataLoader( train_dset, batch_size=args.batch_size, shuffle=True, num_workers=args.n_cpu, collate_fn=collater, ) val_loader = DataLoader( val_dset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_cpu, collate_fn=collater, ) vocab = Vocabulary(max_len=args.max_len) vocab.load_vocab(args.vocab_path) # model = VSE( model = SPVSE( len(vocab), args.emb_size, args.out_size, args.max_len, args.cnn_type, args.rnn_type, pad_idx=vocab.padidx, bos_idx=vocab.bosidx, ) device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") model = model.to(device) if args.freeze_ep > 0: model.freeze() print("freezing model") cfgs = [ { "params": model.im_enc.parameters(), "lr": args.lr_cnn }, { "params": model.cap_enc.parameters(), "lr": args.lr_rnn }, { "params": model.cap_gen.parameters(), "lr": args.lr_gen }, { "params": model.cap_rec.parameters(), "lr": args.lr_rec }, ] if args.optimizer == "SGD": optimizer = optim.SGD(cfgs, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == "Adam": optimizer = optim.Adam(cfgs, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == "RMSprop": optimizer = optim.RMSprop(cfgs, alpha=args.alpha, weight_decay=args.weight_decay) if args.scheduler == "Plateau": scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="max", factor=args.dampen_factor, patience=args.patience, verbose=True) elif args.scheduler == "Step": scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.patience, gamma=args.dampen_factor) lossfunc = SPVSELoss( vocab.padidx, weight_rank=args.weight_rank, weight_gen=args.weight_gen, weight_rec=args.weight_rec, ) if args.checkpoint is not None: print("loading model and optimizer checkpoint from {} ...".format( args.checkpoint), flush=True) ckpt = torch.load(args.checkpoint) model.load_state_dict(ckpt["model_state"]) optimizer.load_state_dict(ckpt["optimizer_state"]) if args.scheduler != "None": scheduler.load_state_dict(ckpt["scheduler_state"]) offset = ckpt["epoch"] data = ckpt["stats"] bestscore = 0 for rank in [1, 5, 10, 20]: bestscore += data["i2c_recall@{}".format(rank)] + data[ "c2i_recall@{}".format(rank)] bestscore = int(bestscore) else: offset = 0 bestscore = -1 model = nn.DataParallel(model) metrics = {} es_cnt = 0 assert offset < args.max_epochs for ep in range(offset, args.max_epochs): if ep == args.freeze_ep: model.module.unfreeze() print("unfreezing model") train(ep + 1, train_loader, model, optimizer, lossfunc, vocab, args) data = validate(ep + 1, val_loader, model, vocab, args) totalscore = 0 for rank in [1, 5, 10, 20]: totalscore += data["i2c_recall@{}".format(rank)] + data[ "c2i_recall@{}".format(rank)] totalscore = int(totalscore) if args.scheduler == "Plateau": scheduler.step(totalscore) if args.scheduler == "Step": scheduler.step() # save checkpoint ckpt = { "stats": data, "epoch": ep + 1, "model_state": model.module.state_dict(), "optimizer_state": optimizer.state_dict(), } if args.scheduler != "None": ckpt["scheduler_state"] = scheduler.state_dict() savedir = os.path.join("models", args.config_name) if not os.path.exists(savedir): os.makedirs(savedir) for k, v in data.items(): if k not in metrics.keys(): metrics[k] = [v] else: metrics[k].append(v) savepath = os.path.join( savedir, "epoch_{:04d}_score_{:03d}.ckpt".format(ep + 1, totalscore)) if int(totalscore) > int(bestscore): print( "score: {:03d}, saving model and optimizer checkpoint to {} ..." .format(totalscore, savepath), flush=True, ) bestscore = totalscore torch.save(ckpt, savepath) es_cnt = 0 else: print( "score: {:03d}, no improvement from best score of {:03d}, not saving" .format(totalscore, bestscore), flush=True, ) es_cnt += 1 if es_cnt == args.es_cnt: print( "early stopping at epoch {} because of no improvement for {} epochs" .format(ep + 1, args.es_cnt)) break print("done for epoch {:04d}".format(ep + 1), flush=True) visualize(metrics, args) print("complete training")
n_train, "\nNumbers of validation images: ", n_valid, ) print( "This training model:" "\nBiFPN:", bifpn_mode, "\nEvaluate traning model:", eval_train_mode, ) print() print() print() train_dataset = CocoDataset(cfg.train_path, train_id, Is_Train=True) valid_dataset = CocoDataset(cfg.train_path, valid_id, Is_Train=False) batch_size = cfg.batch_size accumulation_steps = cfg.accumulation_steps workers = cfg.num_workers train_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=workers, collate_fn=train_dataset.collate_fn, ) valid_loader = DataLoader( valid_dataset,
"ids": "../data/coco_bodypose/ids.pkl", "file_info": "../data/coco_bodypose/file_infos.pkl", "annotations": "../data/coco_bodypose/annotation_ids.pkl" } val_file_info = { "ids": "../data/coco_bodypose/val_ids.pkl", "file_info": "../data/coco_bodypose/val_file_infos.pkl", "annotations": "../data/coco_bodypose/val_annotation_ids.pkl" } if parser.state == "train": # data augumentation data_transforms = COCOTransformation(height=FIX_HEIGHT, width=FIX_WIDTH) trainSet = CocoDataset(parser.train, train_file_info, transform=data_transforms) # trainSet = CocoDataset(parser.val, val_file_info, transform=data_transforms) valSet = CocoDataset(parser.val, val_file_info, transform=data_transforms) trainLoader = DataLoader(trainSet, batch_size=20, shuffle=True, num_workers=10) valLoader = DataLoader(valSet, batch_size=10, shuffle=False, num_workers=5) model = OpenPoseLightning() loss = compute_loss optimizer = torch.optim.Adam(model.parameters(), lr=parser.lr) train_frame = TrainingProcessOpenPose(trainLoader, valLoader, optimizer, loss, model, num_epoch=10,
confidence_threshold = 0.5 nms_threshold = 0.4 input_width = 416 input_height = 416 train_path = '/home/user/Data/coco2014/train2014' train_ann_file = '/home/user/Data/coco2014/annotations/instances_train2014.json' val_path = '/home/user/Data/coco2014/val2014' val_ann_file = '/home/user/Data/coco2014/annotations/instances_val2014.json' detector = Detector(cls_file, cfg_file, weight_file, confidence_threshold, nms_threshold, input_width, input_height) train_dataset = CocoDataset(train_path, train_ann_file, num_classes) val_dataset = CocoDataset(val_path, val_ann_file, num_classes) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=1) val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1) train_detections = {} val_detections = {} print('Running...')
import matplotlib.pyplot as plt from pycocotools.coco import COCO train_path = '/home/user/Data/coco2014/train2014' train_ann_file = '/home/user/Data/coco2014/annotations/instances_train2014.json' val_path = '/home/user/Data/coco2014/val2014' val_ann_file = '/home/user/Data/coco2014/annotations/instances_val2014.json' coco = COCO(train_ann_file) num_labels = 80 transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = CocoDataset(train_path, train_ann_file, transform, num_labels) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True) images, labels = next(iter(train_loader)) print(f'images.shape: {images.shape}') print(labels.shape) img = tensor_to_image(images[0]) class_names = get_classes_from_labels(labels[0]) print(class_names) plt.imshow(img) plt.show()
from dataset import CocoDataset from model import RetinaNet if __name__ == '__main__': coco = CocoDataset() item = coco.__getitem__(0) net = RetinaNet() losses = net(item['img'].data.unsqueeze(0), item['img_meta'].data, item['gt_bboxes'].data, item['gt_labels'].data) print(losses)
def main(): args = parse_args() transform = transforms.Compose([ transforms.Resize((args.imsize, args.imsize)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) if args.dataset == 'coco': train_dset = CocoDataset(root=args.root_path, transform=transform, mode='one') val_dset = CocoDataset(root=args.root_path, imgdir='val2017', jsonfile='annotations/captions_val2017.json', transform=transform, mode='all') train_loader = DataLoader(train_dset, batch_size=args.batch_size, shuffle=True, num_workers=args.n_cpu, collate_fn=collater_train) val_loader = DataLoader(val_dset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_cpu, collate_fn=collater_eval) vocab = Vocabulary(max_len=args.max_len) vocab.load_vocab(args.vocab_path) imenc = ImageEncoder(args.out_size, args.cnn_type) capenc = CaptionEncoder(len(vocab), args.emb_size, args.out_size, args.rnn_type) device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") imenc = imenc.to(device) capenc = capenc.to(device) optimizer = optim.SGD([{ 'params': imenc.parameters(), 'lr': args.lr_cnn, 'momentum': args.mom_cnn }, { 'params': capenc.parameters(), 'lr': args.lr_rnn, 'momentum': args.mom_rnn }]) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=args.patience, verbose=True) lossfunc = PairwiseRankingLoss(margin=args.margin, method=args.method, improved=args.improved, intra=args.intra) if args.checkpoint is not None: print("loading model and optimizer checkpoint from {} ...".format( args.checkpoint), flush=True) ckpt = torch.load(args.checkpoint) imenc.load_state_dict(ckpt["encoder_state"]) capenc.load_state_dict(ckpt["decoder_state"]) optimizer.load_state_dict(ckpt["optimizer_state"]) scheduler.load_state_dict(ckpt["scheduler_state"]) offset = ckpt["epoch"] else: offset = 0 imenc = nn.DataParallel(imenc) capenc = nn.DataParallel(capenc) metrics = {} assert offset < args.max_epochs for ep in range(offset, args.max_epochs): imenc, capenc, optimizer = train(ep + 1, train_loader, imenc, capenc, optimizer, lossfunc, vocab, args) data = validate(ep + 1, val_loader, imenc, capenc, vocab, args) totalscore = 0 for rank in [1, 5, 10, 20]: totalscore += data["i2c_recall@{}".format(rank)] + data[ "c2i_recall@{}".format(rank)] scheduler.step(totalscore) # save checkpoint ckpt = { "stats": data, "epoch": ep + 1, "encoder_state": imenc.module.state_dict(), "decoder_state": capenc.module.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict() } if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) savepath = os.path.join( args.model_save_path, "epoch_{:04d}_score_{:05d}.ckpt".format(ep + 1, int(100 * totalscore))) print( "saving model and optimizer checkpoint to {} ...".format(savepath), flush=True) torch.save(ckpt, savepath) print("done for epoch {}".format(ep + 1), flush=True) for k, v in data.items(): if k not in metrics.keys(): metrics[k] = [v] else: metrics[k].append(v) visualize(metrics, args)
} val_params = { 'batch_size': batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] training_set = CocoDataset(root_dir=os.path.join(data_path, params.project_name), set=params.train_set, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[compound_coef]) ])) training_generator = DataLoader(training_set, **training_params) val_set = CocoDataset(root_dir=os.path.join(data_path, params.project_name), set=params.val_set, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[compound_coef]) ])) val_generator = DataLoader(val_set, **val_params)
def main(num_epochs=10, embedding_dim=256, data_dir="data/"): """ Function to train the model. Args: num_epochs: int Number of full dataset iterations to train the model. embedding_dim: int Output of the CNN model and input of the LSTM embedding size. data_dir: str Path to the folder of the data. """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"WORKING WITH: {device}") # Define the paths for train and validation train_json_path = data_dir + "annotations/captions_train2014.json" train_root_dir = data_dir + "train2014" valid_json_path = data_dir + "annotations/captions_val2014.json" valid_root_dir = data_dir + "val2014" transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = CocoDataset(json_path=train_json_path, root_dir=train_root_dir, transform=transform) train_coco_dataset = get_data_loader(train_dataset, batch_size=128) valid_dataset = CocoDataset(json_path=valid_json_path, root_dir=valid_root_dir, transform=transform) valid_coco_dataset = get_data_loader(valid_dataset, batch_size=1) encoder = FeatureExtractor(embedding_dim).to(device) decoder = CaptionGenerator(embedding_dim, 512, len(train_dataset.vocabulary), 1).to(device) criterion = nn.CrossEntropyLoss() # params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters()) params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = optim.Adam(params, lr=0.01) print(f"TRAIN DATASET: {len(train_coco_dataset)}") print(f"VALID DATASET: {len(valid_coco_dataset)}") total_step = len(train_coco_dataset) for epoch in range(num_epochs): encoder.train() decoder.train() train_loss = 0.0 valid_loss = 0.0 for i, (images, captions, descriptions) in enumerate(train_coco_dataset): # targets = pack_padded_sequence(caption, 0, batch_first=True)[0] images = images.to(device) captions = captions.to(device) # targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] features = encoder(images) outputs = decoder(features, captions) loss = criterion(outputs.view(-1, len(train_dataset.vocabulary)), captions.view(-1)) # bleu = calculate_bleu(decoder, features, descriptions, coco_dataset) # print(bleu) encoder.zero_grad() decoder.zero_grad() loss.backward() optimizer.step() # Print log info train_loss += loss.item() ''' if i % 10 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch, num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) ''' # Save the model checkpoints if (i + 1) % 1000 == 0: torch.save( decoder.state_dict(), os.path.join("models", 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join("models", 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) encoder.eval() decoder.eval() bleu = 0.0 for i, (images, captions, descriptions) in enumerate(valid_coco_dataset): if (i > 80000): break images = images.to(device) captions = captions.to(device) features = encoder(images) outputs = decoder(features, captions) loss = criterion(outputs.view(-1, len(train_dataset.vocabulary)), captions.view(-1)) valid_loss += loss.item() bleu += calculate_bleu(decoder, features, descriptions, train_coco_dataset) # print(f"BLEU: {bleu / 10000}") print( "Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, BLEU: {:.4f}". format(epoch, train_loss / len(train_coco_dataset), valid_loss / 80000, bleu / 80000))
logging.debug("Initializing tokenizer and loading vocabulary from {} ...".format(os.path.join(CONFIG.data_path, CONFIG.caption_file_path))) tokenizer = BasicTokenizer(min_freq=CONFIG.min_freq, max_len=CONFIG.max_len) tokenizer.from_textfile(os.path.join(CONFIG.data_path, CONFIG.caption_file_path)) logging.debug("done!") logging.debug("Initializing Dataset...") transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dset = CocoDataset(CONFIG.data_path, mode="train", tokenizer=tokenizer, transform=transform) train_loader = DataLoader(train_dset, batch_size=CONFIG.batch_size, shuffle=True, num_workers=CONFIG.num_worker, pin_memory=True, collate_fn=train_collater) val_dset = CocoDataset(CONFIG.data_path, mode="val", tokenizer=tokenizer, transform=transform) val_loader = DataLoader(val_dset, batch_size=CONFIG.batch_size, shuffle=True, num_workers=CONFIG.num_worker, pin_memory=True, collate_fn=val_collater) logging.debug("done!") logging.debug("loading model...") if torch.cuda.is_available: device = torch.device("cuda") logging.debug("using {} GPU(s)".format(torch.cuda.device_count())) else: device = torch.device("cpu") logging.debug("using CPU") if CONFIG.attention: model = Captioning_Attention(cnn_type=CONFIG.cnn_arch, pretrained=True, spatial_size=CONFIG.spatial_size, emb_dim=CONFIG.emb_dim, memory_dim=CONFIG.memory_dim, vocab_size=len(tokenizer), max_seqlen=CONFIG.max_len, dropout_p=CONFIG.dropout_p, ss_prob=CONFIG.ss_prob, bos_idx=tokenizer.bosidx)
def main(): args = parse_args() transform = transforms.Compose([ transforms.Resize((args.imsize, args.imsize)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) if args.dataset == "coco": val_dset = CocoDataset( root=args.root_path, split="val", transform=transform, ) val_loader = DataLoader( val_dset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_cpu, collate_fn=collater, ) vocab = Vocabulary(max_len=args.max_len) vocab.load_vocab(args.vocab_path) imenc = ImageEncoder(args.out_size, args.cnn_type) capenc = CaptionEncoder(len(vocab), args.emb_size, args.out_size, args.rnn_type) device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") imenc = imenc.to(device) capenc = capenc.to(device) assert args.checkpoint is not None print("loading model and optimizer checkpoint from {} ...".format( args.checkpoint), flush=True) ckpt = torch.load(args.checkpoint, map_location=device) imenc.load_state_dict(ckpt["encoder_state"]) capenc.load_state_dict(ckpt["decoder_state"]) begin = time.time() dset = EmbedDataset(val_loader, imenc, capenc, vocab, args) print("database created | {} ".format(sec2str(time.time() - begin)), flush=True) savedir = os.path.join("out", args.config_name) if not os.path.exists(savedir): os.makedirs(savedir, 0o777) image = dset.embedded["image"] caption = dset.embedded["caption"] n_i = image.shape[0] n_c = caption.shape[0] all = np.concatenate([image, caption], axis=0) emb_file = os.path.join(savedir, "embedding_{}.npy".format(n_i)) save_file = os.path.join(savedir, "{}.npy".format(args.method)) vis_file = os.path.join(savedir, "{}.png".format(args.method)) np.save(emb_file, all) print("saved embeddings to {}".format(emb_file), flush=True) dimension_reduction(emb_file, save_file, method=args.method) plot_embeddings(save_file, n_i, vis_file, method=args.method)