Пример #1
0
def evaluate_model(epoch_size, num_epochs, device):
    print("Starting evaluate_model(epoch_size=%d, num_epochs=%d, device=%s)" %
          (epoch_size, num_epochs, device))
    start = time.time()
    vgg16 = load_face_model(
        "../../caffemodel2pytorch/gender.caffemodel.pt").to(device)
    img_folder = "../../imdb_crop"
    mat = scipy.io.loadmat("../../imdb/imdb.mat")
    genders = mat['imdb'][0][0][3][0]
    full_paths = mat['imdb'][0][0][2][0]
    print("There are in total %d" % (len(genders)))

    path_idx = 0
    num_correct = 0
    total = 0
    for epoch in range(num_epochs):
        if (path_idx >= len(genders)):
            print("Stopping at epoch %d since no more data points" % path_idx)
            break

        epoch_start = time.time()
        print("Starting epoch %d" % epoch)
        img_batch = []
        genders_batch = []
        while (len(img_batch) < epoch_size):
            if (path_idx >= len(genders)):
                break

            path = os.path.join(img_folder, full_paths[path_idx][0])
            img = Image.open(path)

            # ignore grayscale and the gender label is NaN
            if (img.mode == "L" or math.isnan(genders[path_idx])):  # grayscale
                path_idx += 1
                continue

            tensor = preprocess(Image.open(path))
            img_batch.append(tensor.unsqueeze(0))
            genders_batch.append(int(round(genders[path_idx])))
            path_idx += 1

        img_batch = torch.cat(img_batch, axis=0).to(device) * 255.
        genders_batch = torch.tensor(genders_batch,
                                     device=device,
                                     dtype=torch.int64)

        probs = torch.nn.functional.softmax(vgg16(img_batch), dim=1)
        preds = probs.argmax(axis=1)
        num_correct += torch.sum(torch.eq(preds, genders_batch))
        total += preds.size(0)

        epoch_end = time.time()
        print("Epoch %d took %s" %
              (epoch, utils.sec2str(int(epoch_end - epoch_start))))

    print("acc: ", float(num_correct) / float(total))
    end = time.time()
    print(utils.sec2str(int(end - start)))
Пример #2
0
def get_features(epoch_size, num_epochs, vgg16, device):
    print("Starting evaluate_model(epoch_size=%d, num_epochs=%d, device=%s)" %
          (epoch_size, num_epochs, device))
    start = time.time()

    img_folder = "../../imdb_crop"
    mat = scipy.io.loadmat("../../imdb/imdb.mat")
    genders = mat['imdb'][0][0][3][0]
    full_paths = mat['imdb'][0][0][2][0]
    print("There are in total %d" % (len(genders)))

    data = []
    labels = []
    path_idx = 0
    for epoch in range(num_epochs):
        if (path_idx >= len(genders)):
            print("Stopping at epoch %d since no more data points" % path_idx)
            break

        epoch_start = time.time()
        print("Starting epoch %d" % epoch)
        img_batch = []
        genders_batch = []
        while (len(img_batch) < epoch_size):
            if (path_idx >= len(genders)):
                break

            path = os.path.join(img_folder, full_paths[path_idx][0])
            img = Image.open(path)

            # ignore grayscale and the gender label is NaN
            if (img.mode == "L" or math.isnan(genders[path_idx])):  # grayscale
                path_idx += 1
                continue

            tensor = preprocess(Image.open(path))
            img_batch.append(tensor.unsqueeze(0))
            genders_batch.append(int(round(genders[path_idx])))
            path_idx += 1

        img_batch = torch.cat(img_batch, axis=0).to(device) * 255.
        genders_batch = torch.tensor(genders_batch,
                                     device=device,
                                     dtype=torch.int64)

        data.append(vgg16(img_batch))
        labels.append(genders_batch)

        epoch_end = time.time()
        print("Epoch %d took %s" %
              (epoch, utils.sec2str(int(epoch_end - epoch_start))))

    end = time.time()
    print(utils.sec2str(int(end - start)))
    return torch.cat(data, axis=0), torch.cat(labels, axis=0)
def fetchAP(cursor, table_name, bssid, essid=None):
    '''Function returns AP records from local database'''
    bssid = mac2dec(bssid)
    query = 'SELECT bssid, essid, sec, key, wps \
             FROM {} \
             WHERE bssid = ?'.format(table_name)
    if essid:
        query += ' AND essid = ?'
        cursor.execute(query, (bssid, essid))
    else:
        cursor.execute(query, (bssid,))
    r = cursor.fetchall()
    entries = []
    for k in r:
        entry = {
            'time': strftime("%Y-%m-%d %H:%M:%S", gmtime()),
            'bssid': dec2mac(k[0]),
            'essid': k[1],
            'sec': sec2str(k[2]),
            'key': k[3] if k[2] else '<empty>',
            'wps': pin2str(k[4])
        }
        entries.append(entry)

    return entries
Пример #4
0
def retrieve_c2i(dset, v_dset, capenc, vocab, args):
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    begin = time.time()
    print("-" * 50)
    print("retrieving nearest image to: '{}'".format(args.caption), flush=True)
    cap = vocab.return_idx([args.caption])
    length = [torch.sum(torch.ne(cap, vocab.padidx)).item()]
    with torch.no_grad():
        cap = cap.to(device)
        cap = capenc(cap, length)
    cap = cap.cpu().numpy()
    im = dset.embedded["image"]
    nd = im.shape[0]
    d = im.shape[1]
    cpu_index = faiss.IndexFlatIP(d)
    print("# images: {}, dimension: {}".format(nd, d), flush=True)

    # cap2im
    cpu_index.add(im)
    D, I = cpu_index.search(cap, 5)
    nnidx = I[0, 0]
    nnim_id = dset.embedded["img_id"][nnidx]
    img = v_dset.coco.loadImgs(nnim_id)[0]
    nnim = io.imread(img['coco_url'])
    plt.title("nearest neighbor of '{}'".format(args.caption))
    plt.axis('off')
    plt.imshow(nnim)
    plt.show(block=False)
    print("retrieval time {}".format(sec2str(time.time() - begin)), flush=True)
    print("-" * 50)
    plt.show()
    return
Пример #5
0
def retrieve_i2c(dset, v_dset, imenc, vocab, args):
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    begin = time.time()
    print("-" * 50)
    print("retrieving nearest caption to: '{}'".format(args.image_path),
          flush=True)
    im = Image.open(args.image_path)
    transform = transforms.Compose([
        transforms.Resize((args.imsize, args.imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    im = transform(im).unsqueeze(0)
    with torch.no_grad():
        im = im.to(device)
        im = imenc(im)
    im = im.cpu().numpy()
    cap = dset.embedded["caption"]
    nd = cap.shape[0]
    d = cap.shape[1]
    cpu_index = faiss.IndexFlatIP(d)
    print("# captions: {}, dimension: {}".format(nd, d), flush=True)

    # im2cap
    cpu_index.add(cap)
    D, I = cpu_index.search(im, 5)
    nnidx = I[0, 0]
    nnann_id = dset.embedded["ann_id"][nnidx]
    anns = v_dset.coco.loadAnns(nnann_id)
    print("retrieval time {}".format(sec2str(time.time() - begin)), flush=True)
    v_dset.coco.showAnns(anns)
    print("-" * 50)
    return
Пример #6
0
def train(epoch, loader, imenc, capenc, optimizer, lossfunc, vocab, args):
    begin = time.time()
    maxit = int(len(loader.dataset) / args.batch_size)
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    cumloss = 0
    for it, data in enumerate(loader):
        """image, target, index, img_id"""
        image = data["image"]
        caption = data["caption"]
        img_id = data["img_id"]
        target = vocab.return_idx(caption)
        lengths = target.ne(vocab.padidx).sum(dim=1)

        optimizer.zero_grad()

        image = image.to(device)
        target = target.to(device)

        im_emb = imenc(image)
        cap_emb = capenc(target, lengths)
        lossval = lossfunc(im_emb, cap_emb)
        lossval.backward()
        optimizer.step()
        cumloss += lossval.item()
        if it % args.log_every == args.log_every - 1:
            print("epoch {} | {} | {:06d}/{:06d} iterations | loss: {:.08f}".
                  format(epoch, sec2str(time.time() - begin), it + 1, maxit,
                         cumloss / args.log_every),
                  flush=True)
            cumloss = 0
    return imenc, capenc, optimizer
Пример #7
0
    def dumpScriptNames(self, os):
            """
            write list of scripts to logger
            """
            store = Store.of(self)
            pks = store.find(buildset_script, buildset_script.buildset_id == self.id)
            scripts = pks.order_by(Asc(buildset_script.idx))
            counter_len = counter_length(scripts.count())
            arr = [ ['idx ', 'name', ' duration'] ]
            idx = 0

            def scriptPath(_pack):
                """
                """
                res = []
                cur = _pack.script
                while cur.parent:
                    res.insert(0, cur.parent)
                    cur = cur.parent
                return res

            def new(depth, pack_name, pack_idx, dur = None):
                if dur is None:
                    dur = "  -"
                arr2 = []
                arr2.append(pack_idx)
                arr2.append(" " * (depth * 3) + pack_name)
                arr2.append(" %s" % (str(dur).split('.')[0],))
                return arr2

            last_path = []
            for pack in scripts:

                path = scriptPath(pack)
                depth = len(path)
                if last_path != path and depth:
                    arr.append(new(depth - 1,
                                   path[-1].name,
                                   ' '.rjust(counter_len,' ')))
                last_path = path

                dur = None
                # Lookup last duration of script run
                prevrun = store.find(build_script_status, 
                                     build_script_status.buildset_script_id == pack.id,
                                     build_script_status.exit_code == 0
                                     ).order_by(Desc(build_script_status.id)).first()
                
                dur = "  -"
                if prevrun and prevrun.end_time and prevrun.start_time:
                    dur = prevrun.end_time - prevrun.start_time
                    dur = utils.sec2str(dur.seconds)
                arr.append(new(depth,
                               pack.script.name, 
                               str(pack.idx).rjust(counter_len,'0'),
                               dur))
                idx += 1
            os(table_layout(arr, True, "    ", False))
Пример #8
0
def main():
    args = get_arguments()
    SETTING = Dict(yaml.safe_load(open(os.path.join('arguments',args.arg+'.yaml'), encoding='utf8')))
    print(args)
    args.device = list (map(str,args.device))
    os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(args.device)

    # image transformer
    transform = transforms.Compose([
        transforms.Resize((SETTING.imsize, SETTING.imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
        ])

    if args.dataset == 'coco':
        val_dset = CocoDset(root=SETTING.root_path, img_dir='val2017', ann_dir='annotations/captions_val2017.json', transform=transform)
    val_loader = DataLoader(val_dset, batch_size=SETTING.batch_size, shuffle=False, num_workers=SETTING.n_cpu, collate_fn=collater)

    vocab = Vocabulary(max_len=SETTING.max_len)
    vocab.load_vocab(args.vocab_path)

    imenc = ImageEncoder(SETTING.out_size, SETTING.cnn_type)
    capenc = CaptionEncoder(len(vocab), SETTING.emb_size, SETTING.out_size, SETTING.rnn_type)

    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    imenc = imenc.to(device)
    capenc = capenc.to(device)

    assert args.checkpoint is not None
    print("loading model and optimizer checkpoint from {} ...".format(args.checkpoint), flush=True)
    ckpt = torch.load(args.checkpoint, map_location=device)
    imenc.load_state_dict(ckpt["encoder_state"])
    capenc.load_state_dict(ckpt["decoder_state"])

    begin = time.time()
    dset = EmbedDset(val_loader, imenc, capenc, vocab, args)
    print("database created | {} ".format(sec2str(time.time()-begin)), flush=True)

    savedir = os.path.join("out", args.config_name)
    if not os.path.exists(savedir):
        os.makedirs(savedir, 0o777)

    image = dset.embedded["image"]
    caption = dset.embedded["caption"]
    n_i = image.shape[0]
    n_c = caption.shape[0]
    all = np.concatenate([image, caption], axis=0)

    emb_file = os.path.join(savedir, "embedding_{}.npy".format(n_i))
    save_file = os.path.join(savedir, "{}.npy".format(SETTING.method))
    vis_file = os.path.join(savedir, "{}.png".format(SETTING.method))
    np.save(emb_file, all)
    print("saved embeddings to {}".format(emb_file), flush=True)
    dimension_reduction(emb_file, save_file, method=SETTING.method)
    plot_embeddings(save_file, n_i, vis_file, method=SETTING.method)
Пример #9
0
def validate(epoch, loader, imenc, capenc, vocab, args, SETTING):
    begin = time.time()
    print("begin validation for epoch {}".format(epoch), flush=True)
    dset = EmbedDset(loader, imenc, capenc, vocab, args)
    print("val dataset created | {} ".format(sec2str(time.time()-begin)), flush=True)
    im = dset.embedded["image"]
    cap = dset.embedded["caption"]

    nd = im.shape[0]
    nq = cap.shape[0]
    d = im.shape[1]
    cpu_index = faiss.IndexFlatIP(d)

    print("# images: {}, # captions: {}, dimension: {}".format(nd, nq, d), flush=True)

    # im2cap
    cpu_index.add(cap)
    # calculate every conbination and sort 
    # D = result , I = imgid
    D, I = cpu_index.search(im, nq)
    data = {}
    allrank = []
    # TODO: Make more efficient, do not hardcode 5
    cap_per_image = 5
    # brinf correct answer rank for each sentence(their are 5 each)
    for i in range(cap_per_image):
        gt = (np.arange(nd) * cap_per_image).reshape(-1, 1) + i
        rank = np.where(I == gt)[1]
        allrank.append(rank)
    allrank = np.stack(allrank)
    # minimal rank for ans(best of 5 each)
    allrank = np.amin(allrank, 0)
    # how many images were correct bellow @num
    for rank in [1, 5, 10, 20]:
        data["i2c_recall@{}".format(rank)] = 100 * np.sum(allrank < rank) / len(allrank)
    data["i2c_median@r"] = np.median(allrank) + 1
    data["i2c_mean@r"] = np.mean(allrank)

    # cap2im
    cpu_index.reset()
    cpu_index.add(im)
    D, I = cpu_index.search(cap, nd)
    # TODO: Make more efficient, do not hardcode 5
    gt = np.arange(nq).reshape(-1, 1) // cap_per_image
    allrank = np.where(I == gt)[1]
    for rank in [1, 5, 10, 20]:
        data["c2i_recall@{}".format(rank)] = 100 * np.sum(allrank < rank) / len(allrank)
    data["c2i_median@r"] = np.median(allrank) + 1
    data["c2i_mean@r"] = np.mean(allrank)

    print("-"*50)
    print("results of cross-modal retrieval")
    for key, val in data.items():
        print("{}: {}".format(key, val), flush=True)
    print("-"*50)
    return data
Пример #10
0
def main():

    args = get_arguments()
    SETTING = Dict(
        yaml.safe_load(
            open(os.path.join('arguments', args.arg + '.yaml'),
                 encoding='utf8')))
    print(args)
    args.device = list(map(str, args.device))
    os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(args.device)

    transform = transforms.Compose([
        transforms.Resize((SETTING.imsize, SETTING.imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    if args.dataset == 'coco':
        val_dset = CocoDset(root=SETTING.root_path,
                            img_dir='val2017',
                            ann_dir='annotations/captions_val2017.json',
                            transform=transform)
    val_loader = DataLoader(val_dset,
                            batch_size=SETTING.batch_size,
                            shuffle=False,
                            num_workers=SETTING.n_cpu,
                            collate_fn=collater)

    vocab = Vocabulary(max_len=SETTING.max_len)
    vocab.load_vocab(args.vocab_path)

    imenc = ImageEncoder(SETTING.out_size, SETTING.cnn_type)
    capenc = CaptionEncoder(len(vocab), SETTING.emb_size, SETTING.out_size,
                            SETTING.rnn_type)

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    imenc = imenc.to(device)
    capenc = capenc.to(device)

    assert SETTING.checkpoint is not None
    print("loading model and optimizer checkpoint from {} ...".format(
        SETTING.checkpoint),
          flush=True)
    ckpt = torch.load(SETTING.checkpoint)
    imenc.load_state_dict(ckpt["encoder_state"])
    capenc.load_state_dict(ckpt["decoder_state"])

    begin = time.time()
    dset = EmbedDset(val_loader, imenc, capenc, vocab, args)
    print("database created | {} ".format(sec2str(time.time() - begin)),
          flush=True)

    retrieve_i2c(dset, val_dset, args.image_path, imenc, transform)
    retrieve_c2i(dset, val_dset, args.output_dir, args.caption, capenc, vocab)
Пример #11
0
def validate(epoch, loader, imenc, capenc, vocab, args):
    begin = time.time()
    print("begin validation for epoch {}".format(epoch), flush=True)
    dset = EmbedDataset(loader, imenc, capenc, vocab, args)
    print("val dataset created | {} ".format(sec2str(time.time() - begin)),
          flush=True)
    im = dset.embedded["image"]
    cap = dset.embedded["caption"]
    img_ids = dset.embedded["img_id"]
    ann_ids = dset.embedded["ann_id"]
    #print(len(img_ids)) # 5000
    #print(len(ann_ids)) # 25000

    nd = im.shape[0]
    nq = cap.shape[0]
    d = im.shape[1]
    cpu_index = faiss.IndexFlatIP(d)

    print("# images: {}, # captions: {}, dimension: {}".format(nd, nq, d),
          flush=True)
    # im2cap
    cpu_index.add(cap)
    D, I = cpu_index.search(im, nq)
    data = {}
    allrank = []

    for i in range(5):
        gt = (np.arange(nd) * 5).reshape(-1, 1) + i
        rank = np.where(I == gt)[1]
        allrank.append(rank)
    allrank = np.stack(allrank)
    allrank = np.amin(allrank, 0)
    for rank in [1, 5, 10, 20]:
        data["i2c_recall@{}".format(rank)] = 100 * np.sum(
            allrank < rank) / len(allrank)
    data["i2c_median@r"] = np.median(allrank) + 1

    # cap2im
    cpu_index.reset()
    cpu_index.add(im)
    D, I = cpu_index.search(cap, nd)
    gt = np.arange(nq).reshape(-1, 1) // 5
    allrank = np.where(I == gt)[1]
    for rank in [1, 5, 10, 20]:
        data["c2i_recall@{}".format(rank)] = 100 * np.sum(
            allrank < rank) / len(allrank)
    data["c2i_median@r"] = np.median(allrank) + 1

    print("-" * 50)
    print("results of cross-modal retrieval")
    for key, val in data.items():
        print("{}: {}".format(key, val), flush=True)
    print("-" * 50)
    return data
Пример #12
0
def dimension_reduction(numpyfile, dstfile, method="PCA"):
    all = np.load(numpyfile)
    begin = time.time()
    print("conducting {} on data...".format(method), flush=True)
    if method == "T-SNE":
        all = TSNE(n_components=2).fit_transform(all)
    elif method == "PCA":
        all = PCA(n_components=2).fit_transform(all)
    else:
        raise NotImplementedError()
    print("done | {} ".format(sec2str(time.time()-begin)), flush=True)
    np.save(dstfile, all)
    print("saved {} embeddings to {}".format(method, dstfile), flush=True)
Пример #13
0
def retrieve_c2i(dset, v_dset, savedir, caption, capenc, vocab, k=1):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    begin = time.time()
    print("-" * 50)
    print("source caption: '{}'".format(caption), flush=True)
    cap = vocab.return_idx([caption])
    length = torch.tensor([torch.sum(torch.ne(cap, vocab.padidx)).item()
                           ]).to(device, dtype=torch.long)
    with torch.no_grad():
        cap = cap.to(device)
        cap = capenc(cap, length)
    cap = cap.cpu().numpy()
    im = dset.embedded["image"]
    nd = im.shape[0]
    d = im.shape[1]
    cpu_index = faiss.IndexFlatIP(d)
    print("# images: {}, dimension: {}".format(nd, d), flush=True)

    # cap2im
    cpu_index.add(im)
    D, I = cpu_index.search(cap, k)
    print("retrieval time {}".format(sec2str(time.time() - begin)), flush=True)
    nnimid = []
    for i in range(k):
        nnidx = I[0, i]
        nnim_id = dset.embedded["img_id"][nnidx]
        nnimid.append(nnim_id)
    img = v_dset.coco.loadImgs(nnimid)
    print("-" * 50)
    print("{} nearest neighbors of '{}'".format(k, caption))
    if k == 1:
        plt.figure(figsize=(8, 10))
        nnim = io.imread(img[0]['coco_url'])
        plt.imshow(nnim)
        plt.axis('off')
    elif k > 1:
        fig, axs = plt.subplots(1, k, figsize=(8 * k, 10))
        fig.suptitle("retrieved {} nearest neighbors of '{}'".format(
            k, caption))
        for i in range(k):
            nnim = io.imread(img[i]['coco_url'])
            axs[i].imshow(nnim)
            axs[i].axis('off')
    else:
        raise
    #plt.show(block=False)
    #plt.show()
    if not os.path.exists(savedir):
        os.makedirs(savedir)
    plt.savefig(os.path.join(savedir, "output.png"))
    print("-" * 50)
Пример #14
0
def main():
    args = parse_args()

    transform = transforms.Compose([
        transforms.Resize((args.imsize, args.imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    if args.dataset == 'coco':
        val_dset = CocoDataset(root=args.root_path,
                               imgdir='val2017',
                               jsonfile='annotations/captions_val2017.json',
                               transform=transform,
                               mode='all')
    val_loader = DataLoader(val_dset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.n_cpu,
                            collate_fn=collater_eval)

    vocab = Vocabulary(max_len=args.max_len)
    vocab.load_vocab(args.vocab_path)

    imenc = ImageEncoder(args.out_size, args.cnn_type)
    capenc = CaptionEncoder(len(vocab), args.emb_size, args.out_size,
                            args.rnn_type)

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    imenc = imenc.to(device)
    capenc = capenc.to(device)

    assert args.checkpoint is not None
    print("loading model and optimizer checkpoint from {} ...".format(
        args.checkpoint),
          flush=True)
    ckpt = torch.load(args.checkpoint)
    imenc.load_state_dict(ckpt["encoder_state"])
    capenc.load_state_dict(ckpt["decoder_state"])

    begin = time.time()
    dset = EmbedDataset(val_loader, imenc, capenc, vocab, args)
    print("database created | {} ".format(sec2str(time.time() - begin)),
          flush=True)

    retrieve_i2c(dset, val_dset, imenc, vocab, args)
    retrieve_c2i(dset, val_dset, capenc, vocab, args)
Пример #15
0
 def load_vocab(self, textfile):
     before = time.time()
     print("building vocabulary...", flush=True)
     # Append each line of txt to list
     with open(textfile, 'r') as f:
         sentences = f.readlines()
     # divide to words and punctuation
     sent_proc = list(map(self.text_proc.preprocess, sentences))
     # make vocab dictionary
     self.text_proc.build_vocab(sent_proc, min_freq=self.min_freq)
     self.len = len(self.text_proc.vocab)
     # padding index
     self.padidx = self.text_proc.vocab.stoi["<pad>"]
     print("done building vocabulary, minimum frequency is {} times".format(
         self.min_freq),
           flush=True)
     print("# of words in vocab: {} | {}".format(
         self.len, sec2str(time.time() - before)),
           flush=True)
Пример #16
0
    def load_vocab(self):
        time_start = time.time()
        print('building vocabulary...', flush=True)

        self.text_json_df = []
        for i in range(len(self.annotation_file)):
            sentences = self.load_json_text(
                os.path.join(self.annotation_path, self.annotation_file[i],
                             self.caption_all_json[i]))
        # print(sentences[:3])

        sent_proc = list(map(self.text_proc.preprocess, sentences))
        # print(sent_proc[:3])
        print('number of sentences:', len(sent_proc))

        # self.text_proc.build_vocab(sent_proc, min_freq=self.min_freq)
        self.text_proc.build_vocab(sent_proc,
                                   min_freq=self.min_freq,
                                   vectors=torchtext.vocab.GloVe(name='840B',
                                                                 dim=300))
        vocab_proc = self.text_proc.vocab
        # print('最頻出単語top10:', self.text_proc.vocab.freqs.most_common(10))

        word_embeddings = self.text_proc.vocab.vectors
        # print('self.text_proc.vocab.vectors.size():',
        #       self.text_proc.vocab.vectors.size())

        self.len = len(self.text_proc.vocab)
        self.padidx = self.text_proc.vocab.stoi['<pad>']
        print("done building vocabulary, minimum frequency is {} times".format(
            self.min_freq),
              flush=True)
        print("# of words in vocab: {} | {}".format(
            self.len, sec2str(time.time() - time_start)),
              flush=True)
        print(
            '================================================================================'
        )
        return vocab_proc, word_embeddings
Пример #17
0
 def load_vocab(self, textfile):
     """
     build vocabulary from textfile.
     """
     before = time.time()
     print("building vocabulary...", flush=True)
     with open(textfile, "r") as f:
         sentences = f.readlines()
     sent_proc = list(map(self.text_proc.preprocess, sentences))
     self.text_proc.build_vocab(sent_proc, min_freq=self.min_freq)
     self.len = len(self.text_proc.vocab)
     self.padidx = self.text_proc.vocab.stoi["<pad>"]
     self.bosidx = self.text_proc.vocab.stoi["<bos>"]
     print(
         "done building vocabulary, minimum frequency is {} times".format(
             self.min_freq),
         flush=True,
     )
     print(
         "# of words in vocab: {} | {}".format(
             self.len, sec2str(time.time() - before)),
         flush=True,
     )
Пример #18
0
def retrieve_i2c(dset, v_dset, impath, imenc, transform, k=1):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    im = Image.open(impath)
    print("-" * 50)
    plt.title("source image")
    plt.imshow(np.asarray(im))
    plt.axis('off')
    plt.show(block=False)
    plt.show()

    im = transform(im).unsqueeze(0)
    begin = time.time()
    with torch.no_grad():
        im = im.to(device)
        im = imenc(im)
    im = im.cpu().numpy()
    cap = dset.embedded["caption"]
    nd = cap.shape[0]
    d = cap.shape[1]
    cpu_index = faiss.IndexFlatIP(d)
    print("# captions: {}, dimension: {}".format(nd, d), flush=True)

    # im2cap
    cpu_index.add(cap)
    D, I = cpu_index.search(im, k)
    nnann = []
    for i in range(k):
        nnidx = I[0, i]
        ann_ids = [a for ids in dset.embedded["ann_id"] for a in ids]
        nnann_id = ann_ids[nnidx]
        nnann.append(nnann_id)
    anns = v_dset.coco.loadAnns(nnann)
    print("retrieval time {}".format(sec2str(time.time() - begin)), flush=True)
    print("-" * 50)
    print("{} nearest neighbors of image:".format(k))
    v_dset.coco.showAnns(anns)
    print("-" * 50)
spacy = spacy.load('en_core_web_sm')


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


start_now = datetime.datetime.now()
print(start_now.strftime('%Y/%m/%d %H:%M:%S'))
### Test ###
print('----- Test -----')
start1 = time.time()

print('Loading test dataset...')
start2 = time.time()
my_test_dataset = MyDataset_BMN(mode='test')
end2 = sec2str(time.time() - start2)
print('Finished loading test dataset. | {}'.format(end2))


test_batch_size = 1
# ['LSTM', 'GRU', 'Transformer', 'TF']
rnn_model = 'TF'
attn_mode = 'multihead'  # ['simple', 'multihead']
cap_hidden_size = vid_hidden_size = 512
cap_num_layers = vid_num_layers = 2
cap_bidirectional = vid_bidirectional = True
common_size = 256
grd_mode = 'simple'  # ['simple', 'multi']
num_workers = 4

test_data_loader = torch.utils.data.DataLoader(
Пример #20
0
def BMN_train(train_dataloader, val_dataloader, bmn, criterion, optimizer,
              lr_scheduler, CONFIG, args, device, date_path):
    train_start = time.time()

    # make result directory
    result_path = os.path.join(CONFIG.BMN_result_dir, date_path)
    if os.path.exists(result_path):
        shutil.rmtree(result_path)
    os.makedirs(result_path, exist_ok=True)

    # make checkpoint directory
    checkpoint_path = CONFIG.BMN_checkpoint_dir
    if os.path.exists(checkpoint_path):
        shutil.rmtree(checkpoint_path)
    os.makedirs(checkpoint_path, exist_ok=True)

    CONFIG_df = pd.DataFrame.from_dict(CONFIG, orient='index')
    CONFIG_df.to_csv(os.path.join(result_path, 'config.csv'), header=False)

    best_loss = 1e10
    train_loss_list = []
    train_pem_reg_loss_list = []
    train_pem_cls_loss_list = []
    train_tem_loss_list = []
    val_loss_list = []
    val_pem_reg_loss_list = []
    val_pem_cls_loss_list = []
    val_tem_loss_list = []
    lr_list = []
    for epoch in range(CONFIG.BMN_epoch_num):
        epoch_start = time.time()
        # train
        print('-' * 5, 'train', '-' * 5)
        bmn.train()
        train_loss = 0
        train_pem_reg_loss = 0
        train_pem_cls_loss = 0
        train_tem_loss = 0
        for i, train_data in enumerate(train_dataloader):
            input_data = train_data['video']
            gt_confidence_map = train_data['confidence_map']
            gt_start = train_data['start']
            gt_end = train_data['end']
            input_data = input_data.to(device)
            gt_confidence_map = gt_confidence_map.to(device)
            gt_start = gt_start.to(device)
            gt_end = gt_end.to(device)

            optimizer.zero_grad()

            confidence_map, start, end = bmn(input_data)
            loss = criterion(confidence_map, start, end, gt_confidence_map,
                             gt_start, gt_end)
            loss[0].backward()
            train_loss += loss[0].cpu().detach().numpy()
            train_pem_reg_loss += loss[1].cpu().detach().numpy()
            train_pem_cls_loss += loss[2].cpu().detach().numpy()
            train_tem_loss += loss[3].cpu().detach().numpy()
            optimizer.step()

            if i % 200 == 0:
                print(epoch, i, loss[0])
            # break
        train_loss /= len(train_dataloader)
        train_pem_reg_loss /= len(train_dataloader)
        train_pem_cls_loss /= len(train_dataloader)
        train_tem_loss /= len(train_dataloader)
        train_loss_list.append(train_loss)
        train_pem_reg_loss_list.append(train_pem_reg_loss)
        train_pem_cls_loss_list.append(train_pem_cls_loss)
        train_tem_loss_list.append(train_tem_loss)
        print(sec2str(time.time() - epoch_start))

        # validation
        print('-' * 5, 'validation', '-' * 5)
        bmn.eval()
        val_loss = 0
        val_pem_reg_loss = 0
        val_pem_cls_loss = 0
        val_tem_loss = 0
        with torch.no_grad():
            for i, val_data in enumerate(val_dataloader):
                input_data = val_data['video']
                gt_confidence_map = val_data['confidence_map']
                gt_start = val_data['start']
                gt_end = val_data['end']
                input_data = input_data.to(device)
                gt_confidence_map = gt_confidence_map.to(device)
                gt_start = gt_start.to(device)
                gt_end = gt_end.to(device)

                confidence_map, start, end = bmn(input_data)
                loss = criterion(confidence_map, start, end, gt_confidence_map,
                                 gt_start, gt_end)

                val_loss += loss[0].cpu().detach().numpy()
                val_pem_reg_loss += loss[1].cpu().detach().numpy()
                val_pem_cls_loss += loss[2].cpu().detach().numpy()
                val_tem_loss += loss[3].cpu().detach().numpy()
        val_loss /= len(val_dataloader)
        val_pem_reg_loss /= len(val_dataloader)
        val_pem_cls_loss /= len(val_dataloader)
        val_tem_loss /= len(val_dataloader)
        val_loss_list.append(val_loss)
        val_pem_reg_loss_list.append(val_pem_reg_loss)
        val_pem_cls_loss_list.append(val_pem_cls_loss)
        val_tem_loss_list.append(val_tem_loss)

        save_checkpoint(checkpoint_path, epoch, bmn, optimizer, val_loss,
                        lr_scheduler)
        if val_loss <= best_loss:
            best_loss = val_loss
            save_checkpoint(result_path, epoch, bmn, optimizer, val_loss,
                            lr_scheduler)

        lr_list.append(optimizer.param_groups[0]['lr'])
        lr_scheduler.step(train_loss)

        epoch_end = time.time() - epoch_start
        print(
            'Epoch: [{}/{}], Time: {}, train_loss: {loss:.4f}, val_loss: {val_loss:.4f}'
            .format(epoch + 1,
                    CONFIG.BMN_epoch_num,
                    sec2str(epoch_end),
                    loss=train_loss,
                    val_loss=val_loss))
        print(
            'train: [pem_reg_loss: {}, pem_cls_loss: {}, tem_loss: {}]'.format(
                train_pem_reg_loss, train_pem_cls_loss, train_tem_loss))

        # save BMN log
        log_dict = {
            'epoch': list(range(epoch + 1)),
            'learning_rate': lr_list,
            'train_loss': train_loss_list,
            'train_pem_reg_loss': train_pem_reg_loss_list,
            'train_pem_cls_loss': train_pem_cls_loss_list,
            'train_tem_loss': train_tem_loss_list,
            'val_loss': val_loss_list,
            'val_pem_reg_loss': val_pem_reg_loss_list,
            'val_pem_cls_loss': val_pem_cls_loss_list,
            'val_tem_loss': val_tem_loss_list,
        }
        log_df = pd.DataFrame.from_dict(log_dict).set_index('epoch')
        log_df.to_csv(os.path.join(result_path, 'log.csv'), mode='w')
        plt.figure()
        plt.plot(train_loss_list, label='train')
        plt.plot(val_loss_list, label='val')
        plt.yscale('log')
        plt.legend()
        plt.savefig(os.path.join(result_path, 'loss.png'))
        plt.close()

    # save figure of loss log
    train_end = time.time() - train_start
    print('finised train: {}'.format(sec2str(train_end)))
Пример #21
0
    # weights and biases
    if not args.no_wandb:
        wandb.init(
            config=CONFIG,
            project='two-stage-Temporal Moment Retrieval',
            job_type='training',
        )

    # date path
    date_path = date
    # config_name = str(args.config)[14:-5]
    # date_path = os.path.join(date, config_name)

    BMN_main(CONFIG, args, device, date)


if __name__ == '__main__':
    start_main = time.time()
    start_now = datetime.datetime.now()
    # date = start_now.strftime('%Y-%m-%d/%H')
    # date = start_now.strftime('%Y-%m-%d')
    date = start_now.strftime('%Y-%m')
    print(start_now.strftime('%Y/%m/%d %H:%M:%S'))

    main(date)

    end_main = sec2str(time.time() - start_main)
    end_now = datetime.datetime.now()
    print('Finished main.py! | {} | {}'.format(
        end_main, end_now.strftime('%Y/%m/%d %H:%M:%S')))
    print('=' * 70)
Пример #22
0
def main():
    args = parse_args()

    transform = transforms.Compose([
        transforms.Resize((args.imsize, args.imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    if args.dataset == "coco":
        val_dset = CocoDataset(
            root=args.root_path,
            split="val",
            transform=transform,
        )
    val_loader = DataLoader(
        val_dset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.n_cpu,
        collate_fn=collater,
    )

    vocab = Vocabulary(max_len=args.max_len)
    vocab.load_vocab(args.vocab_path)

    imenc = ImageEncoder(args.out_size, args.cnn_type)
    capenc = CaptionEncoder(len(vocab), args.emb_size, args.out_size,
                            args.rnn_type)

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    imenc = imenc.to(device)
    capenc = capenc.to(device)

    assert args.checkpoint is not None
    print("loading model and optimizer checkpoint from {} ...".format(
        args.checkpoint),
          flush=True)
    ckpt = torch.load(args.checkpoint, map_location=device)
    imenc.load_state_dict(ckpt["encoder_state"])
    capenc.load_state_dict(ckpt["decoder_state"])

    begin = time.time()
    dset = EmbedDataset(val_loader, imenc, capenc, vocab, args)
    print("database created | {} ".format(sec2str(time.time() - begin)),
          flush=True)

    savedir = os.path.join("out", args.config_name)
    if not os.path.exists(savedir):
        os.makedirs(savedir, 0o777)

    image = dset.embedded["image"]
    caption = dset.embedded["caption"]
    n_i = image.shape[0]
    n_c = caption.shape[0]
    all = np.concatenate([image, caption], axis=0)

    emb_file = os.path.join(savedir, "embedding_{}.npy".format(n_i))
    save_file = os.path.join(savedir, "{}.npy".format(args.method))
    vis_file = os.path.join(savedir, "{}.png".format(args.method))
    np.save(emb_file, all)
    print("saved embeddings to {}".format(emb_file), flush=True)
    dimension_reduction(emb_file, save_file, method=args.method)
    plot_embeddings(save_file, n_i, vis_file, method=args.method)
Пример #23
0
    def __len__(self):
        return len(self.video_list)


if __name__ == '__main__':
    start = time.time()
    my_dataset = ActivityNet_Captions_BMN_Dataset(mode='test')
    print(len(my_dataset))
    my_dataset = Charades_STA_BMN_Dataset(mode='test')
    print(len(my_dataset))
    my_dataset = TACoS_BMN_Dataset(mode='test')
    print(len(my_dataset))
    my_dataset = BMN_Dataset(mode='test')
    print(len(my_dataset))
    print(sec2str(time.time() - start))
    data_loader = torch.utils.data.DataLoader(my_dataset,
                                              batch_size=4,
                                              shuffle=False,
                                              drop_last=True,
                                              num_workers=0,
                                              collate_fn=BMN_collate_fn)
    print('=' * 70)
    print(len(data_loader))
    for i, data in enumerate(data_loader):
        if i == 1:
            print(data['video_id'])
            print(data['video'].size())
            print(data['video_length'].size())
            print(data['start'].size())
            # print(data['start'])
Пример #24
0
from utils import sec2str, model_state_dict, collate_fn

import spacy

spacy = spacy.load('en_core_web_sm')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

start_now = datetime.datetime.now()
print(start_now.strftime('%Y/%m/%d %H:%M:%S'))
start0 = time.time()
print('Setting dataset...')
MyFile()
MyTestFile()
MakeDataset()
end0 = sec2str(time.time() - start0)
print('Finished setting dataset. | {}'.format(end0))
print('================================================================================')

print('----- Train & Validation -----')
start1 = time.time()

print('Loading train dataset...')
start2 = time.time()
train_dataset = MyDataset_BMN(mode='train')
end2 = sec2str(time.time() - start2)
print('Finished loading train dataset. | {}'.format(end2))

print('Loading validation dataset...')
start3 = time.time()
val_dataset = MyDataset_BMN(mode='val')
Пример #25
0
def train(epoch, loader, imenc, capenc, optimizer, lossfunc, vocab, args, SETTING):
    begin = time.time()
    # max iteration_num
    maxit = int(len(loader.dataset) / SETTING.batch_size)
    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    cumloss = 0
    for it, data in enumerate(loader):
        """image, target, index, img_id"""
        # batch_size * img_size
        image = data["image"]
        # batch_size * sentence * 5 
        caption = data["caption"]
        # chose 1 random caption from 5  
        caption = [i[np.random.randint(0, len(i))] for i in caption]
        img_id = data["img_id"]
        # caption sentence → id
        target = vocab.return_idx(caption)
        # lengths of each sentence
        lengths = target.ne(vocab.padidx).sum(dim=1)

        optimizer.zero_grad()

        image = image.to(device)
        target = target.to(device)
        lengths = lengths.to(device)

        im_emb = imenc(image)
        cap_emb = capenc(target, lengths)
        lossval = lossfunc(im_emb, cap_emb)
        lossval.backward()

        # clip gradient norm
        if SETTING.grad_clip > 0:
            clip_grad_norm_(imenc.parameters(), SETTING.grad_clip)
            clip_grad_norm_(capenc.parameters(), SETTING.grad_clip)
        optimizer.step()
        cumloss += lossval.item()


        if it % SETTING.log_every == SETTING.log_every-1:
            print("epoch {} | {} | {:06d}/{:06d} iterations | loss: {:.08f}".format(epoch, sec2str(time.time()-begin), it+1, maxit, cumloss/SETTING.log_every), flush=True)
            cumloss = 0