def test_read_write_json(self):
        from src.utils.io_utils import read_json, write_json
        test_dict = {'one': ['two', 3]}
        write_json(test_dict, os.path.join(self.test_dir, 'tmp_dict.json'))
        test_dict_in = read_json(os.path.join(self.test_dir, 'tmp_dict.json'))
        self.assertTrue(test_dict == test_dict_in)

        write_json(test_dict, os.path.join(self.test_dir,
                                           'tmp_dict.json.gzip'))
        test_dict_in = read_json(
            os.path.join(self.test_dir, 'tmp_dict.json.gzip'))
        self.assertTrue(test_dict == test_dict_in)
Пример #2
0
    def save_results(self, prefix, mode="Train"):
        # save predictions
        save_dir = os.path.join(self.config["misc"]["result_dir"],
                                "predictions", mode)
        save_to = os.path.join(save_dir, prefix + ".json")
        io_utils.check_and_create_dir(save_dir)
        io_utils.write_json(save_to, self.results)

        # compute performances
        nb = float(len(self.results["gts"]))
        self.evaluator.set_duration(self.results["durations"])
        rank1, rank5, miou = self.evaluator.eval(self.results["predictions"],
                                                 self.results["gts"])

        for k, v in rank1.items():
            self.counters[k].add(v / nb, 1)
        self.counters["mIoU"].add(miou / nb, 1)
Пример #3
0
    def save_assignments(self, prefix, mode="train"):
        assignments = np.vstack(self.assignments_list)
        qst_ids = []
        for qid in self.qst_ids_list:
            qst_ids.extend(qid)
        print("shape of assignments: ", assignments.shape)
        if mode == "train":
            origin_qst_ids = self.origin_train_qst_ids
        else:
            origin_qst_ids = self.origin_test_qst_ids

        assignments, qst_ids = cmf.reorder_assignments_using_qst_ids(
            origin_qst_ids, qst_ids, assignments, is_subset=True)

        # setting directory for saving assignments
        save_dir = os.path.join(self.config["misc"]["result_dir"],
                                "assignments", mode)
        io_utils.check_and_create_dir(save_dir)

        # save assignments
        save_hdf5_path = os.path.join(save_dir, prefix + "_assignment.h5")
        hdf5_file = io_utils.open_hdf5(save_hdf5_path, "w")
        hdf5_file.create_dataset("assignments",
                                 dtype="int32",
                                 data=assignments)
        print("Assignments are saved in {}".format(save_hdf5_path))

        save_json_path = os.path.join(save_dir, prefix + "_assignment.json")
        for qsp in self.assignment_qst_ans_pairs:
            for k, v in qsp.items():
                qsp[k] = list(qsp[k])
        io_utils.write_json(save_json_path, self.assignment_qst_ans_pairs)
        print("Assignments (ans-qst) are saved in {}".format(save_json_path))

        # save assignments of qst_ids
        save_json_path = os.path.join(save_dir, prefix + "_qst_ids.json")
        out = {}
        out["question_ids"] = qst_ids
        io_utils.write_json(save_json_path, out)
        print("Saving is done: {}".format(save_json_path))
Пример #4
0
def ensemble(config):

    """ Build data loader """
    dset = dataset.DataSet(config["test_loader"])
    L = data.DataLoader( \
            dset, batch_size=config["test_loader"]["batch_size"], \
            num_workers=config["num_workers"], \
            shuffle=False, collate_fn=dataset.collate_fn)

    """ Load assignments if exists """
    with_assignment = False
    if config["assignment_path"] != "None":
        with_assignment = True
        assignment_file = io_utils.load_hdf5(config["assignment_path"], verbose=False)
        assignments = assignment_file["assignments"][:]
        cnt_mapping = np.zeros((3,3))

    """ Build network """
    nets = []
    net_configs = []
    for i in range(len(config["checkpoint_paths"])):
        net_configs.append(io_utils.load_yaml(config["config_paths"][i]))
        net_configs[i] = M.override_config_from_loader(net_configs[i], dset)
        nets.append(M(net_configs[i]))
        nets[i].bring_loader_info(dset)
        apply_cc_after = utils.get_value_from_dict(
                net_configs[i]["model"], "apply_curriculum_learning_after", -1)
        # load checkpoint if exists
        nets[i].load_checkpoint(config["checkpoint_paths"][i])
        start_epoch = int(utils.get_filename_from_path(
                config["checkpoint_paths"][i]).split("_")[-1])
        # If checkpoint use curriculum learning
        if (apply_cc_after > 0) and (start_epoch >= apply_cc_after):
            nets[i].apply_curriculum_learning()

    # ship network to use gpu
    if config["use_gpu"]:
        for i in range(len(nets)):
            nets[i].gpu_mode()
    for i in range(len(nets)):
        nets[i].eval_mode()

    # initialize counters for different tau
    metrics = ["top1-avg", "top1-max", "oracle"]
    for i in range(len(nets)):
        modelname = "M{}".format(i)
        metrics.append(modelname)
    tau = [1.0, 1.2, 1.5, 2.0, 5.0, 10.0, 50.0, 100.0]
    counters = OrderedDict()
    for T in tau:
        tau_name = "tau-"+str(T)
        counters[tau_name] = OrderedDict()
        for mt in metrics:
            counters[tau_name][mt] = accumulator.Accumulator(mt)

    """ Run training network """
    ii = 0
    itoa = dset.get_itoa()
    predictions = []
    for batch in tqdm(L):
        # Forward networks
        probs = 0
        B = batch[0][0].size(0)
        if type(batch[0][-1]) == type(list()):
            gt = batch[0][-1][0]
        else:
            gt = batch[0][-1]

        correct = 0
        probs = {}
        for T in tau:
            tau_name = "tau-"+str(T)
            probs[tau_name] = 0

        prob_list = []
        for i in range(len(nets)):
            outputs = nets[i].evaluate(batch)
            prob_list.append(outputs[1]) # m*[B,A]

        if config["save_logits"]:
            TODO = True

        for T in tau:
            tau_name = "tau-"+str(T)
            probs = [net_utils.get_data(F.softmax(logits/T, dim=1)) \
                     for logits in prob_list] # m*[B,A]

            # count correct numbers for each model
            for i in range(len(nets)):
                val, idx = probs[i].max(dim=1)
                correct = torch.eq(idx, gt)
                num_correct = torch.sum(correct)
                modelname = "M{}".format(i)
                counters[tau_name][modelname].add(num_correct, B)

                # add prob of each model
                if i == 0:
                    oracle_correct = correct
                else:
                    oracle_correct = oracle_correct + correct


            # top1-max accuracy for ensemble
            ens_probs, ens_idx = torch.stack(probs,0).max(0) # [B,A]
            max_val, max_idx = ens_probs.max(dim=1)
            num_correct = torch.sum(torch.eq(max_idx, gt))
            counters[tau_name]["top1-max"].add(num_correct, B)

            # top1-avg accuracy for ensemble
            ens_probs = sum(probs) # [B,A]
            max_val, max_idx = ens_probs.max(dim=1)
            num_correct = torch.sum(torch.eq(max_idx, gt))
            counters[tau_name]["top1-avg"].add(num_correct, B)

            # oracle accuracy for ensemble
            num_oracle_correct = torch.sum(torch.ge(oracle_correct, 1))
            counters[tau_name]["oracle"].add(num_oracle_correct, B)

            # attach predictions
            for i in range(len(batch[1])):
                qid = batch[1][i]
                predictions.append({
                    "question_id": qid,
                    "answer": utils.label2string(itoa, max_idx[i])
                })

        # epoch done

    # print accuracy
    for cnt_k,cnt_v in counters.items():
        txt = cnt_k + " "
        for k,v in cnt_v.items():
            txt += ", {} = {:.5f}".format(v.get_name(), v.get_average())
        print(txt)

    save_dir = os.path.join("results", "ensemble_predictions")
    io_utils.check_and_create_dir(save_dir)
    io_utils.write_json(os.path.join(save_dir, config["out"]+".json"), predictions)
Пример #5
0
    def generate_labels(self, config):
        """ Generate and save labels for temporal language grouding
            1)query_info (.json) with
                - wtoi: word to index dictionary (vocabulary)
                - itow: index to word dictionary (vocabulary)
                - query_lengths: lengths for queries
            2)query_labels (.h5): qid -> label
            3)grounding_labels (.h5): qid -> label
        """
        """ Query information """
        if not os.path.exists(self.paths["query_labels"]):
            # build vocabulary from training data
            train_ann_path = "data/charades/annotations/charades_sta_train.txt"
            train_aux_path = "data/charades/annotations/Charades_v1_train.csv"
            train_anns, _, _ = self._load_annotation(train_ann_path,
                                                     train_aux_path)
            wtoi = self._build_vocab(train_anns)
            itow = {v: k for k, v in wtoi.items()}

            # encode query and save labels (+lenghts)
            L = config.get("max_length", 20)
            encoded = self._encode_query(self.anns, wtoi, L)
            query_labels = io_utils.open_hdf5(self.paths["query_labels"], "w")
            for qid in tqdm(encoded["query_lengths"].keys(),
                            desc="Saving query"):
                _ = query_labels.create_dataset(
                    str(qid), data=encoded["query_labels"][qid])
            query_labels.close()

            # save vocabulary and query length
            query_info = {
                "wtoi": wtoi,
                "itow": itow,
                "query_lengths": encoded["query_lengths"],
            }
            io_utils.write_json(self.paths["query_info"], query_info)
        """ Grounding information """
        if not os.path.exists(self.paths["grounding_info"]):
            grd_dataset = io_utils.open_hdf5(self.paths["grounding_info"], "w")
            start_pos = grd_dataset.create_group("start_pos")
            end_pos = grd_dataset.create_group("end_pos")
            att_masks = grd_dataset.create_group("att_mask")

            for qid, ann in tqdm(self.anns.items(), desc="Gen. Grd. Labels"):
                # get starting/ending positions
                ts = ann["timestamps"]
                vid_d = ann["duration"]
                start = ts[0] / vid_d
                end = ts[1] / vid_d

                # get attention calibration mask
                vid = ann["video_id"]
                if self.feature_type == "I3D":
                    nfeats = np.load(self.feat_path.format(vid)).shape[0]
                else:
                    raise NotImplementedError()

                nfeats = min(nfeats, self.S)

                fs = utils.timestamp_to_featstamp(ts, nfeats, vid_d)
                att_mask = np.zeros((self.S))
                att_mask[fs[0]:fs[1] + 1] = 1

                _ = start_pos.create_dataset(qid, data=start, dtype="float")
                _ = end_pos.create_dataset(qid, data=end, dtype="float")
                _ = att_masks.create_dataset(qid, data=att_mask, dtype="float")

            # save the encoded proposal labels and video ids
            grd_dataset.close()