示例#1
0
文件: main.py 项目: ducminhkhoi/VRED
    def __init__(self, json_file, type_dataset):
        self.type_dataset = type_dataset
        self.db_file = '/scratch/datasets/vrd/relations_' + type_dataset + '.data'

        with open(json_file, 'r') as file:
            contents = json.load(file)
            contents = convert_to_string(contents).items()

            self.data = [(key, rel) for key, value in contents
                         for rel in value]

        print("number of relations:", len(self.data))

        self.list_relation = []

        if os.path.isfile(self.db_file):
            with open(self.db_file, 'rb') as f:
                self.list_relation = pkl.load(f)
        else:
            for i, (key, rel) in enumerate(self.data):
                print(i, len(self.data))
                subject_box = rel['subject'][
                    'bbox']  # [ymin, ymax, xmin, xmax]
                object_box = rel['object']['bbox']

                minbbox = [
                    min(subject_box[0], object_box[0]),
                    max(subject_box[1], object_box[1]),
                    min(subject_box[2], object_box[2]),
                    max(subject_box[3], object_box[3])
                ]

                image = imread('/scratch/datasets/vrd/sg_dataset/sg_' +
                               type_dataset + '_images/' + key)
                bboxes = [subject_box, object_box, minbbox]

                list_image = [
                    image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes
                ]

                list_binary_image = [
                    np.zeros_like(image) for _ in range(len(bboxes))
                ]
                for (binary_image, bbox) in zip(list_binary_image, bboxes):
                    binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1

                relation = [transform(x) for x in list_image] + \
                           [spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image] + \
                           [torch.LongTensor([rel['subject']['category'], rel['object']['category'], rel['predicate']])]

                self.list_relation.append(relation)

            with open(self.db_file, 'wb') as f:
                pkl.dump(self.list_relation, f, protocol=pkl.HIGHEST_PROTOCOL)

        print("complete_loading", type_dataset)
示例#2
0
文件: main.py 项目: ducminhkhoi/VRED
    def __getitem__(self, index):
        """Return a (transformed) vrd_input and target sample from an integer index"""
        key, rel = self.data[index]
        subject_box = rel['subject']['bbox']  # [ymin, ymax, xmin, xmax]
        object_box = rel['object']['bbox']

        minbbox = [
            min(subject_box[0], object_box[0]),
            max(subject_box[1], object_box[1]),
            min(subject_box[2], object_box[2]),
            max(subject_box[3], object_box[3])
        ]

        image = imread('/scratch/datasets/sg_dataset/sg_' + self.type_dataset +
                       '_images/' + key)
        bboxes = [subject_box, object_box, minbbox]

        list_image = [
            image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes
        ]

        subject_visual_input, object_visual_input, union_visual_input = tuple(
            transform(x) for x in list_image)

        list_binary_image = [np.zeros_like(image) for _ in range(len(bboxes))]
        for (binary_image, bbox) in zip(list_binary_image, bboxes):
            binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1

        subject_spatial_input, object_spatial_input, union_spatial_input = \
            tuple(spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image)

        predicate_spatial_feature = torch.cat(
            [subject_spatial_input, object_spatial_input], 0)

        object_word_feature = torch.FloatTensor(
            index_to_emb_dict[rel['object']['category']])
        subject_word_feature = torch.FloatTensor(
            index_to_emb_dict[rel['subject']['category']])

        if use_model == 1:
            input_sample = union_visual_input, predicate_spatial_feature
            target_sample = rel['subject']['category'], rel['object'][
                'category'], rel['predicate']
        elif use_model == 2:
            input_sample = torch.FloatTensor(to_categorical(rel['subject']['category'], object_size)), \
                           torch.FloatTensor(to_categorical(rel['object']['category'], object_size)), \
                           union_visual_input, predicate_spatial_feature
            target_sample = rel['predicate']
        elif use_model == 3:
            input_sample = (subject_word_feature, object_word_feature,
                            union_visual_input, predicate_spatial_feature)
            target_sample = rel['predicate']

        return input_sample, target_sample
                                removeMask[index_elem_to_elim] = 0
                                removed.extend([
                                    good[i] for i in range(len(good))
                                    if not removeMask[i]
                                ])
                                good = [
                                    good[i] for i in range(len(good))
                                    if removeMask[i]
                                ]
                        else:
                            print("Not possible to find another homography")
                            matchesMask = None
                    else:

                        polygon = Polygon([
                            transform(M, pts[0]),
                            transform(M, pts[1]),
                            transform(M, pts[2]),
                            transform(M, pts[3])
                        ])

                        print("POLYGON")
                        for i in range(4):
                            print("Vertex #" + str(i))
                            print("src->(" + str(pts[i][0][0]) + "," +
                                  str(pts[i][0][1]) + ") dst->" +
                                  str(transform(M, pts[i])) + "\n")

                        print("---------------------------------------\n")

                        for i in range(len(index_inliers)):
示例#4
0
文件: main.py 项目: ducminhkhoi/VRED
def run_evaluate(model, json_file, type_dataset):

    output_file = 'data/temp_output_3.pkl'

    if os.path.isfile(output_file):
        with open(output_file, 'rb') as f:
            list_relations, list_gt_relations = pickle.load(f)
    else:
        model.load(
            '/scratch/datasets/vrd/weights.07-train_loss:0.10-train_acc:0.52-val_loss:0.12-val_acc:0.51.pkl'
        )

        with open(json_file, 'r') as file:
            contents = json.load(file)
            contents = convert_to_string(contents).items()

        list_relations = []
        list_gt_relations = []

        for i, (key, value) in enumerate(contents):
            print(i, len(contents))
            list_relation = []
            list_gt_relation = []

            # get list of object
            list_objects = []
            for rel in value:
                list_objects.append(
                    Object(rel['subject']['category'],
                           *tuple(rel['subject']['bbox'])))
                list_objects.append(
                    Object(rel['object']['category'],
                           *tuple(rel['object']['bbox'])))
                list_gt_relation.append(rel['predicate'])

            list_objects = list(set(list_objects))

            for subject, object_ in combinations(list_objects, 2):
                subject_box = subject.ymin, subject.ymax, subject.xmin, subject.xmax  # [ymin, ymax, xmin, xmax]
                object_box = object_.ymin, object_.ymax, object_.xmin, object_.xmax

                minbbox = [
                    min(subject_box[0], object_box[0]),
                    max(subject_box[1], object_box[1]),
                    min(subject_box[2], object_box[2]),
                    max(subject_box[3], object_box[3])
                ]

                image = imread('/scratch/datasets/sg_dataset/sg_' +
                               type_dataset + '_images/' + key)
                bboxes = [subject_box, object_box, minbbox]

                list_image = [
                    image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes
                ]

                list_binary_image = [
                    np.zeros_like(image) for _ in range(len(bboxes))
                ]
                for (binary_image, bbox) in zip(list_binary_image, bboxes):
                    binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1

                subject_visual_input, object_visual_input, union_visual_input = tuple(
                    transform(x) for x in list_image)
                subject_spatial_input, object_spatial_input, union_spatial_input = \
                    tuple(spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image)

                predicate_spatial_feature = torch.cat(
                    [subject_spatial_input, object_spatial_input], 0)

                inputs = (torch.FloatTensor(
                    to_categorical(subject.category, object_size)),
                          torch.FloatTensor(
                              to_categorical(object_.category, object_size)),
                          union_visual_input, predicate_spatial_feature)

                # wrap them in Variable
                if isGPU:
                    inputs = [
                        Variable(x.cuda(), volatile=True) for x in inputs
                    ]
                else:
                    inputs = [Variable(x, volatile=True) for x in inputs]

                # forward
                outputs = model.net(inputs)

                if isGPU:
                    list_relation.append(outputs.data.cpu().numpy())
                else:
                    list_relation.append(outputs.data.numpy())

            list_relations.append(np.array(list_relation))
            list_gt_relations.append(list_gt_relation)

        with open(output_file, 'wb') as f:
            pickle.dump((list_relations, list_gt_relations), f,
                        pickle.HIGHEST_PROTOCOL)

    score = eval_recall(list_relations, list_gt_relations)
    return score
示例#5
0
文件: main.py 项目: ducminhkhoi/VRED
    def build_db(self):
        count_relation = self.num_records

        for i, (key, rel) in enumerate(self.data):
            if i < self.num_records:
                continue

            print(i, len(self.data))
            subject_box = rel['subject']['bbox']  # [ymin, ymax, xmin, xmax]
            object_box = rel['object']['bbox']

            minbbox = [
                min(subject_box[0], object_box[0]),
                max(subject_box[1], object_box[1]),
                min(subject_box[2], object_box[2]),
                max(subject_box[3], object_box[3])
            ]

            image = imread('/scratch/datasets/vrd/sg_dataset/sg_' +
                           self.type_dataset + '_images/' + key)
            bboxes = [subject_box, object_box, minbbox]

            list_image = [
                image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes
            ]

            list_binary_image = [
                np.zeros_like(image) for _ in range(len(bboxes))
            ]
            for (binary_image, bbox) in zip(list_binary_image, bboxes):
                binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1

            subject_visual_input, object_visual_input, union_visual_input = tuple(
                transform(x) for x in list_image)
            subject_spatial_input, object_spatial_input, union_spatial_input = \
                tuple(spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image)

            predicate_spatial_feature = torch.cat(
                [subject_spatial_input, object_spatial_input], 0)

            # subject_word_feature = np.array(emb.emb(index_to_object_dict[rel['subject']['category']]), dtype=np.float32)
            # object_word_feature = np.array(emb.emb(index_to_object_dict[rel['subject']['category']]), dtype=np.float32)

            relation = {
                'image_id':
                key,
                'subject_visual_feature':
                self.save_numpy_array(subject_visual_input.numpy()),
                # 'subject_word_feature': self.save_numpy_array(subject_word_feature.numpy()),
                'object_visual_feature':
                self.save_numpy_array(object_visual_input.numpy()),
                # 'object_word_feature': self.save_numpy_array(object_word_feature.numpy()),
                'predicate_visual_feature':
                self.save_numpy_array(union_visual_input.numpy()),
                'predicate_spatial_feature':
                self.save_numpy_array(predicate_spatial_feature.numpy()),
                'subject_id':
                rel['subject']['category'],
                'object_id':
                rel['object']['category'],
                'predicate_id':
                rel['predicate'],
            }

            self.db.hmset(count_relation, relation)

            count_relation += 1
示例#6
0
文件: main.py 项目: ducminhkhoi/VRED
    def __init__(self, json_file, type_dataset):
        self.type_dataset = type_dataset
        self.db_file = '/scratch/datasets/vrd/relations_' + type_dataset + '.db'
        self.table_name = 'relations'
        self.batch_database_size = 1000

        with open(json_file, 'r') as file:
            contents = json.load(file)
            contents = convert_to_string(contents).items()

            data = [(key, rel) for key, value in contents for rel in value]

        print("number of relations:", len(data))

        create_sql = (
            'create table if not exists relations(' +
            'id integer primary key,' + 'image_id text,' +
            'subject_visual_feature array,' + 'subject_word_feature array,' +
            'object_visual_feature array,' + 'object_word_feature array,' +
            'predicate_visual_feature array,' +
            'predicate_spatial_feature array,' + 'subject_id integer,' +
            'object_id integer,' + 'predicate_id integer' + ')')
        self.db = SQLiteDatabase(self.db_file, self.table_name, create_sql)

        num_records = len(self.db)

        if num_records < len(data):
            list_relation = []
            count_relation = num_records

            for i, (key, rel) in enumerate(data):
                if i < num_records:
                    continue

                print(i, len(data))
                subject_box = rel['subject'][
                    'bbox']  # [ymin, ymax, xmin, xmax]
                object_box = rel['object']['bbox']

                minbbox = [
                    min(subject_box[0], object_box[0]),
                    max(subject_box[1], object_box[1]),
                    min(subject_box[2], object_box[2]),
                    max(subject_box[3], object_box[3])
                ]

                image = imread('/scratch/datasets/vrd/sg_dataset/sg_' +
                               type_dataset + '_images/' + key)
                bboxes = [subject_box, object_box, minbbox]

                list_image = [
                    image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes
                ]

                list_binary_image = [
                    np.zeros_like(image) for _ in range(len(bboxes))
                ]
                for (binary_image, bbox) in zip(list_binary_image, bboxes):
                    binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1

                subject_visual_input, object_visual_input, union_visual_input = tuple(
                    transform(x) for x in list_image)
                subject_spatial_input, object_spatial_input, union_spatial_input = \
                    tuple(spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image)

                predicate_spatial_feature = torch.cat(
                    [subject_spatial_input, object_spatial_input], 0)

                # subject_word_feature = np.array(emb.emb(index_to_object_dict[rel['subject']['category']]), dtype=np.float32)
                # object_word_feature = np.array(emb.emb(index_to_object_dict[rel['subject']['category']]), dtype=np.float32)

                relation = {
                    'id': count_relation,
                    'image_id': key,
                    'subject_visual_feature': subject_visual_input.numpy(),
                    # 'subject_word_feature': subject_word_feature,
                    'object_visual_feature': object_visual_input.numpy(),
                    # 'object_word_feature': object_word_feature,
                    'predicate_visual_feature': union_visual_input.numpy(),
                    'predicate_spatial_feature':
                    predicate_spatial_feature.numpy(),
                    'subject_id': rel['subject']['category'],
                    'object_id': rel['object']['category'],
                    'predicate_id': rel['predicate'],
                }
                list_relation.append(relation)

                count_relation += 1

                if len(list_relation) == self.batch_database_size:
                    print("start inserting")
                    self.db.insert_batch(list_relation)
                    list_relation.clear()

            if list_relation:
                self.db.insert_batch(list_relation)
                list_relation.clear()