Exemplo n.º 1
0
def vr_make_meta_gt_visual_phrase():
    m = h5py.File('data/sg_vrd_meta.h5','r',driver='core')
    h5f  = h5py.File('data/sg_vrd_vp_meta.h5')

    triplets = {}
    cnt = 0
    zl.tick()
    for k in m['gt/train'].keys():
        if cnt %1000==0:

            print cnt,zl.tock()
            zl.tick()
        cnt+=1
        gt_boxes = []
        gt_labels = []
        sub_boxes = m['gt/train/%s/sub_boxes'%k][...]
        obj_boxes = m['gt/train/%s/obj_boxes'%k][...]
        rlp_labels = m['gt/train/%s/rlp_labels'%k][...]
        for i in xrange(rlp_labels.shape[0]):
            sub_box = sub_boxes[i]
            obj_box = obj_boxes[i]
            rlp_label = rlp_labels[i]
            joint_box = [min(sub_box[0],obj_box[0]), min(sub_box[1],obj_box[1]),max(sub_box[2],obj_box[2]),max(sub_box[3],obj_box[3])]
            s_lbl = zl.idx2name_cls(m,rlp_label[0])
            o_lbl = zl.idx2name_cls(m,rlp_label[2])
            p_lbl = zl.idx2name_pre(m,rlp_label[1])
            spo = '%s_%s_%s'%(s_lbl,p_lbl,o_lbl)
            lbl = zl.name2idx_tri(h5f,spo)
            gt_boxes.append(joint_box)
            gt_labels.append(lbl)
        h5f.create_dataset('gt/train/%s/labels'%k,data = np.array(gt_labels).astype(np.int16))
        h5f.create_dataset('gt/train/%s/boxes'%k,data = np.array(gt_boxes).astype(np.int16))
Exemplo n.º 2
0
def vr_vphrase_make_voc_format(split_type):
    if split_type != 'train' and split_type != 'test':
        print 'error'
        exit(0)
    m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5')
    m_vp = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_vphrase_meta.h5')
    root = '/home/zawlin/data/data_vrd/vrd/sg_vp/'
    anno_root = root + 'Annotations/' + split_type + '/'
    data_root = root + 'Data/' + split_type + '/'
    zl.make_dirs(anno_root)
    zl.make_dirs(data_root)
    cnt = 0
    zl.tick()
    for k in m_vp['gt/%s' % split_type].keys():
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        # todo for vg
        # im_data= db.image_data.find_one({'image_id':imid})
        # im_path_full = im_data['url'].replace('https://cs.stanford.edu/people/rak248/','')
        # im_path_folder = im_path_full.split('/')[0]
        # im_path_file = im_path_full.split('/')[1]
        # im_src_path = vr_root+'%s/%s'%(im_path_folder,im_path_file)
        # im_dst_path = data_root+'%s/%s'%(im_path_folder,im_path_file)
        # zl.copy_file(im_src_path,im_dst_path)
        voc_datum = {
            "folder": '',
            "source": {
                "database": "sg vrd visual phrase"
            },
            "filename": k + '.jpg'
        }
        m['train/%s/w' % k][...]
        w, h = int(m['train/%s/w' % k][...]), int(m['train/%s/h' % k][...])
        voc_datum['size'] = {'width': w, 'height': h}

        objs = []
        gt_boxes = m_vp['gt/%s/%s/boxes' % (split_type, k)][...]
        gt_labels = m_vp['gt/%s/%s/labels' % (split_type, k)][...]
        for i in xrange(gt_boxes.shape[0]):
            gt_box = gt_boxes[i]
            gt_label = gt_labels[i]
            ymin, ymax, xmin, xmax = gt_box[1], gt_box[3], gt_box[0], gt_box[2]
            bbox = {'ymin': ymin, 'ymax': ymax, 'xmin': xmin, 'xmax': xmax}
            name = zl.idx2name_tri(m_vp, gt_label)
            obj = {'name': name, 'bndbox': bbox}
            objs.append(obj)

        voc_datum['object'] = objs
        #write to xml
        dst_path = os.path.join(
            anno_root, voc_datum["folder"],
            voc_datum["filename"][:voc_datum["filename"].rfind('.')] + '.xml')
        voc_datum = {'annotation': voc_datum}
        f = open(dst_path, 'w')
        f.write(dict2xml(voc_datum) + '\n')
        f.close()
    print 'images with annotation=%d\n' % cnt
Exemplo n.º 3
0
    def setup(self, bottom, top):
        """Setup the RoIDataLayer."""

        self._cur_idx = 0
        self.gt_labels = {}
        self.meta = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r')
        if os.path.exists('output/cache/sg_vrd_gt.pkl'):
            self.gt_labels = zl.load('output/cache/sg_vrd_gt.pkl')
            glog.info('loaded gt data from cache')
        else:
            glog.info( 'Preloading gt')
            zl.tick()
            for k in self.meta['gt/train'].keys():
                rlp_labels = self.meta['gt/train/%s/rlp_labels'%k][...]
                sub_boxes = self.meta['gt/train/%s/sub_boxes'%k][...].astype(np.float)
                obj_boxes = self.meta['gt/train/%s/obj_boxes'%k][...].astype(np.float)
                if sub_boxes.shape[0]>0:
                    zeros = np.zeros((sub_boxes.shape[0],1), dtype=np.float)
                    # first index is always zero since we do one image by one image
                    sub_boxes = np.concatenate((zeros, sub_boxes),axis=1)
                    obj_boxes = np.concatenate((zeros, obj_boxes),axis=1)
                self.gt_labels[k] = {}
                self.gt_labels[k]['rlp_labels']=rlp_labels
                self.gt_labels[k]['sub_boxes']=sub_boxes
                self.gt_labels[k]['obj_boxes']=obj_boxes
            glog.info('done preloading gt %f'%zl.tock())
            zl.save('output/cache/sg_vrd_gt.pkl',self.gt_labels)

        self.imids = []
        for k in self.gt_labels.keys():
            self.imids.append(k)
        self.imidx =0
        random.shuffle(self.imids)
        # parse the layer parameter string, which must be valid YAML
        layer_params = yaml.load(self.param_str)

        self._num_classes = layer_params['num_classes']
        self._name_to_top_map = {}

        # data blob: holds a batch of N images, each with 3 channels
        idx = 0
        top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3,
                         max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
        self._name_to_top_map['data'] = idx
        idx += 1

        top[idx].reshape(1, 5, 1, 1)
        self._name_to_top_map['sub_boxes'] = idx
        idx += 1

        top[idx].reshape(1, 5, 1, 1)
        self._name_to_top_map['obj_boxes'] = idx
        idx += 1
        # labels blob: R categorical labels in [0, ..., K] for K foreground
        # classes plus background
        top[idx].reshape(1, 1, 1, 1)
        self._name_to_top_map['labels'] = idx
Exemplo n.º 4
0
def vg_cannonicalize():
    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    db_results = db.relationships.find(no_cursor_timeout=True)
    cnt = 0
    mappings = make_mappings()
    mappings_p = make_p_mappings()
    wnl = WordNetLemmatizer()
    spl = SpellingReplacer()
    zl.tick()
    for doc in db_results:
        id = doc['image_id']
        cnt += 1
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        rcnt = 0
        for r in doc['relationships']:
            pre = r['predicate']

            sub_name = r['subject']['name']
            obj_name = r['object']['name']
            #if pre == '
            if sub_name in mappings:
                sub_name = mappings[sub_name]
            if obj_name in mappings:
                obj_name = mappings[obj_name]
            r['predicate_orig'] = pre
            r['object']['name_orig'] = obj_name
            r['subject']['name_orig'] = sub_name

            pre_canon = cannonicalize_relationship(pre, wnl, spl)
            obj_canon = cannonicalize_so(obj_name, wnl, spl)
            sub_canon = cannonicalize_so(sub_name, wnl, spl)

            if pre_canon in mappings_p:
                pre_canon = mappings_p[pre_canon]
            if pre_canon == 'short than':
                pre_canon = 'tall than'
                sub_doc = r['subject']
                obj_doc = r['object']
                r['subject'], r['object'] = obj_doc, sub_doc
                r['subject_orig'], r['object_orig'] = sub_doc, obj_doc
                sub_canon, obj_canon = obj_canon, sub_canon
            if pre_canon == 'large than':
                pre_canon = 'small than'
                sub_doc = r['subject']
                obj_doc = r['object']
                r['subject'], r['object'] = obj_doc, sub_doc
                r['subject_orig'], r['object_orig'] = sub_doc, obj_doc
                sub_canon, obj_canon = obj_canon, sub_canon
            r['predicate'] = pre_canon
            r['object']['name'] = obj_canon
            r['subject']['name'] = sub_canon
        db.relationships_cannon.insert(doc)
Exemplo n.º 5
0
def merge_pickled_files():
    import os
    h5f = h5py.File(C.coco_eb_h5_path, 'w')
    cnt = 0
    zl.tick()
    for path, subdirs, files in os.walk(C.coco_eb_dir):
        for name in files:
            cnt += 1
            if cnt % 1000 == 0:
                print cnt, zl.tock()
                zl.tick()
            fpath = os.path.join(path, name)
            fid = name.replace('.eb', '')
            bbs = np.array(zl.load(fpath)).astype(np.float16)
            h5f[fid] = bbs
Exemplo n.º 6
0
def run_test_remove_invalid_samples():
    m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5','r','core')
    h5f = h5py.File('output/precalc/vg1_2_2016_test.hdf5')
    imids ={}
    for k in m['gt/test'].keys():
        imids[k]=0

    cnt = 0
    zl.tick()
    for k in h5f.keys():
        if cnt%1000==0:
            print cnt,zl.tock()
            zl.tick()
        cnt+=1
        if k not in imids:
            del h5f[k]
Exemplo n.º 7
0
def vr_vphrase_make_voc_format(split_type):
    if split_type !='train' and split_type!='test':
        print 'error'
        exit(0)
    m = h5py.File('data/sg_vrd_meta.h5')
    m_vp = h5py.File('data/sg_vrd_vphrase_meta.h5')
    root = 'data/sg_vrd_2016_vp/'
    anno_root= root+'Annotations/'+split_type+'/'
    data_root= root+'Data/'+split_type+'/'
    zl.make_dirs(anno_root)
    zl.make_dirs(data_root)
    cnt = 0
    zl.tick()
    for k in m_vp['gt/%s'%split_type].keys():
        if cnt%1000==0:
            print cnt,zl.tock()
            zl.tick()
        cnt+=1
        voc_datum = {"folder": '',
                     "source": {"database":"sg vrd visual phrase"},
                     "filename":k+'.jpg'
                     }
        m['train/%s/w'%k][...]
        w, h = int(m['train/%s/w'%k][...]),int(m['train/%s/h'%k][...])
        voc_datum['size']={'width':w,'height':h}

        objs = []
        gt_boxes = m_vp['gt/%s/%s/boxes'%(split_type,k)][...]
        gt_labels = m_vp['gt/%s/%s/labels'%(split_type,k)][...]
        for i in xrange(gt_boxes.shape[0]):
            gt_box = gt_boxes[i]
            gt_label = gt_labels[i]
            ymin, ymax, xmin, xmax = gt_box[1],gt_box[3],gt_box[0],gt_box[2]
            bbox = {'ymin':ymin,'ymax':ymax,'xmin':xmin,'xmax':xmax}
            name = zl.idx2name_tri(m_vp,gt_label)
            obj = {'name':name,
                   'bndbox':bbox}
            objs.append(obj)

        voc_datum['object']=objs
        #write to xml
        dst_path = os.path.join(anno_root,voc_datum["folder"], voc_datum["filename"][:voc_datum["filename"].rfind('.')]+'.xml')
        voc_datum={'annotation':voc_datum}
        f = open(dst_path,'w')
        f.write(dict2xml(voc_datum)+'\n')
        f.close()
    print 'images with annotation=%d\n'%cnt
Exemplo n.º 8
0
def vg_stats_predicate():
    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    db_results = db.relationships_cannon.find(no_cursor_timeout=True)
    cnt = 0
    mappings = make_mappings()
    mappings_p = make_p_mappings()
    wnl = WordNetLemmatizer()
    spl = SpellingReplacer()
    sub_obj_info = {}
    zl.tick()
    for doc in db_results:
        id = doc['image_id']
        cnt += 1
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        rcnt = 0
        for r in doc['relationships']:
            pre = r['predicate']
            sub_name = r['subject']['name']
            obj_name = r['object']['name']
            so_pair = sub_name + '_' + obj_name
            if so_pair not in sub_obj_info:
                so_info = {'total': 0, 'predicates': []}

            else:
                so_info = sub_obj_info[so_pair]
            so_info['total'] += 1
            if pre not in so_info['predicates']:
                so_info['predicates'].append(pre)
    zl.save('output/sub_obj_info.pkl', sub_obj_info)
    #total_pairs = len(sub_obj_info.keys())+0.0
    total_pairs = 0.0
    total_of_averages = 0.0
    for k in sub_obj_info.keys():
        so_info = sub_obj_info[k]
        total_predicates = len(so_info['predicates']) + 0.0
        if so_info['total'] < 2: continue
        total_pairs += 1
        total_annotated_pairs = so_info['total'] + 0.0
        avg_predicates_for_this_pair = total_predicates / total_annotated_pairs
        total_of_averages += avg_predicates_for_this_pair
    total_of_averages /= total_pairs
    print 'total_pairs = %d' % total_pairs
    print 'total_of_averages = %d' % total_of_averages
Exemplo n.º 9
0
def vg_make_meta_visual_phrase():
    m = h5py.File('data/vg1_2_meta.h5', 'r', driver='core')

    h5f = h5py.File('data/vg1_2_vp_meta.h5')

    triplets = {}
    cnt = 0
    zl.tick()
    for k in m['gt/train'].keys():
        if cnt % 1000 == 0:

            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        # sub_boxes = m['gt/train/%s/sub_boxes'%k][...]
        # obj_boxes = m['gt/train/%s/obj_boxes'%k][...]
        rlp_labels = m['gt/train/%s/rlp_labels' % k][...]
        for i in xrange(rlp_labels.shape[0]):
            # sub_box = sub_boxes[i]
            # obj_box = obj_boxes[i]
            rlp_label = rlp_labels[i]
            # joint_bbox = [min(sub_bbox[0],obj_bbox[0]), min(sub_bbox[1],obj_bbox[1]),max(sub_bbox[2],obj_bbox[2]),max(sub_bbox[3],obj_bbox[3])]

            s_lbl = zl.idx2name_cls(m, rlp_label[0])
            o_lbl = zl.idx2name_cls(m, rlp_label[2])
            p_lbl = zl.idx2name_pre(m, rlp_label[1])

            spo = '%s_%s_%s' % (s_lbl, p_lbl, o_lbl)
            # spo = '%d_%d_%d'%(rlp_label[0],rlp_label[1],rlp_label[2])
            if spo not in triplets:
                triplets[spo] = 0
            triplets[spo] += 1
    zl.save('output/pkl/triplets_train_vp.pkl', triplets)
    triplets_sorted = zl.sort_dict_by_val(triplets)

    triplets_ok = []

    for k, v in triplets_sorted:
        triplets_ok.append(k)
        print k, v
    triplets_ok = sorted(triplets_ok)
    triplets_ok = ['__background__'] + triplets_ok
    for i in xrange(len(triplets_ok)):
        h5f['meta/tri/idx2name/%d' % i] = triplets_ok[i]
        h5f['meta/tri/name2idx/%s' % triplets_ok[i]] = i
    print len(triplets_ok)
Exemplo n.º 10
0
def gen_meta_for_retrieval():
    out_pkl = 'output/pkl/vg_retr_meta.pkl'
    m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r')

    rlp_labels = []
    files = []
    counts = []
    cnt = 0
    zl.tick()
    for k in m['gt/test']:
        if cnt % 100 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        gt_rlp_labels = m['gt/test'][k]['rlp_labels'][...]
        for i in xrange(gt_rlp_labels.shape[0]):
            gt_rlp_label = gt_rlp_labels[i]
            if len(rlp_labels) == 0:
                rlp_labels.append(gt_rlp_label.tolist())
                files.append([k])
                counts.append(1)
                continue
            bInd = np.all(gt_rlp_label == rlp_labels, axis=1)
            ind = np.arange(len(rlp_labels))[bInd]
            if len(ind) == 0:
                rlp_labels.append(gt_rlp_label.tolist())
                files.append([k])
                counts.append(1)
            else:
                files[ind].append(k)
                counts[ind] = counts[ind] + 1
                # rlp_labels.append(gt_rlp_label.tolist())
                # files.append([k])
                # counts.append(1)
    rlp_labels = np.array(rlp_labels)
    files = np.array(files)
    counts = np.array(counts)

    ind = np.argsort(counts)[::-1]

    counts = counts[ind]
    files = files[ind]
    rlp_labels = rlp_labels[ind]

    retr_meta = {'counts': counts, 'files': files, 'rlp_labels': rlp_labels}
    zl.save(out_pkl, retr_meta)
Exemplo n.º 11
0
    def setup(self, bottom, top):
        self._cur_idx = 0
        self.vgg_data = {}
        vgg_h5 = h5py.File(
            "output/precalc/vg1_2_2016_predicate_exp_train.hdf5", 'r')
        layer_params = yaml.load(self.param_str_)

        self.imids = []
        for k in vgg_h5.keys():
            self.imids.append(k)
        self.imidx = 0

        if os.path.exists('output/cache/vg1_2_2016_pre_train_concat.pkl'):
            self.vgg_data = zl.load(
                'output/cache/vg1_2_2016_pre_train_concat.pkl')
            print 'loaded train data from cache'
        else:
            print 'Preloading training data'
            zl.tick()
            for k in vgg_h5.keys():
                sub_visual = vgg_h5[k]['sub_visual'][...]
                obj_visual = vgg_h5[k]['obj_visual'][...]
                pre_label = vgg_h5[k]['pre_label'][...]
                self.vgg_data[k] = {}
                self.vgg_data[k]['sub_visual'] = sub_visual
                self.vgg_data[k]['obj_visual'] = obj_visual
                self.vgg_data[k]['pre_label'] = pre_label
            print 'done preloading training data %f' % zl.tock()
            zl.save('output/cache/vg1_2_2016_pre_train_concat.pkl',
                    self.vgg_data)
            vgg_h5.close()

        self._batch_size = layer_params['batch_size']
        self.train_data = []
        self._name_to_top_map = {}

        # data blob: holds a batch of N images, each with 3 channels
        # top[0].reshape(self._batch_size, 4096 * 2 )

        top[0].reshape(self._batch_size, 2 * 4096)
        top[1].reshape(self._batch_size)
        self._name_to_top_map['visual'] = 0
        self._name_to_top_map['label'] = 1
Exemplo n.º 12
0
def run_test_save_result():
    caffe.set_mode_gpu()
    caffe.set_device(0)

    m_vp = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_vp_meta.h5', 'r', 'core')
    m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core')
    net = caffe.Net('models/sg_vrd_vp/vgg16/faster_rcnn_end2end/test.prototxt',
                    'output/faster_rcnn_end2end/sg_vrd_vp_2016_train/sg_vrd_vp_vgg16_faster_rcnn_finetune_iter_15000.caffemodel',caffe.TEST)
    h5path = 'output/precalc/sg_vrd_2016_test_new.hdf5'
    h5f = h5py.File(h5path)
    root = 'data/sg_vrd_2016/Data/sg_test_images/'
    _t = {'im_detect': Timer(), 'misc': Timer()}
    cnt = 0
    thresh = .15
    img_set_file = 'data/sg_vrd_2016/ImageSets/test.txt'
    imlist = {line.strip().split(' ')[1]:line.strip().split(' ')[0] for line in open(img_set_file)}

    # cv2.namedWindow('ctrl')
    # cv2.createTrackbar('thresh','ctrl',10,100,nothing)
    results = {}
    for imid in imlist.keys():
            cnt += 1
            if imid in h5f:continue
            impath = imlist[imid] +'.jpg'
            impath = root+impath
            im = cv2.imread(impath)
            if im == None:
                print impath
            box_proposals = None
            _t['im_detect'].tic()
            score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals)
            _t['im_detect'].toc()
            boxes_tosort = []
            zl.tick()
            h5f.create_dataset(imid + '/scores', dtype='float16', data=scores.astype(np.float16))
            h5f.create_dataset(imid + '/boxes', dtype='short', data=boxes[:,:4].astype(np.short))
            t_misc = zl.tock()
            print 'im_detect: {:d} {:.3f}s {:.3f}s' \
                .format(cnt, _t['im_detect'].average_time,
                        t_misc)
Exemplo n.º 13
0
def vg_db_stats():
    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    db_rel_train_all = db.relationships_all_train.find(no_cursor_timeout=True)
    db_rel_test_all = db.relationships_all_test.find(no_cursor_timeout=True)
    train_stats = {}
    test_stats = {}
    cnt = 0
    zl.tick()
    for db_rel in db_rel_train_all:
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        for r in db_rel['relationships']:
            name = r['predicate']
            if name not in train_stats:
                train_stats[name] = 0
            train_stats[name] += 1

    for db_rel in db_rel_test_all:
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        for r in db_rel['relationships']:
            name = r['predicate']
            if name not in test_stats:
                test_stats[name] = 0
            test_stats[name] += 1
Exemplo n.º 14
0
def convert_vp_result_for_matlab_eval2():
    _t = {'im_detect': Timer(), 'misc': Timer()}
    m_vp = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_vp_meta.h5', 'r', 'core')
    m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r', 'core')
    root = 'data/vg1_2_2016/Data/test/'
    h5path = 'output/precalc/vg1_2_vp2016_test_new.hdf5'
    h5f = h5py.File(h5path, 'r')
    h5path = 'output/precalc/vg1_2_vp2016_test_nms2_.4.hdf5'
    h5f_nms = h5py.File(h5path)
    img_set_file = 'data/vg1_2_2016/ImageSets/test.txt'
    imlist = {
        line.strip().split(' ')[1]: line.strip().split(' ')[0]
        for line in open(img_set_file)
    }
    cnt = 1
    results = {}
    thresh = 0
    zl.tick()
    for imid in imlist.keys():
        cnt += 1
        if cnt % 100 == 0:
            print cnt, zl.tock()
            zl.tick()
        imid_orig = imlist[imid].split('/')[1]
        impath = imlist[imid] + '.jpg'
        impath = root + impath
        im = cv2.imread(impath)
        if im == None:
            print impath
        box_proposals = None

        _t['im_detect'].tic()
        scores, boxes = h5f[imid]['scores'][...], h5f[imid]['boxes'][...]
        _t['im_detect'].toc()
        _t['misc'].tic()
        boxes_tosort = []
        # print scores.shape
        # print boxes.shape
        zl.tick()
        im_disp = im.copy()
        h5_boxes = []
        h5_labels = []
        h5_confs = []
        ind = np.argmax(scores[:, 1:], axis=1) + 1
        scores = scores[np.arange(scores.shape[0]), ind]
        dets = np.hstack((boxes, scores[:, np.newaxis]))
        keep = nms(dets, .4, force_cpu=True)  # nms threshold
        # print len(keep)
        # exit(0)

        dets = dets[keep, :]
        ind = ind[keep]
        h5f_nms.create_dataset(imid_orig + '/boxes', data=dets[:, :4])
        h5f_nms.create_dataset(imid_orig + '/confs', data=dets[:, 4])
        h5f_nms.create_dataset(imid_orig + '/labels', data=ind)
        # if c ==27:
        # exit(0)
        # if c ==ord( ' '):
        # break
        _t['misc'].toc()
Exemplo n.º 15
0
def gen_obj_detection_results_from_hdf5(h5_path, out_path):
    h5f = h5py.File(h5_path, 'r')
    outfile = open(out_path, 'w')
    thresh = 0.01
    cnt = 0
    zl.tick()
    for k in h5f.keys():
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        scores = h5f['%s/scores' % k][...]
        boxes = h5f['%s/boxes' % k][...]
        boxes_tosort = []
        for j in xrange(1, 201):
            inds = np.where(scores[:, j] > 0.001)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, .2, force_cpu=False)  # nms threshold
            cls_dets = cls_dets[keep, :]
            boxes_tosort.append(cls_dets)
        for j in xrange(len(boxes_tosort)):
            cls_dets = boxes_tosort[j]
            for di in xrange(cls_dets.shape[0]):
                #    print 'here'
                di = cls_dets[di]
                score = di[-1]
                cls_idx = j + 1
                if score > 1:
                    score = 1
                if score < thresh:
                    continue
                res_line = '%s %d %f %d %d %d %d' % (k, cls_idx, score, di[0],
                                                     di[1], di[2], di[3])
                outfile.write(res_line + '\n')
    outfile.close()
Exemplo n.º 16
0
def vg_count_only_one_triplet():
    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    db_results = db.relationships_all_train.find(no_cursor_timeout=True)
    cnt = 0
    spo_info = {}
    spo_list = []
    zl.tick()
    for doc in db_results:
        id = doc['image_id']
        cnt += 1
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        rcnt = 0
        for r in doc['relationships']:
            pre = r['predicate']
            sub_name = r['subject']['name']
            obj_name = r['object']['name']
            spo = sub_name + '_' + pre + '_' + obj_name
            if spo not in spo_info:
                spo_info[spo] = 0
            spo_info[spo] += 1

    db_results_2 = db.relationships_all_test.find(no_cursor_timeout=True)

    for doc in db_results_2:
        id = doc['image_id']
        cnt += 1
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        rcnt = 0
        for r in doc['relationships']:
            pre = r['predicate']
            sub_name = r['subject']['name']
            obj_name = r['object']['name']
            spo = sub_name + '_' + pre + '_' + obj_name
            if spo not in spo_info:
                spo_info[spo] = 0
            spo_info[spo] += 1
    zl.save('output/spo_info_vg.pkl', spo_info)
    #total_pairs = len(sub_obj_info.keys())+0.0
    total_spo = len(spo_info.keys()) + 0.0
    one_count = 0
    for k in spo_info.keys():
        if spo_info[k] >= 5:
            spo_list.append(k)
            one_count += 1
    #print total_spo,one_count
    vg_total_annotation_count(spo_list)
Exemplo n.º 17
0
def vg_total_annotation_count(spo_list):

    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    db_results = db.relationships_all_train.find(no_cursor_timeout=True)
    cnt = 0
    rcnt = 0
    zl.tick()
    total_train_cnt = 0
    for doc in db_results:
        id = doc['image_id']
        cnt += 1
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        ok = False
        for r in doc['relationships']:
            pre = r['predicate']
            sub_name = r['subject']['name']
            obj_name = r['object']['name']
            spo = sub_name + '_' + pre + '_' + obj_name
            if spo in spo_list:
                rcnt += 1
                ok = True
        if ok:
            total_train_cnt += 1

    db_results_2 = db.relationships_all_test.find(no_cursor_timeout=True)
    total_test_cnt = 0
    for doc in db_results_2:
        id = doc['image_id']
        cnt += 1
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        ok = False
        for r in doc['relationships']:
            pre = r['predicate']
            sub_name = r['subject']['name']
            obj_name = r['object']['name']
            spo = sub_name + '_' + pre + '_' + obj_name
            if spo in spo_list:
                rcnt += 1
                ok = True
        if ok:
            total_test_cnt += 1
    print rcnt, total_train_cnt, total_test_cnt
Exemplo n.º 18
0
def vg_check_obj_stats():
    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    db_obj_train_all = db.relationships_objects_train.find(
        no_cursor_timeout=True)
    db_obj_test_all = db.relationships_objects_test.find(
        no_cursor_timeout=True)
    train_stats = {}
    test_stats = {}
    cnt = 0
    zl.tick()
    for db_obj in db_obj_train_all:
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        for o in db_obj['objects']:
            name = o['name']
            if name not in train_stats:
                train_stats[name] = 0
            train_stats[name] += 1

    for db_obj in db_obj_test_all:
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        for o in db_obj['objects']:
            name = o['name']
            if name not in test_stats:
                test_stats[name] = 0
            test_stats[name] += 1
    zl.save('output/train_stats.pkl', train_stats)
    zl.save('output/test_stats.pkl', test_stats)
    print zl.sort_dict_by_val(train_stats)
    print zl.sort_dict_by_val(test_stats)
Exemplo n.º 19
0
def vg_count_predicate_per_object():
    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    db_results = db.relationships_all_train.find(no_cursor_timeout=True)
    cnt = 0
    spo_infos = {}
    zl.tick()
    for doc in db_results:
        id = doc['image_id']
        cnt += 1
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        rcnt = 0
        for r in doc['relationships']:
            pre = r['predicate']
            sub_name = r['subject']['name']
            obj_name = r['object']['name']
            if obj_name not in spo_infos:
                spo_info = {'predicates': []}
                spo_infos[obj_name] = spo_info

            if sub_name not in spo_infos:
                spo_info = {'predicates': []}
                spo_infos[sub_name] = spo_info
            sub_spo_info = spo_infos[sub_name]
            obj_spo_info = spo_infos[obj_name]
            if pre not in sub_spo_info['predicates']:
                sub_spo_info['predicates'].append(pre)
            if pre not in obj_spo_info['predicates']:
                obj_spo_info['predicates'].append(pre)

    db_results_2 = db.relationships_all_test.find(no_cursor_timeout=True)

    for doc in db_results_2:
        id = doc['image_id']
        cnt += 1
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        rcnt = 0
        for r in doc['relationships']:
            pre = r['predicate']
            sub_name = r['subject']['name']
            obj_name = r['object']['name']
            if obj_name not in spo_infos:
                spo_info = {'predicates': []}
                spo_infos[obj_name] = spo_info
            if sub_name not in spo_infos:
                spo_info = {'predicates': []}
                spo_infos[sub_name] = spo_info
            sub_spo_info = spo_infos[sub_name]
            obj_spo_info = spo_infos[obj_name]
            if pre not in sub_spo_info['predicates']:
                sub_spo_info['predicates'].append(pre)
            if pre not in obj_spo_info['predicates']:
                obj_spo_info['predicates'].append(pre)
    total_predicates = 0
    for k in spo_infos.keys():
        spo_info = spo_infos[k]
        print len(spo_info['predicates'])
        total_predicates += len(spo_info['predicates'])
    print total_predicates / 200.
Exemplo n.º 20
0
    def setup(self, bottom, top):
        self._cur_idx = 0
        self.vgg_data = {}
        self.gt_labels = {}
        vgg_h5 = h5py.File("output/precalc/vg1_2_2016_train.hdf5", 'r')
        if os.path.exists('output/cache/vg1_2_2016_train.pkl'):
            self.vgg_data = zl.load('output/cache/vg1_2_2016_train.pkl')
            print 'loaded train data from cache'
        else:
            print 'Preloading training data'
            zl.tick()
            for k in vgg_h5.keys():
                classemes = vgg_h5[k]['classemes'][...]
                visuals = vgg_h5[k]['visuals'][...]
                locations = vgg_h5[k]['locations'][...]
                cls_confs = vgg_h5[k]['cls_confs'][...]
                self.vgg_data[k] = {}
                self.vgg_data[k]['classemes'] = classemes
                self.vgg_data[k]['visuals'] = visuals
                self.vgg_data[k]['cls_confs'] = cls_confs
                self.vgg_data[k]['locations'] = locations
            print 'done preloading training data %f' % zl.tock()
            zl.save('output/cache/vg1_2_2016_train.pkl', self.vgg_data)
            vgg_h5.close()

        self.meta = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r')
        if os.path.exists('output/cache/vg1_2_2016_gt.pkl'):
            self.gt_labels = zl.load('output/cache/vg1_2_2016_gt.pkl')
            print 'loaded gt data from cache'
        else:
            print 'Preloading gt'
            zl.tick()
            for k in self.meta['gt/train'].keys():
                rlp_labels = self.meta['gt/train/%s/rlp_labels' % k][...]
                sub_boxes = self.meta['gt/train/%s/sub_boxes' % k][...].astype(
                    np.float)
                obj_boxes = self.meta['gt/train/%s/obj_boxes' % k][...].astype(
                    np.float)
                self.gt_labels[k] = {}
                self.gt_labels[k]['rlp_labels'] = rlp_labels
                self.gt_labels[k]['sub_boxes'] = sub_boxes
                self.gt_labels[k]['obj_boxes'] = obj_boxes
            print 'done preloading gt %f' % zl.tock()
            zl.save('output/cache/vg1_2_2016_gt.pkl', self.gt_labels)

        self.imids = []
        for k in self.vgg_data.keys():
            self.imids.append(k)
        self.imidx = 0
        random.shuffle(self.imids)
        layer_params = yaml.load(self.param_str_)

        self._batch_size = layer_params['batch_size']
        self.train_data = []
        self._name_to_top_map = {}

        # top[0].reshape(self._batch_size, 201*2)
        top[0].reshape(self._batch_size, 4096 * 2)
        # top[0].reshape(self._batch_size, 4*2)
        top[1].reshape(self._batch_size)

        # self._name_to_top_map['classeme'] = 0
        self._name_to_top_map['visual'] = 0
        # self._name_to_top_map['location'] = 0
        self._name_to_top_map['label'] = 1
Exemplo n.º 21
0
def run_test_visualize():
    caffe.set_mode_gpu()
    caffe.set_device(0)

    m_vp = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_vp_meta.h5', 'r', 'core')
    m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r', 'core')
    net = caffe.Net('models/vg1_2_vp/vgg16/faster_rcnn_end2end/test.prototxt',
                    'output/faster_rcnn_end2end/vg1_2_vp2016_train/vg1_2_vp_vgg16_faster_rcnn_finetune_no_bbox_reg_iter_110000.caffemodel',caffe.TEST)
    root = 'data/vg1_2_2016/Data/test/'
    _t = {'im_detect': Timer(), 'misc': Timer()}
    cnt = 0
    thresh = .15
    img_set_file = 'data/vg1_2_2016/ImageSets/test.txt'
    imlist = {line.strip().split(' ')[1]:line.strip().split(' ')[0] for line in open(img_set_file)}

    rel_types = {}
    rel_types['p']=[]
    rel_types['s']=[]
    rel_types['v']=[]
    rel_types['c']=[]

    for k in m_vp['meta/tri/name2idx'].keys():
        if k !='__background__':
            idx = int(str(m_vp['meta/tri/name2idx/'+k][...]))
            r_type = m_vp['meta/tri/name2idx/'+k].attrs['type']
            rel_types[r_type].append(idx)

    cv2.namedWindow('ctrl')
    cv2.createTrackbar('thresh','ctrl',10,100,nothing)
    results = {}
    for imid in imlist.keys():
            cnt += 1
            impath = imlist[imid] +'.jpg'
            if  '1059' not in impath and '107901' not in impath:continue
            impath = root+impath
            im = cv2.imread(impath)
            if im == None:
                print impath
            box_proposals = None
            _t['im_detect'].tic()
            score_raw, scores, fc7, raw_boxes = im_detect(net, im, box_proposals)
            _t['im_detect'].toc()
            boxes_tosort = []
            zl.tick()
            # boxes =np.array([])
            # labels =np.array([])
            boxes = None
            labels = None
            print 'generating boxes'
            for j in xrange(1, 19237):
                inds = np.where(scores[:, j] > 0.00)[0]
                cls_scores = scores[inds, j]
                cls_boxes = raw_boxes[inds, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                boxes_tosort.append(cls_dets)
                keep = nms(cls_dets, .2, force_cpu=True)  # nms threshold

                cls_dets = cls_dets[keep, :]
                # sorted_ind = np.argsort(cls_dets[:,-1])[::-1]
                # cls_dets=cls_dets[sorted_ind]
                if cls_dets.shape[0]>0:
                    if boxes == None:
                        boxes = cls_dets
                    else:
                        boxes = np.vstack((boxes,cls_dets))
                    if labels == None:
                        labels = np.tile(j,cls_dets.shape[0])
                    else:
                        labels = np.hstack((labels,np.tile(j,cls_dets.shape[0])))
            # print boxes[:5]
            # print labels[:5]
            # exit(0)
            # sort the results
            print 'sorting'
            sorted_ind = np.argsort(boxes[:,-1])[::-1]
            boxes = boxes[sorted_ind]
            labels = labels[sorted_ind]

            ours_indices = {}
            ours_indices['p']=[]
            ours_indices['s']=[]
            ours_indices['v']=[]
            ours_indices['c']=[]
            indexor = np.arange(labels.shape[0])

            c_ind = np.in1d(labels,np.array(rel_types['c']))
            ours_indices['c'] = indexor[c_ind]
            p_ind = np.in1d(labels,np.array(rel_types['p']))
            ours_indices['p'] = indexor[p_ind]
            v_ind = np.in1d(labels,np.array(rel_types['v']))
            ours_indices['v'] = indexor[v_ind]
            s_ind = np.in1d(labels,np.array(rel_types['s']))
            ours_indices['s'] = indexor[s_ind]

            # exit(0)
            # for i in xrange(labels.shape[0]):

                # lbl =a labels[i]
                # if lbl in rel_types['p']: ours_indices['p'].append(i)
                # if lbl in rel_types['s']: ours_indices['s'].append(i)
                # if lbl in rel_types['v']: ours_indices['v'].append(i)
                # if lbl in rel_types['c']: ours_indices['c'].append(i)
            # print labels.shape[0]
            # print len(ours_indices['p'])
            # print len(ours_indices['s'])
            # print len(ours_indices['v'])
            # print len(ours_indices['c'])
            # print rel_types['c']
            # exit(0)
            _t['misc'].toc()
            t_misc = zl.tock()
            cv2.namedWindow('ctrl')
            cv2.destroyWindow('ctrl')
            cv2.namedWindow('ctrl')

            ours_p_len = ours_indices['p'].shape[0]-1
            ours_c_len = ours_indices['c'].shape[0]-1
            ours_v_len = ours_indices['v'].shape[0]-1
            ours_s_len = ours_indices['s'].shape[0]-1
            #ours_len = len(rlp_labels_ours)-1
            ours_len = labels.shape[0]-1

            if ours_len>0 :cv2.createTrackbar('idx_ours','ctrl',0,ours_len,nothing)
            if ours_p_len>0 :cv2.createTrackbar('idx_ours_p','ctrl',0,ours_p_len,nothing)
            if ours_c_len>0: cv2.createTrackbar('idx_ours_c','ctrl',0,ours_c_len,nothing)
            if ours_v_len>0:cv2.createTrackbar('idx_ours_v','ctrl',0, ours_v_len,nothing)
            if ours_s_len>0:cv2.createTrackbar('idx_ours_s','ctrl',0, ours_s_len,nothing)
            im_orig = im.copy()
            while True:

                if ours_len>=0:
                    idx_ours = cv2.getTrackbarPos('idx_ours','ctrl')
                    im_ours = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours,(box[0],box[1]),(box[2],box[3]),(0,200,0),2)
                    cv2.imshow('im_ours',im_ours)

                if ours_c_len>=0:
                    idx_ours_c = cv2.getTrackbarPos('idx_ours_c','ctrl')
                    idx_ours = ours_indices['c'][idx_ours_c]
                    im_ours_c = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours_c,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours_c,(box[0],box[1]),(box[2],box[3]),(0,0,200),2)
                    cv2.imshow('im_ours_c',im_ours_c)
                if ours_s_len>=0:
                    idx_ours_s = cv2.getTrackbarPos('idx_ours_s','ctrl')
                    idx_ours = ours_indices['s'][idx_ours_s]
                    im_ours_s = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours_s,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours_s,(box[0],box[1]),(box[2],box[3]),(0,0,200),2)
                    cv2.imshow('im_ours_s',im_ours_s)
                if ours_v_len>=0:
                    idx_ours_v = cv2.getTrackbarPos('idx_ours_v','ctrl')
                    idx_ours = ours_indices['v'][idx_ours_v]
                    im_ours_v = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours_v,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours_v,(box[0],box[1]),(box[2],box[3]),(0,0,200),2)
                    cv2.imshow('im_ours_v',im_ours_v)
                if ours_p_len>=0:
                    idx_ours_p = cv2.getTrackbarPos('idx_ours_p','ctrl')
                    idx_ours = ours_indices['p'][idx_ours_p]
                    im_ours_p = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours_p,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours_p,(box[0],box[1]),(box[2],box[3]),(0,0,200),2)
                    cv2.imshow('im_ours_p',im_ours_p)

                c = cv2.waitKey(1)&0xFF
                if c == ord(' '):
                    break
                if c == ord('s'):
                    im_folder = 'output/results/examples/'+imid
                    if not os.path.exists('output/results/examples/'+imid):
                        os.makedirs('output/results/examples/'+imid)
                    if not os.path.exists('output/results/examples/'+imid+'/orig_'+imid+'.jpg'):
                        cv2.imwrite('output/results/examples/'+imid+'/orig_'+imid+'.jpg',im_orig)


                    if ours_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_'+imid+str(idx_ours)+'.jpg',im_ours)
                    if ours_v_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_v_'+imid+str(idx_ours)+'.jpg',im_ours_v)
                    if ours_p_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_p_'+imid+str(idx_ours)+'.jpg',im_ours_p)
                    if ours_c_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_c_'+imid+str(idx_ours)+'.jpg',im_ours_c)
                    if ours_s_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_s_'+imid+str(idx_ours)+'.jpg',im_ours_s)

            print 'im_detect: {:d} {:.3f}s {:.3f}s' \
                .format(cnt, _t['im_detect'].average_time,
                        t_misc)
Exemplo n.º 22
0
def run_relation(model_type, iteration):
    vgg_data = h5py.File('output/precalc/vg1_2_2016_test.hdf5')
    result = h5py.File('output/vg_results/vg1_2_2016_result_' + model_type +
                       '_' + iteration + '.hdf5')
    m = h5py.File('data/vg1_2_meta.h5')
    data_root = 'data/vg1_2_2016/Data/test/'
    keep = 100
    thresh = 0.0001
    net = caffe.Net(
        'models/vg1_2/relation/test_' + model_type + '.prototxt',
        'output/relation/vg/relation_vgg16_' + model_type + '_iter_' +
        iteration + '.caffemodel', caffe.TEST)
    #net = caffe.Net('models/sg_vrd/relation/test.prototxt','output/models/sg_vrd_relation_vgg16_iter_264000.caffemodel',caffe.TEST)
    cnt = 1
    zl.tick()
    for imid in vgg_data.keys():
        if cnt % 100 == 0:
            print cnt, zl.tock()
            zl.tick()
            exit(0)
        cnt += 1
        # if cnt%100==0:
        # print cnt

        classemes = vgg_data[imid]['classemes']
        visuals = vgg_data[imid]['visuals']
        locations = vgg_data[imid]['locations']
        cls_confs = vgg_data[imid]['cls_confs']

        # im = cv2.imread(data_root+imid+'.jpg')
        # #print cls_confs
        # # for box in locations:
        # # b=box[:4].astype(np.int32)
        # # cv2.rectangle(im,(b[0],b[1]),(b[2],b[3]),(255,0,0))
        # w,h = im.shape[2],im.shape[1]

        rlp_labels = []
        rlp_confs = []
        sub_boxes = []
        obj_boxes = []
        relation_vectors = []

        classemes_in = []
        visuals_in = []
        locations_in = []
        cls_confs_in = []
        for s in xrange(len(locations)):
            for o in xrange(len(locations)):
                if s == o: continue
                sub = locations[s]
                obj = locations[o]
                sub_visual = visuals[s]
                obj_visual = visuals[o]
                sub_cls = cls_confs[s, 0]
                obj_cls = cls_confs[o, 0]
                sub_score = cls_confs[s, 1]
                obj_score = cls_confs[o, 1]
                sub_classme = classemes[s]
                obj_classme = classemes[o]
                if sub_score < 0.01 or obj_score < 0.01: continue
                sub_loc_encoded = bbox_transform(np.array([obj[:4]]),
                                                 np.array([sub[:4]]))[0]
                obj_loc_encoded = bbox_transform(np.array([sub[:4]]),
                                                 np.array([obj[:4]]))[0]
                #sub_loc_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([sub[:4]]))[0]
                #obj_loc_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([obj[:4]]))[0]

                visual = np.hstack((sub_visual, obj_visual)).reshape(1, 8192)
                classeme = np.hstack(
                    (sub_classme, obj_classme)).reshape(1, 402)
                loc = np.hstack(
                    (sub_loc_encoded, obj_loc_encoded)).reshape(1, 8)
                if 'all' in model_type:
                    blob = {
                        'classeme': classeme,
                        'visual': visual,
                        'location': loc
                    }
                elif 'visual' in model_type:
                    blob = {
                        'visual': visual,
                    }
                elif 'classeme' in model_type:
                    blob = {
                        'classeme': classeme,
                    }
                elif 'location' in model_type:
                    blob = {'location': loc}
                #batch this
                net.forward_all(**blob)

                relation_score = net.blobs['relation_prob'].data[0].copy()
                #l2_norm = relation_score/LA.norm(relation_score)
                #relation_score=softmax(relation_score)
                #relation_score/=LA.norm(relation_score)
                #relation_score=softmax(relation_score)
                argmax = np.argmax(relation_score)
                rs = relation_score[argmax]
                predicate = argmax
                rlp_label = np.array([sub_cls, predicate,
                                      obj_cls]).astype(np.int32)
                rlp_conf = rs + sub_score + obj_score  #relation_score[predicate]

                rlp_confs.append(rlp_conf)
                rlp_labels.append(rlp_label)
                sub_boxes.append(sub[:4])
                obj_boxes.append(obj[:4])
                relation_vectors.append(relation_score)
                # for i in xrange(70):
                # rs = relation_score[i]
                # if rs>0.0:
                # predicate =i
                # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]

                # rlp_confs.append(rlp_conf)
                # rlp_labels.append(rlp_label)
                # sub_boxes.append(sub[:4])
                # obj_boxes.append(obj[:4])

        result.create_dataset(imid + '/rlp_confs',
                              dtype='float16',
                              data=np.array(rlp_confs).astype(np.float16))
        result.create_dataset(imid + '/sub_boxes',
                              dtype='float16',
                              data=np.array(sub_boxes).astype(np.float16))
        result.create_dataset(imid + '/obj_boxes',
                              dtype='float16',
                              data=np.array(obj_boxes).astype(np.float16))
        result.create_dataset(imid + '/rlp_labels',
                              dtype='float16',
                              data=np.array(rlp_labels).astype(np.float16))
Exemplo n.º 23
0
def vg_make_voc_format(split_type):
    if split_type != 'train' and split_type != 'test':
        print 'error'
        exit(0)
    vg_root = 'data/vg_1.2/'
    anno_root = 'data/vg_1.2/voc_format/Annotations/' + split_type + '/'
    data_root = 'data/vg_1.2/voc_format/Data/' + split_type + '/'

    zl.make_dirs(anno_root + 'VG_100K_2')
    zl.make_dirs(anno_root + 'VG_100K')
    zl.make_dirs(data_root + 'VG_100K_2')
    zl.make_dirs(data_root + 'VG_100K')
    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    if split_type == 'train':
        db_objs = db.relationships_objects_train.find(no_cursor_timeout=True)
    else:
        db_objs = db.relationships_objects_test.find(no_cursor_timeout=True)

    cnt = 0
    zl.tick()
    for db_obj in db_objs:
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        imid = db_obj['image_id']
        im_data = db.image_data.find_one({'image_id': imid})
        im_path_full = im_data['url'].replace(
            'https://cs.stanford.edu/people/rak248/', '')
        im_path_folder = im_path_full.split('/')[0]
        im_path_file = im_path_full.split('/')[1]
        im_src_path = vg_root + '%s/%s' % (im_path_folder, im_path_file)
        im_dst_path = data_root + '%s/%s' % (im_path_folder, im_path_file)
        zl.copy_file(im_src_path, im_dst_path)
        voc_datum = {
            "folder": im_path_folder,
            "source": {
                "database": "visual genome 1.2"
            },
            "filename": im_path_file
        }

        w, h = im_data['width'], im_data['height']
        voc_datum['size'] = {'width': w, 'height': h}

        objs = []
        for o in db_obj['objects']:
            ymin, ymax, xmin, xmax = o['y'], o['y'] + o['h'], o[
                'x'], o['x'] + o['w']
            bbox = {'ymin': ymin, 'ymax': ymax, 'xmin': xmin, 'xmax': xmax}
            obj = {'name': o['name'], 'bndbox': bbox}
            objs.append(obj)
        voc_datum['object'] = objs
        #write to xml
        dst_path = os.path.join(
            anno_root, voc_datum["folder"],
            voc_datum["filename"][:voc_datum["filename"].rfind('.')] + '.xml')
        voc_datum = {'annotation': voc_datum}
        f = open(dst_path, 'w')
        f.write(dict2xml(voc_datum) + '\n')
        f.close()
    print 'images with annotation=%d\n' % cnt
Exemplo n.º 24
0
def vg_make_voc_imageset(split_type):
    client = MongoClient("mongodb://localhost:27017")
    blacklist = []
    db = client.visual_genome_1_2
    if split_type != 'train' and split_type != 'test':
        print 'error'
        exit(0)
    vg_root = 'data/vg_1.2/'
    imageset_root = 'data/vg_1.2/voc_format/ImageSets/' + split_type + '.txt'
    cnt = 1
    # preload image data
    imdatas = {}
    for imdata in db.image_data.find(no_cursor_timeout=True):
        imid = imdata['image_id']
        imdatas[imid] = imdata
    if split_type == 'train':
        db_objs = db.relationships_objects_train.find(no_cursor_timeout=True)
    else:
        db_objs = db.relationships_objects_test.find(no_cursor_timeout=True)

    output = open(imageset_root, 'w')
    mini_selection = {}
    for db_obj in db_objs:
        if len(db_obj['objects']) <= 0:
            continue
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        imid = db_obj['image_id']

        im_data = imdatas[imid]
        if im_data['width'] < 100 or im_data['height'] < 100: continue
        im_path_full = im_data['url'].replace(
            'https://cs.stanford.edu/people/rak248/', '')
        im_path_folder = im_path_full.split('/')[0]
        im_path_file = im_path_full.split('/')[1]
        if '.jpg' not in im_path_file:
            print 'not a jpg image %s\n' % im_path_file
            exit(0)
        im_index = im_path_folder + '/' + im_path_file.replace('.jpg', '')

        if im_index in blacklist: continue

        if split_type == 'train':
            for o in db_obj['objects']:
                name = o['name']
                if name not in mini_selection:
                    mini_selection[name] = []
                if len(mini_selection[name]
                       ) < 3 and im_index not in mini_selection[name]:
                    mini_selection[name].append(im_index)

        output.write('%s %d\n' % (im_index, cnt))
        cnt += 1
    output.close()

    if split_type == 'train':
        imageset_root = 'data/vg_1.2/voc_format/ImageSets/mini.txt'
        cnt = 1
        imageset_content = ''
        for k in mini_selection.keys():
            for f in mini_selection[k]:
                imageset_content += '%s %d\n' % (f, cnt)
                cnt += 1
        output = open(imageset_root, 'w')
        output.write(imageset_content)
        output.close()
Exemplo n.º 25
0
def run_test_save_result():
    caffe.set_mode_gpu()
    caffe.set_device(0)

    m_vp = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_vp_meta.h5', 'r', 'core')
    m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r', 'core')
    net = caffe.Net(
        'models/vg1_2_vp/vgg16/faster_rcnn_end2end/test.prototxt',
        'output/faster_rcnn_end2end/vg1_2_vp2016_train/vg1_2_vp_vgg16_faster_rcnn_finetune_no_bbox_reg_iter_110000.caffemodel',
        caffe.TEST)
    h5path = 'output/precalc/vg1_2_vp2016_test_new.hdf5'
    h5f = h5py.File(h5path)
    root = 'data/vg1_2_2016/Data/test/'
    _t = {'im_detect': Timer(), 'misc': Timer()}
    cnt = 0
    thresh = .15
    img_set_file = 'data/vg1_2_2016/ImageSets/test.txt'
    imlist = {
        line.strip().split(' ')[1]: line.strip().split(' ')[0]
        for line in open(img_set_file)
    }

    # cv2.namedWindow('ctrl')
    # cv2.createTrackbar('thresh','ctrl',10,100,nothing)
    results = {}
    for imid in imlist.keys():
        cnt += 1

        if imid in h5f: continue
        impath = imlist[imid] + '.jpg'
        impath = root + impath
        im = cv2.imread(impath)
        if im == None:
            print impath
        box_proposals = None
        _t['im_detect'].tic()
        score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals)
        _t['im_detect'].toc()
        boxes_tosort = []
        zl.tick()
        # results[imid] = {'scores':scores.astype(np.float16),'boxes':boxes[:,:4].astype(np.float16)}
        # if len(results.keys())>10:
        # for k in results.keys():
        # h5f.create_dataset(k+ '/scores', dtype='float16', data=results[k]['scores'])
        # h5f.create_dataset(k+ '/boxes', dtype='float16', data=results[k]['boxes'])
        # results = {}
        h5f.create_dataset(imid + '/scores',
                           dtype='float16',
                           data=scores.astype(np.float16))
        h5f.create_dataset(imid + '/boxes',
                           dtype='short',
                           data=boxes[:, :4].astype(np.short))
        h5_boxes = None
        h5_labels = None
        h5_confs = None
        for j in xrange(1, 19237):
            inds = np.where(scores[:, j] > 0.0001)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, .2)  # nms threshold
            cls_dets = cls_dets[keep, :]
            boxes_tosort.append(cls_dets)

            if cls_dets.shape[0] > 0:
                pass

        #_t['misc'].toc()
        t_misc = zl.tock()
        # while True:
        # thresh = cv2.getTrackbarPos('thresh','ctrl')/100.
        # im_disp = im.copy()
        # for j in xrange(len(boxes_tosort)):
        # cls_dets = boxes_tosort[j]
        # for di in xrange(cls_dets.shape[0]):
        # #    print 'here'
        # di = cls_dets[di]
        # score = di[-1]
        # if score<thresh:
        # continue
        # cls_idx = j + 1
        # cls_name = zl.idx2name_tri(m_vp,cls_idx)
        # # cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...])
        # if score > 1:
        # score = 1
        # x, y = int(di[0]), int(di[1])
        # if x < 10:
        # x = 15
        # if y < 10:
        # y = 15
        # cv2.putText(im_disp, cls_name, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1)
        # cv2.rectangle(im_disp, (di[0], di[1]), (di[2], di[3]), (255, 0, 0), 2)
        # #print '%s %d %f %d %d %d %f\n' % (im_idx, j + 1, score, di[0], di[1], di[2], di[3])
        # cv2.imshow('im', im_disp)
        # c = cv2.waitKey(1) & 0xFF
        # if c ==27:
        # exit(0)
        # if c ==ord( ' '):
        # break

        print 'im_detect: {:d} {:.3f}s {:.3f}s' \
            .format(cnt, _t['im_detect'].average_time,
                    t_misc)
Exemplo n.º 26
0
def run_relation_diff(model_type,iteration):
    #vgg_data = h5py.File('output/sg_vrd_2016_test.hdf5')
    vgg_data = h5py.File('output/sg_vrd_2016_test_more.hdf5')
    result = h5py.File('output/sg_vrd_2016_result_'+model_type+'_'+iteration+'.hdf5')
    #if os.path.exists('output/sg_vrd_2016_result.hdf5'):
    #    os.remove('output/sg_vrd_2016_result.hdf5')
    m = h5py.File('data/sg_vrd_meta.h5')
    data_root='data/sg_vrd_2016/Data/sg_test_images/'
    keep = 100
    thresh = 0.0001
    net = caffe.Net('models/sg_vrd/relation/test_'+model_type+'.prototxt','output/relation/sg_vrd_relation_vgg16_'+model_type+'_iter_'+iteration+'.caffemodel',caffe.TEST)
    #net = caffe.Net('models/sg_vrd/relation/test.prototxt','output/models/sg_vrd_relation_vgg16_iter_264000.caffemodel',caffe.TEST)
    cnt =0
    zl.tick()
    for imid in vgg_data.keys():
        cnt+=1
        print cnt,zl.tock()
        zl.tick()

        classemes = vgg_data[imid]['classemes']
        visuals = vgg_data[imid]['visuals']
        locations = vgg_data[imid]['locations']
        cls_confs = vgg_data[imid]['cls_confs']

        #im = cv2.imread(data_root+imid+'.jpg')
        #print cls_confs
        # for box in locations:
            # b=box[:4].astype(np.int32)
            # cv2.rectangle(im,(b[0],b[1]),(b[2],b[3]),(255,0,0))

        rlp_labels = []
        rlp_confs = []
        sub_boxes=[]
        obj_boxes=[]
        for s in xrange(len(locations)):
            for o in xrange(len(locations)):
                if s==o:continue
                sub = locations[s]
                obj = locations[o]
                sub_visual = visuals[s]
                obj_visual = visuals[o]
                sub_cls = cls_confs[s,0]
                obj_cls = cls_confs[o,0]
                sub_score = cls_confs[s,1]
                obj_score = cls_confs[o,1]
                if sub_score<0.1 or obj_score<0.1:continue
                sub_classme = classemes[s]
                obj_classme = classemes[o]
                sub_loc_encoded = bbox_transform( np.array([obj[:4]]), np.array([sub[:4]]))[0]
                obj_loc_encoded = bbox_transform( np.array([sub[:4]]), np.array([obj[:4]]))[0]
                visual = np.hstack((sub_visual, obj_visual)).reshape(1,8192)
                classeme = np.hstack((sub_classme, obj_classme)).reshape(1,202)
                loc = sub_loc_encoded.reshape(1,4)#np.hstack((sub_loc_encoded, obj_loc_encoded)).reshape(1,4)
                if 'all' in model_type:
                    blob = {
                        'classeme_s':np.array(sub_classme).reshape(1,101),
                        'classeme_o':np.array(obj_classme).reshape(1,101),
                        'visual_s':np.array(sub_visual).reshape(1,4096),
                        'visual_o':np.array(obj_visual).reshape(1,4096),
                        'location_s':np.array(sub_loc_encoded).reshape(1,4),
                        'location_o':np.array(obj_loc_encoded).reshape(1,4),
                        }
                elif 'visual' in model_type:
                    blob = {
                        'visual_s':np.array(sub_visual).reshape(1,4096),
                        'visual_o':np.array(obj_visual).reshape(1,4096),
                        }
                elif 'classeme' in model_type:
                    blob = {
                        'classeme_s':np.array(sub_classme).reshape(1,101),
                        'classeme_o':np.array(obj_classme).reshape(1,101),
                        }
                elif 'location' in model_type:
                    blob = {
                        'location_s':np.array(sub_loc_encoded).reshape(1,4),
                        'location_o':np.array(obj_loc_encoded).reshape(1,4),
                        }
                #print visual.shape
                net.forward_all(**blob)
                relation_score =net.blobs['relation'].data[0]
                #l2_norm = relation_score/LA.norm(relation_score)
                relation_score=softmax(relation_score)
                argmax = np.argmax(relation_score)
                rs = relation_score[argmax]

                predicate = argmax
                rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                #print '%s %s %s %f'%(m['meta/cls/idx2name/'+str(rlp_label[0])][...],m['meta/pre/idx2name/'+str(rlp_label[1])][...],m['meta/cls/idx2name/'+str(rlp_label[2])][...],rs)
                rlp_conf = rs+sub_score+obj_score#relation_score[predicate]

                rlp_confs.append(rlp_conf)
                rlp_labels.append(rlp_label)
                sub_boxes.append(sub[:4])
                obj_boxes.append(obj[:4])
                #relation_score/=LA.norm(relation_score)
                # for i in xrange(70):
                    # rs = relation_score[i]
                    # if rs>0.0:
                        # predicate =i
                        # #print relation_score[predicate]
                        # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                        # #print '%s %s %s %f'%(m['meta/cls/idx2name/'+str(rlp_label[0])][...],m['meta/pre/idx2name/'+str(rlp_label[1])][...],m['meta/cls/idx2name/'+str(rlp_label[2])][...],rs)
                        # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]

                        # rlp_confs.append(rlp_conf)
                        # rlp_labels.append(rlp_label)
                        # sub_boxes.append(sub[:4])
                        # obj_boxes.append(obj[:4])

        result.create_dataset(imid+'/rlp_confs',dtype='float16', data=np.array(rlp_confs).astype(np.float16))
        result.create_dataset(imid+'/sub_boxes',dtype='float16', data=np.array(sub_boxes).astype(np.float16))
        result.create_dataset(imid+'/obj_boxes',dtype='float16', data=np.array(obj_boxes).astype(np.float16))
        result.create_dataset(imid+'/rlp_labels',dtype='float16', data=np.array(rlp_labels).astype(np.float16))
Exemplo n.º 27
0
def run_relation_batch(model_type, iteration):
    vgg_h5 = h5py.File('output/precalc/vg1_2_2016_test.hdf5')
    vgg_data = {}
    if os.path.exists('output/cache/vg1_2_2016_test.pkl'):
        vgg_data = zl.load('output/cache/vg1_2_2016_test.pkl')
        print 'loaded test data from cache'
    else:
        print 'Preloading testing data'
        zl.tick()
        for k in vgg_h5.keys():
            classemes = vgg_h5[k]['classemes'][...]
            visuals = vgg_h5[k]['visuals'][...]
            locations = vgg_h5[k]['locations'][...]
            cls_confs = vgg_h5[k]['cls_confs'][...]
            vgg_data[k] = {}
            vgg_data[k]['classemes'] = classemes
            vgg_data[k]['visuals'] = visuals
            vgg_data[k]['cls_confs'] = cls_confs
            vgg_data[k]['locations'] = locations
        print 'done preloading testing data %f' % zl.tock()
        zl.save('output/cache/vg1_2_2016_test.pkl', vgg_data)
        vgg_h5.close()
    result = h5py.File('output/vg_results/vg1_2_2016_result_' + model_type +
                       '_' + iteration + '.hdf5')
    m = h5py.File('data/vg1_2_meta.h5')
    data_root = 'data/vg1_2_2016/Data/test/'
    keep = 100
    thresh = 0.0001
    net = caffe.Net(
        'models/vg1_2/relation/test_' + model_type + '.prototxt',
        'output/relation/vg/relation_vgg16_' + model_type + '_iter_' +
        iteration + '.caffemodel', caffe.TEST)
    #net = caffe.Net('models/sg_vrd/relation/test.prototxt','output/models/sg_vrd_relation_vgg16_iter_264000.caffemodel',caffe.TEST)
    cnt = 1
    zl.tick()
    imids = sorted(vgg_data.keys())
    for imid in imids:
        if cnt % 100 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        if imid in result: continue
        classemes = vgg_data[imid]['classemes']
        visuals = vgg_data[imid]['visuals']
        locations = vgg_data[imid]['locations']
        cls_confs = vgg_data[imid]['cls_confs']

        # im = cv2.imread(data_root+imid+'.jpg')
        # #print cls_confs
        # # for box in locations:
        # # b=box[:4].astype(np.int32)
        # # cv2.rectangle(im,(b[0],b[1]),(b[2],b[3]),(255,0,0))
        # w,h = im.shape[2],im.shape[1]

        rlp_labels = []
        rlp_confs = []
        sub_boxes = []
        obj_boxes = []
        relation_vectors = []

        classemes_in = []
        visuals_in = []
        locations_in = []
        cls_confs_in = []
        sub_cls_in = []
        obj_cls_in = []
        sub_score_in = []
        obj_score_in = []
        sub_boxes = []
        obj_boxes = []
        for s in xrange(len(locations)):
            for o in xrange(len(locations)):
                if s == o: continue
                sub = locations[s]
                obj = locations[o]
                sub_visual = visuals[s]
                obj_visual = visuals[o]
                sub_cls = cls_confs[s, 0]
                obj_cls = cls_confs[o, 0]
                sub_score = cls_confs[s, 1]
                obj_score = cls_confs[o, 1]
                sub_classme = classemes[s]
                obj_classme = classemes[o]
                sub_loc_encoded = bbox_transform(np.array([obj[:4]]),
                                                 np.array([sub[:4]]))[0]
                obj_loc_encoded = bbox_transform(np.array([sub[:4]]),
                                                 np.array([obj[:4]]))[0]

                visual = np.hstack((sub_visual, obj_visual)).reshape(8192)
                classeme = np.hstack((sub_classme, obj_classme)).reshape(402)
                loc = np.hstack((sub_loc_encoded, obj_loc_encoded)).reshape(8)

                classemes_in.append(classeme)
                visuals_in.append(visual)
                locations_in.append(loc)
                sub_cls_in.append(sub_cls)
                obj_cls_in.append(obj_cls)
                sub_score_in.append(sub_score)
                obj_score_in.append(obj_score)
                sub_boxes.append(sub[:4])
                obj_boxes.append(obj[:4])

        if 'all' in model_type:
            blob = {
                'classeme': np.array(classemes_in),
                'visual': np.array(visuals_in),
                'location': np.array(locations_in)
            }
            net.blobs['classeme'].reshape(*blob['classeme'].shape)
            net.blobs['visual'].reshape(*blob['visual'].shape)
            net.blobs['location'].reshape(*blob['location'].shape)
        elif 'visual' in model_type:
            blob = {
                'visual': np.array(visuals_in),
            }
            net.blobs['visual'].reshape(*blob['visual'].shape)
        elif 'classeme' in model_type:
            blob = {
                'classeme': np.array(classemes_in),
            }

            net.blobs['classeme'].reshape(*blob['classeme'].shape)
        elif 'location' in model_type:
            blob = {'location': np.array(locations_in)}
            #batch this
            net.blobs['location'].reshape(*blob['location'].shape)
        if len(locations_in) == 0:
            rlp_confs = []
            sub_boxes = []
            obj_boxes = []
            rlp_labels = []
        else:
            net.forward_all(**blob)
            relation_score = net.blobs['relation_prob'].data.copy()
            argmax = np.argmax(relation_score, axis=1)
            rs = relation_score[np.arange(relation_score.shape[0]), argmax]
            rlp_labels = np.vstack((sub_cls_in, argmax, obj_cls_in)).T
            rlp_confs = np.array(sub_score_in) + np.array(rs) + np.array(
                obj_score_in)
        result.create_dataset(imid + '/rlp_confs',
                              dtype='float16',
                              data=np.array(rlp_confs).astype(np.float16))
        result.create_dataset(imid + '/sub_boxes',
                              dtype='float16',
                              data=np.array(sub_boxes).astype(np.float16))
        result.create_dataset(imid + '/obj_boxes',
                              dtype='float16',
                              data=np.array(obj_boxes).astype(np.float16))
        result.create_dataset(imid + '/rlp_labels',
                              dtype='float16',
                              data=np.array(rlp_labels).astype(np.float16))
Exemplo n.º 28
0
def run_relation(model_type, iteration):
    vgg_data = h5py.File('output/precalc/vg1_2_2016_predicate_exp_test.hdf5')
    result = h5py.File('output/vg1_2_2016_result_' + model_type + '_' +
                       iteration + '.hdf5')
    #if os.path.exists('output/sg_vrd_2016_result.hdf5'):
    #    os.remove('output/sg_vrd_2016_result.hdf5')
    m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5')
    data_root = '/home/zawlin/g/py-faster-rcnn/data/vg1_2_2016/Data/test/'
    keep = 100
    thresh = 0.0001
    net = caffe.Net(
        'models/vg1_2/relation/test_' + model_type + '.prototxt',
        'output/relation/vg/relation_vgg16_' + model_type + '_iter_' +
        iteration + '.caffemodel', caffe.TEST)

    cnt = 0
    zl.tick()
    for imid in vgg_data.keys():
        if cnt % 100 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        obj_boxes_gt = vgg_data[imid]['obj_boxes']
        sub_boxes_gt = vgg_data[imid]['sub_boxes']
        sub_visual = vgg_data[imid]['sub_visual']
        obj_visual = vgg_data[imid]['obj_visual']
        joint_visual = vgg_data[imid]['joint_visual']
        sub_cls = vgg_data[imid]['sub_cls']
        obj_cls = vgg_data[imid]['obj_cls']

        rlp_labels = []
        rlp_confs = []
        sub_boxes = []
        obj_boxes = []
        for s in xrange(sub_boxes_gt.shape[0]):
            if model_type == 'pre_diff':
                blob = {
                    'visual_s': np.array(sub_visual[s]).reshape(1, 4096),
                    'visual_o': np.array(obj_visual[s]).reshape(1, 4096),
                }
            elif model_type == 'pre_jointbox':
                blob = {
                    'visual': np.array(joint_visual[s]).reshape(1, 4096),
                }
                pass
            elif model_type == 'pre_concat':
                visual = np.hstack(
                    (sub_visual[s], obj_visual[s])).reshape(1, 8192)
                blob = {'visual': visual}
            #print visual.shape
            net.forward_all(**blob)
            relation_score = net.blobs['relation'].data[0]
            #l2_norm = relation_score/LA.norm(relation_score)
            relation_score = softmax(relation_score)
            #relation_score/=LA.norm(relation_score)
            argmax = np.argmax(relation_score)
            rs = relation_score[argmax]

            predicate = argmax
            #rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
            rlp_label = np.array([sub_cls[s], predicate,
                                  obj_cls[s]]).astype(np.int32)
            #print '%s %s %s %f'%(m['meta/cls/idx2name/'+str(rlp_label[0])][...],m['meta/pre/idx2name/'+str(rlp_label[1])][...],m['meta/cls/idx2name/'+str(rlp_label[2])][...],rs)
            rlp_conf = rs  #+sub_score+obj_score#relation_score[predicate]

            rlp_confs.append(rlp_conf)
            rlp_labels.append(rlp_label)
            sub_boxes.append(sub_boxes_gt[s])
            obj_boxes.append(obj_boxes_gt[s])
            # for i in xrange(70):
            # rs = relation_score[i]

            # if rs>0.0:
            # predicate =i
            # #print relation_score[predicate]
            # rlp_label = np.array([sub_cls[s],predicate,obj_cls[s]]).astype(np.int32)
            # #print '%s %s %s %f'%(m['meta/cls/idx2name/'+str(rlp_label[0])][...],m['meta/pre/idx2name/'+str(rlp_label[1])][...],m['meta/cls/idx2name/'+str(rlp_label[2])][...],rs)
            # rlp_conf = rs#+sub_score+obj_score#relation_score[predicate]

            # rlp_confs.append(rlp_conf)
            # rlp_labels.append(rlp_label)
            # sub_boxes.append(sub_boxes_gt[s])
            # obj_boxes.append(obj_boxes_gt[s])

        result.create_dataset(imid + '/rlp_confs',
                              dtype='float16',
                              data=np.array(rlp_confs).astype(np.float16))
        result.create_dataset(imid + '/sub_boxes',
                              dtype='float16',
                              data=np.array(sub_boxes).astype(np.float16))
        result.create_dataset(imid + '/obj_boxes',
                              dtype='float16',
                              data=np.array(obj_boxes).astype(np.float16))
        result.create_dataset(imid + '/rlp_labels',
                              dtype='float16',
                              data=np.array(rlp_labels).astype(np.float16))
Exemplo n.º 29
0
def run_relation(model_type,iteration):
    vgg_data = h5py.File('output/sg_vrd_2016_test.hdf5')
    result = h5py.File('output/sg_vrd_2016_result_'+model_type+'_'+iteration+'.hdf5')
    #if os.path.exists('output/sg_vrd_2016_result.hdf5'):
    #    os.remove('output/sg_vrd_2016_result.hdf5')
    m = h5py.File('data/sg_vrd_meta.h5')
    data_root='data/sg_vrd_2016/Data/sg_test_images/'
    keep = 100
    thresh = 0.0001
    net = caffe.Net('models/sg_vrd/relation/test_'+model_type+'.prototxt','output/relation/vr/sg_vrd_relation_vgg16_'+model_type+'_iter_'+iteration+'.caffemodel',caffe.TEST)
    #net = caffe.Net('models/sg_vrd/relation/test.prototxt','output/models/sg_vrd_relation_vgg16_iter_264000.caffemodel',caffe.TEST)
    cnt =0
    zl.tick()
    rel_types = {}
    rel_types['p']=[]
    rel_types['s']=[]
    rel_types['v']=[]
    rel_types['c']=[]
    for k in m['meta/pre/name2idx'].keys():
        idx = int(str(m['meta/pre/name2idx/'+k][...]))
        r_type = m['meta/pre/name2idx/'+k].attrs['type']
        rel_types[r_type].append(idx)

    for imid in vgg_data.keys():
        cnt+=1
        print cnt,zl.tock()
        zl.tick()
        # if cnt%100==0:
            # print cnt

        classemes = vgg_data[imid]['classemes']
        visuals = vgg_data[imid]['visuals']
        locations = vgg_data[imid]['locations']
        cls_confs = vgg_data[imid]['cls_confs']

        # im = cv2.imread(data_root+imid+'.jpg')
        # #print cls_confs
        # # for box in locations:
            # # b=box[:4].astype(np.int32)
            # # cv2.rectangle(im,(b[0],b[1]),(b[2],b[3]),(255,0,0))
        # w,h = im.shape[2],im.shape[1]

        rlp_labels = []
        rlp_confs = []
        sub_boxes=[]
        obj_boxes=[]
        relation_vectors = []
        for s in xrange(len(locations)):
            for o in xrange(len(locations)):
                if s==o:continue
                sub = locations[s]
                obj = locations[o]
                sub_visual = visuals[s]
                obj_visual = visuals[o]
                sub_cls = cls_confs[s,0]
                obj_cls = cls_confs[o,0]
                sub_score = cls_confs[s,1]
                obj_score = cls_confs[o,1]
                sub_classme = classemes[s]
                obj_classme = classemes[o]
                if sub_score<0.01 or obj_score<0.01:continue
                sub_loc_encoded = bbox_transform( np.array([obj[:4]]), np.array([sub[:4]]))[0]
                obj_loc_encoded = bbox_transform( np.array([sub[:4]]), np.array([obj[:4]]))[0]
                #sub_loc_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([sub[:4]]))[0]
                #obj_loc_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([obj[:4]]))[0]

                visual = np.hstack((sub_visual, obj_visual)).reshape(1,8192)
                classeme = np.hstack((sub_classme, obj_classme)).reshape(1,202)
                loc = np.hstack((sub_loc_encoded, obj_loc_encoded)).reshape(1,8)
                if 'all' in model_type:
                    blob = {
                            'classeme':classeme,
                            'visual':visual,
                            'location':loc
                            }
                elif 'visual' in model_type:
                    blob = {
                            'visual':visual,
                            }
                elif 'classeme' in model_type:
                    blob = {
                            'classeme':classeme,
                            }
                elif 'location' in model_type:
                    blob = {
                            'location':loc
                            }
                #batch this
                net.forward_all(**blob)

                relation_score =net.blobs['relation_prob'].data[0].copy()
                #l2_norm = relation_score/LA.norm(relation_score)
                #relation_score=softmax(relation_score)
                #relation_score/=LA.norm(relation_score)
                #relation_score=softmax(relation_score)
                # argmax = np.argmax(relation_score)
                # rs = relation_score[argmax]
                # predicate = argmax
                # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                # # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]
                # rlp_conf = rs+sub_score+obj_score#*sub_score*obj_score
                # rlp_confs.append(rlp_conf)
                # rlp_labels.append(rlp_label)
                # sub_boxes.append(sub[:4])
                # obj_boxes.append(obj[:4])
                # relation_vectors.append(relation_score)

                # for i in xrange(70):
                    # rs = relation_score[i]
                    # if rs>0.0:
                        # predicate =i
                        # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                        # # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]
                        # rlp_conf = rs
                        # rlp_confs.append(rlp_conf)
                        # rlp_labels.append(rlp_label)
                        # sub_boxes.append(sub[:4])
                        # obj_boxes.append(obj[:4])
                r_scores = {'s':{},'v':{},'c':{},'p':{}}
                for i in xrange(70):
                    rs = relation_score[i]
                    if i in rel_types['s']:r_scores['s'][i] = rs
                    if i in rel_types['v']:r_scores['v'][i] = rs
                    if i in rel_types['c']:r_scores['c'][i] = rs
                    if i in rel_types['p']:r_scores['p'][i] = rs
                r_scores['s'] = zl.sort_dict_by_val(r_scores['s'])
                r_scores['v'] = zl.sort_dict_by_val(r_scores['v'])
                r_scores['c'] = zl.sort_dict_by_val(r_scores['c'])
                r_scores['p'] = zl.sort_dict_by_val(r_scores['p'])
                for i,rs in r_scores['s'][:4]:
                    predicate =i
                    rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                    # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]
                    rlp_conf = rs
                    rlp_confs.append(rlp_conf)
                    rlp_labels.append(rlp_label)
                    sub_boxes.append(sub[:4])
                    obj_boxes.append(obj[:4])
                for i,rs in r_scores['v'][:4]:
                    predicate =i
                    rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                    # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]
                    rlp_conf = rs
                    rlp_confs.append(rlp_conf)
                    rlp_labels.append(rlp_label)
                    sub_boxes.append(sub[:4])
                    obj_boxes.append(obj[:4])
                for i,rs in r_scores['p'][:4]:
                    predicate =i
                    rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                    # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]
                    rlp_conf = rs
                    rlp_confs.append(rlp_conf)
                    rlp_labels.append(rlp_label)
                    sub_boxes.append(sub[:4])
                    obj_boxes.append(obj[:4])
                for i,rs in r_scores['c']:
                    predicate =i
                    rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                    # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]
                    rlp_conf = rs
                    rlp_confs.append(rlp_conf)
                    rlp_labels.append(rlp_label)
                    sub_boxes.append(sub[:4])
                    obj_boxes.append(obj[:4])
                    # if rs>0.0:
                        # predicate =i
                        # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32)
                        # # rlp_conf = rs+sub_score+obj_score#relation_score[predicate]
                        # rlp_conf = rs
                        # rlp_confs.append(rlp_conf)
                        # rlp_labels.append(rlp_label)
                        # sub_boxes.append(sub[:4])
                        # obj_boxes.append(obj[:4])

        result.create_dataset(imid+'/rlp_confs',dtype='float16', data=np.array(rlp_confs).astype(np.float16))
        result.create_dataset(imid+'/sub_boxes',dtype='float16', data=np.array(sub_boxes).astype(np.float16))
        result.create_dataset(imid+'/obj_boxes',dtype='float16', data=np.array(obj_boxes).astype(np.float16))
        result.create_dataset(imid+'/rlp_labels',dtype='float16', data=np.array(rlp_labels).astype(np.float16))
Exemplo n.º 30
0
def vg_vphrase_make_voc_format(split_type):
    if split_type != 'train' and split_type != 'test':
        print 'error'
        exit(0)
    m = h5py.File('data/vg1_2_meta.h5')
    m_vp = h5py.File('data/vg1_2_vp_meta.h5')
    vg_root = 'data/vg_1.2/'
    root = 'data/vg_1.2/voc_format_vp/'
    anno_root = root + 'Annotations/' + split_type + '/'
    data_root = root + 'Data/' + split_type + '/'
    zl.make_dirs(anno_root + 'VG_100K_2')
    zl.make_dirs(anno_root + 'VG_100K')
    zl.make_dirs(data_root + 'VG_100K_2')
    zl.make_dirs(data_root + 'VG_100K')

    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    imdatas = {}
    for imdata in db.image_data.find(no_cursor_timeout=True):
        imid = str(imdata['image_id'])
        imdatas[imid] = imdata
    imid2path = {}
    for k in m['meta/imid2path'].keys():
        imid2path[k] = str(m['meta/imid2path/%s' % k][...])

    cnt = 0
    zl.tick()
    for k in m_vp['gt/%s' % split_type].keys():
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        # todo for vg
        im_path = imid2path[k]
        im_src_path = vg_root + im_path
        im_dst_path = data_root + im_path
        zl.copy_file(im_src_path, im_dst_path)
        voc_datum = {
            "folder": im_path.split('/')[0],
            "source": {
                "database": "sg vrd visual phrase"
            },
            "filename": im_path.split('/')[1]
        }
        #todo,remove mongodb from this processing stage
        imdata = imdatas[k]
        w, h = imdata['width'], imdata['height']
        voc_datum['size'] = {'width': w, 'height': h}

        objs = []
        gt_boxes = m_vp['gt/%s/%s/boxes' % (split_type, k)][...]
        gt_labels = m_vp['gt/%s/%s/labels' % (split_type, k)][...]
        for i in xrange(gt_boxes.shape[0]):
            gt_box = gt_boxes[i]
            gt_label = gt_labels[i]
            ymin, ymax, xmin, xmax = gt_box[1], gt_box[3], gt_box[0], gt_box[2]
            bbox = {'ymin': ymin, 'ymax': ymax, 'xmin': xmin, 'xmax': xmax}
            name = zl.idx2name_tri(m_vp, gt_label)
            obj = {'name': name, 'bndbox': bbox}
            objs.append(obj)

        voc_datum['object'] = objs
        #write to xml
        dst_path = os.path.join(
            anno_root, voc_datum["folder"],
            voc_datum["filename"][:voc_datum["filename"].rfind('.')] + '.xml')
        voc_datum = {'annotation': voc_datum}
        f = open(dst_path, 'w')
        f.write(dict2xml(voc_datum) + '\n')
        f.close()
    print 'images with annotation=%d\n' % cnt