Exemplo n.º 1
0
class Handler:
  def __init__(self, prefix, epoch, ctx_id=0):
    print('loading',prefix, epoch)
    if ctx_id>=0:
      ctx = mx.gpu(ctx_id)
    else:
      ctx = mx.cpu()
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    all_layers = sym.get_internals()
    sym = all_layers['heatmap_output']
    image_size = (128, 128)
    self.image_size = image_size
    model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
    model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
    model.set_params(arg_params, aux_params)
    self.model = model
    self.detector = ESSHDetector('./essh-model/essh-r50', 0)

  def trans_dot(self, trans1, trans2):
    trans1 = np.vstack((trans1, [0,0,1]))
    trans2 = np.vstack((trans2, [0,0,1]))
    trans21 = np.dot(trans2, trans1)[0:2]
    return trans21
  
  def get_landmark(self, img):
    ret = self.detector.detect(img, threshold=0.4)
    if ret is None or ret.shape[0]==0:
      return None
    bbox = ret[:,0:4]
    points = ret[:, 5:15].reshape(-1,5,2)
    landmark_list = np.zeros( (bbox.shape[0], 68, 2), dtype=np.float32)
    for i in range(bbox.shape[0]):
      rimg, label, trans1 = img_helper.preprocess(img, points[i], 384)
      ret2 = self.detector.detect(rimg, threshold=0.4)
      rimg, trans2 = img_helper.transform2(rimg, None, self.image_size[0], ret2[0,0:4])
      trans = self.trans_dot(trans1, trans2)
      # cv2.imshow("rimg", rimg)
      # cv2.waitKey(0)
      img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB)
      img2 = np.transpose(img2, (2,0,1)) #3*128*128, RGB
      input_blob = np.zeros( (1, 3, self.image_size[1], self.image_size[0]),dtype=np.uint8 )
      input_blob[0] = img2
      ta = datetime.datetime.now()
      data = mx.nd.array(input_blob)
      db = mx.io.DataBatch(data=(data,))
      self.model.forward(db, is_train=False)
      alabel = self.model.get_outputs()[-1].asnumpy()[0]
      tb = datetime.datetime.now()
      print('module time cost', (tb-ta).total_seconds())
      IM = cv2.invertAffineTransform(trans)
      for j in xrange(alabel.shape[0]):
        a = cv2.resize(alabel[j], (self.image_size[1], self.image_size[0]))
        ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
        # landmark_list[i] = (ind[0], ind[1]) #h, w
        # landmark_list[i,j] = (ind[1], ind[0]) #w, h
        point = (ind[1], ind[0], 1.0)
        point = np.dot(IM, point)
        landmark_list[i,j] = point[0:2]
    return landmark_list
Exemplo n.º 2
0
    def __init__(self, args):
        self.args = args
        if args.gpu >= 0:
            ctx = mx.gpu(args.gpu)
        else:
            ctx = mx.cpu()
        _vec = args.image_size.split(',')
        assert len(_vec) == 2
        image_size = (int(_vec[0]), int(_vec[1]))
        self.model = None
        if len(args.model) > 0:
            self.model = get_model(ctx, image_size, args.model, 'fc1')

        self.det_minsize = 50
        self.det_threshold = [0.6, 0.7, 0.8]
        #self.det_factor = 0.9
        self.image_size = image_size
        mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
        if args.det == 0:
            detector = MtcnnDetector(model_folder=mtcnn_path,
                                     ctx=ctx,
                                     num_worker=1,
                                     accurate_landmark=True,
                                     threshold=self.det_threshold)
        else:
            detector = ESSHDetector(prefix='./ssh-model/essh',
                                    epoch=0,
                                    ctx_id=args.gpu,
                                    test_mode=False)
            # detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=[0.0,0.0,0.2])
        self.detector = detector
Exemplo n.º 3
0
 def __init__(self, prefix, epoch, ctx_id=0):
   print('loading',prefix, epoch)
   if ctx_id>=0:
     ctx = mx.gpu(ctx_id)
   else:
     ctx = mx.cpu()
   sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
   all_layers = sym.get_internals()
   sym = all_layers['heatmap_output']
   image_size = (128, 128)
   self.image_size = image_size
   model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
   model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
   model.set_params(arg_params, aux_params)
   self.model = model
   self.detector = ESSHDetector('./essh-model/essh-r50', 0)
Exemplo n.º 4
0
 def __init__(self, prefix, epoch, ctx_id=0):
     print('loading', prefix, epoch)
     if ctx_id >= 0:
         ctx = mx.gpu(ctx_id)
     else:
         ctx = mx.cpu()
     sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
     all_layers = sym.get_internals()
     sym = all_layers['heatmap_output']
     image_size = (128, 128)
     self.image_size = image_size
     model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
     #model = mx.mod.Module(symbol=sym, context=ctx)
     model.bind(for_training=False,
                data_shapes=[('data', (1, 3, image_size[0], image_size[1]))
                             ])
     model.set_params(arg_params, aux_params)
     self.model = model
     # mtcnn_path = os.path.join(os.path.dirname(__file__),  'mtcnn_model')
     # self.det_threshold = [0.6,0.7,0.8]
     # self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=mx.cpu(), num_worker=1, accurate_landmark = True, threshold=self.det_threshold)
     self.detector = ESSHDetector('./essh-model/essh', 0)
Exemplo n.º 5
0
class ESSHModel:
    def __init__(self):
        self.total_time = 0
        self.scales = [1200, 1600]
        self.t = 10
        self.esshDetector = ESSHDetector('./model/essh-r50', 0, ctx_id=-1)

    def detect_faces(self, video, output):
        self.total_time = 0
        detect_faces(video, output, 640, 360, self.detect_faces_on_img)
        return self.total_time

    def detect_faces_on_img(self, img):
        im_shape = img.shape
        target_size = self.scales[0]
        max_size = self.scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        if im_size_min > target_size or im_size_max > max_size:
            im_scale = float(target_size) / float(im_size_min)
            # prevent bigger axis from being more than max_size:
            if np.round(im_scale * im_size_max) > max_size:
                im_scale = float(max_size) / float(im_size_max)
            img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
            print('resize to', img.shape)

        # for i in range(t - 1):  # warmup
        #     faces = esshDetector.detect(img)
        timea = datetime.datetime.now()

        faces = self.esshDetector.detect(img, threshold=0.5)
        # print("DETECTED!!!!!!!")
        bbox = np.round(faces[:, 0:5])
        landmark = faces[:, 5:15].reshape(-1, 5, 2)

        timeb = datetime.datetime.now()
        diff = timeb - timea
        diff = diff.total_seconds()
        self.total_time += diff
        # print('detection uses', diff.total_seconds(), 'seconds')
        # print('find', faces.shape[0], 'faces')

        for b in bbox:
            cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])),
                          (0, 255, 0), 2)
        for p in landmark:
            for i in range(5):
                cv2.circle(img, (p[i][0], p[i][1]), 1, (0, 0, 255), 2)
        return img
def test(args):
    print('test with', args)
    global detector
    output_folder = args.output
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    detector = ESSHDetector(args.prefix, args.epoch, args.gpu, test_mode=True)
    imdb = eval(args.dataset)(args.image_set, args.root_path,
                              args.dataset_path)
    roidb = imdb.gt_roidb()
    gt_overlaps = np.zeros(0)
    overall = [0.0, 0.0]
    gt_max = np.array((0.0, 0.0))
    num_pos = 0

    for i in xrange(len(roidb)):
        if i % 10 == 0:
            print('processing', i, file=sys.stderr)
        roi = roidb[i]
        boxes = get_boxes(roi, args.pyramid)
        gt_boxes = roidb[i]['boxes'].copy()
        gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] -
                                                            gt_boxes[:, 1] + 1)
        num_pos += gt_boxes.shape[0]

        overlaps = bbox_overlaps(boxes.astype(np.float),
                                 gt_boxes.astype(np.float))
        #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr)

        _gt_overlaps = np.zeros((gt_boxes.shape[0]))

        if boxes.shape[0] > 0:
            _gt_overlaps = overlaps.max(axis=0)
            #print('max_overlaps', _gt_overlaps, file=sys.stderr)
            for j in range(len(_gt_overlaps)):
                if _gt_overlaps[j] > config.TEST.IOU_THRESH:
                    continue
                print(j,
                      'failed',
                      gt_boxes[j],
                      'max_overlap:',
                      _gt_overlaps[j],
                      file=sys.stderr)

            # append recorded IoU coverage level
            found = (_gt_overlaps > config.TEST.IOU_THRESH).sum()
            _recall = found / float(gt_boxes.shape[0])
            print('recall',
                  _recall,
                  gt_boxes.shape[0],
                  boxes.shape[0],
                  gt_areas,
                  file=sys.stderr)
            overall[0] += found
            overall[1] += gt_boxes.shape[0]
            #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
            #_recall = (gt_overlaps >= threshold).sum() / float(num_pos)
            _recall = float(overall[0]) / overall[1]
            print('recall_all', _recall, file=sys.stderr)

        _vec = roidb[i]['image'].split('/')
        out_dir = os.path.join(output_folder, _vec[-2])
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt'))
        with open(out_file, 'w') as f:
            name = '/'.join(roidb[i]['image'].split('/')[-2:])
            f.write("%s\n" % (name))
            f.write("%d\n" % (boxes.shape[0]))
            for b in range(boxes.shape[0]):
                box = boxes[b]
                f.write(
                    "%d %d %d %d %g \n" %
                    (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4]))

    print('Evaluating detections using official WIDER toolbox...')
    path = os.path.join(os.path.dirname(__file__), 'wider_eval_tools')
    eval_output_path = os.path.join(path, 'wider_plots')
    if not os.path.isdir(eval_output_path):
        os.mkdir(eval_output_path)
    cmd = 'cd {} && '.format(path)
    cmd += 'matlab -nodisplay -nodesktop '
    cmd += '-r "dbstop if error; '
    cmd += 'wider_eval(\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
        .format(args.output, args.method_name, eval_output_path)
    print('Running:\n{}'.format(cmd))
    subprocess.call(cmd, shell=True)
Exemplo n.º 7
0
class Handler:
    def __init__(self, prefix, epoch, ctx_id=0):
        print('loading', prefix, epoch)
        if ctx_id >= 0:
            ctx = mx.gpu(ctx_id)
        else:
            ctx = mx.cpu()
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        all_layers = sym.get_internals()
        sym = all_layers['heatmap_output']
        image_size = (128, 128)
        self.image_size = image_size
        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
        #model = mx.mod.Module(symbol=sym, context=ctx)
        model.bind(for_training=False,
                   data_shapes=[('data', (1, 3, image_size[0], image_size[1]))
                                ])
        model.set_params(arg_params, aux_params)
        self.model = model
        # mtcnn_path = os.path.join(os.path.dirname(__file__),  'mtcnn_model')
        # self.det_threshold = [0.6,0.7,0.8]
        # self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=mx.cpu(), num_worker=1, accurate_landmark = True, threshold=self.det_threshold)
        self.detector = ESSHDetector('./essh-model/essh', 0)

    def get(self, img):
        # ret = self.detector.detect_face(img)
        ret = self.detector.detect(img, threshold=0.5)
        if ret is None:
            return None
        bbox = ret[:, 0:4]
        points = ret[:, 5:15].reshape(-1, 5, 2)
        # bbox, points = ret
        # if bbox.shape[0]==0:
        #   return None
        # bbox = bbox[:,0:4]
        # points = points[:,:].reshape((-1,2,5))
        # points = np.transpose(points, (0,2,1))
        # for b in bbox:
        #   cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0), 2)
        # for p in points:
        #   for i in range(5):
        #     cv2.circle(img, (p[i][0], p[i][1]), 1, (0, 0, 255), 2)
        # cv2.imshow("detection result", img)
        # cv2.waitKey(0)

        M = np.zeros((bbox.shape[0], 2, 3), dtype=np.float32)
        ret = np.zeros((bbox.shape[0], 68, 2), dtype=np.float32)
        for i in range(bbox.shape[0]):
            M[i] = img_helper.estimate_trans_bbox(bbox[i, :],
                                                  self.image_size[0],
                                                  s=1.2)
            rimg = cv2.warpAffine(img, M[i], self.image_size, borderValue=0.0)
            img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB)
            img2 = np.transpose(img2, (2, 0, 1))  #3*112*112, RGB
            # cv2.imshow("detection result", rimg)
            # cv2.waitKey(0)
            # filename = 'sample-images/%d.jpg'%(i+1)
            # cv2.imwrite(filename, rimg)
            input_blob = np.zeros(
                (1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
            input_blob[0] = img2
            ta = datetime.datetime.now()
            data = mx.nd.array(input_blob)
            db = mx.io.DataBatch(data=(data, ))
            self.model.forward(db, is_train=False)
            alabel = self.model.get_outputs()[-1].asnumpy()[0]
            tb = datetime.datetime.now()
            print('module time cost', (tb - ta).total_seconds())
            # ret = np.zeros( (alabel.shape[0], 2), dtype=np.float32)
            for j in xrange(alabel.shape[0]):
                a = cv2.resize(alabel[j],
                               (self.image_size[1], self.image_size[0]))
                ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
                #ret[i] = (ind[0], ind[1]) #h, w
                ret[i, j] = (ind[1], ind[0])  #w, h
        return ret, M
Exemplo n.º 8
0
PARTS = [1, 2, 3]
MAX_LABEL = 99999

print(args, MAX_LABEL, PARTS)

SPLIT = [0, 1]
if len(args.split) > 0:
    _v = args.split.split(',')
    SPLIT[0] = int(_v[0])
    SPLIT[1] = int(_v[1])

print('SPLIT:', SPLIT)

detector = ESSHDetector('./model/ssh-model/essh',
                        0,
                        ctx_id=args.gpu,
                        test_mode=False)


def get_faces(video, is_train=True):
    R = []
    sampling = args.sampling
    while True:
        cap = cv2.VideoCapture(video)
        frame_num = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if frame is None:
                break
            frame_num += 1
            if frame_num % sampling != 0:
Exemplo n.º 9
0
import cv2
import sys
import numpy as np
import datetime
#sys.path.append('.')
from essh_detector import ESSHDetector

scales = [1200, 1600]
#scales = [600, 1200]
t = 10
detector = ESSHDetector('./model/essh', 0)

f = './sample-images/t1.jpg'
if len(sys.argv) > 1:
    f = sys.argv[1]
img = cv2.imread(f)
im_shape = img.shape
print(im_shape)
target_size = scales[0]
max_size = scales[1]
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if im_size_min > target_size or im_size_max > max_size:
    im_scale = float(target_size) / float(im_size_min)
    # prevent bigger axis from being more than max_size:
    if np.round(im_scale * im_size_max) > max_size:
        im_scale = float(max_size) / float(im_size_max)
    img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
    print('resize to', img.shape)

for i in xrange(t - 1):  #warmup
Exemplo n.º 10
0
class Handler:
    def __init__(self, prefix, epoch, ctx_id=0):
        print('loading', prefix, epoch)
        if ctx_id >= 0:
            ctx = mx.gpu(ctx_id)
        else:
            ctx = mx.cpu()
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        all_layers = sym.get_internals()
        sym = all_layers['heatmap_output']
        image_size = (128, 128)
        self.image_size = image_size
        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
        model.bind(for_training=False,
                   data_shapes=[('data', (1, 3, image_size[0], image_size[1]))
                                ])
        model.set_params(arg_params, aux_params)
        self.model = model
        self.detector = ESSHDetector('./essh-model/essh', 0)

    def trans_dot(self, trans1, trans2):
        trans1 = np.vstack((trans1, [0, 0, 1]))
        trans2 = np.vstack((trans2, [0, 0, 1]))
        trans21 = np.dot(trans2, trans1)[0:2]
        return trans21

    def get_maxpos(self, img, det):
        img_size = np.asarray(img.shape)[0:2]
        # bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
        img_center = img_size / 2
        offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
                             (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
        offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
        # bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
        bindex = np.argmin(
            offset_dist_squared)  # some extra weight on the centering
        return bindex

    def get_landmark(self, img, label, dataset, use_essh):
        if use_essh:
            ret = self.detector.detect(img, threshold=0.4)
            if ret is None or ret.shape[0] == 0:
                return None, None
            bindex = self.get_maxpos(img, ret)
            face = ret[bindex]
            bbox = face[0:4]
            points = face[5:15].reshape(5, 2)
            # b = bbox
            # cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255))
            # for p in landmark:
            #   cv2.circle(img, (int(p[0]), int(p[1])), 1, (0, 0, 255), 2)
            # cv2.imshow("detection result", img)
            # cv2.waitKey(0)
            # for i in range(bbox.shape[0]):
            rimg, label2, trans1 = img_helper.preprocess(
                img, points, img.shape[0])
            ret2 = self.detector.detect(rimg, threshold=0.4)
            if ret2 is None or ret2.shape[0] == 0:
                return None, None
            bindex2 = self.get_maxpos(rimg, ret2)
            rimg, trans2 = img_helper.transform2(rimg, None,
                                                 self.image_size[0],
                                                 ret2[bindex2, 0:4], dataset)
        else:
            rimg, label2, trans1 = img_helper.preprocess(
                img, label, img.shape[0])
            rimg, trans2 = img_helper.transform2(rimg, label2,
                                                 self.image_size[0], None,
                                                 dataset)
        trans = self.trans_dot(trans1, trans2)
        # cv2.imshow("rimg", rimg)
        # cv2.waitKey(0)
        # img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB)
        img2 = np.transpose(rimg, (2, 0, 1))  #3*128*128, RGB
        input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]),
                              dtype=np.uint8)
        input_blob[0] = img2
        data = mx.nd.array(input_blob)
        db = mx.io.DataBatch(data=(data, ))
        self.model.forward(db, is_train=False)
        alabel = self.model.get_outputs()[-1].asnumpy()[0]
        IM = cv2.invertAffineTransform(trans)
        landmark = np.zeros((68, 2), dtype=np.float32)
        for i in xrange(alabel.shape[0]):
            a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0]))
            ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
            point = (ind[1], ind[0], 1.0)  #w, h
            point = np.dot(IM, point)
            landmark[i] = point[0:2]
            npt = img_helper.transform_pt(label[i], trans)
            if config.landmark_type == '2d':
                npt = np.floor(npt)
            else:
                npt = np.round(npt)
            point = (npt[0], npt[1], 1.0)
            point = np.dot(IM, point)
            label[i] = point[0:2]
        return landmark, label
Exemplo n.º 11
0
import cv2
import sys
import numpy as np
import datetime
#sys.path.append('.')
from essh_detector import ESSHDetector

scales = [1200, 1600]
#scales = [600, 1200]
t = 10
detector = ESSHDetector('./model/essh-r50', 0, ctx_id=0)


def main():
    f = './sample-images/t1.jpg'
    if len(sys.argv) > 1:
        f = sys.argv[1]
    img = cv2.imread(f)
    im_shape = img.shape
    print(im_shape)
    target_size = scales[0]
    max_size = scales[1]
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    if im_size_min > target_size or im_size_max > max_size:
        im_scale = float(target_size) / float(im_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)
        img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
        print('resize to', img.shape)
Exemplo n.º 12
0
 def __init__(self):
     self.total_time = 0
     self.scales = [1200, 1600]
     self.t = 10
     self.esshDetector = ESSHDetector('./model/essh-r50', 0, ctx_id=-1)