示例#1
0
 def __init__(self, model, input_shape, name_classes):
     print("カメラやビデオを処理する!")
     self.model = model
     self.num_classes = len(name_classes)
     self.name_classes = name_classes
     self.width, self.height = input_shape[0], input_shape[1]
     self.window_pos_x, self.window_pos_y = (60, 40)
     self.bbox_util = BBoxUtility(num_classes=self.num_classes)
示例#2
0
 def __init__(self, class_names, model, input_shape):
     self.class_names = class_names
     self.num_classes = len(class_names)
     self.model = model
     self.input_shape = input_shape
     self.bbox_util = BBoxUtility(self.num_classes)
     
     # Create unique and somewhat visually distinguishable bright
     # colors for the different classes.
     self.class_colors = []
     for i in range(0, self.num_classes):
         # This can probably be written in a more elegant manner
         hue = 255*i/self.num_classes
         col = np.zeros((1,1,3)).astype("uint8")
         col[0][0][0] = hue
         col[0][0][1] = 128 # Saturation
         col[0][0][2] = 255 # Value
         cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
         col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
         self.class_colors.append(col)
示例#3
0
 def get_detection_data(self):
     gt = pickle.load(open(self.gt_path, "rb"))
     name_keys = sorted(gt.keys())
     number = int(round(0.8 * len(name_keys)))
     train_keys = name_keys[:number]
     val_keys = name_keys[number:]
     bbox_util_ = BBoxUtility(self.num_classes, gt)
     gen = Generator(bbox_util_,
                     self.image_path,
                     self.batch_size,
                     train_keys,
                     val_keys, (self.input_shape[0], self.input_shape[1]),
                     num_classes=self.num_classes)
     return gen
示例#4
0
	def __init__(self,num_classes = 21, input_shape=(300,300,3),epochs=12):
		self.num_classes = num_classes
		self.batch_size = 4
		self.input_shape = input_shape
		self.epochs = epochs

		self.gt_path = data_path
		self.image_path = image_path 

		prior = pickle.load(open(start_data,"rb"))

		self.bbox_util =BBoxUtility(self.num_classes,prior)

		self.pre_trained = weight_path
		self.model = SSD300(self.input_shape,num_classes=self.num_classes)
示例#5
0
from ssd import SSD300
from utils.prior_box_creator import PriorBoxCreator
from image_generator import ImageGenerator

num_classes = 21
model = SSD300()
image_shape = model.input_shape[1:]
box_creator = PriorBoxCreator(model)
prior_boxes = box_creator.create_boxes()

root_prefix = '../datasets/VOCdevkit/VOC2007/'
ground_data_prefix = root_prefix + 'Annotations/'
image_prefix = root_prefix + 'JPEGImages/'

ground_truth_manager = XMLParser(ground_data_prefix, background_id=None)
ground_truth_data = ground_truth_manager.get_data()

prior_boxes = flatten_prior_boxes(prior_boxes)
prior_boxes = add_variances(prior_boxes)
print('WTF')
bbox_util = BBoxUtility(num_classes, prior_boxes)

result = bbox_util.assign_boxes(ground_truth_data['000007.jpg'])
train_keys, val_keys = split_data(ground_truth_data, training_ratio=.8)
image_generator = ImageGenerator(ground_truth_data, bbox_util, 10, (300, 300),
                                 train_keys, val_keys, image_prefix)
data = next(image_generator.flow(mode='train'))

# test the differences here between you bbox_util
# why can't you train with this ?
import time
from PyQt5 import QtCore, QtGui, QtWidgets
from models.model_c3d import *
from models.model_2d import *
from models.ssd import SSD300 as SSD
from utils.clip_detector import process_image
from utils.pose_detector import detect_image
from utils.ssd_utils import BBoxUtility
from utils.processing import preprocessing
from config import *
import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
bbox_util = BBoxUtility(21)
ssd_model = SSD(ssd_input_shape, num_classes=21)
ssd_model.load_weights('weights_SSD300.hdf5')
c3d = c3d_model(c3d_input_shape, nb_classes=len(action_classes))
c3d.load_weights('results/weights_c3d_mask.h5')
cnn = cnn_2d(cnn_2d_input_shape, nb_classes=len(pose_classes))
cnn.load_weights('results/cnn_2d_{0}.h5'.format(mode))


class Ui_MainWindow(QtWidgets.QWidget):
    def __init__(self, parent=None):
        super(Ui_MainWindow, self).__init__(parent)

        self.timer_camera = QtCore.QTimer()
        self.cap = cv2.VideoCapture()
        self.CAM_NUM = 0
示例#7
0
class Video_tracker:
    def __init__(self, model, input_shape, name_classes):
        print("カメラやビデオを処理する!")
        self.model = model
        self.num_classes = len(name_classes)
        self.name_classes = name_classes
        self.width, self.height = input_shape[0], input_shape[1]
        self.window_pos_x, self.window_pos_y = (60, 40)
        self.bbox_util = BBoxUtility(num_classes=self.num_classes)

    def run(self, filepath, conf_thresh=0.4):
        """
		"""
        frame = cv.imread(filepath)
        src = np.copy(frame)
        resized = cv.resize(frame, (self.width, self.height))
        rgb = cv.cvtColor(resized, cv.COLOR_BGR2RGB)
        src_shape = src.shape
        inputs = [img_to_array(rgb)]
        x = preprocess_input(np.array(inputs))
        y = self.model.predict(x)
        results = self.bbox_util.detection_out(y)
        to_draw = cv.resize(resized, (int(src_shape[1]), int(src_shape[0])))

        if len(results) > 0 and len(results[0]) > 0:
            # Interpret output, only one frame is used
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= conf_thresh
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            for i in range(top_conf.shape[0]):
                xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                ymax = int(round(top_ymax[i] * to_draw.shape[0]))
                class_num = int(top_label_indices[i])
                cv.rectangle(src, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
                text = self.name_classes[class_num] + " " + ('%.2f' %
                                                             top_conf[i])
                text_top = (xmin, ymin - 10)
                text_bot = (xmin + 80, ymin + 5)
                text_pos = (xmin + 5, ymin)
                cv.rectangle(src, text_top, text_bot, (0, 255, 0), -1)
                cv.putText(src, text, text_pos, cv.FONT_HERSHEY_SIMPLEX, 0.35,
                           (0, 0, 0), 1)

        cv.imshow("pic", src)
        cv.waitKey(0)
        cv.destroyAllWindows()

        cv.imwrite("output.png", src)
示例#8
0
class VideoTest(object):
    
    def __init__(self, class_names, model, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)
        
        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255*i/self.num_classes
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            self.class_colors.append(col)
            

        
    def run(self, video_path = 0, start_frame = 0, conf_thresh = 0):  
    	
    	#動画ファイル準備
        print('------------------------------------------------')
        print("input filename.mov or 0")
        print("input the name of video : ", end='')
        videoName = input()
        if videoName == '0':
            videoName = 'WebCam.mov'
            save(0)
            videoPass = '******'
            vid = cv2.VideoCapture(videoPass)
            total_frames = vid.get(cv2.CAP_PROP_FRAME_COUNT)    
            tmp_key = 1
            print('Total frames : ', total_frames)
        else:
            videoPass = '******' + videoName
            vid = cv2.VideoCapture(videoPass)
            total_frames = vid.get(cv2.CAP_PROP_FRAME_COUNT)    
            tmp_key = 1
            print('Total frames : ', total_frames)
        print('------------------------------------------------')


        if not vid.isOpened():
            raise IOError(("Couldn't open video file or webcam. If you're "
            "trying to open a webcam, make sure you video_path is an integer!"))
        
        # Compute aspect ratio of video     
        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
        vidar = vidw/vidh
        
        # Skip frames until reaching start_frame
        if start_frame > 0:
            vid.set(cv2.CAP_PROP_POS_MSEC, start_frame)
            
        #動画ファイル書き出し
        frame_rate=24
        fourcc=cv2.VideoWriter_fourcc('m','p','4','v')
        f_v='../processed/' + videoName
        video=cv2.VideoWriter(f_v, fourcc, frame_rate, (int(vidw), int(vidh)))

        # プログレスバーを表示
        if tmp_key != 0:
            pbar = tqdm(total=total_frames)
        else:
            print('processing...')

        while vid.isOpened():
            #プログレスバーを進める
            if tmp_key != 0:
                pbar.update(1)

            # 全フレーム終了
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return
                
            im_size = (self.input_shape[0], self.input_shape[1])    
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
            
            # Reshape to original aspect ratio for later visualization
            # The resized version is used, to visualize what kind of resolution
            # the network has to work with.
            to_draw = cv2.resize(resized, (int(self.input_shape[0]*vidar), self.input_shape[1]))
            
            # Use model to predict 
            inputs = [image.img_to_array(rgb)]
            tmp_inp = np.array(inputs)
            x = preprocess_input(tmp_inp)
            
            y = self.model.predict(x)
            
            # This line creates a new TensorFlow device every time. Is there a 
            # way to avoid that?
            results = self.bbox_util.detection_out(y)
            
            if len(results) > 0 and len(results[0]) > 0:
                # Interpret output, only one frame is used 
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                for i in range(top_conf.shape[0]):
                    class_num = int(top_label_indices[i])
                    # 30%以上と判定した場合
                    if (top_conf[i] > 0.3 and (self.class_names[class_num] == 'person')):
                        xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                        ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                        xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                        ymax = int(round(top_ymax[i] * to_draw.shape[0]))
                        
                        # 検出対象にモザイク処理をする
                        to_draw[ymin:ymax, xmin:xmax] = mosaic_area(to_draw, xmin, ymin, xmax, ymax)

            #検出のために拡大したサイズをもとに戻す
            to_draw = cv2.resize(to_draw,(int(vidw),int(vidh)))
            
            cv2.startWindowThread()
            #動画の表示
            cv2.imshow("SSD result", to_draw)
            #動画の書き出し
            video.write(to_draw)

            k = cv2.waitKey(1)
            if k == ord('q'):
                break

        cv2.destroyAllWindows()
        print('finish')
        pbar.close()
        cap.release()
        video.release()
示例#9
0
def run_camera(input_shape, ssd_model, action_class, clip_length, c3d):
    num_classes = 21
    conf_thresh = 0.5
    bbox_util = BBoxUtility(num_classes)

    class_colors = []
    for i in range(0, num_classes):
        hue = 255 * i / num_classes
        col = np.zeros((1, 1, 3)).astype("uint8")
        col[0][0][0] = hue
        col[0][0][1] = 128  # Saturation
        col[0][0][2] = 255  # Value
        cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
        col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
        class_colors.append(col)

    vid = cv2.VideoCapture(0)

    # Compute aspect ratio of video
    vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
    vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
    # vidar = vidw / vidh
    empty_count = 0
    origin_stack = []
    while True:
        retval, orig_image = vid.read()
        if not retval:
            print("Done!")
            return None

        im_size = (input_shape[0], input_shape[1])
        resized = cv2.resize(orig_image, im_size)
        rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

        inputs = [image.img_to_array(rgb)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)

        y = ssd_model.predict(x)

        results = bbox_util.detection_out(y)
        if len(results) > 0 and len(results[0]) > 0:
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= conf_thresh
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            if 15 not in top_label_indices:
                empty_count += 1
                if empty_count == 4:
                    origin_stack = []
                    empty_count = 0
            else:
                for i in range(top_conf.shape[0]):
                    xmin = int(round((top_xmin[i] * vidw) * 0.9))
                    ymin = int(round((top_ymin[i] * vidh) * 0.9))
                    xmax = int(round(
                        (top_xmax[i] * vidw) *
                        1.1)) if int(round(
                            (top_xmax[i] * vidw) * 1.1)) <= vidw else int(
                                round(top_xmax[i] * vidw))
                    ymax = int(round(
                        (top_ymax[i] * vidh) *
                        1.1)) if int(round(
                            (top_ymax[i] * vidh) * 1.1)) <= vidh else int(
                                round(top_ymax[i] * vidh))

                    # save frames
                    class_num = int(top_label_indices[i])
                    if class_num == 15:
                        cv2.rectangle(orig_image, (xmin, ymin), (xmax, ymax),
                                      class_colors[class_num], 2)
                        frame = orig_image
                        curl = np.zeros_like(frame, dtype='uint8')
                        curl[ymin:ymax, xmin:xmax, :] = frame[ymin:ymax,
                                                              xmin:xmax, :]
                        curl = cv2.resize(curl, (171, 128))
                        if len(origin_stack) < clip_length:
                            origin_stack.append(curl[8:120, 30:142, :])
                        if len(origin_stack) == clip_length:
                            origin_stack.pop(0)
                            origin_stack.append(curl[8:120, 30:142, :])
                            clip = np.array(origin_stack)
                            clip = np.expand_dims(clip, axis=0)
                            clip = preprocessing(clip)
                            c3d_result = c3d.predict(clip)
                            if max(c3d_result[0]) >= conf_thresh:
                                label = np.argmax(c3d_result[0])
                                action_name = action_class[label]
                                cv2.putText(
                                    orig_image,
                                    action_name + '%.2f' % max(c3d_result[0]),
                                    (xmin + 10, ymin + 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255),
                                    1)
        cv2.imshow("SSD result", orig_image)
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break