def get_input_sequence(self, train_instance, idx): sequence_length = self.config['SEQUENCE_LENGTH'] image_path = train_instance['filename'] video_path = train_instance['video_path'] num_frames_before_after = [ -self.config['SEQUENCE_LENGTH'], self.config['SEQUENCE_LENGTH'] ] video_proc = VideoProcessor(num_frames_before_after, enhance_clahe=False) frame_number = int(image_path.split("_")[-1].split(".")[0]) cap = cv2.VideoCapture(video_path) frames, frame_names = video_proc.get_video_frames(cap, frame_number) frame_corrector = FrameMatchCorrector(frames, image_path) frames = frame_corrector.correct_frame_match() if frames is None: print("Non-matched") return None else: print("Matched!") frames = frames[:, :frames.shape[1] / 2 + 1, ...] frames = frames[0, -self.config['SEQUENCE_LENGTH']:] # TODO: # frames = self.aug_images() for i in range(frames.shape[0]): frames[i] = self.norm(frames[i]) return frames
def main(): parser = argparse.ArgumentParser( description='arguments passed to VideoProcessor') parser.add_argument("-i", "--input", type=str, help="data root for source video and images") parser.add_argument("-o", "--output", type=str, help="folder to store the generated video and images") parser.add_argument("--file_path", type=str, help="file path which records compositon fg and bg") parser.add_argument("--phase", type=str, help="train or test") parser.add_argument("--image_only", action="store_true", help="whether to generate image datapoints only") parser.add_argument("--video_only", action="store_true", help="whether to generate video datapoints only") parser.add_argument("--bg_blur", action="store_true", help="whether to blue the background video") parser.add_argument("--n_proc", type=int, default=10, help="number of process for multiprocessing") parser.add_argument( "--classes", nargs="+", type=str, help= "classes of foreground images/videos to be used for generation (easy, medium, hard)" ) parser.add_argument( "--max_size_from_image", type=int, default=4000, help="max number of datapoints generated using the foreground images") parser.add_argument( "--max_size_from_video", type=int, default=1000, help="max number of datapoints generated using the foreground videos") args = parser.parse_args() from video_processor import VideoProcessor, VIDEO, IMAGE vp = VideoProcessor(data_root=args.input, save_root=args.output, file_list_path=args.file_path, phase=args.phase, \ classes_used = args.classes, save_mode=IMAGE, bg_blur=args.bg_blur) if args.image_only: vp.batch_image(n_proc=args.n_proc, max_n=args.max_size_from_image) elif args.video_only: vp.batch_video(n_proc=args.n_proc, max_n=args.max_size_from_video) else: vp.batch_image(n_proc=args.n_proc, max_n=args.max_size_from_image) vp.batch_video(n_proc=args.n_proc, max_n=args.max_size_from_video)
def main(argv): LoggingConfig.setup() input_files_path = "/Users/allarviinamae/EduWorkspace/master-thesis-training-videos/backflips" op_models_path = "/Users/allarviinamae/EduWorkspace/openpose/models" show_video = False try: opts, args = getopt.getopt(argv, "hi:m:s", ["inputFilesPath=", "opModelsPath=", "showVideo="]) except getopt.GetoptError: logging.info('main.py -i <inputFilesPath> -m <opModelsPath> -s') sys.exit(2) for opt, arg in opts: if opt == '-h': logging.info('main.py -i <inputFilesPath> -m <opModelsPath> -s') sys.exit() elif opt in ("-i", "--inputFilesPath"): input_files_path = arg elif opt in ("-m", "--opModelsPath"): op_models_path = arg elif opt in ("-s", "--showVideo"): show_video = True logging.info(f'Input files path is {input_files_path}') logging.info(f'OpenPose models path is {op_models_path}') logging.info(f'Show video is {show_video}') try: # Change these variables to point to the correct folder (Release/x64 etc.) # sys.path.append('../../python') # If you run `make install` (default path is `/usr/local/python` for Ubuntu), you can also access the # OpenPose/python module from there. This will install OpenPose and the python library at your desired # installation path. Ensure that this is in your python path in order to use it. sys.path.append( # '/usr/local/python') from openpose import pyopenpose as op except ImportError as e: logging.warn( 'Error: OpenPose library could not be found. Did you enable `BUILD_PYTHON` in CMake and have this Python ' 'script in the right folder?') raise e # Initializing Python OpenPose wrapper. Constructing OpenPose object allocates GPU memory logging.info("Starting OpenPose Python Wrapper...") op_wrapper = op.WrapperPython() openpose_params = get_openpose_params(op_models_path) op_wrapper.configure(openpose_params) op_wrapper.start() logging.info("OpenPose Python Wrapper started") video_processor = VideoProcessor(op_wrapper, show_video) input_files = InputFileService.get_input_files(input_files_path) input_files.sort() for video_to_process in input_files: video_processor.process(video_to_process)
def main(): parser = VideoProcessor.make_parser( "Apply mask to every frame of a video and save as a new video.") parser.add_argument("mask_file") args = parser.parse_args() app = VideoMasker(args) return app.run()
def open_file_name_dialog(self): options = QFileDialog.Options() options |= QFileDialog.DontUseNativeDialog file_name, _ = QFileDialog.getOpenFileName(self, "Select a video file", "", "MP4 Files (*.mp4)", options=options) if file_name and file_name is not '': print('Opening ' + file_name) self.original_video_dialog.open_file_for_playing(file_name) self.original_video_dialog.show() video_processor = VideoProcessor(file_name) output_file = video_processor.run_model() self.processed_video_dialog.open_file_for_playing(output_file) self.processed_video_dialog.show()
def initialize(self, verbose=True): start = self.sampling_interval_start_frame if start < 0: return end = self.sampling_interval_end_frame last_frame = self.get_last_frame() if end <= start: raise ValueError("Sampling interval end frame (currently set to {:d}) should be greater than the " + "sampling interval start frame (currently set to {:d}).".format(end, start)) if start > last_frame or end > last_frame: raise ValueError("The sampling interval start & end frame (currently set to {:d} and {:d}, " + "respectively) should be within [0,{:d}] as dictated by length of video {:s} " + "(and global offset, if present)." .format(start, end, last_frame, self.in_video)) max_sampling_duration_frames = int(self.sampling_interval * (self.num_samples - 1) / 1000 * self.fps) + 1 max_end = start + max_sampling_duration_frames - 1 if end > max_end: print(("Notice: sampling_interval_end_frame is set to {0:d}, which is beyond the limit imposed by " + "sampling interval ({1:f}), fps {2:.2f}, and number of samples ({3:d}). " + "Changing it to {4:d} to save time.") .format(self.sampling_interval_end_frame, self.sampling_interval, self.fps, self.num_samples, max_end)) end = max_end if verbose: print("Initializing from frame {:d} to frame {:d}...".format(start, end)) self.go_to_frame(start) start_time = time.time() total_frames = end - start + 1 fc = 1 for i_frame in range(start, end + 1): frame = self.cap.read()[1] # apply preliminary mask if at all present # build up the background model self.background_subtractor.pretrain(frame) if not self.no_progress_bar: VideoProcessor.update_progress(fc / total_frames, start_time) fc += 1 sys.stdout.write("\n") # terminate progress bar self.reload_video()
def main(args): video_capture = VideoProcessor( 'rtsp://{username}:{password}@{ip}/cam/realmonitor?channel=1&subtype=0' .format(username=settings.USERNAME, password=settings.PASSWORD, ip=settings.IP), output_queue=frame_queue) detection = Detection() num = 0 count = 0 path = 'faces' if not os.path.exists(path): os.mkdir(path) path = path + '/' + args.id if not os.path.exists(path): os.mkdir(path) print('capture faces for : ', str(args.id)) video_capture.start_processing() while True: frame = video_capture.get_latest_frame() if frame is None: continue faces = detection.find_faces(frame) for face in faces: filename = path + '/' + str(num) + '.jpg' if num % 2 == 0: cv2.imwrite(filename, face.face_image_raw) count = count + 1 add_overlays(frame, faces, count) num = num + 1 cv2.imshow('face capture', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break print('capture faces num: ', str(num))
def video_process(uuid): media = get_media_collection() query = {'uuid': uuid} doc = media.find_one(query) if not doc: return uuid + ' not found and not processed', 404 else: data = request.get_json() rate, option = data['samplingRate'], data['samplingOption'] funcs = [getattr(VideoFunctions, funcname) for funcname in data['selectedVideoFunctions']] new_processed_file = '_'.join([str(rate), str(option)]) + '.processed.mp4' if new_processed_file not in doc['processed']: media.update_one({'_id': doc['_id']},{'$push': {'processed': new_processed_file}}, upsert=False) input_filename = get_media_folder() + uuid + '/original.mp4' output_filename = get_media_folder() + uuid + '/' + new_processed_file vp = VideoProcessor(funcs) vp.process_video(input_filename, output_filename, rate, option) #Has standard output fps of 10 and no composition return 'Processed ' + uuid, 200
def main(): args = parse_arguments() if args.mode == 'r': if args.video: video_processor = VideoProcessor() sort_option = {'prefix': 'VIDEO', 'date_format': 0} video_processor.scan_files( args.input_path, args.output_path, sort_option, 'r') else: rename_image_prefix_date = RenameImagePrefixDate() print(args.prefix) print(args.output_path) sort_option = {'prefix': args.prefix, 'date_format': 0} rename_image_prefix_date.walk_images( args.input_path, args.output_path, sort_option, 'r') elif args.mode == 's': folder = input("Give a folder containing images..\n") destination = input("Give a destination for the ordered folders..\n") answer = '' result = 0 result, answer = get_sorting_option("Order by year? y/n \n", 2) result, answer = get_sorting_option( "Order by year and month? y/n \n", 1, answer, result) result, answer = get_sorting_option( "Order by year,month and day? y/n \n", 0, answer, result) mode, answer = get_sorting_option("Recursively ? y/n \n", 'r') sort_images_to_folder = SortImagesToFolder() sort_images_to_folder.walk_images(folder, destination, result, mode)
def handler(event, context): for record in event['Records']: bucket = record['s3']['bucket']['name'] key = record['s3']['object']['key'] try: obj = s3_client.get_object(Bucket=bucket, Key=key) if obj["Metadata"].get("process", 0): # Temporary path where we'll save original object original_obj_path = '/tmp/{}{}'.format(uuid.uuid4(), key.replace("/", "-")) s3_client.download_file(bucket, key, original_obj_path) print('Processing object {}...'.format(key)) # Videos are all stored in 'vid/' folder in S3 so if this part is in the key (pathname) then it is a video # otherwise we considered it is a image. Documents are not processed as they don't have the 'process' metadata (yet ?) if "vid/" in key: processor = VideoProcessor( s3_client, bucket, ) else: processor = ImageProcessor(s3_client, bucket) processor.process(original_obj_path, obj["Metadata"], key, obj["Metadata"].get("dest_ext", None)) return { 'statusCode': 200, 'body': "Process executed successfully" } except Exception as e: print(e) print( 'Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.' .format(key, bucket)) raise e
def run(self, args): if (args.use_svm): settings.USE_SVM = True print('use svm') else: settings.USE_SVM = False print('use distance') plc = PLCControl() face_recognition = Recognition() video_capture = VideoProcessor( self.camera.get_url(), output_queue=frame_queue) video_capture.start_processing() while True: # Capture frame-by-frame frame = video_capture.get_latest_frame() if frame is None: continue faces = face_recognition.identify(frame) legal = False for f in faces: recognition_result_processor.put_result(f.result) #recognition_result_processor.send_email(f.result) if (f.result.result == settings.LEGAL): legal = True if len(faces) > 0: recognition_result_processor.push_result() if legal: plc.open_door() self.add_overlays(frame, faces) if args.debug: cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything is done, release the capture video_capture.stop_processing() video_capture.cleanup() cv2.destroyAllWindows()
def make_parser(help_string): parser = VideoProcessor.make_parser(help_string, with_output=False) BackgroundSubtractor.prep_parser(parser) parser.add_argument("-cpu", "--caffe_cpu", action="store_true", help="Use Caffe in CPU mode.", default=False) parser.add_argument("-od", "--output_datafile", default=None) parser.add_argument("-bc", "--boundary_check", action="store_true", help="Whether to mark frame as 'subject out-of-view' for frames when the" + " subject's bounding box intersects with the frame's bounding box.") parser.add_argument("-v", "--vgg_model_path", type=str, default=None, help="Path to the vgg model file.") parser.add_argument("-vm", "--vgg_model_filename", type=str, default="VGG_ILSVRC_16_layers_deploy.prototxt", help="Path to the vgg model file.") parser.add_argument("-vp", "--vgg_pretrained_filename", type=str, default="VGG_ILSVRC_16_layers.caffemodel", help="Path to the vgg model file.") parser.add_argument("-aug", "--augment_file", action="store_true", help="Augment exisiting file instead of overwriting " + "(useful when not all features are collected)", default=False) parser.add_argument("-nv", "--no_vgg", action="store_true", help="skip actual vgg feature extraction", default=False) return parser
def dynamic_selection(self, video_path): selected_frames = [1] estimations = [1.0] time_start = time.time() with VideoProcessor(video_path) as video: prev_frame = next(video) prev_feat = self.get_frame_feature(prev_frame) for frame in video: feat = self.get_frame_feature(frame) dis = self.cal_frame_diff(feat, prev_feat) if dis > self.thresh: selected_frames.append(video.index) prev_feat = feat estimations.append(1.0) else: estimations.append((self.thresh - dis) / self.thresh) total_frames = video.index complete_time = time.time() - time_start return self._format_selection(selected_frames, total_frames, complete_time, estimations)
def feedback_kmeans(self, video, ty, threshold=0.8): # input video may have None label vp = VideoProcessor(video) if vp.get_video_view() == "front": # you need 6-dimensional data in front caseW if ty is "elbow": angle_result = vp.compute_left_elbow_angle(threshold) if ty is "shoulder": angle_result = vp.compute_left_shoulder_angle(threshold) if ty is "arm": angle_result = vp.compute_left_arm_angle_with_floor(threshold) min_max = self.min_max(angle_result, "front", ty) if (self.div_zero is 1): #sys.exit("training data is erronous, div-zero happend") print("div-zero happend") return [], self.div_zero training_data = [t[0] for t in self.front_data] # expect two clusters; one for full reps, another one for partial reps km = KMeans(n_clusters=8, random_state=0).fit(np.array(training_data)) cluster = km.predict(np.array([np.array(min_max)])) #label format [partial range or not, elbow flare or not, wide or not] # majority voting labels = km.labels_ training_labels = [t[1] for t in self.front_data] positive_labels_num = 0 negative_labels_num = 0 if ty is "elbow": label_index = 0 if ty is "shoulder": label_index = 2 if ty is "arm": label_index = 1 for i in range(len(labels)): if labels[i] == cluster: if training_labels[i][label_index] == 1: positive_labels_num = positive_labels_num + 1 else: negative_labels_num = negative_labels_num + 1 result = positive_labels_num > negative_labels_num return result, self.div_zero
def feedback_kmeans(self, video, threshold=0.8): # input video may have None label vp = VideoProcessor(video) if vp.get_video_view( ) == "front": # you need 6-dimensional data in front case elbow_angle_result = vp.compute_left_elbow_angle(threshold) shoulder_angle_result = vp.compute_left_shoulder_angle(threshold) arm_angle_result = vp.compute_left_arm_angle_with_floor(threshold) min_max_1 = self.min_max(elbow_angle_result, "front", "elbow") min_max_2 = self.min_max(shoulder_angle_result, "front", "shoulder") min_max_3 = self.min_max(arm_angle_result, "front", "arm") if (self.div_zero is 1): sys.exit("training data is erronous, div-zero happend") min_max = min_max_1 + [min_max_2[0]] + min_max_3 training_data = [t[0] for t in self.front_data] # expect two clusters; one for full reps, another one for partial reps km = KMeans(n_clusters=8, random_state=0).fit(np.array(training_data)) cluster = km.predict(np.array([np.array(min_max)])) #label format [partial range or not, elbow flare or not, wide or not] # majority voting labels = km.labels_ training_labels = [t[1] for t in self.front_data] positive_labels_num = [0, 0, 0] negative_labels_num = [0, 0, 0] for i in range(len(labels)): if labels[i] == cluster: print(str(i)) print(training_labels[i]) for j in range(len(positive_labels_num)): if training_labels[i][j] == 1: positive_labels_num[j] = positive_labels_num[j] + 1 else: negative_labels_num[j] = negative_labels_num[j] + 1 result = [] for i in range(len(positive_labels_num)): result.append(positive_labels_num[i] > negative_labels_num[i]) return result
def analyze_video(self, video_path): diff_values = [] total_t = 0 with VideoProcessor(video_path) as video: prev_frame = next(video) t1 = time.time() prev_frame = self.get_frame_feature(prev_frame) t2 = time.time() total_t += t2 - t1 for frame in video: t3 = time.time() frame = self.get_frame_feature(frame) diff_value = self.cal_frame_diff(frame, prev_frame) t4 = time.time() total_t += t4 - t3 diff_values.append(diff_value) prev_frame = frame return diff_values, total_t
def learn(self, video, ty, threshold=0.8): vp = VideoProcessor(video) if vp.get_video_view() == "front": # you need 5-dimensional data in front case if ty is "elbow": angle_result = vp.compute_left_elbow_angle(threshold) if ty is "shoulder": angle_result = vp.compute_left_shoulder_angle(threshold) if ty is "arm": angle_result = vp.compute_left_arm_angle_with_floor(threshold) min_max = self.min_max(angle_result, "front", ty) if (self.div_zero is 1): print("training data is erronous, div-zero happend. This data is not added") self.div_zero = 0 return self.front_data.append(((min_max), vp.get_video_label())) elif vp.get_video_view() == "left": angle_result = vp.compute_left_elbow_angle(threshold) min_max = self.min_max(angle_result, "left") self.left_data.append(((min_max), vp.get_video_label())) elif vp.get_video_view() == "squat": angle_result = vp.compute_left_knee_angle(threshold) min_max = self.min_max(angle_result, "squat") self.left_data.append(((min_max), vp.get_video_label())) else: sys.exit("Error: Wrong view type is given") print(video.get_name() + " has been successfully learned.")
import sys import os import json from flask import Flask from flask_restful import Resource, Api from flask_cors import CORS from video_processor import VideoProcessor, API_HOST, API_PORT app = Flask(__name__) CORS(app) api = Api(app) video_processor = VideoProcessor() class EmotionsDetection(Resource): def get(self): if not video_processor.data: return app.response_class(response=json.dumps( {"message": "no faces found"}), status=200, mimetype='application/json') emotions_response = { 'image': video_processor.image.decode('utf8'), 'emotions_data': video_processor.data, } return app.response_class(response=json.dumps(emotions_response), status=200, mimetype='application/json')
def main(): """Main function for the program. Everything starts here. :return: None """ global resize_output, resize_output_width, resize_output_height, \ resize_output, resize_output_width, resize_output_height, \ device_count if (not handle_args()): print_usage() return 1 # get list of all the .mp4 files in the image directory input_video_filename_list = os.listdir(input_video_path) input_video_filename_list = [i for i in input_video_filename_list if i.endswith('.mp4')] if (len(input_video_filename_list) < 1): # no images to show print('No video (.mp4) files found') return 1 resting_image = cv2.imread("resting_image.png") if (resting_image is None): resting_image = numpy.zeros((800, 600, 3), numpy.uint8) if (resize_output): resting_image = cv2.resize(resting_image, (resize_output_width, resize_output_height), cv2.INTER_LINEAR) # Set logging level to only log errors mvnc.global_set_option(mvnc.GlobalOption.RW_LOG_LEVEL, 3) devices = mvnc.enumerate_devices() if len(devices) < 1: print('No NCS device detected.') print('Insert device and try again!') return 1 if (device_count < 1) or (device_count > len(devices)): device_count = len(devices) # Create an object detector processor for each device that opens # and store it in our list of processors obj_detect_list = list() idle_obj_detect_list = list() device_number = 0 for one_device in devices: try: obj_detect_dev = mvnc.Device(one_device) obj_detect_dev.open() print("opened device " + str(device_number)) obj_detector_proc = Yolov2_tiny_Processor(NETWORK_GRAPH_FILENAME, obj_detect_dev, inital_box_prob_thresh=min_score_percent / 100.0, classification_mask=object_classifications_mask, name="object detector " + str(device_number)) if (device_number < device_count): obj_detect_list.append(obj_detector_proc) else: idle_obj_detect_list.append(obj_detector_proc) device_number += 1 except: print("Could not open device " + str(device_number) + ", trying next device") pass if len(obj_detect_list) < 1: print('Could not open any NCS devices.') print('Reinsert devices and try again!') return 1 print("Using " + str(len(obj_detect_list)) + " devices for object detection") print_hot_keys() cv2.namedWindow(cv_window_name) cv2.moveWindow(cv_window_name, 10, 10) cv2.waitKey(1) exit_app = False while (True): for input_video_file in input_video_filename_list : for one_obj_detect_proc in obj_detect_list: print("using object detector: " + one_obj_detect_proc.get_name()) one_obj_detect_proc.drain_queues() # video processor that will put video frames images on the object detector's input FIFO queue video_proc = VideoProcessor(input_video_path + '/' + input_video_file, network_processor_list = obj_detect_list) video_proc.start_processing() frame_count = 0 start_time = time.time() last_throttle_time = start_time end_time = start_time uptime=time.time() while(True): done = False for one_obj_detect_proc in obj_detect_list: try: (filtered_objs, display_image) = one_obj_detect_proc.get_async_inference_result() print("resive result:",time.time()-uptime) uptime=time.time() except : print("exception caught in main") raise # check if the window is visible, this means the user hasn't closed # the window via the X button prop_val = cv2.getWindowProperty(cv_window_name, cv2.WND_PROP_ASPECT_RATIO) if (prop_val < 0.0): end_time = time.time() video_proc.stop_processing() video_proc.cleanup() exit_app = True break running_fps = frame_count / (time.time() - start_time) overlay_on_image(display_image, filtered_objs, running_fps) print("show time:",time.time()-uptime) if (resize_output): display_image = cv2.resize(display_image, (resize_output_width, resize_output_height), cv2.INTER_LINEAR) cv2.imshow(cv_window_name, display_image) raw_key = cv2.waitKey(1) if (raw_key != -1): if (handle_keys(raw_key, obj_detect_list) == False): end_time = time.time() exit_app = True done = True break frame_count += 1 #if (one_obj_detect_proc.is_input_queue_empty()): if (not video_proc.is_processing()): # asssume the video is over. end_time = time.time() done = True print('video processor not processing, assuming video is finished.') break #if (frame_count % 100) == 0: if ((time.time() - last_throttle_time) > throttle_check_seconds): #long movie, check for throttling devices # throttling = one_obj_detect_proc.get_device().get_option(mvnc.DeviceOption.RO_THERMAL_THROTTLING_LEVEL) last_throttle_time = time.time() print("movie not done, but going a long time so adjust for throttling") video_proc.pause() do_throttle_adjustment(obj_detect_list, idle_obj_detect_list) video_proc.unpause() if (done) : break frames_per_second = frame_count / (end_time - start_time) print('Frames per Second: ' + str(frames_per_second)) # check for throttling devices and save in throttling list throttling_list = list() for one_obj_detect_proc in obj_detect_list: throttling = one_obj_detect_proc.get_device().get_option(mvnc.DeviceOption.RO_THERMAL_THROTTLING_LEVEL) if (throttling > 0): print("\nDevice " + one_obj_detect_proc.get_name() + " is throttling, level is: " + str(throttling)) throttling_list.append(one_obj_detect_proc) if (not exit_app): # rest between movies, display an image while resting resting_display_image = cv2.resize(resting_image, (display_image.shape[1], display_image.shape[0]), cv2.INTER_LINEAR) cv2.imshow(cv_window_name, resting_display_image) if ((len(throttling_list) > len(idle_obj_detect_list))): # more devices throttling than we have in the idle list # so do extra rest by applying a multiplier to the rest time print("throttling devices... resting") cv2.waitKey(rest_seconds * 1000 * rest_throttling_multiplier) else: cv2.waitKey(rest_seconds * 1000) # remove the throttling devices from the main list and put them at the end so they will # be moved to the idle list with priority for one_throttling in throttling_list: obj_detect_list.remove(one_throttling) obj_detect_list.append(one_throttling) num_idle = len(idle_obj_detect_list) if (num_idle > len(obj_detect_list)): num_idle = len(obj_detect_list) if (num_idle > 0): # replace one of the devices with an idle device for idle_index in range(0, num_idle): #for one_idle_proc in idle_obj_detect_list: obj_detect_list.insert(0, idle_obj_detect_list.pop(0)) for idle_count in range(0, num_idle): idle_obj_detect_list.append(obj_detect_list.pop()) video_proc.stop_processing() video_proc.cleanup() if (exit_app): break if (exit_app): break # Clean up the graph and the device for one_obj_detect_proc in obj_detect_list: cv2.waitKey(1) one_obj_detect_proc.cleanup(True) cv2.destroyAllWindows()
def make_parser(help_string): parser = VideoProcessor.make_parser(help_string) parser.add_argument("-mo", "--mask_output_video", default="") BackgroundSubtractor.prep_parser(parser) return parser
import glob import cv2 from settings import load_settings from video_processor import VideoProcessor """ Load a sequence of images and save the predictions """ settings = load_settings() settings[ 'n_frames'] = 1 # override setting because the images are not sequential processor = VideoProcessor() for file in glob.glob("./test_images/*.jpg"): image = cv2.imread(file) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) out_img = processor.process_frame(image) out_img = cv2.cvtColor(out_img, cv2.COLOR_RGB2BGR) filename = file.split('/')[-1] cv2.imwrite("./output_images/" + filename, out_img)
def __init__( self, # parameters for testing model_path, batch_size, device=None, workers=0, # parameters for the video processing save_size=112, nomask=True, grey=False, quiet=True, tracked_vid=False, noface_save=False, openface_exe='OpenFace/build/bin/FeatureExtraction', # parameters for deep feature extraction (Resnet50) benchmark_dir='pytorch-benchmarks', model_name='resnet50_ferplus_dag', feature_layer='pool5_7x7_s1', # parameters for snipper sampler num_phase=12, phase_size=48, length=64, stride=64, # parameters for phase difference extractor height=4, nbands=2, scale_factor=2, extract_level=[1, 2]): assert os.path.exists(model_path), \ "Please, download the model checkpoint first." self.batch_size = batch_size self.workers = workers self.num_phase = num_phase self.phase_size = phase_size self.length = length self.stride = stride self.device = get_device(device) # Face detection and face alignment self.video_processor = VideoProcessor(save_size, nomask, grey, quiet, tracked_vid, noface_save, openface_exe) # From snippets to deep facial features self.resnet50_extractor = Resnet50Extractor(benchmark_dir, self.device, model_name, feature_layer) # Phase and phase differences over time on faces self.pd_extractor = Phase_Difference_Extractor(height, nbands, scale_factor, extract_level, self.device, not quiet) self.model = Two_Stream_RNN() # model for FER checkpoint = torch.load(model_path, map_location=self.device) self.model.load_state_dict(checkpoint['state_dict']) self.model = self.model.eval() self.model.to(self.device) logger.info(f"Loaded checkpoint from {model_path}," f"Epoch:{checkpoint['epoch']}") self.label_name = ['valence', 'arousal'] # model output format
"""Simple script to run vpa""" from video_processor import VideoProcessor VPA = VideoProcessor() VPA.begin_capture(0) VPA.process_and_output_video()
from json_parser import JsonParser from video_processor import VideoProcessor from feedback import FeedbackSystem j = JsonParser() video = j.parse("flare3", 200, "json/learn", "front", [0,0]) vp = VideoProcessor(video) angles = vp.compute_left_elbow_angle(0.4) fs = FeedbackSystem() out = fs.min_max(angles) print(out)
class DeepFacialEmotionInference(object): def __init__( self, # parameters for testing model_path, batch_size, device=None, workers=0, # parameters for the video processing save_size=112, nomask=True, grey=False, quiet=True, tracked_vid=False, noface_save=False, openface_exe='OpenFace/build/bin/FeatureExtraction', # parameters for deep feature extraction (Resnet50) benchmark_dir='pytorch-benchmarks', model_name='resnet50_ferplus_dag', feature_layer='pool5_7x7_s1', # parameters for snipper sampler num_phase=12, phase_size=48, length=64, stride=64, # parameters for phase difference extractor height=4, nbands=2, scale_factor=2, extract_level=[1, 2]): assert os.path.exists(model_path), \ "Please, download the model checkpoint first." self.batch_size = batch_size self.workers = workers self.num_phase = num_phase self.phase_size = phase_size self.length = length self.stride = stride self.device = get_device(device) # Face detection and face alignment self.video_processor = VideoProcessor(save_size, nomask, grey, quiet, tracked_vid, noface_save, openface_exe) # From snippets to deep facial features self.resnet50_extractor = Resnet50Extractor(benchmark_dir, self.device, model_name, feature_layer) # Phase and phase differences over time on faces self.pd_extractor = Phase_Difference_Extractor(height, nbands, scale_factor, extract_level, self.device, not quiet) self.model = Two_Stream_RNN() # model for FER checkpoint = torch.load(model_path, map_location=self.device) self.model.load_state_dict(checkpoint['state_dict']) self.model = self.model.eval() self.model.to(self.device) logger.info(f"Loaded checkpoint from {model_path}," f"Epoch:{checkpoint['epoch']}") self.label_name = ['valence', 'arousal'] # model output format def run_inference_from_video(self, input_video, keep_tmp=True): """ Perform Video-Facial-Emotion recognition on the provided video. Args: input_video (str): path to the video stream to process. Returns: emotions_dict: a dict of dataframes containing the emotion prediction of each video (key by name) per frame. Notes: - The user can provide a dir for temporary files (snippets, features). """ video_name = os.path.splitext(os.path.basename(input_video))[0] tmp_dir = create_dir( os.path.join(os.path.dirname(input_video), video_name + "-tmp")) # first, the input video is processed using OpenFace opface_output_dir = os.path.join(tmp_dir, video_name + "_opface") self.video_processor.process(input_video, opface_output_dir) logger.info(f"{video_name} processed with OpenFace.") # the cropped and aligned faces are then fed to resnet50 for deep feature ext feature_dir = os.path.join(tmp_dir, video_name + "_pool5") self.resnet50_extractor.run(opface_output_dir, feature_dir, video_name=video_name) logger.info(f"Deep facial features extracted with pre-trained ResNet.") # creating a sequence of inputs for the NN (sampling images) dataset = Snippet_Sampler(video_name, opface_output_dir, feature_dir, annot_dir=None, label_name='valence_arousal', test_mode=True, num_phase=self.num_phase, phase_size=self.phase_size, length=self.length, stride=self.stride) data_loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, num_workers=self.workers, pin_memory=False) av_dict = self.run_inference_from_dataloader(data_loader, self.model) logger.info(f"{len(data_loader)} batches for {video_name}.") if not keep_tmp: # tmp folders need to be removed logger.info(f"Removing {tmp_dir} as requested.") rmtree(tmp_dir) # this removes nested folders too assert len(av_dict) == 1, "This function processes one video only." return list(av_dict.values())[0] # the first and only item is returned def run_inference_from_dataloader(self, dataloader, train_mean=None, train_std=None): """ Perform inference on a sequence of (already pre-processed) samples, provided as a torch dataloader to simplify processing. Args: dataloader (data.DataLoader): a dataloader containing video features. train_mean (list): mean per video, or None train_std (list): std per video, or None Returns: video_dict (dict): valence-arousal predictions per video (the name of each video is used as a key in the dictionary), each provided as a pandas DataFrame, and w.r.t. each frame. """ sample_names = [] sample_preds = [] sample_ranges = [] for i, data_batch in enumerate(dataloader): phase_f, rgb_f, label, ranges, names = data_batch with torch.no_grad(): # instantiating tensors for current batch phase_f = phase_f.type('torch.FloatTensor').to(self.device) phase_0, phase_1 = self.phase_diff_output( phase_f, self.pd_extractor) rgb_f = Variable( rgb_f.type('torch.FloatTensor').to(self.device)) phase_0 = Variable( phase_0.type('torch.FloatTensor').to(self.device)) phase_1 = Variable( phase_1.type('torch.FloatTensor').to(self.device)) output = self.model([phase_0, phase_1], rgb_f) sample_names.append(names) sample_ranges.append(ranges) sample_preds.append(output.cpu().data.numpy()) sample_names = np.concatenate([arr for arr in sample_names], axis=0) sample_preds = np.concatenate([arr for arr in sample_preds], axis=0) n_sample, n_length, n_labels = sample_preds.shape if train_mean is not None and train_std is not None: # standardise output features if required (mean and std provided) trans_sample_preds = sample_preds.reshape(-1, n_labels) trans_sample_preds = np.array([ correct(trans_sample_preds[:, i], train_mean[i], train_std[i]) for i in range(n_labels) ]) # scaling of predictions sample_preds = trans_sample_preds.reshape(n_sample, n_length, n_labels) sample_ranges = np.concatenate([arr for arr in sample_ranges], axis=0) video_dict = { } # one entry per video, based on the dataloader provided for video in sample_names: mask = sample_names == video video_ranges = sample_ranges[mask] if video not in video_dict.keys(): max_len = max([ranges[-1] for ranges in video_ranges]) video_dict[video] = np.zeros((max_len, n_labels)) video_preds = sample_preds[mask] min_f, max_f = 0, 0 # make sure to return full range of video frames for rg, pred in zip(video_ranges, video_preds): start, end = rg video_dict[video][start:end, :] = pred min_f = min(min_f, start) max_f = max(max_f, end) assert (min_f == 0) and (max_f == max_len) for video in video_dict.keys(): # creating a dataframe per video video_dict[video] = pd.DataFrame(data=video_dict[video], columns=self.label_name) return video_dict def phase_diff_output(self, phase_batch, steerable_pyramid): """ Extract the first level and the second level phase difference images. """ sp = steerable_pyramid bs, num_frames, num_phases, W, H = phase_batch.size() coeff_batch = sp.build_pyramid( phase_batch.view(bs * num_frames, num_phases, W, H)) assert isinstance(coeff_batch, list) phase_batch_0 = sp.extract(coeff_batch[0]) N, n_ch, n_ph, W, H = phase_batch_0.size() phase_batch_0 = phase_batch_0.view(N, -1, W, H) phase_batch_0 = phase_batch_0.view(bs, num_frames, -1, W, H) phase_batch_1 = sp.extract(coeff_batch[1]) N, n_ch, n_ph, W, H = phase_batch_1.size() phase_batch_1 = phase_batch_1.view(N, -1, W, H) phase_batch_1 = phase_batch_1.view(bs, num_frames, -1, W, H) return phase_batch_0, phase_batch_1
from moviepy.editor import VideoFileClip from video_processor import VideoProcessor from settings import load_settings """ Process a video file and save the results """ processor = VideoProcessor() settings = load_settings() clip1 = VideoFileClip("./project_video.mp4") # clip1 = VideoFileClip("./project_video.mp4").subclip(t_start=6, t_end=10) # clip1 = VideoFileClip("./project_video.mp4").subclip(t_start=24, t_end=28) # clip1 = VideoFileClip("./test_video.mp4") proc_clip = clip1.fl_image(processor.process_frame) proc_clip.write_videofile("output_video/project_video" + ("_debug" if settings['DEBUG'] else "") + ".mp4", audio=False) # proc_clip.write_videofile("output_video/test_video.mp4", audio=False)
def gcf_generate_summary(data, context): """ Cloud Function triggered by a new Cloud Storage object """ annotation_bucket = data['bucket'] path_to_annotation = data['name'] annot_uri = f'gs://{annotation_bucket}/{path_to_annotation}' VideoProcessor.generate_summary(annot_uri, SUMMARY_BUCKET, ANIMATED)
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os from video_processor import VideoProcessor SUMMARY_BUCKET = os.getenv('SUMMARY_BUCKET', '') assert SUMMARY_BUCKET, 'Undefined SUMMARY_BUCKET environment variable' ANIMATED = os.getenv('ANIMATED', '0') == '1' def gcf_generate_summary(data, context): """ Cloud Function triggered by a new Cloud Storage object """ annotation_bucket = data['bucket'] path_to_annotation = data['name'] annot_uri = f'gs://{annotation_bucket}/{path_to_annotation}' VideoProcessor.generate_summary(annot_uri, SUMMARY_BUCKET, ANIMATED) if __name__ == '__main__': """ Only for local tests """ import argparse parser = argparse.ArgumentParser() parser.add_argument('annot_uri', type=str, help='gs://annotation_bucket/path/to/video.ext.json') args = parser.parse_args() VideoProcessor.generate_summary(args.annot_uri, SUMMARY_BUCKET, ANIMATED)
""" This code demonstrates simple learning and feedback process for wrong push-up posture. For the intermediate presentations use only. """ from json_parser import JsonParser from video_processor import VideoProcessor from feedback import FeedbackSystem from pathlib import Path import subprocess import os, re files = ["output"] for filename in files: json_dir = "../json/" + filename j = JsonParser() count = len(os.listdir(json_dir)) print(count) video = j.parse(None, count, json_dir, "pushup", None) vp = VideoProcessor(video) vp.compute_left_elbow_angle(0.5) vp.dump_csv()
def main(): video_processor = VideoProcessor() video_processor.run()