def process(self, response): """Process the message, calls process_images(batch, tstamps, contours=None) which is implemented by the child module Returns: Response: response object """ log.debug("Processing message") super().process(response) try: log.info(f"Loading media from url: {self.response.request.url}") self.media = MediaRetriever(self.response.request.url) self.frames_iterator = self.media.get_frames_iterator( self.response.request.sample_rate) except Exception as e: log.error(e) log.error(traceback.print_exc()) self.code = Codes.ERROR_LOADING_MEDIA return self.update_and_return_response() self._update_w_h_in_response() if self.prev_pois and not self.response.has_frame_anns(): log.warning("NO_PREV_REGIONS_OF_INTEREST, returning...") self.code = Codes.NO_PREV_REGIONS_OF_INTEREST return self.update_and_return_response() num_problematic_frames = 0 for image_batch, tstamp_batch, prev_region_batch in batch_generator( self.preprocess_input(), self.batch_size): if image_batch is None or tstamp_batch is None: continue try: self.process_images(image_batch, tstamp_batch, prev_region_batch) except ValueError as e: num_problematic_frames += 1 log.warning("Problem processing frames") if num_problematic_frames >= MAX_PROBLEMATIC_FRAMES: log.error(e) self.code = Codes.ERROR_PROCESSING return self.update_and_return_response() log.debug("Finished processing.") if self.prev_pois and self.prev_regions_of_interest_count == 0: log.warning("NO_PREV_REGIONS_OF_INTEREST, returning...") self.code = Codes.NO_PREV_REGIONS_OF_INTEREST return self.update_and_return_response()
def __init__(self, response=None, med_ret=None, server_name='FrameDrawer', version='1.0.0', module_id_map=None, pushing_folder=DEFAULT_DUMP_FOLDER, s3_bucket=None, s3_key=None): super().__init__(server_name=server_name, version=version, module_id_map=module_id_map) #log.setLevel('DEBUG') self.response = None if isinstance(response, Response): self.response = response elif isinstance(response, dict): self.response = Response(response) else: log.debug("No response") self.med_ret = med_ret if med_ret is None: if self.response is not None: self.med_ret = MediaRetriever(self.response.url) if s3_bucket and not s3_key: raise ValueError("s3 bucket defined but s3 key not defined") if s3_key and not s3_bucket: raise ValueError("s3 key defined but s3 bucket not defined") if not pushing_folder and not s3_key: raise ValueError( "pushing_folder and s3 key not defined, we cannot set where to dump." ) if pushing_folder: self.pushing_folder = pushing_folder else: self.pushing_folder = DEFAULT_DUMP_FOLDER self.s3_bucket = s3_bucket self.s3_key = s3_key
def __init__(self, response=None, med_ret=None, server_name='FrameDrawer', version='1.0.0', module_id_map=None, pushing_folder=DEFAULT_DUMP_FOLDER, s3_bucket=None, s3_key=None): super().__init__(server_name=server_name, version=version, module_id_map=module_id_map) if isinstance(response, dict): response = Response(response) self.response = response if not isinstance(self.response, Response): log.debug("No response set") self.response = None self.med_ret = med_ret if med_ret is None and self.response is not None: self.med_ret = MediaRetriever(self.response.url) if s3_bucket and not s3_key: raise ValueError("s3 bucket defined but s3_key not defined") if s3_key and not s3_key: raise ValueError("s3_key defined but s3 bucket not defined") if not pushing_folder and not s3_key: raise ValueError("pushing_folder and s3_key not defined, " "we cannot set where to dump.") self.local_dir = pushing_folder self.s3_bucket = s3_bucket self.s3_key_prefix = s3_key
class FrameExtractor(PropertiesModule): def __init__( self, server_name, version, sample_rate=1.0, s3_bucket=None, local_dir=None, module_id_map=None, n_threads=100, ): super().__init__(server_name, version, module_id_map=module_id_map) self.n_threads = n_threads self._sample_rate = sample_rate self._local_dir = local_dir self._s3_bucket = s3_bucket self._list_file = "contents" self._img_name_format = "{tstamp:010d}" self._rel_path_format = "{video_id}/{filename}.{ext}" self._image_rel_path_format = "{video_id}/frames/{filename}.{ext}" self._s3_url_format = "https://s3.amazonaws.com/{bucket}/{s3_key}" self._sql_db = "" self._s3_client = boto3.client("s3") self._encoding = "JPEG" self._content_type = "image/jpeg" self._s3_upload_args = {"ContentType": self._content_type} @staticmethod def _mklocaldirs(directory): if not os.path.exists(directory): os.makedirs(directory) def _write_frame_helper(self, data): try: self._write_frame(**data) except: log.error("Local Write Failed") def _write_frame(self, frame, tstamp, video_id): filename = self._img_name_format.format(tstamp=tstamp) relative_path = self._image_rel_path_format.format(video_id=video_id, filename=filename, ext=self._encoding) full_path = "{}/{}".format(self._local_dir, relative_path) full_path = "".join( [e for e in full_path if e.isalnum() or e in ["/", "."]]) im_filelike = self._convert_frame_to_filelike(frame) with open(full_path, "wb") as f: f.write(im_filelike.read()) def _add_contents_to_local(self, contents): filelike = BytesIO() contents_json = {"original_url": self.video_url, "frames": []} for video_id, tstamp in contents: filename = self._img_name_format.format(tstamp=tstamp) relative_path = self._image_rel_path_format.format( video_id=video_id, filename=filename, ext=self._encoding) full_path = "{}/{}".format(self._local_dir, relative_path) full_path = "".join( [e for e in full_path if e.isalnum() or e in ["/", "."]]) # line = "{}\t{}\n".format(tstamp, full_path) # filelike.write(line.encode()) contents_json["frames"].append((tstamp, full_path)) filelike.write(json.dumps(contents_json, indent=2).encode()) filelike.seek(0) with open("{}/{}".format(self._local_dir, self.contents_file_key), "wb") as f: f.write(filelike.read()) def _convert_frame_to_filelike(self, frame): im = Image.fromarray(frame) im_filelike = BytesIO() im.save(im_filelike, format=self._encoding) im_filelike.seek(0) return im_filelike def _upload_frame_helper(self, data): try: self._upload_frame(**data) except: log.error("Failed to write Frame to S3") def _upload_frame(self, frame, tstamp, video_id): filename = self._img_name_format.format(tstamp=tstamp) im_filelike = self._convert_frame_to_filelike(frame) s3_key = self._image_rel_path_format.format(video_id=video_id, filename=filename, ext=self._encoding) s3_key = "".join([e for e in s3_key if e.isalnum() or e in ["/", "."]]) result = self._s3_client.upload_fileobj(im_filelike, self._s3_bucket, s3_key, ExtraArgs=self._s3_upload_args) self._append_to_sql(video_id, tstamp, s3_key, result) def _append_to_sql(self, video_id, tstamp, s3_key, result): pass def _add_contents_to_s3(self, contents): filelike = BytesIO() contents_json = {"original_url": self.video_url, "frames": []} for video_id, tstamp in contents: filename = self._img_name_format.format(tstamp=tstamp) s3_key = self._image_rel_path_format.format(video_id=video_id, filename=filename, ext=self._encoding) s3_key = "".join( [e for e in s3_key if e.isalnum() or e in ["/", "."]]) im_url = self._s3_url_format.format(bucket=self._s3_bucket, s3_key=s3_key) # line = "{}\t{}\n".format(tstamp, im_url) # filelike.write(line.encode()) contents_json["frames"].append((tstamp, im_url)) filelike.write(json.dumps(contents_json, indent=2).encode()) filelike.seek(0) result = self._s3_client.upload_fileobj( filelike, self._s3_bucket, self.contents_file_key, ExtraArgs={"ContentType": "application/json"}, ) return result def process_properties(self): self._s3_write_manager = WorkerManager( func=self._upload_frame_helper, n=self.n_threads, max_queue_size=100, parallelization="thread", ) self._local_write_manager = WorkerManager( func=self._write_frame_helper, n=self.n_threads, max_queue_size=100, parallelization="thread", ) self.last_tstamp = 0.0 log.info("Processing") # filelike = self.media_api.download(return_filelike=True) # if filelike.getbuffer().nbytes == 0: # self.code = "ERROR_NO_IMAGES_LOADED" # log.info('Getting hash') # video_hash = hashfileobject(filelike, hexdigest=True) self.video_url = self.response.request.url self.med_ret = MediaRetriever(self.video_url) self.contents_file_key = get_contents_file_s3_key( self.video_url, self._sample_rate) video_id = self.contents_file_key.split("/")[0] if self._local_dir is not None: self._mklocaldirs("{}/{}".format(self._local_dir, video_id)) self._mklocaldirs("{}/{}/frames".format(self._local_dir, video_id)) if os.path.exists("{}/{}".format(self._local_dir, self.contents_file_key)): log.info("Local Video already exists") try: self._s3_client.head_object(Bucket=self._s3_bucket, Key=self.contents_file_key) new_url = self._s3_url_format.format(bucket=self._s3_bucket, s3_key=self.contents_file_key) log.info("Video already exists") p = Property( server=self.name, ver=self.version, value=new_url, property_type="extraction", property_id=1, ) track = VideoAnn(t1=0.0, t2=float(self.last_tstamp), props=[p]) self.response.append_track(track) self._s3_write_manager.kill_workers_on_completion() self._local_write_manager.kill_workers_on_completion() return except: pass contents = [] log.info("Getting frames") for i, (frame, tstamp_secs) in enumerate( self.med_ret.get_frames_iterator( sample_rate=self._sample_rate)): tstamp = int(tstamp_secs * 1000) # self._upload_frame(frame, tstamp, video_hash) if i % 100 == 0: log.info("...tstamp: " + str(tstamp)) log.debug("tstamp: " + str(tstamp)) if frame is None: continue frame = np.ascontiguousarray(frame[:, :, ::-1]) # RGB to BGR self.last_tstamp = tstamp data = {"frame": frame, "tstamp": tstamp, "video_id": video_id} contents.append((video_id, tstamp)) if self._local_dir is not None: self._local_write_manager.queue.put(data) if self._s3_bucket is not None: self._s3_write_manager.queue.put(data) # self._s3_write_manager.kill_workers_on_completion() # self._local_write_manager.kill_workers_on_completion() if self._s3_bucket is not None: result = self._add_contents_to_s3(contents) if self._local_dir is not None: self._add_contents_to_local(contents) self.response.url_original = self.video_url new_url = self._s3_url_format.format(bucket=self._s3_bucket, s3_key=self.contents_file_key) self.response.url = new_url p = Property( server=self.name, ver=self.version, value=new_url, property_type="extraction", property_id=1, ) track = VideoAnn(t1=0.0, t2=float(self.last_tstamp), props=[p]) self.response.append_track(track) self._s3_write_manager.kill_workers_on_completion() self._local_write_manager.kill_workers_on_completion()
def process_properties(self): self._s3_write_manager = WorkerManager( func=self._upload_frame_helper, n=self.n_threads, max_queue_size=100, parallelization="thread", ) self._local_write_manager = WorkerManager( func=self._write_frame_helper, n=self.n_threads, max_queue_size=100, parallelization="thread", ) self.last_tstamp = 0.0 log.info("Processing") # filelike = self.media_api.download(return_filelike=True) # if filelike.getbuffer().nbytes == 0: # self.code = "ERROR_NO_IMAGES_LOADED" # log.info('Getting hash') # video_hash = hashfileobject(filelike, hexdigest=True) self.video_url = self.response.request.url self.med_ret = MediaRetriever(self.video_url) self.contents_file_key = get_contents_file_s3_key( self.video_url, self._sample_rate) video_id = self.contents_file_key.split("/")[0] if self._local_dir is not None: self._mklocaldirs("{}/{}".format(self._local_dir, video_id)) self._mklocaldirs("{}/{}/frames".format(self._local_dir, video_id)) if os.path.exists("{}/{}".format(self._local_dir, self.contents_file_key)): log.info("Local Video already exists") try: self._s3_client.head_object(Bucket=self._s3_bucket, Key=self.contents_file_key) new_url = self._s3_url_format.format(bucket=self._s3_bucket, s3_key=self.contents_file_key) log.info("Video already exists") p = Property( server=self.name, ver=self.version, value=new_url, property_type="extraction", property_id=1, ) track = VideoAnn(t1=0.0, t2=float(self.last_tstamp), props=[p]) self.response.append_track(track) self._s3_write_manager.kill_workers_on_completion() self._local_write_manager.kill_workers_on_completion() return except: pass contents = [] log.info("Getting frames") for i, (frame, tstamp_secs) in enumerate( self.med_ret.get_frames_iterator( sample_rate=self._sample_rate)): tstamp = int(tstamp_secs * 1000) # self._upload_frame(frame, tstamp, video_hash) if i % 100 == 0: log.info("...tstamp: " + str(tstamp)) log.debug("tstamp: " + str(tstamp)) if frame is None: continue frame = np.ascontiguousarray(frame[:, :, ::-1]) # RGB to BGR self.last_tstamp = tstamp data = {"frame": frame, "tstamp": tstamp, "video_id": video_id} contents.append((video_id, tstamp)) if self._local_dir is not None: self._local_write_manager.queue.put(data) if self._s3_bucket is not None: self._s3_write_manager.queue.put(data) # self._s3_write_manager.kill_workers_on_completion() # self._local_write_manager.kill_workers_on_completion() if self._s3_bucket is not None: result = self._add_contents_to_s3(contents) if self._local_dir is not None: self._add_contents_to_local(contents) self.response.url_original = self.video_url new_url = self._s3_url_format.format(bucket=self._s3_bucket, s3_key=self.contents_file_key) self.response.url = new_url p = Property( server=self.name, ver=self.version, value=new_url, property_type="extraction", property_id=1, ) track = VideoAnn(t1=0.0, t2=float(self.last_tstamp), props=[p]) self.response.append_track(track) self._s3_write_manager.kill_workers_on_completion() self._local_write_manager.kill_workers_on_completion()
def create_media_retrievers(url): efficient_mr = MediaRetriever(VIDEO_URL) fast_mr = MediaRetriever(VIDEO_URL, limitation="cpu") return efficient_mr, fast_mr
def process_properties(self, dump_video=True, dump_images=False, tstamps_Of_Interest=None): self.last_tstamp = 0.0 assert (self.response) self.med_ret = MediaRetriever(self.response.url) self.w, self.h = self.med_ret.get_w_h() media_id = os.path.basename(self.response.url).rsplit(".", 1)[0] self.media_id = "".join( [e for e in media_id if e.isalnum() or e in ["/", "."]]) self.content_type_map = {} # if there is no flag in the request of not request_api we'll get None. self.dump_video = None self.dump_images = None try: self.dump_video = self.request.get("dump_video") self.dump_images = self.request.get("dump_images") except Exception: log.error( "Unable to get flags from request dump_video or dump_images") pass if self.dump_video is None: self.dump_video = dump_video if self.dump_images is None: self.dump_images = dump_images if self.dump_video is False and self.dump_images is False: log.warning( "Not dumping anything--you might want to dump something.") return dump_folder = self.pushing_folder + '/' + self.media_id + '/' self.dumping_folder_url = dump_folder if dump_folder: if not os.path.exists(dump_folder): os.makedirs(dump_folder) if self.dump_video: filename = dump_folder + '/video.mp4' fps = 1 frameSize = self.med_ret.shape frameSize = (self.w, self.h) fourcc = cv2.VideoWriter_fourcc(*'H264') log.info("filename: " + filename) log.info("fourcc: " + str(fourcc)) log.info("type(fourcc): " + str(type(fourcc))) log.info("fps: " + str(fps)) log.info("type(fps): " + str(type(fps))) log.info("frameSize: " + str(frameSize)) log.info("type(frameSize): " + str(type(frameSize))) vid = cv2.VideoWriter(filename, fourcc, fps, frameSize) self.content_type_map[os.path.basename(filename)] = 'video/mp4' face = cv2.FONT_HERSHEY_SIMPLEX scale = 0.65 thickness = 2 # we get the image_annotation tstamps tstamps = self.response.get_timestamps() tstamp_frame_anns = self.response.get_timestamps_from_frames_ann() log.debug('tstamps: ' + str(tstamps)) log.debug('tstamps_dets: ' + str(tstamp_frame_anns)) # we get the frame iterator frames_iterator = [] if tstamps_Of_Interest: if type(tstamps_Of_Interest) is list: for t in tstamps_Of_Interest: frame = self.med_ret.get_frame(tstamp=t) frames_iterator.append((frame, t)) elif tstamps_Of_Interest is None: try: frames_iterator = self.med_ret.get_frames_iterator( sample_rate=1.0) except Exception: log.error(traceback.format_exc()) raise Exception("Error loading media") for i, (img, tstamp) in enumerate(frames_iterator): self.last_tstamp = tstamp if img is None: log.warning("Invalid frame") continue if tstamp is None: log.warning("Invalid tstamp") continue # log.info('tstamp: ' + str(tstamp)) if tstamp in tstamp_frame_anns: log.debug("drawing frame for tstamp: " + str(tstamp)) # we get image_ann for that time_stamps regions = self.response.get_regions_from_tstamp(tstamp) # log.info(json.dumps(image_ann, indent=2)) for region in regions: rand_color = get_rand_bgr() p0, p1 = p0p1_from_bbox_contour(region['contour'], self.w, self.h) anchor_point = [p0[0] + 3, p1[1] - 3] if abs(p1[1] - self.h) < 30: anchor_point = [p0[0] + 3, int(p1[1] / 2) - 3] img = cv2.rectangle(img, p0, p1, rand_color, thickness) prop_strs = get_props_from_region(region) for i, prop in enumerate(prop_strs): img = cv2.putText( img, prop, (anchor_point[0], anchor_point[1] + i * 25), face, 1.0, rand_color, thickness) elif tstamp in tstamps: log.debug("Making frame gray") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) else: log.debug("No processed frame") continue # Include the timestamp img = cv2.putText(img, str(tstamp), (20, 20), face, scale, [255, 255, 255], thickness) if self.dump_video: # we add the frame log.debug("Adding frame") vid.write(img) if self.dump_images: # we dump the frame outfn = "{}/{}.jpg".format(dump_folder, tstamp) log.debug("Writing to file: {}".format(outfn)) cv2.imwrite(outfn, img) self.content_type_map[os.path.basename(outfn)] = 'image/jpeg' if self.dump_video: vid.release() if self.s3_bucket: try: self.upload_files(dump_folder) except Exception: log.error(traceback.format_exc()) if self.pushing_folder == DEFAULT_DUMP_FOLDER: log.info('Removing files in ' + dump_folder) shutil.rmtree(dump_folder) props = [] if self.dump_images: props.append( Property( server=self.name, ver=self.version, value=self.dumping_folder_url, property_type="dumped_images", property_id=1, )) if self.dump_video: dumped_video_url = self.dumping_folder_url + '/video.mp4' dumped_video_url = dumped_video_url.replace('//', '/') dumped_video_url = dumped_video_url.replace('https:/', 'https://') props.append( Property( server=self.name, ver=self.version, value=dumped_video_url, property_type="dumped_video", property_id=2, )) media_summary = VideoAnn(t1=0.0, t2=self.last_tstamp, props=props) self.response.append_media_summary(media_summary)
class FrameDrawer(PropertiesModule): def __init__(self, response=None, med_ret=None, server_name='FrameDrawer', version='1.0.0', module_id_map=None, pushing_folder=DEFAULT_DUMP_FOLDER, s3_bucket=None, s3_key=None): super().__init__(server_name=server_name, version=version, module_id_map=module_id_map) #log.setLevel('DEBUG') self.response = None if isinstance(response, Response): self.response = response elif isinstance(response, dict): self.response = Response(response) else: log.debug("No response") self.med_ret = med_ret if med_ret is None: if self.response is not None: self.med_ret = MediaRetriever(self.response.url) if s3_bucket and not s3_key: raise ValueError("s3 bucket defined but s3 key not defined") if s3_key and not s3_bucket: raise ValueError("s3 key defined but s3 bucket not defined") if not pushing_folder and not s3_key: raise ValueError( "pushing_folder and s3 key not defined, we cannot set where to dump." ) if pushing_folder: self.pushing_folder = pushing_folder else: self.pushing_folder = DEFAULT_DUMP_FOLDER self.s3_bucket = s3_bucket self.s3_key = s3_key def process_properties(self, dump_video=True, dump_images=False, tstamps_Of_Interest=None): self.last_tstamp = 0.0 assert (self.response) self.med_ret = MediaRetriever(self.response.url) self.w, self.h = self.med_ret.get_w_h() media_id = os.path.basename(self.response.url).rsplit(".", 1)[0] self.media_id = "".join( [e for e in media_id if e.isalnum() or e in ["/", "."]]) self.content_type_map = {} # if there is no flag in the request of not request_api we'll get None. self.dump_video = None self.dump_images = None try: self.dump_video = self.request.get("dump_video") self.dump_images = self.request.get("dump_images") except Exception: log.error( "Unable to get flags from request dump_video or dump_images") pass if self.dump_video is None: self.dump_video = dump_video if self.dump_images is None: self.dump_images = dump_images if self.dump_video is False and self.dump_images is False: log.warning( "Not dumping anything--you might want to dump something.") return dump_folder = self.pushing_folder + '/' + self.media_id + '/' self.dumping_folder_url = dump_folder if dump_folder: if not os.path.exists(dump_folder): os.makedirs(dump_folder) if self.dump_video: filename = dump_folder + '/video.mp4' fps = 1 frameSize = self.med_ret.shape frameSize = (self.w, self.h) fourcc = cv2.VideoWriter_fourcc(*'H264') log.info("filename: " + filename) log.info("fourcc: " + str(fourcc)) log.info("type(fourcc): " + str(type(fourcc))) log.info("fps: " + str(fps)) log.info("type(fps): " + str(type(fps))) log.info("frameSize: " + str(frameSize)) log.info("type(frameSize): " + str(type(frameSize))) vid = cv2.VideoWriter(filename, fourcc, fps, frameSize) self.content_type_map[os.path.basename(filename)] = 'video/mp4' face = cv2.FONT_HERSHEY_SIMPLEX scale = 0.65 thickness = 2 # we get the image_annotation tstamps tstamps = self.response.get_timestamps() tstamp_frame_anns = self.response.get_timestamps_from_frames_ann() log.debug('tstamps: ' + str(tstamps)) log.debug('tstamps_dets: ' + str(tstamp_frame_anns)) # we get the frame iterator frames_iterator = [] if tstamps_Of_Interest: if type(tstamps_Of_Interest) is list: for t in tstamps_Of_Interest: frame = self.med_ret.get_frame(tstamp=t) frames_iterator.append((frame, t)) elif tstamps_Of_Interest is None: try: frames_iterator = self.med_ret.get_frames_iterator( sample_rate=1.0) except Exception: log.error(traceback.format_exc()) raise Exception("Error loading media") for i, (img, tstamp) in enumerate(frames_iterator): self.last_tstamp = tstamp if img is None: log.warning("Invalid frame") continue if tstamp is None: log.warning("Invalid tstamp") continue # log.info('tstamp: ' + str(tstamp)) if tstamp in tstamp_frame_anns: log.debug("drawing frame for tstamp: " + str(tstamp)) # we get image_ann for that time_stamps regions = self.response.get_regions_from_tstamp(tstamp) # log.info(json.dumps(image_ann, indent=2)) for region in regions: rand_color = get_rand_bgr() p0, p1 = p0p1_from_bbox_contour(region['contour'], self.w, self.h) anchor_point = [p0[0] + 3, p1[1] - 3] if abs(p1[1] - self.h) < 30: anchor_point = [p0[0] + 3, int(p1[1] / 2) - 3] img = cv2.rectangle(img, p0, p1, rand_color, thickness) prop_strs = get_props_from_region(region) for i, prop in enumerate(prop_strs): img = cv2.putText( img, prop, (anchor_point[0], anchor_point[1] + i * 25), face, 1.0, rand_color, thickness) elif tstamp in tstamps: log.debug("Making frame gray") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) else: log.debug("No processed frame") continue # Include the timestamp img = cv2.putText(img, str(tstamp), (20, 20), face, scale, [255, 255, 255], thickness) if self.dump_video: # we add the frame log.debug("Adding frame") vid.write(img) if self.dump_images: # we dump the frame outfn = "{}/{}.jpg".format(dump_folder, tstamp) log.debug("Writing to file: {}".format(outfn)) cv2.imwrite(outfn, img) self.content_type_map[os.path.basename(outfn)] = 'image/jpeg' if self.dump_video: vid.release() if self.s3_bucket: try: self.upload_files(dump_folder) except Exception: log.error(traceback.format_exc()) if self.pushing_folder == DEFAULT_DUMP_FOLDER: log.info('Removing files in ' + dump_folder) shutil.rmtree(dump_folder) props = [] if self.dump_images: props.append( Property( server=self.name, ver=self.version, value=self.dumping_folder_url, property_type="dumped_images", property_id=1, )) if self.dump_video: dumped_video_url = self.dumping_folder_url + '/video.mp4' dumped_video_url = dumped_video_url.replace('//', '/') dumped_video_url = dumped_video_url.replace('https:/', 'https://') props.append( Property( server=self.name, ver=self.version, value=dumped_video_url, property_type="dumped_video", property_id=2, )) media_summary = VideoAnn(t1=0.0, t2=self.last_tstamp, props=props) self.response.append_media_summary(media_summary) def upload_files(self, path): log.info("Uploading files") session = boto3.Session() s3 = session.resource('s3') bucket = s3.Bucket(self.s3_bucket) key_root = self.s3_key + '/' + self.media_id + '/' # https://<bucket-name>.s3.amazonaws.com/<key> self.dumping_folder_url = 'https://s3.amazonaws.com/' + self.s3_bucket + '/' + key_root for subdir, dirs, files in os.walk(path): for file in files: full_path = os.path.join(subdir, file) with open(full_path, 'rb') as data: rel_path = os.path.basename(full_path) key = key_root + rel_path log.info('Pushing ' + full_path + ' to ' + self.dumping_folder_url) try: content_type = self.content_type_map[os.path.basename( full_path)] except Exception: content_type = None #file is not intended to be uploaded, it was not generated in this execution. if content_type: bucket.put_object(Key=key, Body=data, ContentType=content_type)
def process_properties(self): self.last_tstamp = 0.0 log.info("Processing") # filelike = self.media_api.download(return_filelike=True) # if filelike.getbuffer().nbytes == 0: # self.code = "ERROR_NO_IMAGES_LOADED" # log.info('Getting hash') # video_hash = hashfileobject(filelike, hexdigest=True) self.video_url = self.response.request.url try: log.info(f"Loading media from url: {self.response.request.url}") self.med_ret = MediaRetriever(self.video_url) except Exception as e: log.error(e) log.error(traceback.print_exc()) self.code = Codes.ERROR_LOADING_MEDIA return self.contents_file_key = get_contents_file_s3_key(self.video_url, self._sample_rate) video_id = self.contents_file_key.split("/")[0] if self._local_dir is not None: self._mklocaldirs("{}/{}".format(self._local_dir, video_id)) self._mklocaldirs("{}/{}/frames".format(self._local_dir, video_id)) if os.path.exists("{}/{}".format(self._local_dir, self.contents_file_key)): log.info("Local Video already exists") try: self._s3_client.head_object( Bucket=self._s3_bucket, Key=self.contents_file_key ) new_url = self._s3_url_format.format( bucket=self._s3_bucket, s3_key=self.contents_file_key ) log.info("Video already exists") p = Property( server=self.name, ver=self.version, value=new_url, property_type="extraction", property_id=1, ) track = VideoAnn(t1=0.0, t2=float(self.last_tstamp), props=[p]) self.response.append_track(track) return except Exception: pass contents = [] log.info("Getting frames") with ThreadPoolExecutor(max_workers=self.n_threads) as writer: for i, (frame, tstamp_secs) in enumerate( self.med_ret.get_frames_iterator(sample_rate=self._sample_rate) ): # int(16.016 * 1000) == 16015, but round(16.016) == 16016 tstamp = round(tstamp_secs * 1000) # self._upload_frame(frame, tstamp, video_hash) if i % 100 == 0: log.info("...tstamp: " + str(tstamp)) log.debug("tstamp: " + str(tstamp)) if frame is None: continue frame = np.ascontiguousarray(frame[:, :, ::-1]) # RGB to BGR self.last_tstamp = tstamp data = {"frame": frame, "tstamp": tstamp, "video_id": video_id} contents.append((video_id, tstamp)) if self._local_dir is not None: writer.submit(self._write_frame_helper, data) if self._s3_bucket is not None: writer.submit(self._upload_frame_helper, data) if self._s3_bucket is not None: _ = self._add_contents_to_s3(contents) if self._local_dir is not None: self._add_contents_to_local(contents) new_url = self._s3_url_format.format( bucket=self._s3_bucket, s3_key=self.contents_file_key ) p = Property( server=self.name, ver=self.version, value=new_url, property_type="extraction", property_id=1, ) track = VideoAnn(t1=0.0, t2=float(self.last_tstamp), props=[p]) self.response.append_track(track)
def process_properties(self, dump_video=True, dump_images=False, tstamps_of_interest=None): assert (isinstance(self.response, Response)) self.med_ret = MediaRetriever(self.response.url) dump_video = (self.request.get("dump_video") or dump_video) and \ self.med_ret.is_video dump_images = (self.request.get("dump_images") or dump_images) or \ self.med_ret.is_image if dump_images is False and self.med_ret.is_image: dump_images = True if dump_video is False and dump_images is False: log.warning("`dump_video` and `dump_images` are both false." " Unable to proceed.") return log.debug(f"Dumping Video: {dump_video}") log.debug(f"Dumping Frames: {dump_images}") # we get the frame iterator frames_iterator = [] if tstamps_of_interest is not None: if type(tstamps_of_interest) is list: for t in tstamps_of_interest: frame = self.med_ret.get_frame(tstamp=t) frames_iterator.append((frame, t)) else: try: frames_iterator = self.med_ret.get_frames_iterator( sample_rate=1.0) except Exception: log.error(traceback.format_exc()) raise Exception("Error loading media") vid_file, images_dir, max_tstamp = self.dump_data( frames_iterator, dump_video=dump_video, dump_images=dump_images) props = [] if self.local_dir is not None and dump_video: local_vid_path = self.copy_video(vid_file.name) p = Property( server=self.name, ver=self.version, value=local_vid_path, property_type="dumped_video", property_id=4, ) props.append(p) if self.local_dir is not None and dump_images: local_frames_paths = self.copy_frames(images_dir.name) ps = [ Property( server=self.name, ver=self.version, value=path, property_type="dumped_image", property_id=3, ) for path in local_frames_paths ] props.extend(ps) if self.s3_bucket is not None and dump_video: s3_vid_url = self.upload_video(vid_file.name) p = Property( server=self.name, ver=self.version, value=s3_vid_url, property_type="dumped_video", property_id=2, ) props.append(p) if self.s3_bucket is not None and dump_images: s3_frames_urls = self.upload_frames(images_dir.name) ps = [ Property( server=self.name, ver=self.version, value=url, property_type="dumped_image", property_id=1, ) for url in s3_frames_urls ] props.extend(ps) images_dir.cleanup() vid_file.close() media_summary = VideoAnn(t1=0.0, t2=max_tstamp, props=props) self.response.append_media_summary(media_summary)
class FrameDrawer(PropertiesModule): def __init__(self, response=None, med_ret=None, server_name='FrameDrawer', version='1.0.0', module_id_map=None, pushing_folder=DEFAULT_DUMP_FOLDER, s3_bucket=None, s3_key=None): super().__init__(server_name=server_name, version=version, module_id_map=module_id_map) if isinstance(response, dict): response = Response(response) self.response = response if not isinstance(self.response, Response): log.debug("No response set") self.response = None self.med_ret = med_ret if med_ret is None and self.response is not None: self.med_ret = MediaRetriever(self.response.url) if s3_bucket and not s3_key: raise ValueError("s3 bucket defined but s3_key not defined") if s3_key and not s3_key: raise ValueError("s3_key defined but s3 bucket not defined") if not pushing_folder and not s3_key: raise ValueError("pushing_folder and s3_key not defined, " "we cannot set where to dump.") self.local_dir = pushing_folder self.s3_bucket = s3_bucket self.s3_key_prefix = s3_key def process_properties(self, dump_video=True, dump_images=False, tstamps_of_interest=None): assert (isinstance(self.response, Response)) self.med_ret = MediaRetriever(self.response.url) dump_video = (self.request.get("dump_video") or dump_video) and \ self.med_ret.is_video dump_images = (self.request.get("dump_images") or dump_images) or \ self.med_ret.is_image if dump_images is False and self.med_ret.is_image: dump_images = True if dump_video is False and dump_images is False: log.warning("`dump_video` and `dump_images` are both false." " Unable to proceed.") return log.debug(f"Dumping Video: {dump_video}") log.debug(f"Dumping Frames: {dump_images}") # we get the frame iterator frames_iterator = [] if tstamps_of_interest is not None: if type(tstamps_of_interest) is list: for t in tstamps_of_interest: frame = self.med_ret.get_frame(tstamp=t) frames_iterator.append((frame, t)) else: try: frames_iterator = self.med_ret.get_frames_iterator( sample_rate=1.0) except Exception: log.error(traceback.format_exc()) raise Exception("Error loading media") vid_file, images_dir, max_tstamp = self.dump_data( frames_iterator, dump_video=dump_video, dump_images=dump_images) props = [] if self.local_dir is not None and dump_video: local_vid_path = self.copy_video(vid_file.name) p = Property( server=self.name, ver=self.version, value=local_vid_path, property_type="dumped_video", property_id=4, ) props.append(p) if self.local_dir is not None and dump_images: local_frames_paths = self.copy_frames(images_dir.name) ps = [ Property( server=self.name, ver=self.version, value=path, property_type="dumped_image", property_id=3, ) for path in local_frames_paths ] props.extend(ps) if self.s3_bucket is not None and dump_video: s3_vid_url = self.upload_video(vid_file.name) p = Property( server=self.name, ver=self.version, value=s3_vid_url, property_type="dumped_video", property_id=2, ) props.append(p) if self.s3_bucket is not None and dump_images: s3_frames_urls = self.upload_frames(images_dir.name) ps = [ Property( server=self.name, ver=self.version, value=url, property_type="dumped_image", property_id=1, ) for url in s3_frames_urls ] props.extend(ps) images_dir.cleanup() vid_file.close() media_summary = VideoAnn(t1=0.0, t2=max_tstamp, props=props) self.response.append_media_summary(media_summary) def copy_video(self, video_file_path): assert (os.path.isfile(video_file_path)) media_id = self.create_media_id() filename = f"{media_id}.mp4" target_dir = os.path.join(self.local_dir, media_id) target_filepath = os.path.join(target_dir, filename) if not os.path.isdir(target_dir): os.makedirs(target_dir) shutil.copyfile(video_file_path, target_filepath) return target_filepath def copy_frames(self, image_directory): assert (os.path.isdir(image_directory)) media_id = self.create_media_id() target_dir = os.path.join(self.local_dir, media_id, "frames") if not os.path.isdir(target_dir): os.makedirs(target_dir) for image_path in iglob(os.path.join(image_directory, "*")): image_name = os.path.basename(image_path) target_filepath = os.path.join(target_dir, image_name) shutil.copyfile(image_path, target_filepath) return target_dir def upload_video(self, video_file_path): assert (os.path.isfile(video_file_path)) media_id = self.create_media_id() s3_key = f"{self.s3_key_prefix}/{media_id}/{media_id}.mp4" client = boto3.client("s3") client.upload_file(video_file_path, self.s3_bucket, s3_key, ExtraArgs={'ContentType': 'video/mp4'}) s3_url = client.generate_presigned_url('get_object', Params={ 'Bucket': self.s3_bucket, 'Key': s3_key }, ExpiresIn=3600) return s3_url.split("?")[0] def upload_frames(self, image_directory): assert (os.path.isdir(image_directory)) media_id = self.create_media_id() s3_key_prefix = f"{self.s3_key_prefix}/{media_id}/frames" client = boto3.client("s3") s3_urls = [] for image_path in iglob(os.path.join(image_directory, "*")): image_name = os.path.basename(image_path) s3_key = f"{s3_key_prefix}/{image_name}" client.upload_file(image_path, self.s3_bucket, s3_key, ExtraArgs={'ContentType': 'image/jpeg'}) s3_url = client.generate_presigned_url('get_object', Params={ 'Bucket': self.s3_bucket, 'Key': s3_key }, ExpiresIn=3600) s3_urls.append(s3_url.split("?")[0]) return s3_urls def create_media_id(self): media_id = os.path.basename(self.response.url).rsplit(".", 1)[0] media_id = "".join( [e for e in media_id if e.isalnum() or e in ["/", "."]]) hash = hashlib.md5(self.med_ret.url.encode()).hexdigest() return f"{media_id}_{hash}" def dump_data(self, frames_iterator, dump_video=False, dump_images=False): face = cv2.FONT_HERSHEY_SIMPLEX scale = 0.65 thickness = 2 fps = 1 vid = None image_dir = TemporaryDirectory() vid_file = NamedTemporaryFile(suffix=".mp4") tstamps = set(self.response.get_timestamps()) tstamp_frame_anns = set(self.response.get_timestamps_from_frames_ann()) video_width, video_height = self.med_ret.get_w_h() for i, (img, tstamp) in enumerate(frames_iterator): if vid is None: fourcc = cv2.VideoWriter_fourcc(*'H264') vid = cv2.VideoWriter(vid_file.name, fourcc, fps, (video_width, video_height)) if tstamp in tstamp_frame_anns: log.debug(f"drawing frame for tstamp: {tstamp}") regions = self.response.get_regions_from_tstamp(tstamp) for region in regions: rand_color = random_bgr() p0, p1 = p0p1_from_bbox_contour(region['contour'], video_width, video_height) anchor_point = [p0[0] + 3, p1[1] - 3] if abs(p1[1] - video_height) < 30: anchor_point = [p0[0] + 3, int(p1[1] / 2) - 3] img = cv2.rectangle(img, p0, p1, rand_color, thickness) prop_strs = get_props_from_region(region) for i, prop in enumerate(prop_strs): img = cv2.putText( img, prop, (anchor_point[0], anchor_point[1] + i * 25), face, 1.0, rand_color, thickness) elif tstamp in tstamps: log.debug(f"Making frame at {tstamp} gray") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) else: log.debug(f"No frame at {tstamp}") continue img = cv2.putText(img, str(tstamp), (20, 20), face, scale, [255, 255, 255], thickness) if dump_video: vid.write(img) if dump_images: cv2.imwrite(f"{image_dir.name}/{tstamp}.jpeg", img) vid.release() return vid_file, image_dir, tstamp
class ImagesModule(Module): def __init__( self, server_name, version, prop_type=None, prop_id_map=None, module_id_map=None, batch_size=BATCH_SIZE, ): super().__init__( server_name=server_name, version=version, prop_type=prop_type, prop_id_map=prop_id_map, module_id_map=module_id_map, ) self.batch_size = batch_size log.debug(f"Creating ImagesModule with batch_size: {batch_size}") def process(self, response): """Process the message, calls process_images(batch, tstamps, contours=None) which is implemented by the child module Returns: Response: response object """ log.debug("Processing message") super().process(response) try: log.info(f"Loading media from url: {self.response.request.url}") self.media = MediaRetriever(self.response.request.url) self.frames_iterator = self.media.get_frames_iterator( self.response.request.sample_rate) except Exception as e: log.error(e) log.error(traceback.print_exc()) self.code = Codes.ERROR_LOADING_MEDIA return self.update_and_return_response() self._update_w_h_in_response() if self.prev_pois and not self.response.has_frame_anns(): log.warning("NO_PREV_REGIONS_OF_INTEREST, returning...") self.code = Codes.NO_PREV_REGIONS_OF_INTEREST return self.update_and_return_response() num_problematic_frames = 0 for image_batch, tstamp_batch, prev_region_batch in batch_generator( self.preprocess_input(), self.batch_size): if image_batch is None or tstamp_batch is None: continue try: self.process_images(image_batch, tstamp_batch, prev_region_batch) except ValueError as e: num_problematic_frames += 1 log.warning("Problem processing frames") if num_problematic_frames >= MAX_PROBLEMATIC_FRAMES: log.error(e) self.code = Codes.ERROR_PROCESSING return self.update_and_return_response() log.debug("Finished processing.") if self.prev_pois and self.prev_regions_of_interest_count == 0: log.warning("NO_PREV_REGIONS_OF_INTEREST, returning...") self.code = Codes.NO_PREV_REGIONS_OF_INTEREST return self.update_and_return_response() def preprocess_input(self): """Parses request for data Yields: frame: An image a time tstamp of a video or image tstamp: The timestamp associated with the frame region: The matching region dict """ for i, (frame, tstamp) in enumerate(self.frames_iterator): if frame is None: log.warning("Invalid frame") continue if tstamp is None: log.warning("Invalid tstamp") continue self.tstamps_processed.append(tstamp) log.debug(f"tstamp: {tstamp}") if i % 100 == 0: log.info(f"tstamp: {tstamp}") if not self.prev_pois: yield frame, tstamp, None else: log.debug("Processing with previous response") log.debug(f"Querying on self.prev_pois: {self.prev_pois}") regions_that_match_props = [] regions_at_tstamp = self.response.get_regions_from_tstamp( tstamp) log.debug(f"Finding regions at tstamp: {tstamp}") if regions_at_tstamp is not None: log.debug( f"len(regions_at_tstamp): {len(regions_at_tstamp)}") for i_region in regions_at_tstamp: if self._region_contains_props(i_region): log.debug( f"region: {i_region} contains props of interest" ) regions_that_match_props.append(i_region) self.prev_regions_of_interest_count += 1 for region in regions_that_match_props: yield frame, tstamp, region @abstractmethod def process_images(self, image_batch, tstamp_batch, prev_region_batch=None): """Abstract method to be implemented by child module""" pass def _update_w_h_in_response(self): (width, height) = self.media.get_w_h() log.debug(f"Setting in response w: {width} h: {height}") self.response.width = width self.response.height = height def _region_contains_props(self, region): """ Boolean to check if a region's props matches the defined previous properties of interest Args: props (list): list of properties for a region Returns: bool: if props match prev_pois query """ props = region.get("props") if props is None: return False return pandas_query_matches_props(self.prev_pois_bool_exp, pd.DataFrame(props))