def test_annotate_video(self): # Setup Expected Response expected_response = {} expected_response = video_intelligence_pb2.AnnotateVideoResponse( **expected_response) operation = operations_pb2.Operation( name="operations/test_annotate_video", done=True) operation.response.Pack(expected_response) # Mock the API response channel = ChannelStub(responses=[operation]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = videointelligence_v1p2beta1.VideoIntelligenceServiceClient( ) # Setup Request input_uri = "gs://demomaker/cat.mp4" features_element = enums.Feature.LABEL_DETECTION features = [features_element] response = client.annotate_video(input_uri=input_uri, features=features) result = response.result() assert expected_response == result assert len(channel.requests) == 1 expected_request = video_intelligence_pb2.AnnotateVideoRequest( input_uri=input_uri, features=features) actual_request = channel.requests[0][1] assert expected_request == actual_request
def recognize_by_google_ocr(video): storage_uri = f'gs://{settings.GS_BUCKET_NAME}/{video.name}' video_client = videointelligence.VideoIntelligenceServiceClient() features = [videointelligence.enums.Feature.TEXT_DETECTION] operation = video_client.annotate_video(input_uri=storage_uri, features=features) result = operation.result() return result
def textDetection(path): with io.open(path, 'rb') as file: input_content = file.read() ResultList = [] video_client = videointelligence.VideoIntelligenceServiceClient() features = [videointelligence.enums.Feature.TEXT_DETECTION] video_context = videointelligence.types.VideoContext() operation = video_client.annotate_video( input_content=input_content, # the bytes of the video file features=features, video_context=video_context) print('\nProcessing video for text detection. timeout is 300s') result = operation.result(timeout=300) # TODO: batch process is possible # The first result is retrieved because a single video was processed. annotation_result = result.annotation_results[0] #extract results from data structure annotation_result for text_annotation in annotation_result.text_annotations: text_segment = text_annotation.segments[0] start_time = text_segment.segment.start_time_offset # get rid of results which confidence of text detection is less than 0.5 if text_segment.confidence < 0.5: continue # Data item: [Text, Time, Confi] # TODO: checking all utf - 8 text d = Data(text_annotation.text.encode('utf-8'), start_time.seconds + start_time.nanos * 1e-9, text_segment.confidence) ResultList.append(d) # ResultList.sort(key=setSortedKey) ResultList.sort(key=lambda x: x[1]) return ResultList
def test_annotate_video_exception(self): # Setup Response error = status_pb2.Status() operation = operations_pb2.Operation( name="operations/test_annotate_video_exception", done=True) operation.error.CopyFrom(error) # Mock the API response channel = ChannelStub(responses=[operation]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = videointelligence_v1p2beta1.VideoIntelligenceServiceClient( ) # Setup Request features_element = enums.Feature.LABEL_DETECTION features = [features_element] input_uri = "gs://cloud-samples-data/video/cat.mp4" response = client.annotate_video(features, input_uri=input_uri) exception = response.exception() assert exception.errors[0] == error
def test_annotate_video_exception(self): # Setup Response error = status_pb2.Status() operation = operations_pb2.Operation( name='operations/test_annotate_video_exception', done=True) operation.error.CopyFrom(error) # Mock the API response channel = ChannelStub(responses=[operation]) patch = mock.patch('google.api_core.grpc_helpers.create_channel') with patch as create_channel: create_channel.return_value = channel client = videointelligence_v1p2beta1.VideoIntelligenceServiceClient( ) # Setup Request input_uri = 'gs://demomaker/cat.mp4' features_element = enums.Feature.LABEL_DETECTION features = [features_element] response = client.annotate_video(input_uri=input_uri, features=features) exception = response.exception() assert exception.errors[0] == error
""" Recognize text. Detection performs Optical Character Recognition (OCR). It detects and extracts text within an input video. Text detection is available for all of the languages supported by the Cloud Vision API. """ """Detect text in a video stored on GCS.""" from google.cloud import videointelligence_v1p2beta1 as videointelligence video_client = videointelligence.VideoIntelligenceServiceClient() features = [videointelligence.enums.Feature.TEXT_DETECTION] operation = video_client.annotate_video( input_uri='gs://deeplens-videos/my_desk.mp4', features=features) print('\nProcessing video for text detection.') result = operation.result(timeout=300) # The first result is retrieved because a single video was processed. annotation_result = result.annotation_results[0] # Get only the first result text_annotation = annotation_result.text_annotations[0] print('\nText: {}'.format(text_annotation.text)) # Get the first text segment text_segment = text_annotation.segments[0]
def track_objects_gcs(gcs_uri): # [START video_object_tracking_gcs_beta] """Object Tracking.""" from google.cloud import videointelligence_v1p2beta1 as videointelligence # It is recommended to use location_id as 'us-east1' for the best latency # due to different types of processors used in this region and others. video_client = videointelligence.VideoIntelligenceServiceClient() features = [videointelligence.enums.Feature.OBJECT_TRACKING] operation = video_client.annotate_video(input_uri=gcs_uri, features=features, location_id='us-east1') print('\nProcessing video for object annotations.') result = operation.result(timeout=300) print('\nFinished processing.\n') #Array of JSON data by frame frameData = {"frames": []} # Get only the first annotation for demo purposes. object_annotation = object_annotations[0] for o in object_annotations: print('Entity id: {}'.format(o.entity.description)) print(o.entity.description == 'ball' or o.entity.description == 'basketball') if o.entity.description == 'ball' or o.entity.description == 'basketball': object_annotation = o # description is in Unicode print('Entity description: {}'.format( object_annotation.entity.description)) if object_annotation.entity.entity_id: print('Entity id: {}'.format( object_annotation.entity.entity_id)) print('Segment: {}s to {}s'.format( object_annotation.segment.start_time_offset.seconds + object_annotation.segment.start_time_offset.nanos / 1e9, object_annotation.segment.end_time_offset.seconds + object_annotation.segment.end_time_offset.nanos / 1e9)) print('Confidence: {}'.format(object_annotation.confidence)) #video = cv2.VideoWriter('video.avi',-1,1,(1920,1080)) # Here we print only the bounding box of the first frame in this segment for frame in object_annotation.frames: box = frame.normalized_bounding_box time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9 print('Time offset of the first frame: {}s'.format(time)) """print('Bounding box position:') print('\tleft : {}'.format(box.left)) print('\ttop : {}'.format(box.top)) print('\tright : {}'.format(box.right)) print('\tbottom: {}'.format(box.bottom)) print('\n')""" frameData["frames"].append({ "time": time, "left": box.left, "top": box.top, "right": box.right, "bottom": box.bottom }) # [END video_object_tracking_beta] with open("frameData.json", "w") as write_file: json.dump(frameData, write_file) return object_annotations
def track_objects_gcs_all(gcs_uri): # [START video_object_tracking_gcs_beta] """Object Tracking.""" from google.cloud import videointelligence_v1p2beta1 as videointelligence # It is recommended to use location_id as 'us-east1' for the best latency # due to different types of processors used in this region and others. video_client = videointelligence.VideoIntelligenceServiceClient() features = [videointelligence.enums.Feature.OBJECT_TRACKING] operation = video_client.annotate_video(input_uri=gcs_uri, features=features, location_id='us-east1') print('\nProcessing video for object annotations.') result = operation.result(timeout=300) print('\nFinished processing.\n') # The first result is retrieved because a single video was processed. object_annotations = result.annotation_results[0].object_annotations #Array of JSON data by frame objectsData = {} # Get only the first annotation for demo purposes. for object_annotation in object_annotations: print('Entity id: {}'.format(object_annotation.entity.description)) # description is in Unicode print('Entity description: {}'.format( object_annotation.entity.description)) if object_annotation.entity.entity_id: print('Entity id: {}'.format(object_annotation.entity.entity_id)) print('Segment: {}s to {}s'.format( object_annotation.segment.start_time_offset.seconds + object_annotation.segment.start_time_offset.nanos / 1e9, object_annotation.segment.end_time_offset.seconds + object_annotation.segment.end_time_offset.nanos / 1e9)) print('Confidence: {}'.format(object_annotation.confidence)) if object_annotation.entity.description not in objectsData: objectsData[object_annotation.entity.description] = [] # Here we print only the bounding box of the first frame in this segment for frame in object_annotation.frames: box = frame.normalized_bounding_box time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9 print('Time offset of the first frame: {}s'.format(time)) objectsData[object_annotation.entity.description].append({ "time": time, "left": box.left, "top": box.top, "right": box.right, "bottom": box.bottom }) # [END video_object_tracking_beta] for obj in objectsData.keys(): objectsData[obj] = sorted( objectsData[obj], key=lambda i: i['time']) #sort by time for each object print(objectsData.keys()) # Descriptions for all objects with open("objectsData.json", "w") as write_file: json.dump(objectsData, write_file) return object_annotations
def track_objects_all(path): # [START video_object_tracking_beta] """Object Tracking.""" """Creates JSON with keys being the object descriptions and values being an array of frames""" from google.cloud import videointelligence_v1p2beta1 as videointelligence video_client = videointelligence.VideoIntelligenceServiceClient() features = [videointelligence.enums.Feature.OBJECT_TRACKING] with io.open(path, 'rb') as file: input_content = file.read() try: #if not os.path.exists('data'): os.makedirs('data') except OSError: print('Error: Creating directory of data') # When everything done, release the capture # It is recommended to use location_id as 'us-east1' for the best latency # due to different types of processors used in this region and others. operation = video_client.annotate_video(input_content=input_content, features=features, location_id='us-east1') print('\nProcessing video for object annotations.') result = operation.result(timeout=300) print('\nFinished processing.\n') # The first result is retrieved because a single video was processed. object_annotations = result.annotation_results[0].object_annotations #Array of JSON data by frame objectsData = {} # Get only the first annotation for demo purposes. for object_annotation in object_annotations: print('Entity id: {}'.format(object_annotation.entity.description)) # description is in Unicode print('Entity description: {}'.format( object_annotation.entity.description)) if object_annotation.entity.entity_id: print('Entity id: {}'.format(object_annotation.entity.entity_id)) print('Segment: {}s to {}s'.format( object_annotation.segment.start_time_offset.seconds + object_annotation.segment.start_time_offset.nanos / 1e9, object_annotation.segment.end_time_offset.seconds + object_annotation.segment.end_time_offset.nanos / 1e9)) print('Confidence: {}'.format(object_annotation.confidence)) if object_annotation.entity.description not in objectsData: objectsData[object_annotation.entity.description] = [] # Here we print only the bounding box of the first frame in this segment for frame in object_annotation.frames: box = frame.normalized_bounding_box time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9 print('Time offset of the first frame: {}s'.format(time)) objectsData[object_annotation.entity.description].append({ "time": time, "left": box.left, "top": box.top, "right": box.right, "bottom": box.bottom }) # [END video_object_tracking_beta] for obj in objectsData.keys(): objectsData[obj] = sorted(objectsData[obj], key=lambda i: i['time']) print(objectsData.keys()) with open("objectsData.json", "w") as write_file: json.dump(objectsData, write_file) return object_annotations
def track_objects(path): # [START video_object_tracking_beta] """Object Tracking.""" from google.cloud import videointelligence_v1p2beta1 as videointelligence video_client = videointelligence.VideoIntelligenceServiceClient() features = [videointelligence.enums.Feature.OBJECT_TRACKING] vid = cv2.VideoCapture(path) success, image = vid.read() fourcc = cv2.VideoWriter_fourcc(*'mp4v') v = cv2.VideoWriter('slowed_video.mp4', fourcc, vid.get(cv2.CAP_PROP_FPS), (len(image[0]), len(image)), True) while success: success, image = vid.read() print('Read a new frame: ', success) for _ in range(3): v.write(image) v.release() with io.open('slowed_video.mp4', 'rb') as file: input_content = file.read() cap = cv2.VideoCapture('slowed_video.mp4') numFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT) #count_frames(cap) cap.set(cv2.CAP_PROP_POS_AVI_RATIO, 1) length = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 fps = cap.get(cv2.CAP_PROP_FPS) #numFrames/length print(fps) print(cap.get(cv2.CAP_PROP_FPS)) print(length) try: #if not os.path.exists('data'): os.makedirs('data') except OSError: print('Error: Creating directory of data') # When everything done, release the capture # It is recommended to use location_id as 'us-east1' for the best latency # due to different types of processors used in this region and others. operation = video_client.annotate_video(input_content=input_content, features=features, location_id='us-east1') print('\nProcessing video for object annotations.') result = operation.result(timeout=300) print('\nFinished processing.\n') cap.set(1, 1) ret1, img1 = cap.read() #print(img1) y, x = len(img1), len(img1[0]) print(str(x) + "x" + str(y)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') #cv2.VideoWriter_fourcc(*'avc1') video = cv2.VideoWriter( 'tracked_object.mp4', fourcc, 10, (x, y), True ) # MUST CHANGE HARD-CODED 10 INTO WHATEVER GOOGLE'S FRAME PROCESSING EXACT RATE IS # The first result is retrieved because a single video was processed. object_annotations = result.annotation_results[0].object_annotations #Array of JSON data by frame frameData = {"frames": []} # Get only the first annotation for demo purposes. object_annotation = object_annotations[0] for o in object_annotations: print('Entity id: {}'.format(o.entity.description)) print(o.entity.description == 'ball' or o.entity.description == 'basketball') if o.entity.description in [ 'ball', 'basketball', 'orange', 'fruit', 'lemon', 'food' ]: object_annotation = o # description is in Unicode print('Entity description: {}'.format( object_annotation.entity.description)) if object_annotation.entity.entity_id: print('Entity id: {}'.format( object_annotation.entity.entity_id)) print('Segment: {}s to {}s'.format( object_annotation.segment.start_time_offset.seconds + object_annotation.segment.start_time_offset.nanos / 1e9, object_annotation.segment.end_time_offset.seconds + object_annotation.segment.end_time_offset.nanos / 1e9)) print('Confidence: {}'.format(object_annotation.confidence)) #video = cv2.VideoWriter('video.avi',-1,1,(1920,1080)) # Here we print only the bounding box of the first frame in this segment for frame in object_annotation.frames: box = frame.normalized_bounding_box time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9 print('Time offset of the first frame: {}s'.format(time)) """print('Bounding box position:') print('\tleft : {}'.format(box.left)) print('\ttop : {}'.format(box.top)) print('\tright : {}'.format(box.right)) print('\tbottom: {}'.format(box.bottom)) print('\n')""" frameData["frames"].append({ "time": time, "left": box.left, "top": box.top, "right": box.right, "bottom": box.bottom }) frame_no = round(fps * time) print(str(fps) + " " + str(time) + " " + str(frame_no)) total_frames = cap.get(7) cap.set(1, frame_no) ret, img = cap.read() name = './data/frame' + str(frame_no) + '.jpg' print('Creating...' + name) print((box.left, box.top)) cv2.rectangle(img, (int(box.left * x), int(box.top * y)), (int(box.right * x), int(box.bottom * y)), (0, 255, 0), 3) #print(img) #cv2.imwrite(name, img) video.write(img) # [END video_object_tracking_beta] with open("frameData.json", "w") as write_file: json.dump(frameData, write_file) cap.release() cv2.destroyAllWindows() video.release() vid.release() return object_annotations
"""Object Tracking.""" from google.cloud import videointelligence_v1p2beta1 as videointelligence from google.oauth2 import service_account import io path = 'Priya-Help-3da0ec6b2c1e.json' creds = service_account.Credentials.from_service_account_file(path) video_client = videointelligence.VideoIntelligenceServiceClient( credentials=creds) features = [videointelligence.enums.Feature.OBJECT_TRACKING] path = "Tikal.mp4" with io.open(path, 'rb') as file: input_content = file.read() # It is recommended to use location_id as 'us-east1' for the best latency # due to different types of processors used in this region and others. operation = video_client.annotate_video(input_content=input_content, features=features, location_id='us-east1') print('\nProcessing video for object annotations.') result = operation.result(timeout=300) print('\nFinished processing.\n') # The first result is retrieved because a single video was processed. object_annotations = result.annotation_results[0].object_annotations # Get only the first annotation for demo purposes.