Пример #1
0
def detect_person(gcs_uri):
    """
    Detects people in a video with Google Video Intelligence
    """

    client = videointelligence.VideoIntelligenceServiceClient()

    # Configure the request
    config = videointelligence.types.PersonDetectionConfig(
        include_bounding_boxes=True,
        include_attributes=True,
        include_pose_landmarks=True,
    )
    context = videointelligence.types.VideoContext(
        person_detection_config=config)

    # Start the asynchronous request
    operation = client.annotate_video(
        request={
            "features": [videointelligence.Feature.PERSON_DETECTION],
            "input_uri": gcs_uri,
            "video_context": context,
        })

    print("\nProcessing video for person detection annotations.")
    result = operation.result(timeout=300)

    print("\nFinished processing.\n")
    return result
Пример #2
0
    def __init__(
            self,
            credential_path,
            codec="flac",
            sample_rate=44100,
            require_api_confirmation=True,
            api="speech",  # speech, video
            **kwargs):
        self.swears = utils.parse_swears()
        self.codec = codec
        self.sample_rate = sample_rate
        self.api = api

        self.speech_config = {
            "enable_word_time_offsets": True,
            "language_code": "en-US",
            "max_alternatives": 2,
            "profanity_filter": False
        }

        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(credential_path)

        if api == "speech":
            self.speech_client = self.client = speech_v1.SpeechClient()
            self.speech_config[
                "model"] = "video"  # default OR video, video-style recognition is more expensive
        elif api == "video":
            self.video_client = self.client = video_v1.VideoIntelligenceServiceClient(
            )

        self.require_api_confirmation = require_api_confirmation
    def test_annotate_video(self):
        # Setup Expected Response
        expected_response = {}
        expected_response = video_intelligence_pb2.AnnotateVideoResponse(
            **expected_response
        )
        operation = operations_pb2.Operation(
            name="operations/test_annotate_video", done=True
        )
        operation.response.Pack(expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = videointelligence_v1p3beta1.VideoIntelligenceServiceClient()

        # Setup Request
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]
        input_uri = "gs://cloud-samples-data/video/cat.mp4"

        response = client.annotate_video(features, input_uri=input_uri)
        result = response.result()
        assert expected_response == result

        assert len(channel.requests) == 1
        expected_request = video_intelligence_pb2.AnnotateVideoRequest(
            features=features, input_uri=input_uri
        )
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
Пример #4
0
def get_video(path):
    """Transcribe speech from a video stored on GCS."""

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.Feature.SPEECH_TRANSCRIPTION]

    config = videointelligence.SpeechTranscriptionConfig(
        language_code="en-US", enable_automatic_punctuation=True
    )
    video_context = videointelligence.VideoContext(speech_transcription_config=config)

    operation = video_client.annotate_video(
        request={
            "features": features,
            "input_uri": path,
            "video_context": video_context,
        }
    )

    print("\nProcessing video for speech transcription.")

    result = operation.result(timeout=600)
    print("\nFinished processing.\n")
    # There is only one annotation_result since only
    # one video is processed.
    annotation_results = result.annotation_results[0]

    final_text = ""
    for speech_transcription in annotation_results.speech_transcriptions:

        # The number of alternatives for each transcription is limited by
        # SpeechTranscriptionConfig.max_alternatives.
        # Each alternative is a different possible transcription
        # and has its own confidence score.
        final_text = speech_transcription.alternatives[0].transcript
        print(final_text)
        """
        for alternative in speech_transcription.alternatives:
            print("Alternative level information:")

            print("Transcript: {}".format(alternative.transcript))
            print("Confidence: {}\n".format(alternative.confidence))

            print("Word level information:")
            for word_info in alternative.words:
                word = word_info.word
                start_time = word_info.start_time
                end_time = word_info.end_time
                print(
                    "\t{}s - {}s: {}".format(
                        start_time.seconds + start_time.microseconds * 1e-6,
                        end_time.seconds + end_time.microseconds * 1e-6,
                        word,
                    )
                )
                """
    return(final_text)
Пример #5
0
def detect_faces(local_file_path="path/to/your/video-file.mp4"):
    """Detects faces in a video from a local file."""

    client = videointelligence.VideoIntelligenceServiceClient()

    with io.open(local_file_path, "rb") as f:
        input_content = f.read()

    # Configure the request
    config = videointelligence.types.FaceDetectionConfig(
        include_bounding_boxes=True, include_attributes=True)
    context = videointelligence.types.VideoContext(
        face_detection_config=config)

    # Start the asynchronous request
    operation = client.annotate_video(
        input_content=input_content,
        features=[videointelligence.enums.Feature.FACE_DETECTION],
        video_context=context,
    )

    print("\nProcessing video for face detection annotations.")
    result = operation.result(timeout=300)

    print("\nFinished processing.\n")

    # Retrieve the first result, because a single video was processed.
    annotation_result = result.annotation_results[0]

    for annotation in annotation_result.face_detection_annotations:
        print("Face detected:")
        for track in annotation.tracks:
            print("Segment: {}s to {}s".format(
                track.segment.start_time_offset.seconds +
                track.segment.start_time_offset.nanos / 1e9,
                track.segment.end_time_offset.seconds +
                track.segment.end_time_offset.nanos / 1e9,
            ))

            # Each segment includes timestamped faces that include
            # characteristics of the face detected.
            # Grab the first timestamped face
            timestamped_object = track.timestamped_objects[0]
            box = timestamped_object.normalized_bounding_box
            print("Bounding box:")
            print("\tleft  : {}".format(box.left))
            print("\ttop   : {}".format(box.top))
            print("\tright : {}".format(box.right))
            print("\tbottom: {}".format(box.bottom))

            # Attributes include glasses, headwear, facial hair, smiling,
            # direction of gaze, etc.
            print("Attributes:")
            for attribute in timestamped_object.attributes:
                print("\t{}:{} {}".format(attribute.name, attribute.value,
                                          attribute.confidence))
def detect_faces(gcs_uri="gs://YOUR_BUCKET_ID/path/to/your/video.mp4"):
    """Detects faces in a video."""

    client = videointelligence.VideoIntelligenceServiceClient()

    # Configure the request
    config = videointelligence.FaceDetectionConfig(include_bounding_boxes=True,
                                                   include_attributes=True)
    context = videointelligence.VideoContext(face_detection_config=config)

    # Start the asynchronous request
    operation = client.annotate_video(
        request={
            "features": [videointelligence.Feature.FACE_DETECTION],
            "input_uri": gcs_uri,
            "video_context": context,
        })

    print("\nProcessing video for face detection annotations.")
    result = operation.result(timeout=300)

    print("\nFinished processing.\n")

    # Retrieve the first result, because a single video was processed.
    annotation_result = result.annotation_results[0]

    for annotation in annotation_result.face_detection_annotations:
        print("Face detected:")
        for track in annotation.tracks:
            print("Segment: {}s to {}s".format(
                track.segment.start_time_offset.seconds +
                track.segment.start_time_offset.microseconds / 1e6,
                track.segment.end_time_offset.seconds +
                track.segment.end_time_offset.microseconds / 1e6,
            ))

            # Each segment includes timestamped faces that include
            # characteristics of the face detected.
            # Grab the first timestamped face
            timestamped_object = track.timestamped_objects[0]
            box = timestamped_object.normalized_bounding_box
            print("Bounding box:")
            print("\tleft  : {}".format(box.left))
            print("\ttop   : {}".format(box.top))
            print("\tright : {}".format(box.right))
            print("\tbottom: {}".format(box.bottom))

            # Attributes include glasses, headwear, smiling, direction of gaze
            print("Attributes:")
            for attribute in timestamped_object.attributes:
                print("\t{}:{} {}".format(attribute.name, attribute.value,
                                          attribute.confidence))
    def test_annotate_video_exception(self):
        # Setup Response
        error = status_pb2.Status()
        operation = operations_pb2.Operation(
            name="operations/test_annotate_video_exception", done=True
        )
        operation.error.CopyFrom(error)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = videointelligence_v1p3beta1.VideoIntelligenceServiceClient()

        # Setup Request
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]
        input_uri = "gs://cloud-samples-data/video/cat.mp4"

        response = client.annotate_video(features, input_uri=input_uri)
        exception = response.exception()
        assert exception.errors[0] == error
Пример #8
0
def sample_annotate_video(local_file_path="resources/googlework_tiny.mp4"):
    """Performs asynchronous video annotation for logo recognition on a local file."""

    client = videointelligence_v1p3beta1.VideoIntelligenceServiceClient()

    with io.open(local_file_path, "rb") as f:
        input_content = f.read()
    features_element = enums.Feature.LOGO_RECOGNITION
    features = [features_element]

    operation = client.annotate_video(input_content=input_content,
                                      features=features)

    print(u"Waiting for operation to complete...")
    response = operation.result()

    # Get the first response, since we sent only one video.
    annotation_result = response.annotation_results[0]
    # Annotations for list of logos detected, tracked and recognized in video.
    for logo_recognition_annotation in annotation_result.logo_recognition_annotations:
        entity = logo_recognition_annotation.entity
        # Opaque entity ID. Some IDs may be available in [Google Knowledge Graph
        # Search API](https://developers.google.com/knowledge-graph/).
        print(u"Entity Id : {}".format(entity.entity_id))
        # Textual description, e.g. `Google`.
        print(u"Description : {}".format(entity.description))
        # All logo tracks where the recognized logo appears. Each track corresponds
        # to one logo instance appearing in consecutive frames.
        for track in logo_recognition_annotation.tracks:
            # Video segment of a track.
            segment = track.segment
            segment_start_time_offset = segment.start_time_offset
            print(u"\n\tStart Time Offset : {}.{}".format(
                segment_start_time_offset.seconds,
                segment_start_time_offset.nanos))
            segment_end_time_offset = segment.end_time_offset
            print(u"\tEnd Time Offset : {}.{}".format(
                segment_end_time_offset.seconds,
                segment_end_time_offset.nanos))
            print(u"\tConfidence : {}".format(track.confidence))
            # The object with timestamp and attributes per frame in the track.
            for timestamped_object in track.timestamped_objects:
                # Normalized Bounding box in a frame, where the object is located.
                normalized_bounding_box = timestamped_object.normalized_bounding_box
                print(u"\n\t\tLeft : {}".format(normalized_bounding_box.left))
                print(u"\t\tTop : {}".format(normalized_bounding_box.top))
                print(u"\t\tRight : {}".format(normalized_bounding_box.right))
                print(u"\t\tBottom : {}".format(
                    normalized_bounding_box.bottom))
                # Optional. The attributes of the object in the bounding box.
                for attribute in timestamped_object.attributes:
                    print(u"\n\t\t\tName : {}".format(attribute.name))
                    print(u"\t\t\tConfidence : {}".format(
                        attribute.confidence))
                    print(u"\t\t\tValue : {}".format(attribute.value))
            # Optional. Attributes in the track level.
            for track_attribute in track.attributes:
                print(u"\n\t\tName : {}".format(track_attribute.name))
                print(u"\t\tConfidence : {}".format(
                    track_attribute.confidence))
                print(u"\t\tValue : {}".format(track_attribute.value))
        # All video segments where the recognized logo appears. There might be
        # multiple instances of the same logo class appearing in one VideoSegment.
        for logo_recognition_annotation_segment in logo_recognition_annotation.segments:
            logo_recognition_annotation_segment_start_time_offset = (
                logo_recognition_annotation_segment.start_time_offset)
            print(u"\n\tStart Time Offset : {}.{}".format(
                logo_recognition_annotation_segment_start_time_offset.seconds,
                logo_recognition_annotation_segment_start_time_offset.nanos,
            ))
            logo_recognition_annotation_segment_end_time_offset = (
                logo_recognition_annotation_segment.end_time_offset)
            print(u"\tEnd Time Offset : {}.{}".format(
                logo_recognition_annotation_segment_end_time_offset.seconds,
                logo_recognition_annotation_segment_end_time_offset.nanos,
            ))
Пример #9
0
def detect_person(gcs_uri="gs://YOUR_BUCKET_ID/path/to/your/video.mp4"):
    """Detects people in a video."""

    client = videointelligence.VideoIntelligenceServiceClient()

    # Configure the request
    config = videointelligence.types.PersonDetectionConfig(
        include_bounding_boxes=True,
        include_attributes=True,
        include_pose_landmarks=True,
    )
    context = videointelligence.types.VideoContext(
        person_detection_config=config
    )

    # Start the asynchronous request
    operation = client.annotate_video(
        input_uri=gcs_uri,
        features=[videointelligence.enums.Feature.PERSON_DETECTION],
        video_context=context,
    )

    print("\nProcessing video for person detection annotations.")
    result = operation.result(timeout=300)

    print("\nFinished processing.\n")

    # Retrieve the first result, because a single video was processed.
    annotation_result = result.annotation_results[0]

    for annotation in annotation_result.person_detection_annotations:
        print("Person detected:")
        for track in annotation.tracks:
            print(
                "Segment: {}s to {}s".format(
                    track.segment.start_time_offset.seconds
                    + track.segment.start_time_offset.nanos / 1e9,
                    track.segment.end_time_offset.seconds
                    + track.segment.end_time_offset.nanos / 1e9,
                )
            )

            # Each segment includes timestamped objects that include
            # characteristics - -e.g.clothes, posture of the person detected.
            # Grab the first timestamped object
            timestamped_object = track.timestamped_objects[0]
            box = timestamped_object.normalized_bounding_box
            print("Bounding box:")
            print("\tleft  : {}".format(box.left))
            print("\ttop   : {}".format(box.top))
            print("\tright : {}".format(box.right))
            print("\tbottom: {}".format(box.bottom))

            # Attributes include unique pieces of clothing,
            # poses, or hair color.
            print("Attributes:")
            for attribute in timestamped_object.attributes:
                print(
                    "\t{}:{} {}".format(
                        attribute.name, attribute.value, attribute.confidence
                    )
                )

            # Landmarks in person detection include body parts such as
            # left_shoulder, right_ear, and right_ankle
            print("Landmarks:")
            for landmark in timestamped_object.landmarks:
                print(
                    "\t{}: {} (x={}, y={})".format(
                        landmark.name,
                        landmark.confidence,
                        landmark.point.x,  # Normalized vertex
                        landmark.point.y,  # Normalized vertex
                    )
                )
Пример #10
0
def face_detect_video(gcs_uri="gs://prac_interview/testvideo.mp4"):
    """Detects faces in a video."""

    client = videointelligence.VideoIntelligenceServiceClient()

    # Configure the request
    config = videointelligence.FaceDetectionConfig(
        include_bounding_boxes=True, include_attributes=True
    )
    context = videointelligence.VideoContext(face_detection_config=config)

    # Start the asynchronous request
    operation = client.annotate_video(
        request={
            "features": [videointelligence.Feature.FACE_DETECTION],
            "input_uri": gcs_uri,
            "video_context": context,
        }
    )

    print("\nProcessing video for face detection annotations.")
    result = operation.result(timeout=300)
    print("\nFinished processing.\n")

    # Retrieve the first result, because a single video was processed.
    annotation_result = result.annotation_results[0]
    looking_at_camera_count = 0
    tracking = [0,0,0] #smiling looking_at_camera, eyes_visible
    for annotation in annotation_result.face_detection_annotations:
        print("Face detected:")
        for track in annotation.tracks:
            print(
                "Segment: {}s to {}s".format(
                    track.segment.start_time_offset.seconds
                    + track.segment.start_time_offset.microseconds / 1e6,
                    track.segment.end_time_offset.seconds
                    + track.segment.end_time_offset.microseconds / 1e6,
                )
            )

            # Each segment includes timestamped faces that include
            # characteristics of the face detected.
            # Grab the first timestamped face
            #print(track.timestamped_objects)
            divisor = 1
            track_length = (len(track.timestamped_objects))
            print(track_length)
            part_track = (int(track_length/divisor))
            for i in range(part_track):
                timestamped_object = track.timestamped_objects[divisor*i]

                """box = timestamped_object.normalized_bounding_box
                print("Bounding box:")
                print("\tleft  : {}".format(box.left))
                print("\ttop   : {}".format(box.top))
                print("\tright : {}".format(box.right))
                print("\tbottom: {}".format(box.bottom))"""

                # Attributes include glasses, headwear, smiling, direction of gaze
                #print("Attributes:")
                for attribute in timestamped_object.attributes:
                    if(attribute.name == "smiling"):
                        tracking[0] += attribute.confidence
                    elif(attribute.name == "looking_at_camera"):
                        tracking[1] += attribute.confidence
                    elif(attribute.name == "eyes_visible"):
                        tracking[2] += attribute.confidence
                    """print(
                        "\t{}:{} {}".format(
                            attribute.name, attribute.value, attribute.confidence
                        )
                    )"""
            print(tracking)
            print(part_track)
            tracking = [round(num/part_track, 2) for num in tracking]
            print(tracking)
    return(tracking)
from google.cloud import videointelligence_v1p3beta1 as videointelligence

OUTPUT_BUCKET = 'gs://YOUR OUTPUT BUCKET/'

client = videointelligence.VideoIntelligenceServiceClient()

# Optional: define extra settings for label detection
config = videointelligence.types.LabelDetectionConfig(
    label_detection_mode=videointelligence.LabelDetectionMode.SHOT_MODE,
    video_confidence_threshold=0.5)

video_context = videointelligence.types.VideoContext(
    label_detection_config=config)


def analyse(event, context):

    print(event)

    gcs_uri = 'gs://' + event['bucket'] + '/' + event['name']
    just_file_name = event['name'].split('.')[0]

    operation = client.annotate_video({
        'input_uri':
        gcs_uri,
        'features': [videointelligence.Feature.LABEL_DETECTION],
        'output_uri':
        OUTPUT_BUCKET + just_file_name + '.json',
        'video_context':
        video_context
    })