Python MongoClient.document_text_detection示例

编程语言: Python

命名空间/包名称: pymongo

类/类型: MongoClient

方法/功能: document_text_detection

hotexamples.com的示例: 1

Python MongoClient.document_text_detection - 已找到1个示例。这些是从开源项目中提取的最受好评的pymongo.MongoClient.document_text_detection现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

close(30)

find(30)

disconnect(30)

count(30)

drop(30)

authenticate(30)

MongoClient(30)

create_index(30)

database_names(30)

collection_names(30)

delete_one(27)

create_collection(26)

connect(24)

aggregate(17)

delete_many(17)

command(15)

drop_database(14)

drop_collection(13)

count_documents(13)

__getattr__(10)

__init__(8)

db(7)

__len__(6)

ensure_index(5)

initialize_unordered_bulk_op(5)

distinct(5)

bulk_write(5)

alive(4)

end_request(3)

delete(3)

create_database(3)

commit(3)

cursor(2)

drop_indexes(2)

dataset(2)

decode(1)

Close(1)

delete_manu(1)

dereference(1)

dht_get(1)

dropDatabase(1)

eval(1)

execute(1)

execute_sql(1)

feature(1)

get_measures(1)

id11(1)

document_text_detection(1)

copyDatabase(1)

create_indexes(1)

示例#1

显示文件

文件： vidText.py 项目： rahibk/qhacks-2019

def get_document_bounds(videoIO, feature, folderPath, youtube_id):

    client = MongoClient(
        'mongodb+srv://qhacks:[email protected]/test?retryWrites=true'
    )
    db = client['qhacks']

    global notes_screenshots
    global glob_data
    global length
    global prev_file
    global prev_time
    global secs
    global temp
    """Returns document bounds given an image."""

    # setting up frame by frame per 5 secs
    myclip = VideoFileClip(videoIO)
    frames = []

    for frame in myclip.iter_frames(fps=0.2):
        frames.append(frame)
        # print("hi")

    for count, single_frame in enumerate(frames, start=1):
        # print("stephen")
        #print(i)
        img = Image.fromarray(single_frame, 'RGB')

        dir_path = os.path.dirname(os.path.realpath(__file__))

        #print(dir_path)

        file = "/file%d.png" % count

        #print(file)

        #print(folderPath)

        filename = dir_path + "/" + folderPath + "/" + file

        #print(filename)

        img.save(filename)
        #img.show()

        # if length is 5:
        #     break
        # img_process = img.tobytes()

        #runnin the image processor
        first = True
        build_word = ""
        words = []
        # words.append("a")

        client = vision.ImageAnnotatorClient()

        bounds = []

        temp = filename

        #CHANGED HERE
        # content = img_process

        # content = Image.open(filename)
        with io.open(filename, 'rb') as image_file:
            content = image_file.read()

        # print(content)

        image = types.Image(content=content)

        response = client.document_text_detection(image=image)
        document = response.full_text_annotation

        # Collect specified feature bounds by enumerating all document features
        for page in document.pages:
            for block in page.blocks:
                for paragraph in block.paragraphs:

                    for word in paragraph.words:

                        # for symbol in word.symbols:
                        #if (feature == FeatureType.SYMBOL):
                        #bounds.append(symbol.bounding_box)

                        #if (feature == FeatureType.WORD):

                        if first and feature == FeatureType.WORD:
                            bounds.append(word.bounding_box)
                            first = False

                        for i in word.symbols:
                            if hasattr(i, "text"):
                                if i.property.detected_break.type is 0:
                                    build_word += i.text
                                else:
                                    build_word += i.text
                                    # print(build_word)
                                    words.append(build_word)
                                    build_word = ""

                    #if (feature == FeatureType.PARA):
                    #bounds.append(paragraph.bounding_box)

                #if (feature == FeatureType.BLOCK):
                #bounds.append(block.bounding_box)

            #if (feature == FeatureType.PAGE):
            #bounds.append(block.bounding_box)

        # The list `bounds` contains the coordinates of the bounding boxes.

        # temp = {
        #     "bound": bounds
        # }

        # bound_data = {
        #     "v0x": temp['bound'][0].vertices[0].x,
        #     "v0y": temp['bound'][0].vertices[0].y,
        #     "v1x": temp['bound'][0].vertices[1].x,
        #     "v1y": temp['bound'][0].vertices[1].y,
        #     "v2x": temp['bound'][0].vertices[2].x,
        #     "v2y": temp['bound'][0].vertices[2].y,
        #     "v3x": temp['bound'][0].vertices[3].x,
        #     "v3y": temp['bound'][0].vertices[3].y,
        # }

        collection_texts = db['timestamps']

        for i in words:
            db_data = {"secs": secs, "keyword": i, "youtube_id": "tasty"}
            collection_texts.insert_one(db_data)

        # db_data = {
        #     "secs": secs,
        #     "keyword": words,
        #     "youtube_id": "hello"
        # }

        data = {secs: words}

        if (len(data[secs]) == 0):
            data = {secs: "a"}

        #print(data)

        # print(data['bound'])

        # print(data['bound'])
        # print(type(data['bound'][0]))
        # print(type(data['bound'][0].vertices))
        # print(type(data['bound'][0].vertices[0].x))

        glob_data.append(data)
        length += 1

        if length > 1:
            if glob_data[length - 1][secs][0] and glob_data[length -
                                                            2][prev_time][0]:
                if glob_data[length -
                             1][secs][0] == glob_data[length -
                                                      2][prev_time][0]:
                    prev_file = temp
                    prev_time = secs
                else:
                    screenshot_data = {
                        "secs": secs,
                        "file": prev_file,
                        "youtube_id": "tasty"
                    }
                    notes_screenshots.append(screenshot_data)
                    prev_file = temp
                    prev_time = secs  #HERE BABY
                    # imagerino = Image.open(prev_file)
                    # imagerino.show()
        secs += 5

    # print(glob_data)
    # print("STEPHENNNNNN")
    # print(screenshot_data)

    collection_screenshots = db['screenshots']
    collection_screenshots.insert_many(notes_screenshots)