Python update_doc примеры использования

Язык программирования: Python

Пространство имен/Пакет: docproc.pymgutil

Метод/Функция: update_doc

Примеров на hotexamples.com: 7

Python update_doc - 7 примеров найдено. Это лучшие примеры Python кода для docproc.pymgutil.update_doc, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def insert_html_images(d):
    """Inserts an image into page images for html content types

    input
        d: ObjectId from pymongo

    output
        Boolean sucess indicator
    """
    client = py.MongoClient('mongo')
    db = client['docs']
    col = db['aug_meta']
    doc_id = d['_id']
    doc = col.find_one({"_id": doc_id})
    html_file = pu.get_from_gridfs(db, doc['raw_file'])
    f = tempfile.NamedTemporaryFile(mode='wb', delete=False, suffix='.html')
    f.write(html_file)
    f.flush()
    display = Display(visible=0, size=(800,600))
    display.start()
    jpgfile = imgkit.from_file(f.name, 'pageimg.jpg')
  #  display.stop()
    f.delete
    b = py.import_to_gridfs(db, 'pageimg.jpg', 'image')
    if 'page_images' not in doc:
        doc['page_images']=[]
    doc['page_images'].append(b)

    success = pu.update_doc(col, doc_id, doc)

    return success

Пример #2

Показать файл

Файл: mgtika.py Проект: ren-hoek/daex-meta

def insert_content_type(d):
    """Insert content type.

    Inserts a standardized content type list in the
    top level of a document.
    Inputs:
        d: Returned ObjectId dictionary from pymongo find
    Output:
        Boolean sucess indictor
    """
    client = py.MongoClient('mongo')
    db = client['docs']
    col = db['aug_meta']

    doc_id = d['_id']
    doc = col.find_one({"_id": doc_id})
    c = doc['metadata']['Content-Type']
    content_type = standardize_content_type(c)
    doc['Content-Type'] = dict()
    doc['Content-Type']['Content'] = content_type[0]
    if len(content_type) == 2:
        doc['Content-Type']['Charset'] = content_type[1]
    success = pu.update_doc(col, doc_id, doc)

    return success

Пример #3

Показать файл

Файл: docglove.py Проект: ren-hoek/daex-meta

def insert_glove(d):
    """Insert document vectors.

    Inserts a document vector created from averaging glove vectors.
    Assumes the spacy model has been imported into this module's namspace
    as "model", to ensure that the model is only loaded once per container.
    Inputs:
        d: Returned ObjectId dictionary from pymongo find
    Output:
        Boolean sucess indictor
    """

    client = py.MongoClient('mongo')
    db = client['docs']
    col = db['aug_meta']

    doc_id = d['_id']
    doc = col.find_one({"_id": doc_id})
    text = doc['content']

    vec = generate_glove(text, model, 300)

    if 'ml-features' not in doc:
        doc['ml-features'] = dict()
    doc['ml-features']['glove'] = vec.tolist()
    success = pu.update_doc(col, doc_id, doc)

    return success

Пример #4

Показать файл

def insert_doc2vec(d):
    """Insert document vectors.

    Inserts a document vector created from aggregating word2vec vectors.
    Note the word2vec model needs to be loaded as "model" in this module's
    namespace. This is so that the model is not repeatedly loaded.
    Inputs:
        d: Returned ObjectId dictionary from pymongo find
    Output:
        Boolean sucess indictor
    """

    client = py.MongoClient('mongo')
    db = client['docs']
    col = db['aug_meta']

    doc_id = d['_id']
    doc = col.find_one({"_id": doc_id})
    text = doc['content']

    vec = generate_doc2vec(text, model, 300)

    if 'ml-features' not in doc:
        doc['ml-features'] = dict()
    doc['ml-features']['doc2vec'] = vec.tolist()
    success = pu.update_doc(col, doc_id, doc)

    return success

Пример #5

Показать файл

def insert_pdf_images(d):
    """Insert TIKA extracted metadata and content."""
    client = py.MongoClient('mongo')
    db = client['docs']
    col = db['aug_meta']

    temp_dir = tempfile.mkdtemp()

    doc_id = d['_id']
    doc = col.find_one({"_id": doc_id})
    pdf_file = pu.get_from_gridfs(db, doc['raw_file'])

    images = import_page_images(db, pdf_file, temp_dir, False)
    doc['page_images'] = images
    success = pu.update_doc(col, doc_id, doc)

    fl.clean_temp_files(temp_dir)

    return success

Пример #6

Показать файл

def insert_office_images(d):
    """Insert TIKA extracted metadata and content."""
    client = py.MongoClient('mongo')
    db = client['docs']
    col = db['aug_meta']

    temp_dir = tempfile.mkdtemp()

    doc_id = d['_id']
    doc = col.find_one({"_id": doc_id})
    pdf_file = pu.get_from_gridfs(db, doc['raw_file'])

    f = tempfile.NamedTemporaryFile(mode='wb', delete=False)
    f.write(pdf_file)
    images = import_page_images(db, f.name, temp_dir, True)
    doc['page_images'] = images
    success = pu.update_doc(col, doc_id, doc)

    fl.clean_temp_files(temp_dir, f.name)

    return success

Пример #7

Показать файл

Файл: docsrch.py Проект: ren-hoek/daex-meta

def extract_postcode(d, b):
    """Extract Postcode."""
    client = py.MongoClient('mongo')
    db = client['docs']
    col = db['greenbook']

    session = aws.create_session()

    doc_id = d['_id']
    doc = col.find_one({"_id": doc_id})
    postcodes = find_ukpc(doc['text'])
    if postcodes:
        doc['postcode'] = postcodes
        pc_key = "testing/" + doc['key'] + "/postcodes.json"
        s3_pc = aws.write_dict_json(postcodes)
        write_s3 = aws.put_s3_object(session, b, pc_key, s3_pc)
        success = pu.update_doc(col, doc_id, doc)
    else:
        success = None

    return success