예제 #1
0
def update_document(host, port, url, message_dict):
    """
    Save document to core api
    """

    document_uuid = message_dict['document_uuid']
    url = 'http://%s:%d/%s/%s/' % (host, port, url, document_uuid)
    headers = {'content-type': 'application/json; charset=utf-8'}

    try:
        message = {
            'is_document_processed': True,
            'coverImage': message_dict['cover_image'],
            'previewImageWidth': message_dict['preview_image_width'],
            'previewImageHeight': message_dict['preview_image_height'],
            'fileType': 'pdf',
            'pageCount': message_dict['metadata'].get('number_of_pages', 0),
            'toc': message_dict['metadata'].get('toc', []),
            'document_file_original': message_dict['original_document'],
        }
        response = requests.put(
            url,
            json=message,
            headers=headers,
        )
        if response.status_code != 200:
            sentry_client('API Error: CODE %d, CONTENT: %s' %
                          (response.status_code, str(response.content)))
    except Exception as e:
        sentry_client(e)
        raise e
예제 #2
0
def save_to_seaweedfs(message):
    host = CONFIG['seaweed_filler_host']
    port = CONFIG['seaweed_filler_port']

    try:
        images_list = message['preview_images']
        _dir = message['document_uuid']
        original_document = message['original_document']
        document_name = message['document_name']
        result_list = save_files_filler_batch(host, port, _dir, images_list)

        _splited = os.path.split(original_document)
        _ex = _splited[1].split('.')[1]
        document_file = '%s.%s' % (document_name, _ex)
        new_original_document = os.path.join(_splited[0], document_file)

        os.rename(original_document, new_original_document)
        save_file_to_filler(host, port, _dir, new_original_document)

        message['cover_image'] = result_list[0]
        width, height = get_image_size(images_list[0])
        message['preview_image_width'] = width
        message['preview_image_height'] = height
        message['original_document'] = document_file

        # TODO: add /tmp to conf
        shutil.rmtree(os.path.join('/tmp', _dir))
        del message['preview_images']

        return message
    except Exception as e:
        sentry_client(e)
예제 #3
0
def read_metadata(pdf_path, document_uuid, document_name):
    try:
        metadata_dict = get_metadata(pdf_path)
        return dict(
            original_document=pdf_path,
            metadata=metadata_dict,
            document_uuid=document_uuid,
            document_name=document_name,
        )
    except Exception as e:
        sentry_client(e)
예제 #4
0
def save_file(host, port, file_path):
    """
    Save file to Seaweed FS

    :param host: str, master host
    :param port: int, master post
    :param file_path: str, path to file
    :return: str, file fid
    """
    files = {'upload_file': open(file_path, 'rb')}
    url = 'http://%s:%d/submit' % (host, port)
    try:
        response = requests.post(url, files=files)
        return response.json()['fid']
    except Exception as e:
        sentry_client(e)
        raise e
예제 #5
0
def split_document(message):
    _path = message['original_document']
    _splited_path = 'splited'
    _sizes = dict(
        large=CONFIG['preview_large'],
        normal=CONFIG['preview_normal'],
        small=CONFIG['preview_small']
    )
    _format = CONFIG['preview_format']

    try:
        # TODO: Add document_name to preview images
        preview_images = split_pdf(_path, _splited_path,  _sizes, _format)
        message.update({'preview_images': preview_images})
        return message
    except Exception as e:
        sentry_client(e)