def update_document(host, port, url, message_dict): """ Save document to core api """ document_uuid = message_dict['document_uuid'] url = 'http://%s:%d/%s/%s/' % (host, port, url, document_uuid) headers = {'content-type': 'application/json; charset=utf-8'} try: message = { 'is_document_processed': True, 'coverImage': message_dict['cover_image'], 'previewImageWidth': message_dict['preview_image_width'], 'previewImageHeight': message_dict['preview_image_height'], 'fileType': 'pdf', 'pageCount': message_dict['metadata'].get('number_of_pages', 0), 'toc': message_dict['metadata'].get('toc', []), 'document_file_original': message_dict['original_document'], } response = requests.put( url, json=message, headers=headers, ) if response.status_code != 200: sentry_client('API Error: CODE %d, CONTENT: %s' % (response.status_code, str(response.content))) except Exception as e: sentry_client(e) raise e
def save_to_seaweedfs(message): host = CONFIG['seaweed_filler_host'] port = CONFIG['seaweed_filler_port'] try: images_list = message['preview_images'] _dir = message['document_uuid'] original_document = message['original_document'] document_name = message['document_name'] result_list = save_files_filler_batch(host, port, _dir, images_list) _splited = os.path.split(original_document) _ex = _splited[1].split('.')[1] document_file = '%s.%s' % (document_name, _ex) new_original_document = os.path.join(_splited[0], document_file) os.rename(original_document, new_original_document) save_file_to_filler(host, port, _dir, new_original_document) message['cover_image'] = result_list[0] width, height = get_image_size(images_list[0]) message['preview_image_width'] = width message['preview_image_height'] = height message['original_document'] = document_file # TODO: add /tmp to conf shutil.rmtree(os.path.join('/tmp', _dir)) del message['preview_images'] return message except Exception as e: sentry_client(e)
def read_metadata(pdf_path, document_uuid, document_name): try: metadata_dict = get_metadata(pdf_path) return dict( original_document=pdf_path, metadata=metadata_dict, document_uuid=document_uuid, document_name=document_name, ) except Exception as e: sentry_client(e)
def save_file(host, port, file_path): """ Save file to Seaweed FS :param host: str, master host :param port: int, master post :param file_path: str, path to file :return: str, file fid """ files = {'upload_file': open(file_path, 'rb')} url = 'http://%s:%d/submit' % (host, port) try: response = requests.post(url, files=files) return response.json()['fid'] except Exception as e: sentry_client(e) raise e
def split_document(message): _path = message['original_document'] _splited_path = 'splited' _sizes = dict( large=CONFIG['preview_large'], normal=CONFIG['preview_normal'], small=CONFIG['preview_small'] ) _format = CONFIG['preview_format'] try: # TODO: Add document_name to preview images preview_images = split_pdf(_path, _splited_path, _sizes, _format) message.update({'preview_images': preview_images}) return message except Exception as e: sentry_client(e)