Python log_error示例，anuvaad_auditor.loghandler.log_error Python示例

示例#1

0

显示文件

 def validate_tool_response(self, tool_response, tool_details, wf_input):
     if not tool_response:
         log_error("Error from the tool: " + str(tool_details["name"]),
                   wf_input, None)
         error = post_error(
             "ERROR_FROM_TOOL",
             "Error from the tool: " + str(tool_details["name"]), None)
         client_output = self.get_wf_details_sync(wf_input, None, True,
                                                  error)
         self.update_job_details(client_output, False)
         log_info("Job FAILED, jobID: " + str(wf_input["jobID"]), wf_input)
         return client_output
     else:
         fail_msg = None
         if 'error' in tool_response.keys():
             if tool_response["error"]:
                 fail_msg = "Error from the tool: " + str(
                     tool_details["name"]) + " | Cause: " + str(
                         tool_response["error"])
         elif 'http' in tool_response.keys():
             if 'status' in tool_response["http"]:
                 if tool_response["http"]["status"] != 200:
                     fail_msg = "Error from the tool: " + str(
                         tool_details["name"]) + " | Cause: " + str(
                             tool_response["why"])
         if fail_msg:
             log_error(fail_msg, wf_input, None)
             error = post_error("ERROR_FROM_TOOL", fail_msg, None)
             client_output = self.get_wf_details_sync(
                 wf_input, None, True, error)
             self.update_job_details(client_output, False)
             log_info("Job FAILED, jobID: " + str(wf_input["jobID"]),
                      wf_input)
             return client_output

示例#2

0

显示文件

文件： translatorservice.py 项目： eagle-sb/anuvaad

 def page_processor(self, page, record_id, file, tmx_present, nonmt_user,
                    tmx_file_cache, translate_wf_input):
     batches, pw_dict, bw_data = self.fetch_batches_of_sentences(
         file, record_id, page, tmx_present, tmx_file_cache, False,
         translate_wf_input)
     batches_count, sentences_count, tmx_count = 0, 0, 0
     if not batches:
         log_error("No batches obtained for page: " + str(page["page_no"]),
                   translate_wf_input, None)
         return batches_count, sentences_count, tmx_count
     batches_count, tmx_count = len(batches), pw_dict["tmx_count"]
     partition = random.choice(
         list(range(0, total_no_of_partitions)
              ))  # So that all batches of a page go to the same consumer
     topic = self.get_nmt_in_topic(translate_wf_input, file)
     for batch_id in batches.keys():
         batch = batches[batch_id]
         record_id_enhanced = record_id + "|" + str(len(batch))
         nmt_in = {
             "record_id": record_id_enhanced,
             "id": file["model"]["model_id"],
             "message": batch
         }
         if nonmt_user:
             producer.produce(nmt_in, anu_translator_nonmt_topic, partition)
         else:
             producer.produce(nmt_in, topic, partition)
         log_info(
             "B_ID: " + batch_id + " | SENTENCES: " + str(len(batch)) +
             " | COMPUTED: " + str(bw_data[batch_id]["computed"]) +
             " | TMX: " + str(bw_data[batch_id]["tmx_count"]),
             translate_wf_input)
         sentences_count += len(batch)
     return batches_count, sentences_count, tmx_count

示例#3

0

显示文件

def tesseract_ocr(pdf_image_paths, desired_width, desired_height, dfs, lang,
                  jobid):

    log_info("Service ocr_text_utilities", "tesseract ocr started  ===>",
             jobid)

    try:
        start_time = time.time()
        ocr_dfs = []
        for i, df in enumerate(dfs):
            filepath = pdf_image_paths[i]
            df_updated = extract_text_from_image(filepath, desired_width,
                                                 desired_height, df, lang)
            ocr_dfs.append(df_updated)
            print('OCR on page : ', i)

        end_time = time.time()
        extraction_time = end_time - start_time
    except Exception as e:
        log_error("Service ocr_text_utilities", "Error in tesseract ocr",
                  jobid, e)

    log_info("Service ocr_text_utilities",
             "tesseract ocr successfully completed", jobid)

    return ocr_dfs

示例#4

0

显示文件

def get_pdfs(page_dfs,lang):
    start_time          = time.time()
    try:
        p_dfs    = []
        pages    = len(page_dfs)
        block_configs = config.BLOCK_CONFIGS
        for page_index in range(pages):
            page_df     = page_dfs[page_index]
            cols        = page_df.columns.values.tolist()
            df          = pd.DataFrame(columns=cols)
            for index, row in page_df.iterrows():
                if row['children'] == None:
                    d_tmp = page_df.iloc[index]
                    d_tmp['avg_line_height'] = int(d_tmp['text_height'])
                    df = df.append(d_tmp)
                else:
                    dfs = process_block(page_df.iloc[index], block_configs,lang)
                    df  = df.append(dfs)
            p_dfs.append(df)

    except Exception as e :
        log_error('Error in creating p_dfs', app_context.application_context, e)
        return None

    end_time         = time.time()
    elapsed_time     = end_time - start_time
    log_info('Processing of get_pdfs completed in {}/{}, average per page {}'.format(elapsed_time, len(p_dfs), (elapsed_time/len(p_dfs))), app_context.application_context)
    return p_dfs

示例#5

0

显示文件

 def post(self):
     json_data = request.get_json(force=True)
     app_context.init()
     app_context.application_context = json_data
     log_info(
         "Resource Layout_Detector_WF  Layout_Detector service started",
         app_context.application_context)
     task_id = str("LD-" + str(time.time()).replace('.', '')[0:13])
     task_starttime = eval(str(time.time()).replace('.', '')[0:13])
     #json_data = request.get_json(force = True)
     try:
         error_validator = ValidationResponse(DOWNLOAD_FOLDER)
         if error_validator.format_error(json_data) is True:
             response_gen = Response(json_data, DOWNLOAD_FOLDER)
             response = response_gen.workflow_response(
                 task_id, task_starttime)
             log_info(
                 "Resource Layout_Detector_WF Layout_Detector api response completed",
                 app_context.application_context)
             return jsonify(response)
     except FormatError as e:
         log_error(
             "Resource Layout_Detector_WF Input json format is not correct or dict_key is missing",
             app_context.application_context, e)
         return Status.ERR_request_input_format.value

示例#6

0

显示文件

def extract_pdf_metadata(filename, working_dir, base_dir):
    start_time          = time.time()
    pdf_filepath        = os.path.join(base_dir, filename)

    log_info('filepath {}, working_dir {}'.format(pdf_filepath, working_dir), app_context.application_context)
    try:
        pdf_image_paths         = extract_image_paths_from_pdf(pdf_filepath, working_dir)
        pdf_xml_filepath        = extract_xml_path_from_digital_pdf(pdf_filepath, working_dir)
    except Exception as e:
        log_error('error extracting xml information of {}'.format(pdf_filepath), app_context.application_context, e)
        return None, None, None
    log_info('Extracting xml of {}'.format(pdf_filepath), app_context.application_context)

    try:
        pdf_bg_img_filepaths    = extract_html_bg_image_paths_from_digital_pdf(pdf_filepath, working_dir)
    except Exception as e:
        log_error('unable to extract background images of {}'.format(pdf_filepath), app_context.application_context, None)
        return None, None, None

    log_info('Extracting background images of {}'.format(pdf_filepath), app_context.application_context)

    end_time            = time.time()
    extraction_time     = end_time - start_time
    log_info('Extraction of {} completed in {}'.format(pdf_filepath, extraction_time), app_context.application_context)

    return pdf_xml_filepath, pdf_image_paths, pdf_bg_img_filepaths

示例#7

0

显示文件

文件： get_xml.py 项目： AgHarsh/anuvaad

def extract_pdf_metadata(filename, working_dir, base_dir, jobid):
    start_time = time.time()
    pdf_filepath = Path(os.path.join(base_dir, filename))
    try:
        pdf_image_paths = extract_image_paths_from_pdf(pdf_filepath,
                                                       working_dir)
        pdf_xml_dir = extract_xml_from_digital_pdf(pdf_filepath, working_dir)
    except Exception as e:
        log_error("Service xml_utils", "Error in extracting xml", jobid, e)
    try:
        os.system('pdftohtml -c ' + str(pdf_filepath) + ' ' +
                  str(working_dir) + '/')
    except Exception as e:
        log_error("Service get_xml", "Error in extracting html", jobid, e)

    # try:
    #     pdf_bg_image_dir    = extract_html_bg_images_from_digital_pdf(pdf_filepath, working_dir)
    # except Exception as e :
    #     log_error("Service xml_utils", "Error in extracting html of bg images", jobid, e)
    #
    end_time = time.time()
    extraction_time = end_time - start_time

    xml_files = read_directory_files(pdf_xml_dir, pattern='*.xml')
    bg_files = None  #read_directory_files(pdf_bg_image_dir, pattern='*.png')

    log_info('Service get_xml',
             'Successfully extracted xml, background images of file:', jobid)

    return xml_files, bg_files, pdf_image_paths

示例#8

0

显示文件

文件： extract_images.py 项目： srihari-nagaraj/anuvaad

def extract_images(app_context, base_dir):

    files = get_files(app_context.application_context)
    file_images = []
    try:
        for file in files:
            file_properties = File(file)
            file_format = file_properties.get_format()

            if file_format in ['PDF', 'pdf']:
                filename = file_properties.get_name()
                image_paths = extract_pdf_images(filename, base_dir)
                file_images.append(image_paths)
            else:
                if file_format in [
                        'PNG', 'JPEG', 'BMP', 'jpg', 'png', 'bmp', 'jpeg'
                ]:
                    filename = file_properties.get_name()
                    image_paths = [os.path.join(base_dir, filename)]
                    file_images.append(image_paths)
                else:
                    log_info(
                        "currently we do not support {} files .".format(
                            file_format), app_context.application_context)
                    return None
    except Exception as e:
        log_error('error extracting images of' + str(e),
                  app_context.application_context, e)
        return None

    return file_images

示例#9

0

显示文件

def prepocess_pdf_regions(pdf_data, flags, config=preprocess_config):
    xml_dfs = pdf_data['in_dfs']
    #if flags['doc_class'] == 'class_1':
    page_height = pdf_data['page_height']
    #else:
    #    page_height =  pdf_data['pdf_image_height']
    #header_region = None
    #footer_region =None
    #if len(xml_dfs) > 1 :
    try:
        start_time = time.time()
        header_region = find_header(xml_dfs, page_height, config)
        footer_region = find_footer(xml_dfs, page_height, config)
        end_time = time.time() - start_time
        log_info(
            'Header Footer detection completed successfully  in time {} '.
            format(end_time), app_context.application_context)
        log_info('Footers found {} '.format(len(footer_region)),
                 app_context.application_context)
        log_info('Headers found {}'.format(len(header_region)),
                 app_context.application_context)

        pdf_data['header_region'], pdf_data[
            'footer_region'] = header_region, footer_region
        return pdf_data
    except Exception as e:
        log_error('Error in finding header/footer ' + e,
                  app_context.application_context, e)
        pdf_data['header_region'], pdf_data['footer_region'] = pd.DataFrame(
        ), pd.DataFrame()
        return pdf_data

    return header_region, footer_region

示例#10

0

显示文件

文件： extract_images.py 项目： srihari-nagaraj/anuvaad

def extract_pdf_images(filename, base_dir):
    start_time = time.time()

    working_dir, ret = create_pdf_processing_paths(filename, base_dir)
    pdf_filepath = os.path.join(base_dir, filename)
    log_info('filepath {}, working_dir {}'.format(pdf_filepath, working_dir),
             app_context.application_context)

    try:
        pdf_image_paths = extract_image_paths_from_pdf(pdf_filepath,
                                                       working_dir)
        log_info('Extracting images of {}'.format(pdf_filepath),
                 app_context.application_context)
    except Exception as e:
        log_error('error extracting images of {}'.format(pdf_filepath),
                  app_context.application_context, e)
        return None
    end_time = time.time()
    extraction_time = end_time - start_time
    log_info(
        'Extraction of {} completed in {}'.format(pdf_filepath,
                                                  extraction_time),
        app_context.application_context)

    return pdf_image_paths

示例#11

0

显示文件

 def call_api(self, uri, method, api_input, params, user_id):
     try:
         log_info("URI: " + uri, None)
         response = None
         if method == "POST":
             api_headers = {
                 'userid': user_id,
                 'x-user-id': user_id,
                 'Content-Type': 'application/json'
             }
             response = requests.post(url=uri,
                                      json=api_input,
                                      headers=api_headers)
         elif method == "GET":
             api_headers = {'userid': user_id}
             response = requests.get(url=uri,
                                     params=params,
                                     headers=api_headers)
         if response is not None:
             if response.text is not None:
                 log_info(response.text, None)
                 return json.loads(response.text)
             else:
                 log_error("API response was None, URI: " + str(uri),
                           api_input, None)
                 return None
         else:
             log_error("API call failed! URI: " + str(uri), api_input, None)
             return None
     except Exception as e:
         log_exception("Exception while making the api call: " + str(e),
                       api_input, e)
         return None

示例#12

0

显示文件

文件： kf_service.py 项目： dwtcourses/anuvaad

def process_tokenization_kf():
    file_ops = FileOperation()
    DOWNLOAD_FOLDER =file_ops.file_download(config.download_folder)
    # instatiation of consumer for respective topic
    try:
        consumer_class = Consumer(config.input_topic, config.bootstrap_server)
        consumer = consumer_class.consumer_instantiate()
        log_info("process_tokenization_kf : trying to receive value from consumer ", None)
        for msg in consumer:
            data = msg.value
            log_info("process_tokenization_kf : received input json from input topic consumer ", data)
            task_id = str("TOK-" + str(time.time()).replace('.', '')[0:13])
            task_starttime = eval(str(time.time()).replace('.', '')[0:13])
            input_files, workflow_id, jobid, tool_name, step_order, user_id = file_ops.json_input_format(data)
            response_gen = Response(data, DOWNLOAD_FOLDER)
            file_value_response = response_gen.workflow_response(task_id, task_starttime)
            if "errorID" not in file_value_response.keys():
                producer = Producer()
                producer.push_data_to_queue(config.output_topic, file_value_response, data, task_id)
            else:
                log_error("process_tokenization_kf : error send to error handler", data, None)
    except KafkaConsumerError as e:
        response_custom = CustomResponse(Status.ERR_STATUS.value, None, None)
        response_custom.status_code['message'] = str(e)
        file_ops.error_handler(response_custom.status_code, "KAFKA_CONSUMER_ERROR", True)
        log_exception("process_tokenization_kf : Consumer didn't instantiate", None, e)
    except KafkaProducerError as e:
        response_custom = e.code
        response_custom['message'] = e.message      
        file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True)
        log_exception("process_tokenization_kf : response send to topic %s"%(config.output_topic), data, e)

示例#13

0

显示文件

def start_kafka():
    try:
        t1 = threading.Thread(target=process_fc_kf, name='keep_on_running')
        t1.start()
        log_info("multithread : Kafka running on multithread", None)
    except Exception as e:
        log_error("multithread : Error while running custom threads", None, e)

示例#14

0

显示文件

文件： main.py 项目： AgHarsh/anuvaad

def doc_pre_processing(filename, base_dir, jobid):
    '''
        Preprocessing on input pdf to get:
            - xml files
            - images 
            - background images 
            - header and footer regions

    '''
    log_info("Service main", "document preprocessing started  ===>", jobid)

    img_dfs, xml_dfs, page_width, page_height, working_dir, pdf_image_paths = get_xml.process_input_pdf(
        filename, base_dir, jobid)
    multiple_pages = False
    pages = len(xml_dfs)
    if pages > 1:
        multiple_pages = True
    try:
        header_region, footer_region = prepocess_pdf_regions(
            xml_dfs, page_height)
    except Exception as e:
        log_error("Service prepocess",
                  "Error in finding footer and header region", jobid, e)

    log_info("Service main", "document preprocessing successfully completed",
             jobid)

    return img_dfs, xml_dfs, pages, working_dir, header_region, footer_region, multiple_pages, page_width, page_height, pdf_image_paths

示例#15

0

显示文件

def df_to_json(p_df):
    page_data = []
    try:
        p_df      = p_df.where(p_df.notnull(), None)
        if len(p_df) > 0 :
            p_df = get_xml.drop_cols(p_df)
            for index ,row in p_df.iterrows():
                block = row.to_dict()
                for key in block.keys():
                    if key in ['text']:
                        block[key] = block[key]
                    if key not in ['text', 'children']:
                        try :
                            block[key] = int(block[key])
                        except :
                            pass
                    
                if block['attrib'] == "TABLE":
                    pass
                else :
                    if 'children' in list(block.keys()):
                        if block['children'] == None :
                            pass
                        else :
                            block['children'] = df_to_json(pd.read_json(row['children']))
                page_data.append(block)
            
        return page_data
    except Exception as e :
            log_error("Service get_response", "Error in converting dataframe to json", None, e)

示例#16

0

显示文件

def get_hdfs(in_dfs, header_region, footer_region):

    start_time          = time.time()
    try:
        pages = len(in_dfs)
        multiple_pages = False
        if pages > 1:
            multiple_pages =True
        h_dfs = []
        document_configs = config.DOCUMENT_CONFIGS
        for page_index in range(pages):
            page_df   = in_dfs[page_index]
            if multiple_pages :
                page_df   = tag_heaader_footer_attrib(header_region , footer_region,page_df)

            h_df    = merge_horizontal_blocks(page_df, document_configs, debug=False)
            h_dfs.append(h_df)
    except Exception as e :
        log_error('Error in creating h_dfs' +str(e), app_context.application_context, e)
        return None

    end_time         = time.time()
    elapsed_time     = end_time - start_time
    log_info('Processing of get_hdfs completed in {}/{}, average per page {}'.format(elapsed_time, len(in_dfs), (elapsed_time/len(in_dfs))), app_context.application_context)

    return h_dfs

示例#17

0

显示文件

def resize_image(image_paths):
    '''
    Google ocr will not process an image if it has more than 65M pixels
    '''
    max_res = 65_000_000
    try:
        if image_paths is not None and len(image_paths) > 0:
            for path in image_paths:
                img = cv2.imread(path)
                img_res = img.shape[0] * img.shape[1]

                if img_res >= max_res:
                    log_info(
                        "Resolution of pdf too high scaling down to enable OCR",
                        app_context.application_context)
                    scaling_factor = math.sqrt(max_res / img_res)
                    img = cv2.resize(img,
                                     None,
                                     fx=scaling_factor,
                                     fy=scaling_factor,
                                     interpolation=cv2.INTER_AREA)
                    cv2.imwrite(path, img)
    except Exception as e:
        log_error('error in resizing images ' + str(e),
                  app_context.application_context, e)

示例#18

0

显示文件

文件： app.py 项目： eagle-sb/anuvaad

def start_kafka():
    try:
        t1 = threading.Thread(target=process_pdf_kf, name='keep_on_running')
        t1.start()
        log_info("multithread", "Kafka running on multithread", None)
    except Exception as e:
        log_error("threading", "ERROR WHILE RUNNING CUSTOM THREADS ", None, e)

示例#19

0

显示文件

def children_condition(children):
    try:
        return merge_children(children)

    except Exception as e:
        log_error("Error breaking regions type1 " + str(e),
                  app_context.application_context, e)
        return None

示例#20

0

显示文件

文件： translatorservice.py 项目： eagle-sb/anuvaad

 def fetch_batches_of_blocks(self, record_id, page_no, text_blocks, file,
                             tmx_present, tmx_file_cache, third_party,
                             translate_wf_input):
     batch_id, tmx_count, computed = str(uuid.uuid4()), 0, 0
     sentences_for_trans, batch_wise_tmx, bw_tmx_count, bw_computed_count = {}, {}, 0, 0
     for block in text_blocks:
         block_id = block["block_id"]
         if 'tokenized_sentences' in block.keys():
             for sentence in block["tokenized_sentences"]:
                 tmx_phrases = []
                 if tmx_present:
                     tmx_phrases, res_dict = self.fetch_tmx(
                         sentence["src"], file, tmx_present, tmx_file_cache,
                         translate_wf_input)
                     bw_tmx_count += len(tmx_phrases)
                     bw_computed_count += res_dict["computed"]
                 node_id = str(record_id) + "|" + str(page_no) + "|" + str(
                     block_id)
                 if not third_party:
                     sent_nmt_in = {
                         "src": sentence["src"],
                         "s_id": sentence["s_id"],
                         "n_id": node_id,
                         "batch_id": batch_id,
                         "tmx_phrases": tmx_phrases
                     }
                 else:
                     s_id = node_id + "xxx" + batch_id + "xxx" + sentence[
                         "s_id"]
                     sent_nmt_in = {
                         "src": sentence["src"],
                         "s_id": s_id,
                         "tmx_phrases": tmx_phrases
                     }
                 if batch_id in sentences_for_trans.keys():
                     sentence_list = sentences_for_trans[batch_id]
                     sentence_list.append(sent_nmt_in)
                     sentences_for_trans[batch_id] = sentence_list
                 else:
                     sentence_list = [sent_nmt_in]
                     sentences_for_trans[batch_id] = sentence_list
                 batch_wise_tmx[batch_id] = {
                     "tmx_count": bw_tmx_count,
                     "computed": bw_computed_count
                 }
                 if len(sentences_for_trans[batch_id]
                        ) == nmt_max_batch_size:
                     batch_id, bw_tmx_count, bw_computed_count = str(
                         uuid.uuid4()), 0, 0
         else:
             log_error(
                 "There are no tokenised sentences in block: " +
                 str(block_id), translate_wf_input, None)
             continue
     for batch in batch_wise_tmx.keys():
         tmx_count += batch_wise_tmx[batch]["tmx_count"]
     return sentences_for_trans, {"tmx_count": tmx_count}, batch_wise_tmx

示例#21

0

显示文件

文件： app.py 项目： eagle-sb/anuvaad

def start_kafka():
    try:
        t1 = threading.Thread(target=kf_service.process_kf_request_payload,
                              name='sentence-grader-kafka-worker-thread')
        t1.start()
        log_info("starting kafka consumer thread", LOG_WITHOUT_CONTEXT)

    except Exception as e:
        log_error("threading ERROR WHILE RUNNING CUSTOM THREADS ",
                  LOG_WITHOUT_CONTEXT, e)

示例#22

0

显示文件

def process_bg_image(bg_img):
    bg_image_data = []
    try:
        with open(bg_img, "rb") as img_file:
            img_base64 = base64.b64encode(img_file.read())
            img_base64 = img_base64.decode('ascii')
            bg_image_data.append(img_base64)
            return bg_image_data
    except Exception as e :
            log_error("Service get_response", "Error in processing bg_image", None, e)

示例#23

0

显示文件

def break_block(v_block):
    try:
        block_configs = config.BLOCK_CONFIGS
        if  v_block['children'] != None and  len(v_block['children'] ) < 2 :
            print(v_block)
            return [v_block]
        else:
            return break_paragraph(v_block, block_configs)
    except Exception as e :
       log_error('Error in breaking blocks' + str(e), app_context.application_context, e)
       return None

示例#24

0

显示文件

文件： get_xml.py 项目： AgHarsh/anuvaad

def get_vdfs(pages, h_dfs, document_configs, debug=False):
    v_dfs = []
    try:
        for page_index in range(pages):
            h_df = h_dfs[page_index]
            v_df = merge_vertical_blocks(h_df, document_configs, debug=False)
            v_dfs.append(v_df)
    except Exception as e:
        log_error("Service get_xml", "Error in creating v_dfs", None, e)

    return v_dfs

示例#25

0

显示文件

文件： get_table_cells.py 项目： srihari-nagaraj/anuvaad

def mask_image(image, regions , image_width,image_height,input_json,margin= 0 ,fill=255):
    for table in regions:
        try :
            row_top, row_bottom,row_left,row_right = end_point_correction(table, 2,image_height,image_width)
            if len(image.shape) == 2 :
                image[row_top - margin : row_bottom + margin , row_left - margin: row_right + margin] = fill
            if len(image.shape) == 3 :
                image[row_top - margin: row_bottom + margin, row_left - margin: row_right + margin,:] = fill
        except Exception as e :
            log_error("Service TableExtractor Error in masking bg image" +str(e), input_json, e)
            return image
    return image

示例#26

0

显示文件

文件： translatorcontroller.py 项目： eagle-sb/anuvaad

def block_translate():
    service = BlockTranslationService()
    validator = TranslatorValidator()
    data = request.get_json()
    error = validator.validate_block_translate(data)
    if error is not None:
        log_error("Error in Block Translate: " + str(error), data, None)
        log_error("Input: " + str(data), data, None)
        data["state"], data["status"], data["error"] = "TRANSLATED", "FAILED", error
        return data, 400
    response = service.block_translate(data)
    return jsonify(response), 200

示例#27

0

显示文件

文件： pdf2html_resource.py 项目： eagle-sb/anuvaad

 def post(self):
     log_info("Resource Pdf2HtmlConversion", "pdf2html service started", None)
     json_data = request.get_json(force=True)
     try:
         error_validator = ValidationResponse(DOWNLOAD_FOLDER)
         if error_validator.format_error(json_data) is True:
             response_gen = Response(json_data, DOWNLOAD_FOLDER)
             response = response_gen.nonwf_response()
             log_info("Resource Pdf2HtmlConversion", "pdf2html api response completed", None)
             return jsonify(response)
     except FormatError as e:
         log_error("Resource Pdf2HtmlConversion", "Input json format is not correct or dict_key is missing", None, e)
         return Status.ERR_request_input_format.value

示例#28

0

显示文件

def create_pdf_processing_paths(filepath, base_dir):

    filename    = os.path.basename(filepath)
    working_dir = os.path.join(base_dir, os.path.splitext(filename)[0] + '_' + str(uuid.uuid1()))
    ret         = create_directory(working_dir)

    if ret == False:
        log_error('unable to create working directory {}'.format(working_dir), app_context.application_context, None)
        return None, False

    log_info('created processing directories successfully {}'.format(working_dir), app_context.application_context)

    return working_dir, True

示例#29

0

显示文件

文件： app.py 项目： eagle-sb/anuvaad

def start_kafka():
    try:
        t1 = threading.Thread(target=process_vision_ocr_kf, name='vision_ocr-consumer-thread')
        t1.start()
        log_info("multithread Kafka running on multithread", LOG_WITHOUT_CONTEXT)

        t2 = threading.Thread(target=vision_ocr_request_worker, name='vision_ocr-worker-thread')
        t2.start()
        log_info("Starting vision_ocr_request_worker", LOG_WITHOUT_CONTEXT)


    except Exception as e:
        log_error("threading ERROR WHILE RUNNING CUSTOM THREADS ", LOG_WITHOUT_CONTEXT, e)

示例#30

0

显示文件

def start_kafka():
    try:
        t1 = threading.Thread(target=process_block_merger_kf,
                              name='BM-consumer-thread')
        t1.start()
        log_info("multithread Kafka running on multithread", None)

        t2 = threading.Thread(target=block_merger_request_worker,
                              name='BM-worker-thread')
        t2.start()
        log_info("Starting block_merger_request_worker", None)

    except Exception as e:
        log_error("threading ERROR WHILE RUNNING CUSTOM THREADS ", None, e)