def process_HTML_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) task_id = str("HTML2JSON-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') # instatiation of consumer for respective topic try: consumer_class = Consumer(config.html_input_topic, config.bootstrap_server) consumer = consumer_class.consumer_instantiate() #Consumer log.info("--- consumer running -----") except: response = Status.ERR_Consumer.value producer_html2json = Producer(config.bootstrap_server) producer = producer_html2json.producer_fn() producer.send(config.html_output_topic, value=response) producer.flush() log.error( "error in kafka opertation producer flushed value on topic %s" % (config.html_input_topic)) try: log.info("trying to receive value from consumer ") for msg in consumer: log.info("value received from consumer") data = msg.value task_id = str("HTML2JSON-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') checking_response = CheckingResponse(data, task_id, task_starttime, DOWNLOAD_FOLDER) file_value_response = checking_response.main_response_wf() try: producer_html2json = Producer(config.bootstrap_server) producer = producer_html2json.producer_fn() producer.send(config.html_output_topic, value=file_value_response) producer.flush() log.info("producer flushed value on topic %s" % (config.html_output_topic)) except: log.info( "error occured in file operation of workflow and it is pushed to error queue" ) except Exception as e: log.error( "error occured while listening message from consumer or flushing data to another queue %s" % e) for msg in consumer: log.info("value received from consumer") data = msg.value input_files, workflow_id, jobid, tool_name, step_order = file_ops.input_format( data) task_id = str("HTML2JSON-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') response_custom = CustomResponse(Status.ERR_Producer.value, jobid, task_id) file_ops.error_handler(response_custom.status_code, True) log.info( "error in kafka opertation producer flushed value on error topic" )
def process_pdf_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) task_id = str("PDF2HTML-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') # instatiation of consumer for respective topic try: consumer_class = Consumer(config.input_topic, config.bootstrap_server) consumer = consumer_class.consumer_instantiate() log_info("process_pdf_kf", "trying to receive value from consumer ", None) for msg in consumer: data = msg.value task_id = str("PDF2HTML-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( data) response_gen = Response(data, DOWNLOAD_FOLDER) file_value_response = response_gen.workflow_response( task_id, task_starttime) if "errorID" not in file_value_response.keys(): producer = Producer() producer.push_data_to_queue(config.output_topic, file_value_response, jobid, task_id) else: log_info("process_pdf_kf", "error send to error handler", jobid) except KafkaConsumerError as e: response_custom = CustomResponse(Status.ERR_STATUS.value, None, None) response_custom.status_code['message'] = str(e) file_ops.error_handler(response_custom.status_code, "KAFKA_CONSUMER_ERROR", True) log_exception("process_pdf_kf", "Consumer didn't instantiate", None, e) except KafkaProducerError as e: response_custom = e.code response_custom['message'] = e.message file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True) log_exception("process_pdf_kf", "response send to topic %s" % (config.output_topic), response_custom['jobID'], e)
import os import json from flask_restful import Resource from flask.json import jsonify from flask import request from utilities.utils import FileOperation from utilities.model_response import CheckingResponse import werkzeug from werkzeug.utils import secure_filename import config import logging import time file_ops = FileOperation() DOWNLOAD_FOLDER =file_ops.create_file_download_dir(config.download_folder) log = logging.getLogger('file') # rest request for html2json workflow service class Html2JsonConversionWF(Resource): # reading json request and reurnung final response def post(self): log.info("HTML to JSON conversion started") task_id = str("Html2Json-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') json_data = request.get_json(force = True) checking_response = CheckingResponse(json_data, task_id, task_starttime, DOWNLOAD_FOLDER) file_value_response = checking_response.main_response_wf(rest_request=True) log.info("Conversion completed") return file_value_response