Пример #1
0
def consume_nmt():
    try:
        utils = TranslatorUtils()
        topics = utils.get_topics_from_models()
        topics.append(anu_nmt_output_topic)
        consumer = instantiate(topics)
        service = TranslatorService()
        rand_str = ''.join(random.choice(string.ascii_letters) for i in range(4))
        prefix = "Translator-NMT-" + "(" + rand_str + ")"
        log_info(prefix + " Running..........", None)
        while True:
            for msg in consumer:
                data = {}
                try:
                    data = msg.value
                    if data:
                        log_info(prefix + " | Received on Topic: " + msg.topic + " | Partition: " + str(msg.partition), data)
                        service.process_nmt_output(data)
                    else:
                        break
                except Exception as e:
                    log_exception(prefix + " Exception in translator nmt while consuming: " + str(e), data, e)
                    post_error("TRANSLATOR_CONSUMER_ERROR", "Exception in translator while consuming: " + str(e), None)
    except Exception as e:
        log_exception("Exception while starting the translator nmt consumer: " + str(e), None, e)
        post_error("TRANSLATOR_CONSUMER_EXC", "Exception while starting translator consumer: " + str(e), None)
Пример #2
0
def core_consume():
    try:
        wfmservice = WFMService()
        topics = [anu_etl_wfm_core_topic]
        consumer = instantiate(topics)
        rand_str = ''.join(
            random.choice(string.ascii_letters) for i in range(4))
        prefix = "WFM-Core-" + "(" + rand_str + ")"
        log_info(prefix + " | Running..........", None)
        log_info(prefix + " | Topics: " + str(topics), None)
        while True:
            for msg in consumer:
                data = {}
                try:
                    if msg:
                        data = msg.value
                        log_info(
                            prefix + " | Received on Topic: " + msg.topic +
                            " | Partition: " + str(msg.partition), data)
                        wfmservice.initiate_wf(data)
                except Exception as e:
                    log_exception(
                        prefix + " | Exception while consuming: " + str(e),
                        data, e)
                    post_error("WFM_CORE_CONSUMER_ERROR",
                               "Exception while consuming: " + str(e), None)
    except Exception as e:
        log_exception(
            "Exception while starting the wfm core consumer: " + str(e), None,
            e)
        post_error("WFM_CONSUMER_ERROR",
                   "Exception while starting wfm core consumer: " + str(e),
                   None)
Пример #3
0
 def validate_tool_response(self, tool_response, tool_details, wf_input):
     if not tool_response:
         log_error("Error from the tool: " + str(tool_details["name"]),
                   wf_input, None)
         error = post_error(
             "ERROR_FROM_TOOL",
             "Error from the tool: " + str(tool_details["name"]), None)
         client_output = self.get_wf_details_sync(wf_input, None, True,
                                                  error)
         self.update_job_details(client_output, False)
         log_info("Job FAILED, jobID: " + str(wf_input["jobID"]), wf_input)
         return client_output
     else:
         fail_msg = None
         if 'error' in tool_response.keys():
             if tool_response["error"]:
                 fail_msg = "Error from the tool: " + str(
                     tool_details["name"]) + " | Cause: " + str(
                         tool_response["error"])
         elif 'http' in tool_response.keys():
             if 'status' in tool_response["http"]:
                 if tool_response["http"]["status"] != 200:
                     fail_msg = "Error from the tool: " + str(
                         tool_details["name"]) + " | Cause: " + str(
                             tool_response["why"])
         if fail_msg:
             log_error(fail_msg, wf_input, None)
             error = post_error("ERROR_FROM_TOOL", fail_msg, None)
             client_output = self.get_wf_details_sync(
                 wf_input, None, True, error)
             self.update_job_details(client_output, False)
             log_info("Job FAILED, jobID: " + str(wf_input["jobID"]),
                      wf_input)
             return client_output
Пример #4
0
    def post(self):
        userID = request.headers.get('userID')
        if userID == None:
            userID = request.headers.get('x-user-id')
        body = request.get_json()

        if 'words' not in body and not body['words']:
            return post_error("Data Missing", "words are required", None), 400

        words = body['words']
        AppContext.adduserID(userID)
        log_info(
            "DigitalDocumentUpdateWordResource for user {}, number words to update {} request {}"
            .format(userID, len(words), body), AppContext.getContext())

        try:
            result = digitalRepo.update_words(userID, words)
            if result == True:
                res = CustomResponse(Status.SUCCESS.value, words)
                return res.getres()
            # return post_error("Data Missing","Failed to update word since data is missing",None), 400
            return result, 400

        except Exception as e:
            log_exception(
                "Exception in DigitalDocumentUpdateWordResource |{}".format(
                    str(e)), AppContext.getContext(), e)
            return post_error("Data Missing",
                              "Failed to update word since data is missing",
                              None), 400
Пример #5
0
    def post(self):
        body = request.get_json()

        if 'files' not in body or not body['files']:
            return post_error("Data Missing", "files is required", None), 400

        if 'recordID' not in body or not body['recordID']:
            return post_error("Data Missing", "recordID is required",
                              None), 400

        # if 'jobID' not in body or not body['jobID']:
        #     return post_error("Data Missing","jobID is required",None), 400

        files = body['files']
        userID = body['metadata']['userID']
        recordID = body['recordID']

        if not userID:
            return post_error("Data Missing", "userID is required", None), 400

            AppContext.addRecordID(recordID)
            log_info(
                'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}'
                .format(body, userID, recordID), AppContext.getContext())

        try:
            AppContext.addRecordID(recordID)
            log_info(
                'DigitalDocumentSaveResource request received, user_id:{}, record_id:{}'
                .format(userID, recordID), AppContext.getContext())

            result = digitalRepo.store(userID, recordID, files)
            if result == False:
                log_info(
                    'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}'
                    .format(body, userID, recordID), AppContext.getContext())
                return post_error("Data Missing",
                                  "Failed to store doc since data is missing",
                                  None), 400
            elif result is None:
                AppContext.addRecordID(recordID)
                log_info(
                    'DigitalDocumentSaveResource request completed, user_id:{}, record_id:{}'
                    .format(userID, recordID), AppContext.getContext())
                res = CustomResponse(Status.SUCCESS.value, None)
                return res.getres()
            else:
                log_info(
                    'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}'
                    .format(body, userID, recordID), AppContext.getContext())
                return result, 400
        except Exception as e:
            AppContext.addRecordID(recordID)
            log_exception(
                "Exception on save document | DigitalDocumentSaveResource :{}".
                format(str(e)), AppContext.getContext(), e)
            return post_error("Data Missing",
                              "Failed to store doc since data is missing",
                              None), 400
Пример #6
0
 def text_translate(self, text_translate_input):
     text_translate_input["jobID"] = utils.generate_task_id()
     text_translate_input["startTime"] = eval(
         str(time.time()).replace('.', '')[0:13])
     log_info("Text Translation started....", text_translate_input)
     output = text_translate_input
     output["status"], output["output"] = "FAILED", None
     try:
         text_for_nmt, ch_res = self.get_stored_hypothesis_ch(
             text_translate_input["input"]["textList"],
             text_translate_input)
         if text_for_nmt:
             url, body = self.get_nmt_url_body(text_translate_input,
                                               text_for_nmt)
             log_info("NMT IT URI - " + str(url), text_translate_input)
             nmt_response = utils.call_api(
                 url, "POST", body, None,
                 text_translate_input["metadata"]["userID"])
             if nmt_response:
                 if 'status' in nmt_response.keys():
                     if 'statusCode' in nmt_response["status"].keys():
                         if nmt_response["status"]["statusCode"] != 200:
                             output["error"] = post_error(
                                 "TRANSLATION_FAILED",
                                 "Error while translating: " +
                                 str(nmt_response["status"]["message"]),
                                 None)
                             return output
                 ch_res.extend(nmt_response["data"])
                 nmt_predictions = self.dedup_hypothesis(ch_res)
                 output["input"], output["status"] = None, "SUCCESS"
                 output["taskEndTime"], output["output"] = eval(
                     str(time.time()).replace('.', '')[0:13]), {
                         "predictions": nmt_predictions
                     }
             else:
                 output["taskEndTime"] = eval(
                     str(time.time()).replace('.', '')[0:13])
                 output["error"] = post_error("TRANSLATION_FAILED",
                                              "Error while translating",
                                              None)
         else:
             ch_predictions = self.dedup_hypothesis(ch_res)
             output["input"], output["status"] = None, "SUCCESS"
             output["taskEndTime"], output["output"] = eval(
                 str(time.time()).replace('.', '')[0:13]), {
                     "predictions": ch_predictions
                 }
         log_info("Text Translation completed!", text_translate_input)
         return output
     except Exception as e:
         log_exception("Exception while translating: " + str(e),
                       text_translate_input, None)
         output["error"] = post_error(
             "TRANSLATION_FAILED", "Exception while translating: " + str(e),
             None)
         output["taskEndTime"] = eval(
             str(time.time()).replace('.', '')[0:13])
         return output
Пример #7
0
    def get(self):

        parser = reqparse.RequestParser()
        parser.add_argument(
            'start_page',
            type=int,
            location='args',
            help=
            'start_page can be 0, set start_page & end_page as 0 to get entire document',
            required=True)
        parser.add_argument(
            'end_page',
            type=int,
            location='args',
            help=
            'end_page can be 0, set start_page & end_page as 0 to get entire document',
            required=True)
        parser.add_argument('recordID',
                            type=str,
                            location='args',
                            help='record_id is required',
                            required=True)

        args = parser.parse_args()
        AppContext.addRecordID(args['recordID'])
        log_info(
            "DigitalDocumentGetResource record_id {} ".format(
                args['recordID']), AppContext.getContext())

        try:
            result = digitalRepo.get_pages(args['recordID'],
                                           args['start_page'],
                                           args['end_page'])
            if result == False:
                return post_error("Data Missing",
                                  "Failed to get pages since data is missing",
                                  None), 400

            AppContext.addRecordID(args['recordID'])
            log_info(
                "DigitalDocumentGetResource record_id {} has {} pages".format(
                    args['recordID'], result['total']),
                AppContext.getContext())
            res = CustomResponse(Status.SUCCESS.value, result['pages'],
                                 result['total'])
            return res.getres()

        except Exception as e:
            AppContext.addRecordID(args['recordID'])
            log_exception(
                "Exception in DigitalDocumentGetResource |{}".format(str(e)),
                AppContext.getContext(), e)
            return post_error("Data Missing",
                              "Failed to get pages since data is missing",
                              None), 400
Пример #8
0
 def common_validate(self, data):
     if data is None:
         return post_error("INPUT_NOT_FOUND", "Input is empty", None)
     if 'workflowCode' not in data.keys():
         return post_error("WOFKLOWCODE_NOT_FOUND",
                           "workflowCode is mandatory", None)
     else:
         configs = wfmutils.get_configs()
         if data["workflowCode"] not in configs.keys():
             return post_error(
                 "WORKFLOW_NOT_FOUND",
                 "There's no workflow configured against this workflowCode",
                 None)
Пример #9
0
 def glossary_create(self, object_in):
     try:
         if 'org' not in object_in.keys():
             return post_error("ORG_NOT_FOUND", "org is mandatory", None)
         if 'context' not in object_in.keys():
             return post_error("CONTEXT_NOT_FOUND", "context is mandatory", None)
         else:
             if 'translations' not in object_in.keys():
                 return post_error("TRANSLATIONS_NOT_FOUND", "Translations are mandatory", None)
             else:
                 if not object_in["translations"]:
                     return post_error("TRANSLATIONS_EMPTY", "Translations cannot be empty", None)
                 else:
                     for translation in object_in["translations"]:
                         if 'src' not in translation.keys():
                             return post_error("SRC_NOT_FOUND", "src is mandatory for every translation", None)
                         if 'tgt' not in translation.keys():
                             return post_error("TGT_NOT_FOUND", "tgt is mandatory for every translation", None)
                         if 'locale' not in translation.keys():
                             return post_error("LOCALE_NOT_FOUND", "locale is mandatory for every translation", None)
         for translation in object_in["translations"]:
             translation["id"] = uuid.uuid4()
             translation["org"] = object_in["org"]
             translation["uploaded_by"] = object_in["userID"]
             translation["created_on"] = eval(str(time.time()).replace('.', '')[0:13])
             repo.glossary_create(translation)
         return {"message": "Glossary created successfully", "status": "SUCCESS"}
     except Exception as e:
         return post_error("GLOSSARY_CREATION_FAILED",
                           "Glossary creation failed due to exception: {}".format(str(e)), None)
Пример #10
0
def consume():
    try:
        topics = [anu_translator_input_topic, anu_translator_nonmt_topic]
        consumer = instantiate(topics)
        service = TranslatorService()
        validator = TranslatorValidator()
        rand_str = ''.join(
            random.choice(string.ascii_letters) for i in range(4))
        prefix = "Translator-Core-" + "(" + rand_str + ")"
        log_info(prefix + " Running..........", None)
        while True:
            for msg in consumer:
                data = {}
                try:
                    data = msg.value
                    if data:
                        if msg.topic == anu_translator_nonmt_topic:
                            service.process_no_nmt_jobs(data)
                        else:
                            log_info(
                                prefix + " | Received on Topic: " + msg.topic +
                                " | Partition: " + str(msg.partition), data)
                            error = validator.validate_wf(data, False)
                            if error is not None:
                                log_error(prefix + " | Error: " + str(error),
                                          data, error)
                                log_info(prefix + " | Input: " + str(data),
                                         data)
                                post_error_wf(error["code"], error["message"],
                                              data, None)
                                break
                            service.start_file_translation(data)
                    else:
                        break
                except Exception as e:
                    log_exception(
                        prefix + " Exception in translator while consuming: " +
                        str(e), data, e)
                    post_error(
                        "TRANSLATOR_CONSUMER_ERROR",
                        "Exception in translator while consuming: " + str(e),
                        None)
    except Exception as e:
        log_exception(
            "Exception while starting the translator consumer: " + str(e),
            None, e)
        post_error("TRANSLATOR_CONSUMER_EXC",
                   "Exception while starting translator consumer: " + str(e),
                   None)
Пример #11
0
 def validate_wf(self, data, is_api):
     if 'jobID' not in data.keys():
         return post_error("JOBID_NOT_FOUND", "jobID is mandatory", None)
     else:
         error = self.validate_input_files(data, is_api)
         if error is not None:
             return error
Пример #12
0
 def produce(self, object_in, topic, partition):
     producer = self.instantiate()
     try:
         if object_in:
             if partition is None:
                 partition = random.choice(
                     list(range(0, total_no_of_partitions)))
             producer.send(topic, value=object_in, partition=partition)
             log_info("Pushing to topic: " + topic, object_in)
         producer.flush()
     except Exception as e:
         log_exception("Exception in translator while producing: " + str(e),
                       object_in, e)
         post_error("TRANSLATOR_PRODUCER_EXC",
                    "Exception in translator while producing: " + str(e),
                    None)
Пример #13
0
    def update_words(self, user_id, words):

        for word in words:
            Validation = validator.update_word_validation(word)
            if Validation is not None:
                return Validation

            page = word['page_no']
            region_id = word['region_id']
            word_id = word['word_id']
            record_id = word['record_id']
            user_word = word['updated_word']

            AppContext.addRecordID(record_id)
            log_info("DigitalDocumentRepo update word request",
                     AppContext.getContext())  #str(page)
            region_to_update = self.docModel.get_word_region(
                user_id, record_id, region_id, page)
            if region_to_update:
                if region_to_update['identifier'] == region_id:
                    region_to_update['updated'] = True
                    for data in region_to_update['regions']:
                        for word in data['regions']:
                            if word['identifier'] == word_id:
                                word['ocr_text'] = word['text']
                                word['text'] = user_word
                                break
                            else:
                                pass
                                # return post_error("Data Missing","No record with the given user_id,record_id and word_id",None)
            else:
                return post_error(
                    "Data Missing",
                    "No record with the given user_id,record_id and region_id",
                    None)

            AppContext.addRecordID(record_id)
            log_info(
                "DigitalDocumentRepo update word region :{}".format(
                    str(region_to_update)), AppContext.getContext())
            print(region_to_update)
            if self.docModel.update_word(user_id, record_id, region_id,
                                         region_to_update, page) == False:
                return post_error(
                    "Data Missing",
                    "Failed to update word since data is missing", None)
        return True
Пример #14
0
 def error_handler(self, code, message, object_in, iswf):
     if iswf:
         object_in["state"] = "SENTENCES-ALIGNED"
         object_in["status"] = "FAILED"
         error = post_error_wf(code, message, object_in, None)
     else:
         error = post_error(code, message, None)
     return error
Пример #15
0
    def update_word_validation(word):

        obj_keys = {
            'record_id', 'region_id', 'word_id', 'updated_word', 'page_no'
        }
        word_keys = word.keys()
        if not all(item in word_keys for item in obj_keys):
            return post_error(
                "Data Missing",
                "record_id,region_id,word_id,updated_word are mandatory for updating the word",
                None)
        if not word['record_id'] or not word['region_id'] or not word[
                'word_id'] or not word['updated_word'] or not word['page_no']:
            return post_error(
                "Data Missing",
                "record_id,region_id,word_id,updated_word are mandatory for updating the word",
                None)
Пример #16
0
 def run(self):
     obj = {"metadata": {"module": module_name}}
     rand_str = ''.join(
         random.choice(string.ascii_letters) for i in range(4))
     prefix = "WFMJobsManager(" + rand_str + ")"
     log_info(prefix + " -- AJM Deployed, WFMJobsManager running......",
              obj)
     wfm_utils = WFMJMCronUtils()
     run = 0
     while not self.stopped.wait(eval(str(js_cron_interval_sec))):
         try:
             criteria, exclude = {
                 "status": {
                     "$in": ["STARTED", "INPROGRESS"]
                 }
             }, {
                 '_id': False
             }
             jobs = wfm_utils.search_job(criteria, exclude, None, None)
             no_of_jobs = 0
             if jobs:
                 log_info(
                     prefix + " -- Run: " + str(run) + " | Jobs Fetched: " +
                     str(len(jobs)), obj)
                 for job in jobs:
                     if "AL" in job["workflowCode"]:
                         continue  #Ignore Aligner jobs
                     job_start_time = job["startTime"]
                     diff = eval(str(time.time()).replace(
                         '.', '')[0:13]) - job_start_time
                     if (diff / 1000) > eval(
                             str(js_job_failure_interval_sec)):
                         job["status"] = "FAILED"
                         job["error"] = post_error(
                             "ORPHAN_JOB",
                             "The job was failed by the system, since it was idle",
                             None)
                         job["endTime"] = eval(
                             str(time.time()).replace('.', '')[0:13])
                         wfm_utils.update_job(job, job["jobID"])
                         log_info(
                             prefix +
                             " -- JOB FAILED: Idle job, force failed. jobID: "
                             + job["jobID"], job)
                         no_of_jobs += 1
             run += 1
             log_info(
                 prefix + " -- Run: " + str(run) + " | Jobs Fetched: " +
                 str(len(jobs)) + " | Jobs Processed: " + str(no_of_jobs),
                 obj)
         except Exception as e:
             run += 1
             log_exception(
                 prefix + " -- Run: " + str(run) +
                 " | Exception in JobSweeper: " + str(e), obj, e)
Пример #17
0
 def validate_text_translate(self, data):
     if 'input' not in data.keys():
         return post_error("INPUT_NOT_FOUND", "Input key is mandatory",
                           None)
     else:
         api_input = data["input"]
         if 'textList' not in api_input.keys():
             return post_error("TEXT_LIST_NOT_FOUND",
                               "Text List is mandatory", None)
         else:
             if not api_input["textList"]:
                 return post_error("TEXT_LIST_EMPTY",
                                   "Text list cannot be empty", None)
             else:
                 for text in api_input["textList"]:
                     if 's_id' not in text.keys():
                         return post_error("SENTENCE_ID_NOT_FOUND",
                                           "s_id is mandatory", None)
                     if 'src' not in text.keys():
                         return post_error("TEXT_NOT_FOUND",
                                           "src is mandatory", None)
                     if 'taggedPrefix' not in text.keys():
                         return post_error("TAGGED_PREFIX_NOT_FOUND",
                                           "taggedPrefix is mandatory",
                                           None)
         if 'model' not in api_input.keys():
             return post_error("MODEL_NOT_FOUND",
                               "Model details are mandatory for this wf.",
                               None)
         else:
             model = api_input["model"]
             if 'model_id' not in model.keys():
                 return post_error("MODEL_ID_NOT_FOUND",
                                   "Model Id is mandatory.", None)
             if 'source_language_code' not in model.keys():
                 return post_error("SRC_LANG_NOT_FOUND",
                                   "Source language code is mandatory.",
                                   None)
             if 'target_language_code' not in model.keys():
                 return post_error("TGT_LANG_NOT_FOUND",
                                   "Target language code is mandatory.",
                                   None)
Пример #18
0
 def error_handler(self, object_in, code, iswf):
     if iswf:
         object_in['status'] = "FAILED"
         object_in['state'] = config.TASK_STAT
         error = post_error_wf(code, object_in['message'], object_in, None)
         return error
     else:
         code = code
         message = ""
         error = post_error(code, message, None)
         return error
Пример #19
0
 def error_handler(self, object_in, code, iswf):
     if iswf:
         object_in['status'] = "FAILED"
         object_in['state'] = "SENTENCE-TOKENISED"
         error = post_error_wf(code, object_in['message'], object_in, None)
         return error
     else:
         code = code
         message = ""
         error = post_error(code, message, None)
         return error
Пример #20
0
 def validate_for_annotator(self, data):
     for file in data["files"]:
         if 'annotationType' not in file.keys():
             return post_error(
                 "ANNOTATION_TYPE_NOT_FOUND",
                 "annotationType is mandatory for all files for this wf",
                 None)
         if 'sourceLanguage' not in file.keys():
             return post_error(
                 "SRC_LANG_NOT_FOUND",
                 "sourceLanguage is mandatory for all files for this wf",
                 None)
         if 'targetLanguage' not in file.keys():
             return post_error(
                 "TGT_LANG_NOT_FOUND",
                 "targetLanguage is mandatory for all files for this wf",
                 None)
         if 'fileInfo' not in file.keys():
             return post_error(
                 "FILES_INFO_NOT_FOUND",
                 "fileInfo is mandatory for all files for this wf", None)
         if 'users' not in file.keys():
             return post_error(
                 "USERS_NOT_FOUND",
                 "users is mandatory for all files for this wf", None)
         if 'description' not in file.keys():
             return post_error(
                 "DESC_NOT_FOUND",
                 "description is mandatory for all files for this wf", None)
Пример #21
0
 def start_file_translation(self, translate_wf_input):
     duplicate_jobs = repo.search({"jobID": translate_wf_input["jobID"]},
                                  {'_id': False})
     if duplicate_jobs:
         log_info(
             "Duplicate Job, jobID: " + str(translate_wf_input["jobID"]),
             translate_wf_input)
         return None
     translate_wf_input["taskID"] = utils.generate_task_id()
     translate_wf_input["taskStartTime"] = eval(
         str(time.time()).replace('.', '')[0:13])
     translate_wf_input["state"] = "TRANSLATED"
     log_info(
         "Translator process initiated... jobID: " +
         str(translate_wf_input["jobID"]), translate_wf_input)
     error, error_list = None, []
     for file in translate_wf_input["input"]["files"]:
         try:
             dumped = self.dump_file_to_db(file["path"], translate_wf_input)
             if not dumped:
                 error_list.append({
                     "inputFile":
                     str(file["path"]),
                     "outputFile":
                     "FAILED",
                     "error":
                     "File is either empty or  couldn't be downloaded!"
                 })
                 error = post_error(
                     "FILE_DUMP_FAILED",
                     "File is either empty or  couldn't be downloaded!",
                     None)
             else:
                 translation_process = Process(
                     target=self.push_sentences_to_nmt,
                     args=(file, translate_wf_input))
                 translation_process.start()
         except Exception as e:
             log_exception(
                 "Exception while posting sentences to NMT: " + str(e),
                 translate_wf_input, e)
             continue
     if error_list and error is not None:
         translate_wf_input["output"], translate_wf_input[
             "status"] = error_list, "FAILED"
         translate_wf_input["error"] = error
         translate_wf_input["taskEndTime"] = eval(
             str(time.time()).replace('.', '')[0:13])
         producer.produce(translate_wf_input, anu_translator_output_topic,
                          None)
         return {"status": "failed", "message": "Some/All files failed"}
     return {"status": "success", "message": "Sentences sent to NMT"}
Пример #22
0
def consume_tmx():
    try:
        topics = [anu_translator_tmx_in_topic]
        consumer = instantiate(topics)
        service = TMXService()
        rand_str = ''.join(random.choice(string.ascii_letters) for i in range(4))
        prefix = "Translator-TMX-" + "(" + rand_str + ")"
        log_info(prefix + " Running..........", None)
        while True:
            for msg in consumer:
                data = {}
                try:
                    data = msg.value
                    if data:
                        log_info(prefix + " | Received on Topic: " + msg.topic + " | Partition: " + str(msg.partition), data)
                        service.push_to_tmx_store(data)
                except Exception as e:
                    log_exception(prefix + " Exception in translator tmx while consuming: " + str(e), data, e)
                    post_error("TRANSLATOR_CONSUMER_ERROR", "Exception in translator while consuming: " + str(e), None)
    except Exception as e:
        log_exception("Exception while starting the translator nmt consumer: " + str(e), None, e)
        post_error("TRANSLATOR_CONSUMER_EXC", "Exception while starting translator consumer: " + str(e), None)
Пример #23
0
def consume():
    try:
        wfmutils = WFMUtils()
        wfmservice = WFMService()
        wfmutils.read_all_configs()
        configs = wfmutils.get_configs()
        topics = wfmutils.fetch_output_topics(configs)
        consumer = instantiate(topics)
        rand_str = ''.join(
            random.choice(string.ascii_letters) for i in range(4))
        prefix = "WFM--" + "(" + rand_str + ")"
        log_info(prefix + " | Running..........", None)
        log_info(prefix + " | Topics: " + str(topics), None)
        while True:
            for msg in consumer:
                data = {}
                try:
                    if msg:
                        data = msg.value
                        if 'jobID' in data.keys():
                            job_details = wfmutils.get_job_details(
                                data["jobID"])
                            if job_details:
                                data["metadata"] = job_details[0]["metadata"]
                        log_info(
                            prefix + " | Received on Topic: " + msg.topic +
                            " | Partition: " + str(msg.partition), data)
                        wfmservice.manage_wf(data)
                except Exception as e:
                    log_exception(
                        prefix + " | Exception while consuming: " + str(e),
                        data, e)
                    post_error("WFM_CONSUMER_ERROR",
                               "Exception while consuming: " + str(e), None)
    except Exception as e:
        log_exception("Exception while starting the wfm consumer: " + str(e),
                      None, e)
        post_error("WFM_CONSUMER_ERROR",
                   "Exception while starting wfm consumer: " + str(e), None)
Пример #24
0
 def process_sync(self, wf_input):
     try:
         ctx = wf_input
         order_of_execution = wfmutils.get_order_of_exc(
             wf_input["workflowCode"])
         tool_output = None
         previous_tool = None
         for tool_order in order_of_execution.keys():
             step_details = order_of_execution[tool_order]
             tool_details = step_details["tool"][0]
             log_info(
                 tool_details["name"] + log_msg_start + " jobID: " +
                 ctx["jobID"], ctx)
             if not tool_output:
                 tool_input = wfmutils.get_tool_input_sync(
                     tool_details["name"], None, None, wf_input)
             else:
                 tool_input = wfmutils.get_tool_input_sync(
                     tool_details["name"], previous_tool, tool_output, None)
             response = wfmutils.call_api(
                 tool_details["api-details"][0]["uri"], tool_input,
                 wf_input["metadata"]["userID"])
             error = self.validate_tool_response(response, tool_details,
                                                 wf_input)
             if error:
                 return error
             tool_output = response
             previous_tool = tool_details["name"]
             ctx["metadata"]["module"] = module_wfm_name
             tool_output["metadata"] = ctx["metadata"]
             log_info(
                 tool_details["name"] + log_msg_end + " jobID: " +
                 ctx["jobID"], ctx)
         client_output = self.get_wf_details_sync(None, tool_output, True,
                                                  None)
         self.update_job_details(client_output, False)
         log_info("Job COMPLETED, jobID: " + str(wf_input["jobID"]), ctx)
         return client_output
     except Exception as e:
         log_exception(
             "Exception while processing SYNC workflow: " + str(e),
             wf_input, e)
         error = post_error(
             "SYNC_WFLOW_ERROR",
             "Exception while processing the sync workflow: " + str(e), e)
         client_output = self.get_wf_details_sync(wf_input, None, True,
                                                  error)
         self.update_job_details(client_output, False)
         log_info("Job FAILED, jobID: " + str(wf_input["jobID"]), wf_input)
         return client_output
Пример #25
0
 def error_handler(self, object_in, code, iswf):
     if iswf:
         job_id = object_in["jobID"]
         task_id = object_in["taskID"]
         state = object_in['state']
         status = object_in['status']
         code = code
         message = object_in['message']
         error = post_error_wf(code, message, object_in, None)
         return error
     else:
         code = object_in['error']['code']
         message = object_in['error']['message']
         error = post_error(code, message, None)
         return error
Пример #26
0
 def push_to_queue(self, object_in, topic):
     global topic_partition_map
     producer = self.instantiate()
     partition = random.choice(list(range(0, total_no_of_partitions)))
     if topic in topic_partition_map.keys():
         while partition == topic_partition_map[topic]:
             partition = random.choice(list(range(0, total_no_of_partitions)))
     topic_partition_map[topic] = partition
     try:
         if object_in:
             producer.send(topic, partition=partition, value=object_in)
             object_in["metadata"]["module"] = module_wfm_name # FOR LOGGING ONLY.
             log_info("Pushing to TOPIC: " + topic + " | PARTITION: " + str(partition), object_in)
             return None
         producer.flush()
     except Exception as e:
         log_exception("Exception while producing: " + str(e), object_in, e)
         return post_error("WFLOW_PRODUCER_ERROR", "Exception while producing: " + str(e), None)
Пример #27
0
    def store(self, userID, recordID, files):
        try:
            for file in files:

                # recordID= recordID
                jobID = recordID.split('|')[0]
                fileID = file['file']['identifier']
                file_name = file['file']['name']
                locale = file['config']['language']
                file_type = file['file']['type']

                pages = file['pages']
                log_info(
                    "DigitalDocumentRepo save document for user: {}| record: {}| count of pages received: {}"
                    .format(userID, recordID, str(len(pages))),
                    AppContext.getContext())

                blocks = []
            for page in pages:
                block = self.create_regions_from_page(userID, jobID, recordID,
                                                      fileID, file_name,
                                                      locale, file_type, page)
                if len(block.keys()) > 5:
                    blocks.append(block)
                else:
                    return block
            log_info(
                'DigitalDocumentRepo page blocks created for insert, user_id:{}, record_id:{}, block length:{}'
                .format(userID, recordID, str(len(blocks))),
                AppContext.getContext())
            result = self.docModel.store_bulk_blocks(blocks)
            if result == False:
                return False
        except Exception as e:
            AppContext.addRecordID(recordID)
            log_exception(
                'Exception on save document | DigitalDocumentRepo :{}'.format(
                    str(e)), AppContext.getContext(), e)
            return post_error("Data Missing",
                              "Failed to store doc since :{}".format(str(e)),
                              None)
Пример #28
0
    def create_regions_from_page(self, userID, jobID, recordID, fileID,
                                 file_name, locale, file_type, page):
        try:
            AppContext.addRecordID(recordID)
            log_info(
                'DigitalDocumentRepo page blocks creation started for record_id:{}, page_number:{}'
                .format(recordID,
                        str(page['page_no'])), AppContext.getContext())
            block_info = {}
            block_info['userID'] = userID
            block_info['jobID'] = jobID
            block_info['recordID'] = recordID
            block_info['file_identifier'] = fileID
            block_info['file_name'] = file_name
            block_info['file_locale'] = locale
            block_info['file_type'] = file_type
            block_info['created_on'] = datetime.utcnow()

            page_info = {}
            page_info['page_no'] = page['page_no'] + 1
            page_info['page_identifier'] = page['identifier']
            page_info['page_boundingBox'] = page['boundingBox']
            page_info['page_img_path'] = page['path']
            if 'resolution' in page.keys():
                page_info['page_resolution'] = page['resolution']

            block_info['page_info'] = page_info

            block_info['regions'] = page['regions']
            return block_info
        except Exception as e:
            AppContext.addRecordID(recordID)
            log_exception(
                'Exception on save document | DigitalDocumentRepo :{}'.format(
                    str(e)), AppContext.getContext(), e)
            return post_error("Data Missing",
                              "Failed to store doc since data is missing",
                              None)
Пример #29
0
 def validate_async(self, data, workflowCode):
     if is_async_flow_enabled:
         configs = wfmutils.get_configs()
         if configs[workflowCode]["type"] != "ASYNC":
             return post_error("UNSUPPORTED_WF_CODE",
                               "This workflow is NOT of the ASYNC type.",
                               None)
         if 'files' not in data.keys():
             return post_error("FILES_NOT_FOUND", "files are mandatory",
                               None)
         else:
             if len(data["files"]) == 0:
                 return post_error("FILES_NOT_FOUND",
                                   "Input files are mandatory", None)
             else:
                 tools = wfmutils.get_tools_of_wf(workflowCode)
                 if tool_annotator in tools:
                     self.validate_for_annotator(data)
                     return
                 for file in data["files"]:
                     if 'path' not in file.keys():
                         return post_error(
                             "FILES_PATH_NOT_FOUND",
                             "Path is mandatory for all files in the input",
                             None)
                     if 'type' not in file.keys():
                         return post_error(
                             "FILES_TYPE_NOT_FOUND",
                             "Type is mandatory for all files in the input",
                             None)
                     if 'locale' not in file.keys():
                         return post_error(
                             "FILES_LOCALE_NOT_FOUND",
                             "Locale is mandatory for all files in the input",
                             None)
                     if tool_translator in tools:
                         if 'model' not in file.keys():
                             return post_error(
                                 "MODEL_NOT_FOUND",
                                 "Model details are mandatory for this wf.",
                                 None)
                         else:
                             model = file["model"]
                             if 'model_id' not in model.keys():
                                 return post_error(
                                     "MODEL_ID_NOT_FOUND",
                                     "Model Id is mandatory.", None)
                             if 'source_language_code' not in model.keys():
                                 return post_error(
                                     "SRC_LANG_NOT_FOUND",
                                     "Source language code is mandatory.",
                                     None)
                             if 'target_language_code' not in model.keys():
                                 return post_error(
                                     "TGT_LANG_NOT_FOUND",
                                     "Target language code is mandatory.",
                                     None)
                     if tool_worddetector in tools or tool_layoutdetector in tools or tool_ocrgooglevision in tools \
                             or tool_ocrtesseract in tools or tool_blocksegmenter in tools or tool_ocrdd10googlevision in tools\
                             or tool_ocrdd15googlevision in tools:
                         if 'config' not in file.keys():
                             return post_error(
                                 "CONFIG_NOT_FOUND",
                                 "OCR Config details are mandatory for this wf.",
                                 None)
                         else:
                             config = file["config"]
                             if 'OCR' not in config.keys():
                                 return post_error(
                                     "CONFIG_NOT_FOUND",
                                     "OCR Config details are mandatory for this wf.",
                                     None)
     else:
         return post_error(
             "WORKFLOW_TYPE_DISABLED",
             "This workflow belongs to ASYNC type, which is currently disabled.",
             None)
Пример #30
0
    def validate_sync(self, data, workflowCode):
        if is_sync_flow_enabled:
            configs = wfmutils.get_configs()
            if configs[workflowCode]["type"] != "SYNC":
                return post_error("UNSUPPORTED_WF_CODE",
                                  "This workflow is NOT of the SYNC type.",
                                  None)
            if 'recordID' not in data.keys():
                return post_error("RECORD_ID_NOT_FOUND",
                                  "Record id is mandatory.", None)
            if 'locale' not in data.keys():
                return post_error("LOCALE_NOT_FOUND", "Locale is mandatory.",
                                  None)
            if 'textBlocks' not in data.keys():
                return post_error("TEXT_BLOCKS_NOT_FOUND",
                                  "text blocks are mandatory.", None)
            else:
                if not data["textBlocks"]:
                    return post_error("TEXT_BLOCKS_NOT_FOUND",
                                      "text blocks are mandatory.", None)
                tools = wfmutils.get_tools_of_wf(workflowCode)
                if tool_translator in tools:
                    if 'model' not in data.keys():
                        return post_error(
                            "MODEL_NOT_FOUND",
                            "Model details are mandatory for this wf.", None)
                    else:
                        model = dict(data["model"])
                        if 'model_id' not in model.keys():
                            return post_error("MODEL_ID_NOT_FOUND",
                                              "Model Id is mandatory.", None)
                        if 'source_language_code' not in model.keys():
                            return post_error(
                                "SRC_LANG_NOT_FOUND",
                                "Source language code is mandatory.", None)
                        if 'target_language_code' not in model.keys():
                            return post_error(
                                "TGT_LANG_NOT_FOUND",
                                "Target language code is mandatory.", None)
                    if len(tools) == 1:
                        if 'modifiedSentences' not in data.keys():
                            return post_error(
                                "MODIFIED_SENT_NOT_FOUND",
                                "Ids of modified sentences is mandatory", None)
                        else:
                            if not data["modifiedSentences"]:
                                return post_error(
                                    "MODIFIED_SENT_NOT_FOUND",
                                    "Ids of modified sentences is mandatory",
                                    None)

        else:
            return post_error(
                "WORKFLOW_TYPE_DISABLED",
                "This workflow belongs to SYNC type, which is currently disabled.",
                None)