Exemplo n.º 1
0
    def model_conversion(inputs):
        out = {}
        if any(v not in inputs for v in ['inp_model_path','out_dir']):
            out = CustomResponse(Status.INCOMPLETE_API_REQUEST.value, [])
            log_info("Missing either inp_model_path,out_dir in model conversion request",MODULE_CONTEXT)
            return out
        with open(config.ICONFG_FILE) as f:
            confs = json.load(f)
            model_root = confs['models_root']
        final_dir =  os.path.join(model_root, inputs['out_dir'])  
        try:
            log_info("Inside model_conversion-interactive_translate function",MODULE_CONTEXT)
            converter = ctranslate2.converters.OpenNMTPyConverter(inputs['inp_model_path'])       # inp_model_path: the model which has to be converted
            output_dir = converter.convert(
                        final_dir,                                          # Path to the output directory.
                        "TransformerBase",                                  # A model specification instance from ctranslate2.specs.
                        vmap=None,                                          # Path to a vocabulary mapping file.
                        quantization=None,                                  # Weights quantization: "int8" or "int16".
                        force=False)
            log_info("Interactive model converted and saved at: {}".format(output_dir),MODULE_CONTEXT)   
            out = CustomResponse(Status.SUCCESS.value, None)   
        except Exception as e:
            log_exception("Error in model_conversion interactive translate: {} and {}".format(sys.exc_info()[0],e),MODULE_CONTEXT,e)
            status = Status.SYSTEM_ERR.value
            status['why'] = str(e)
            out = CustomResponse(status, None)  

        return out
Exemplo n.º 2
0
    def post(self):
        userID = request.headers.get('userID')
        if userID == None:
            userID = request.headers.get('x-user-id')
        body = request.get_json()

        if 'words' not in body and not body['words']:
            return post_error("Data Missing", "words are required", None), 400

        words = body['words']
        AppContext.adduserID(userID)
        log_info(
            "DigitalDocumentUpdateWordResource for user {}, number words to update {} request {}"
            .format(userID, len(words), body), AppContext.getContext())

        try:
            result = digitalRepo.update_words(userID, words)
            if result == True:
                res = CustomResponse(Status.SUCCESS.value, words)
                return res.getres()
            # return post_error("Data Missing","Failed to update word since data is missing",None), 400
            return result, 400

        except Exception as e:
            log_exception(
                "Exception in DigitalDocumentUpdateWordResource |{}".format(
                    str(e)), AppContext.getContext(), e)
            return post_error("Data Missing",
                              "Failed to update word since data is missing",
                              None), 400
Exemplo n.º 3
0
    def post(self):
        body = request.get_json()

        if 'files' not in body or not body['files']:
            return post_error("Data Missing", "files is required", None), 400

        if 'recordID' not in body or not body['recordID']:
            return post_error("Data Missing", "recordID is required",
                              None), 400

        # if 'jobID' not in body or not body['jobID']:
        #     return post_error("Data Missing","jobID is required",None), 400

        files = body['files']
        userID = body['metadata']['userID']
        recordID = body['recordID']

        if not userID:
            return post_error("Data Missing", "userID is required", None), 400

            AppContext.addRecordID(recordID)
            log_info(
                'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}'
                .format(body, userID, recordID), AppContext.getContext())

        try:
            AppContext.addRecordID(recordID)
            log_info(
                'DigitalDocumentSaveResource request received, user_id:{}, record_id:{}'
                .format(userID, recordID), AppContext.getContext())

            result = digitalRepo.store(userID, recordID, files)
            if result == False:
                log_info(
                    'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}'
                    .format(body, userID, recordID), AppContext.getContext())
                return post_error("Data Missing",
                                  "Failed to store doc since data is missing",
                                  None), 400
            elif result is None:
                AppContext.addRecordID(recordID)
                log_info(
                    'DigitalDocumentSaveResource request completed, user_id:{}, record_id:{}'
                    .format(userID, recordID), AppContext.getContext())
                res = CustomResponse(Status.SUCCESS.value, None)
                return res.getres()
            else:
                log_info(
                    'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}'
                    .format(body, userID, recordID), AppContext.getContext())
                return result, 400
        except Exception as e:
            AppContext.addRecordID(recordID)
            log_exception(
                "Exception on save document | DigitalDocumentSaveResource :{}".
                format(str(e)), AppContext.getContext(), e)
            return post_error("Data Missing",
                              "Failed to store doc since data is missing",
                              None), 400
Exemplo n.º 4
0
    def get(self):

        parser = reqparse.RequestParser()
        parser.add_argument(
            'start_page',
            type=int,
            location='args',
            help=
            'start_page can be 0, set start_page & end_page as 0 to get entire document',
            required=True)
        parser.add_argument(
            'end_page',
            type=int,
            location='args',
            help=
            'end_page can be 0, set start_page & end_page as 0 to get entire document',
            required=True)
        parser.add_argument('recordID',
                            type=str,
                            location='args',
                            help='record_id is required',
                            required=True)

        args = parser.parse_args()
        AppContext.addRecordID(args['recordID'])
        log_info(
            "DigitalDocumentGetResource record_id {} ".format(
                args['recordID']), AppContext.getContext())

        try:
            result = digitalRepo.get_pages(args['recordID'],
                                           args['start_page'],
                                           args['end_page'])
            if result == False:
                return post_error("Data Missing",
                                  "Failed to get pages since data is missing",
                                  None), 400

            AppContext.addRecordID(args['recordID'])
            log_info(
                "DigitalDocumentGetResource record_id {} has {} pages".format(
                    args['recordID'], result['total']),
                AppContext.getContext())
            res = CustomResponse(Status.SUCCESS.value, result['pages'],
                                 result['total'])
            return res.getres()

        except Exception as e:
            AppContext.addRecordID(args['recordID'])
            log_exception(
                "Exception in DigitalDocumentGetResource |{}".format(str(e)),
                AppContext.getContext(), e)
            return post_error("Data Missing",
                              "Failed to get pages since data is missing",
                              None), 400
Exemplo n.º 5
0
    def get(self):

        parser = reqparse.RequestParser()
        parser.add_argument(
            'start_page',
            type=int,
            location='args',
            help=
            'start_page can be 0, set start_page & end_page as 0 to get entire document',
            required=True)
        parser.add_argument(
            'end_page',
            type=int,
            location='args',
            help=
            'end_page can be 0, set start_page & end_page as 0 to get entire document',
            required=True)
        parser.add_argument('ad-userid',
                            location='headers',
                            type=str,
                            help='userid cannot be empty',
                            required=True)
        parser.add_argument('job_id',
                            type=str,
                            location='args',
                            help='Job Id is required',
                            required=False)
        parser.add_argument('record_id',
                            type=str,
                            location='args',
                            help='record_id is required',
                            required=True)

        args = parser.parse_args()
        AppContext.addRecordID(args['record_id'])
        log_info(
            "FileContentGetResource record_id {} for user {}".format(
                args['record_id'], args['ad-userid']), AppContext.getContext())

        try:
            result = fileContentRepo.get(args['ad-userid'], args['record_id'],
                                         args['start_page'], args['end_page'])
            if result == False:
                res = CustomResponse(
                    Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
                return res.getresjson(), 400
            log_info(
                "FileContentGetResource record_id {} for user {} has {} pages".
                format(args['record_id'], args['ad-userid'], result['total']),
                AppContext.getContext())
            res = CustomResponse(Status.SUCCESS.value, result['pages'],
                                 result['total'])
            return res.getres()
        except Exception as e:
            log_exception("FileContentGetResource ", AppContext.getContext(),
                          e)
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400
Exemplo n.º 6
0
 def post(self):
     inputs = request.get_json(force=True)
     if len(inputs) > 0:
         log_info("Making interactive-model-convert API call",
                  MODULE_CONTEXT)
         out = ModelConvertService.model_conversion(inputs)
         return out.getres()
     else:
         log_info("null inputs in request in interactive-translation API",
                  MODULE_CONTEXT)
         out = CustomResponse(Status.INVALID_API_REQUEST.value, None)
         return out.getres()
Exemplo n.º 7
0
 def post(self):
     inputs = request.get_json(force=True)
     if len(inputs)>0:
         log_info("Making v3/translate-anuvaad API call",MODULE_CONTEXT)
         log_info("inputs---{}".format(inputs),MODULE_CONTEXT)
         out = OpenNMTTranslateService.translate_func(inputs)
         log_info("Final output from v3/translate-anuvaad API: {}".format(out.getresjson()),MODULE_CONTEXT)
         return out.getres()
     else:
         log_info("null inputs in request in translate-anuvaad API",MODULE_CONTEXT)
         out = CustomResponse(Status.INVALID_API_REQUEST.value,None)
         return out.getres()             
Exemplo n.º 8
0
 def post(self):
     inputs = request.get_json(force=True)
     if len(inputs)>0:
         log_info("Making v2/interactive-translation API call",MODULE_CONTEXT)
         log_info("inputs---{}".format(inputs),MODULE_CONTEXT)
         # log_info(entry_exit_log(LOG_TAGS["input"],inputs))
         out = TranslateService.interactive_translation(inputs)
         log_info("out from v2/interactive-translation done: {}".format(out.getresjson()),MODULE_CONTEXT)
         # log_info(entry_exit_log(LOG_TAGS["output"],out))
         return out.getres()
     else:
         log_info("null inputs in request in v2/interactive-translation API",MODULE_CONTEXT)
         out = CustomResponse(Status.INVALID_API_REQUEST.value,None)
         return out.getres()        
Exemplo n.º 9
0
 def post(self):
     translation_batch = {}
     src_list, response_body = list(), list()
     inputs = request.get_json(force=True)
     if len(inputs) > 0 and all(v in inputs
                                for v in ['src_list', 'model_id']):
         try:
             log_info("Making v1/translate API call", MODULE_CONTEXT)
             log_info("inputs---{}".format(inputs), MODULE_CONTEXT)
             input_src_list = inputs.get('src_list')
             src_list = [i.get('src') for i in input_src_list]
             if len(src_list) > translation_batch_limit:
                 raise Exception(
                     f"Number of sentences per request exceeded the limit of:{translation_batch_limit} sentences per batch"
                 )
             translation_batch = {
                 'id': inputs.get('model_id'),
                 'src_list': src_list
             }
             output_batch = FairseqDocumentTranslateService.batch_translator(
                 translation_batch)
             output_batch_dict_list = [{
                 'tgt':
                 output_batch['tgt_list'][i],
                 'tagged_tgt':
                 output_batch['tagged_tgt_list'][i],
                 'tagged_src':
                 output_batch['tagged_src_list'][i]
             } for i in range(len(input_src_list))]
             for j, k in enumerate(input_src_list):
                 k.update(output_batch_dict_list[j])
                 response_body.append(k)
             out = CustomResponse(Status.SUCCESS.value, response_body)
             log_info(
                 "Final output from v1/translate API: {}".format(
                     out.get_res_json()), MODULE_CONTEXT)
         except Exception as e:
             status = Status.SYSTEM_ERR.value
             status['message'] = str(e)
             log_exception(
                 "Exception caught in batch_translator child block: {}".
                 format(e), MODULE_CONTEXT, e)
             out = CustomResponse(status, inputs)
         return out.jsonify_res()
     else:
         log_info(
             "API input missing mandatory data ('src_list','model_id')",
             MODULE_CONTEXT)
         status = Status.INVALID_API_REQUEST.value
         status[
             'message'] = "Missing mandatory data ('src_list','model_id')"
         out = CustomResponse(status, inputs)
         return out.jsonify_res()
Exemplo n.º 10
0
 def post(self):
     inputs = request.get_json(force=True)
     if len(inputs) > 0:
         log_info("Making v0/interactive-translation API call",
                  MODULE_CONTEXT)
         log_info("inputs---{}".format(inputs), MODULE_CONTEXT)
         out = FairseqAutoCompleteTranslateService.constrained_translation(
             inputs)
         log_info(
             "out from v0/interactive-translation done: {}".format(
                 out.getresjson()), MODULE_CONTEXT)
         return out.jsonify_res()
     else:
         log_info(
             "null inputs in request in v0/interactive-translation API",
             MODULE_CONTEXT)
         out = CustomResponse(Status.INVALID_API_REQUEST.value, None)
         return out.jsonify_res()
Exemplo n.º 11
0
 def post(self):
     inputs = request.get_json(force=True)
     if len(inputs)>0:
         log_info("Making interactive-translation API call",MODULE_CONTEXT)
         log_info("inputs---{}".format(inputs),MODULE_CONTEXT)
         # log_info(entry_exit_log(LOG_TAGS["input"],inputs))
         out = TranslateService.interactive_translation(inputs)
         out = out.getresjson()
         complete_response = out['response_body']
         out['response_body'] = [{"tgt": complete_response[i]['tgt'][0],"tagged_tgt":complete_response[i]['tagged_tgt'][0],
                                 "tagged_src":complete_response[i]['tagged_src'],"s_id":complete_response[i]['s_id'],
                                 "src":complete_response[i]["src"]}
                 for i in range(len(complete_response))]
         log_info("out from interactive-translation done: {}".format(out),MODULE_CONTEXT)
         # log_info(entry_exit_log(LOG_TAGS["output"],out))
         return CustomResponse.jsonify(out)
     else:
         log_info("null inputs in request in interactive-translation API",MODULE_CONTEXT)
         out = CustomResponse(Status.INVALID_API_REQUEST.value,None)
         return out.getres()
Exemplo n.º 12
0
    def find_performance_pipeline(input_text_file,model_id,batch_size,max_batch_size,batch_type):
        '''
        Given an input english text file (with one sentence per line)
        returns average time taken per word for each step in the 
        pipeline of translation.
        '''
        try:
            with open(input_text_file,'r') as f:
                input_text_array = f.readlines()
                input_text_array = [sent[:-1] for sent in input_text_array]
            # input_text_array = input_text_array[:500]
            word_count = 0
            for sentence in input_text_array:
                word_count += len(sentence.split())

            batch_input_array = []
            num_of_batch = len(input_text_array) // batch_size
            for i in range(num_of_batch):
                prev_index = i * batch_size
                if (prev_index + batch_size) < len(input_text_array):
                    input_batch = {'id': model_id, 'src_list': input_text_array[prev_index: prev_index + batch_size]}
                else:
                    input_batch = {'id': model_id, 'src_list': input_text_array[prev_index: ]}
                batch_input_array.append(input_batch)

            time_model_loading_array, time_preprocessing_array, time_tokenizing_array, time_encoding_array, \
            time_translating_array, time_decoding_array, time_detokenizing_array, time_postprocessing_array = [],[],[],[],[],[],[],[]
            for batch_input in batch_input_array:
                time_taken_dict = NMTTranslatePerformanceService.batch_translator(batch_input,max_batch_size,batch_type)
                time_model_loading_array.append(time_taken_dict["time_model_loading"])
                time_preprocessing_array.append(time_taken_dict["time_preprocessing"])
                time_tokenizing_array.append(time_taken_dict[ "time_tokenizing"])
                time_encoding_array.append(time_taken_dict["time_encoding"])
                time_translating_array.append(time_taken_dict["time_translating"])
                time_decoding_array.append(time_taken_dict["time_decoding"])
                time_detokenizing_array.append(time_taken_dict["time_detokenizing"])
                time_postprocessing_array.append(time_taken_dict["time_postprocessing"])          

            return sum(time_model_loading_array) /word_count, sum(time_preprocessing_array) /word_count,\
                sum(time_tokenizing_array) /word_count, sum(time_encoding_array) /word_count,\
                    sum(time_translating_array) /word_count, sum(time_decoding_array) /word_count,\
                        sum(time_detokenizing_array) /word_count, sum(time_postprocessing_array) /word_count
        
        except Exception as e:
            status = Status.SYSTEM_ERR.value
            log_exception("Exception caught in performance check: {} ".format(e),MODULE_CONTEXT,e)
            out = CustomResponse(status, [])  

        return out
Exemplo n.º 13
0
    def find_performance(input_text_file,model_id,batch_size):
        '''
        Given an input english text file (with one sentence per line)
        returns average number of words translated per second by the
        document translator
        '''
        try:
            with open(input_text_file,'r') as f:
                input_text_array = f.readlines()
                input_text_array = [sent[:-1] for sent in input_text_array]

            word_count = 0
            for sentence in input_text_array:
                word_count += len(sentence.split())

            batch_input_array = []
            num_of_batch = len(input_text_array) // batch_size
            for i in range(num_of_batch):
                prev_index = i * batch_size
                if (prev_index + batch_size) < len(input_text_array):
                    input_batch = {'id': model_id, 'src_list': input_text_array[prev_index: prev_index + batch_size]}
                else:
                    input_batch = {'id': model_id, 'src_list': input_text_array[prev_index: ]}
                batch_input_array.append(input_batch)

            time_taken_array = []
            out_tgt_array = []
            for batch_input in batch_input_array:
                start = time.time()
                out_batch = NMTTranslateService.batch_translator(batch_input)
                time_taken = time.time() - start
                time_taken_array.append(time_taken)
                out_tgt_array.append(out_batch['tgt_list'])           
            avg_words_per_sec = word_count / sum(time_taken_array)
            out_tgt_array = [sentence for out_batch in out_tgt_array for sentence in out_batch]

            return avg_words_per_sec, out_tgt_array
        
        except Exception as e:
            status = Status.SYSTEM_ERR.value
            log_exception("Exception caught in performance check: {} ".format(e),MODULE_CONTEXT,e)
            out = CustomResponse(status, [])  

        return out
Exemplo n.º 14
0
    def get(self, user_id, s_id):
        AppContext.addRecordID(None)
        log_info(
            "SentenceBlockGetResource {} for user {}".format(s_id, user_id),
            AppContext.getContext())

        try:
            result = SentenceRepositories.get_sentence_block(user_id, s_id)
            if result == False:
                res = CustomResponse(
                    Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
                return res.getresjson(), 400
            res = CustomResponse(Status.SUCCESS.value, result)
            return result, 200
        except Exception as e:
            log_exception("SentenceBlockGetResource ", AppContext.getContext(),
                          e)
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400
Exemplo n.º 15
0
 def get(self):
     log_info("FetchModelsResource api called", MODULE_CONTEXT)
     try:
         with open(config.FETCH_MODEL_CONFG) as f:
             confs = json.load(f)
             models = confs['data']
             out = CustomResponse(Status.SUCCESS.value, models)
         return out.getres()
     except Exception as e:
         log_exception("Error in FetchModelsResource: {}".format(e),
                       MODULE_CONTEXT, e)
         status = Status.SYSTEM_ERR.value
         status['why'] = str(e)
         out = CustomResponse(status, None)
         return out.getres()
Exemplo n.º 16
0
 def get(self):
     log_info("NMT Health api called", MODULE_CONTEXT)
     out = CustomResponse(Status.SUCCESS.value, [])
     return out.getres()
Exemplo n.º 17
0
    def post(self):
        '''
        End point when only src and tgt language information is available
        '''
        translation_batch = {}
        src_list, response_body = list(), list()
        content_type = 'application/json'
        inputs = request.get_json(force=True)
        if request.content_type != content_type:
            status = Status.INVALID_CONTENT_TYPE.value
            log_exception("v1.1 translate API | Invalid content type",
                          MODULE_CONTEXT, status['message'])
            out = CustomResponse(status, html_encode(inputs))
            return out.get_res_json(), 406, {
                'Content-Type': content_type,
                'X-Content-Type-Options': 'nosniff'
            }

        if len(inputs) > 0 and all(
                v in inputs for v in
            ['src_list', 'source_language_code', 'target_language_code']):
            if (inputs.get('source_language_code') not in supported_languages
                ) or (inputs.get('target_language_code')
                      not in supported_languages):
                status = Status.UNSUPPORTED_LANGUAGE.value
                log_exception(
                    "v1.1 translate API | Unsupported input language code",
                    MODULE_CONTEXT, status['message'])
                out = CustomResponse(status, html_encode(inputs))
                return out.get_res_json(), 400, {
                    'Content-Type': content_type,
                    'X-Content-Type-Options': 'nosniff'
                }
            elif inputs.get('source_language_code') == inputs.get(
                    'target_language_code'):
                status = Status.SAME_LANGUAGE_VALUE.value
                log_exception(
                    "v1.1 translate API | src and tgt code can't be same",
                    MODULE_CONTEXT, status['message'])
                out = CustomResponse(status, html_encode(inputs))
                return out.get_res_json(), 400, {
                    'Content-Type': content_type,
                    'X-Content-Type-Options': 'nosniff'
                }

            try:
                log_info("Making translate v1.1 API call", MODULE_CONTEXT)
                log_info("v1.1 translate API | input--- {}".format(inputs),
                         MODULE_CONTEXT)
                input_src_list = inputs.get('src_list')
                src_list = [i.get('src') for i in input_src_list]
                m_id = get_model_id(inputs.get('source_language_code'),
                                    inputs.get('target_language_code'))
                translation_batch = {
                    'id': m_id,
                    'src_lang': inputs.get('source_language_code'),
                    'tgt_lang': inputs.get('target_language_code'),
                    'src_list': src_list
                }
                output_batch = FairseqDocumentTranslateService.indic_to_indic_translator(
                    translation_batch)
                output_batch_dict_list = [{
                    'tgt': output_batch['tgt_list'][i]
                } for i in range(len(input_src_list))]
                for j, k in enumerate(input_src_list):
                    k.update(output_batch_dict_list[j])
                    response_body.append(k)
                out = CustomResponse(Status.SUCCESS.value, response_body)
                log_info(
                    "Final output v1.1 API | {}".format(out.get_res_json()),
                    MODULE_CONTEXT)
                return out.get_res_json(), 200, {
                    'Content-Type': content_type,
                    'X-Content-Type-Options': 'nosniff'
                }
            except Exception as e:
                status = Status.SYSTEM_ERR.value
                status['message'] = str(e)
                log_exception(
                    "Exception caught in v1.1 translate API resource child block: {}"
                    .format(e), MODULE_CONTEXT, e)
                out = CustomResponse(status, html_encode(inputs))
                return out.get_res_json(), 500, {
                    'Content-Type': content_type,
                    'X-Content-Type-Options': 'nosniff'
                }
        else:
            status = Status.INVALID_API_REQUEST.value
            status[
                'message'] = "Missing mandatory data ('src_list','source_language_code','target_language_code')"
            log_exception(
                "v1.1 translate API | input missing mandatory data ('src_list','source_language_code','target_language_code')",
                MODULE_CONTEXT, status['message'])
            out = CustomResponse(status, html_encode(inputs))
            return out.get_res_json(), 401, {
                'Content-Type': content_type,
                'X-Content-Type-Options': 'nosniff'
            }
Exemplo n.º 18
0
    def post(self):
        body = request.get_json()
        user_id = request.headers.get('userid')
        if user_id == None:
            user_id = request.headers.get('ad-userid')

        workflowCode = None

        if 'blocks' not in body or user_id is None:
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        if 'workflowCode' in body:
            workflowCode = body['workflowCode']

        blocks = body['blocks']
        AppContext.addRecordID(None)
        log_info(
            "FileContentUpdateResource for user ({}), to update ({}) blocks".
            format(user_id, len(blocks)), AppContext.getContext())

        try:
            result, updated_blocks = fileContentRepo.update(
                user_id, blocks, workflowCode)

            if result == False:
                res = CustomResponse(
                    Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
                return res.getresjson(), 400

            log_info(
                "FileContentUpdateResource for user ({}) updated".format(
                    user_id), AppContext.getContext())
            response = {'blocks': updated_blocks, 'workflowCode': workflowCode}
            res = CustomResponse(Status.SUCCESS.value, response,
                                 len(updated_blocks))
            return res.getres()
        except Exception as e:
            log_exception("FileContentUpdateResource ",
                          AppContext.getContext(), e)
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400
Exemplo n.º 19
0
    def post(self):
        body = request.get_json()
        user_id = request.headers.get('userid')
        if user_id == None:
            user_id = request.headers.get('ad-userid')

        pages = body['pages']
        file_locale = ''

        if 'file_locale' in body:
            file_locale = body['file_locale']

        job_id = ''
        if 'job_id' in body:
            job_id = body['job_id']

        record_id = None
        if 'record_id' in body:
            record_id = body['record_id']

        src_lang = None
        if 'src_lang' in body:
            src_lang = body['src_lang']
        tgt_lang = None
        if 'tgt_lang' in body:
            tgt_lang = body['tgt_lang']

        if 'pages' not in body or user_id is None or record_id == None or src_lang == None or tgt_lang == None:
            log_info(
                'Missing params in FileContentSaveResource {}, user_id:{}'.
                format(body, user_id), AppContext.getContext())
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        AppContext.addRecordID(record_id)
        log_info(
            "FileContentSaveResource record_id ({}) for user ({})".format(
                record_id, user_id), AppContext.getContext())

        try:
            if fileContentRepo.store(user_id, file_locale, record_id, pages,
                                     src_lang, tgt_lang) == False:
                res = CustomResponse(
                    Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
                return res.getresjson(), 400

            log_info(
                "FileContentSaveResource record_id ({}) for user ({}) saved".
                format(record_id, user_id), AppContext.getContext())
            res = CustomResponse(Status.SUCCESS.value, None)
            return res.getres()
        except Exception as e:
            log_exception("FileContentSaveResource ", AppContext.getContext(),
                          e)
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400
Exemplo n.º 20
0
    def post(self):
        body = request.json

        log_info('received request for WordSearch', AppContext.getContext())
        if 'word' not in body or 'word_locale' not in body or 'target_locale' not in body:
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        if (body['word_locale'] == 'en') or (body['target_locale'] == 'en'):
            result = None
            if body['word_locale'] == 'en':
                body['word'] = body['word'].lower()
                result = wordRepo.search_english(body['word'],
                                                 body['target_locale'])
            else:
                result = wordRepo.search_vernacular(body['word'],
                                                    body['word_locale'])

            if result == None:
                '''
                    - call google apis to get the translation
                    - save the translation
                    - return the response
                '''
                log_info(
                    'checking google for the searched word ({})'.format(
                        body['word']), AppContext.getContext())

                input_word, translated_word, input_locale = translate.translate_text(
                    body['target_locale'], body['word'])
                log_info(
                    'google returned input ({}), translated ({})'.format(
                        input_word, translated_word), AppContext.getContext())
                if translated_word == None:
                    res = CustomResponse(Status.SUCCESS.value, None)
                    return res.getres()
                else:
                    if body['word_locale'] == 'en':
                        result = wordRepo.update(body['word'], 'en',
                                                 translated_word,
                                                 body['target_locale'])
                    else:
                        result = wordRepo.update(translated_word,
                                                 body['target_locale'],
                                                 body['word'],
                                                 body['word_locale'])

                    if result == None:
                        res = CustomResponse(Status.SUCCESS.value, None)
                        return res.getres()
                    else:
                        res = CustomResponse(Status.SUCCESS.value, result)
                        return res.getres()
            else:
                log_info('returning word search from local database',
                         AppContext.getContext())
                res = CustomResponse(Status.SUCCESS.value, result)
                return res.getres()
        else:
            res = CustomResponse(Status.ERR_ENGLISH_MANDATORY.value, None)
            return res.getresjson(), 400
Exemplo n.º 21
0
    def interactive_translation(inputs):
        out = {}
        i_src, tgt = list(), list()
        tagged_tgt = list()
        tagged_src = list()
        sentence_id = list()
        tp_tokenizer = None

        try:
            for i in inputs:
                sentence_id.append(i.get("s_id") or "NA")
                if any(v not in i for v in ['src', 'id']):
                    log_info("either id or src missing in some input",
                             MODULE_CONTEXT)
                    out = CustomResponse(Status.ID_OR_SRC_MISSING.value,
                                         inputs)
                    return out

                log_info("input sentence:{}".format(i['src']), MODULE_CONTEXT)
                i_src.append(i['src'])
                i['src'] = i['src'].strip()

                i['src_lang'], i['tgt_lang'] = misc.get_src_tgt_langauge(
                    i['id'])
                i['src'] = misc.convert_digits_preprocess(
                    i['src_lang'], i['src'])

                if special_case_handler.special_case_fits(i['src']):
                    log_info(
                        "sentence fits in special case, returning accordingly and not going to model",
                        MODULE_CONTEXT)
                    translation = special_case_handler.handle_special_cases(
                        i['src'], i['id'])
                    translation = [translation]
                    tag_tgt, tag_src = translation, i['src']

                else:
                    log_info(
                        "Performing interactive translation on:{}".format(
                            i['id']), MODULE_CONTEXT)
                    i['src'], date_original, url_original, num_array, num_map = tagger_util.tag_number_date_url(
                        i['src'])
                    tag_src = i['src']

                    if i['id'] == 56:
                        "english-hindi"
                        if i['src'].isupper():
                            log_info(
                                "src all Upper case hence Tital casing it",
                                MODULE_CONTEXT)
                            i['src'] = i['src'].title()
                        tp_tokenizer = sentence_processor.indic_tokenizer
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.indic_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 7:
                        "english-tamil"
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                    elif i['id'] == 10:
                        "english-gujarati"
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                    elif i['id'] == 15:
                        "english-kannada"
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                    elif i['id'] == 16:
                        "english-telugu"
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                    elif i['id'] == 17:
                        "english-malayalam"
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                    elif i['id'] == 18:
                        "english-punjabi"
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                    elif i['id'] == 42:
                        "english-marathi"
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                    elif i['id'] == 50:
                        "telugu-english"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 6:
                        "hindi-english"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 62:
                        "marathi-english"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 8:
                        "tamil-english"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 55:
                        "punjabi-english"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 48:
                        "kannada-english"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 60:
                        "malayalam-english"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 52:
                        "gujarati-english"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 65:
                        "english-bengali 4th"
                        tp_tokenizer = sentence_processor.indic_tokenizer
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.indic_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 66:
                        "bengali-english 3rd"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 67:
                        "ta-en 3rd"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode_v2(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 68:
                        "en-ta 5th"
                        tp_tokenizer = sentence_processor.indic_tokenizer
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation = encode_itranslate_decode_v2(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.indic_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 69:
                        "hi-en 3rd"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode_v2(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 70:
                        "en-hi 15th"
                        tp_tokenizer = sentence_processor.indic_tokenizer
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation = encode_itranslate_decode_v2(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.indic_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 71:
                        "te-en 2nd"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode_v2(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 72:
                        "en-te 3rd"
                        tp_tokenizer = sentence_processor.indic_tokenizer
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation = encode_itranslate_decode_v2(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.indic_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 73:
                        "ml-en 2nd"
                        tp_tokenizer = sentence_processor.moses_tokenizer
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation = encode_itranslate_decode_v2(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.moses_detokenizer(i)
                            for i in translation
                        ]
                    elif i['id'] == 74:
                        "en-ml 3rd"
                        tp_tokenizer = sentence_processor.indic_tokenizer
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation = encode_itranslate_decode_v2(
                            i, num_map, tp_tokenizer)
                        translation = [
                            sentence_processor.indic_detokenizer(i)
                            for i in translation
                        ]

                    else:
                        log_info(
                            "unsupported model id: {} for given input".format(
                                i['id']), MODULE_CONTEXT)
                        raise Exception(
                            "Unsupported Model ID - id: {} for given input".
                            format(i['id']))

                    translation = [i.replace("▁", " ") for i in translation]
                    translation = [
                        misc.regex_pass(i, [
                            patterns['p8'], patterns['p9'], patterns['p4'],
                            patterns['p5'], patterns['p6'], patterns['p7']
                        ]) for i in translation
                    ]
                    tag_tgt = translation
                    translation = [
                        tagger_util.replace_tags_with_original(
                            i, date_original, url_original, num_array, num_map)
                        for i in translation
                    ]
                    translation = [
                        misc.convert_digits_postprocess(i['tgt_lang'], item)
                        for item in translation
                    ]
                log_info(
                    "interactive translation-experiment-{} output: {}".format(
                        i['id'], translation), MODULE_CONTEXT)
                tgt.append(translation)
                tagged_tgt.append(tag_tgt)
                tagged_src.append(tag_src)

            out['response_body'] = [{
                "tgt": tgt[i],
                "tagged_tgt": tagged_tgt[i],
                "tagged_src": tagged_src[i],
                "s_id": sentence_id[i],
                "src": i_src[i]
            } for i in range(len(tgt))]
            out = CustomResponse(Status.SUCCESS.value, out['response_body'])
        except Exception as e:
            status = Status.SYSTEM_ERR.value
            status['why'] = str(e)
            log_exception(
                "Unexpected error:%s and %s" % (e, sys.exc_info()[0]),
                MODULE_CONTEXT, e)
            out = CustomResponse(status, inputs)

        return out
Exemplo n.º 22
0
    def post(self):
        body = request.get_json()
        user_id = request.headers.get('userid')
        if user_id == None:
            user_id = request.headers.get('x-user-id')

        if 'sentences' not in body or user_id is None or 'workflowCode' not in body:
            log_info(
                'Missing params in SaveSentenceResource {}, user_id:{}'.format(
                    body, user_id), AppContext.getContext())
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        sentences = body['sentences']
        workflowCode = body['workflowCode']

        AppContext.addRecordID(None)
        log_info(
            "SaveSentenceResource for user {}, number sentences to update {} request {}"
            .format(user_id, len(sentences), body), AppContext.getContext())

        try:
            result = sentenceRepo.update_sentences(user_id, sentences,
                                                   workflowCode)
            if result == False:
                res = CustomResponse(
                    Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
                return res.getresjson(), 400

            if USER_TRANSLATION_ENABLED:
                try:
                    result = sentenceRepo.save_sentences(user_id, sentences)
                except Exception as e:
                    log_exception("SaveSentenceResource",
                                  AppContext.getContext(), e)

            # sentence_ids = []
            # for sentence in sentences:
            #     sentence_ids.append(sentence['s_id'])

            # result  = sentenceRepo.get_sentence(user_id, sentence_ids)
            # if result == False:
            #     res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
            #     return res.getresjson(), 400
            # else:
            res = CustomResponse(Status.SUCCESS.value, sentences)
            return res.getres()

        except Exception as e:
            log_exception("SaveSentenceResource ", AppContext.getContext(), e)
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400
Exemplo n.º 23
0
    def post(self):
        body = request.get_json()

        if "keys" not in body or not body["keys"]:
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        keys = body["keys"]

        log_info("Fetching sentences from redis store",
                 AppContext.getContext())

        try:
            result = sentenceRepo.get_sentences_from_store(keys)
            if result == None:
                res = CustomResponse(
                    Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
                return res.getresjson(), 400

            res = CustomResponse(Status.SUCCESS.value, result)
            return res.getres()
        except Exception as e:
            log_exception(
                "Exception while fetching sentences from redis store ",
                AppContext.getContext(), e)
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400
Exemplo n.º 24
0
    def post(self):
        body = request.get_json()
        user_id = request.headers.get('userid')
        if user_id == None:
            user_id = request.headers.get('x-user-id')

        s_ids = None
        if 'sentences' in body:
            s_ids = body['sentences']

        if user_id is None or s_ids is None:
            log_info(
                'Missing params in FetchSentenceResource {}, user_id:{}'.
                format(body, user_id), AppContext.getContext())
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        AppContext.addRecordID(None)
        log_info(
            "FetchSentenceResource s_ids {} for user {}".format(
                len(s_ids), user_id), AppContext.getContext())

        try:
            result = sentenceRepo.get_sentence(user_id, s_ids)
            if result == False:
                res = CustomResponse(
                    Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
                return res.getresjson(), 400

            res = CustomResponse(Status.SUCCESS.value, result)
            return res.getres()
        except Exception as e:
            log_exception("FetchSentenceResource ", AppContext.getContext(), e)
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400
Exemplo n.º 25
0
    def post(self):
        body = request.get_json()
        user_id = request.headers.get('userid')
        if user_id == None:
            user_id = request.headers.get('x-user-id')

        if 'record_ids' not in body or user_id is None:
            log_info(
                'Missing params in SentenceStatisticsCount {}, user_id:{}'.
                format(body, user_id), AppContext.getContext())
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        record_ids = body['record_ids']

        bleu_return = None
        if 'bleu_score' in body:
            bleu_return = body['bleu_score']
        else:
            bleu_return = False
        AppContext.addRecordID(None)
        log_info(
            "SentenceStatisticsCount for user {}, sentence count for record_ids {}"
            .format(user_id, record_ids), AppContext.getContext())

        try:
            result = sentenceRepo.get_sentences_counts(record_ids, bleu_return)
            if result == False:
                res = CustomResponse(
                    Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
                return res.getresjson(), 400

            res = CustomResponse(Status.SUCCESS.value, result)
            return res.getres()
        except Exception as e:
            log_exception("SentenceStatisticsCount ", AppContext.getContext(),
                          e)
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400
Exemplo n.º 26
0
    def post(self):
        inputs = request.get_json(force=True)
        if len(inputs) > 0:
            log_info("Making performance check API call", MODULE_CONTEXT)
            try:
                for i in inputs:
                    if i['mode'] == 0:
                        avg_words_per_sec, target_array = BatchNMTPerformanceService.find_performance(i['input_txt_file'],\
                            i['model_id'],i['batch_size'])

                        output_file_name = os.path.basename(i['input_txt_file']).split(".")[0] + "_" + str(i['model_id']) + \
                            "_" + str(i['batch_size']) + "_" + "output" +".txt"
                        with open(
                                os.path.join(str(Path.home()),
                                             output_file_name), 'w') as f:
                            for sentence in target_array:
                                f.write("%s\n" % sentence)

                        out = {}
                        out['response_body'] = {
                            "words_per_sec": avg_words_per_sec
                        }
                        out = CustomResponse(Status.SUCCESS.value,
                                             out['response_body'])
                        log_info(
                            "out from performance check done: {}".format(
                                out.getresjson()), MODULE_CONTEXT)

                        return out.getres()

                    elif i['mode'] == 1:
                        time_taken_array = BatchNMTPerformanceService.find_performance_pipeline(i['input_txt_file'],\
                            i['model_id'],i['batch_size'],i['max_batch_size'],i['batch_type'])

                        out = {}
                        out['response_body'] = {"avg_time_loading_per_word": time_taken_array[0] ,\
                                                "avg_time_preprocessing_per_word": time_taken_array[1] ,\
                                                "avg_time_tokenizing_per_word": time_taken_array[2] ,\
                                                "avg_time_encoding_per_word": time_taken_array[3] ,\
                                                "avg_time_translating_per_word": time_taken_array[4] ,\
                                                "avg_time_decoding_per_word": time_taken_array[5] ,\
                                                "avg_time_detokenizing_per_word": time_taken_array[6] ,\
                                                "avg_time_postprocessing_per_word": time_taken_array[7] }
                        out = CustomResponse(Status.SUCCESS.value,
                                             out['response_body'])
                        log_info(
                            "out from performance check done: {}".format(
                                out.getresjson()), MODULE_CONTEXT)

                        return out.getres()

            except Exception as e:
                status = Status.SYSTEM_ERR.value
                status['why'] = str(e)
                out = CustomResponse(status, [])

                return out.getres()
        else:
            log_info("null inputs in request in /v0/performance API",
                     MODULE_CONTEXT)
            out = CustomResponse(Status.INVALID_API_REQUEST.value, None)

            return out.getres()
Exemplo n.º 27
0
    def post(self):
        inputs = request.get_json(force=True)
        response_list = list()
        if len(inputs) > 0:
            log_info("Making labse-aligner(Resource) API call", MODULE_CONTEXT)
            log_info("Complete request input: {}".format(inputs),
                     MODULE_CONTEXT)
            try:
                for i in inputs:
                    if all(v in i for v in ["src_phrases", "tgt"]):
                        log_info("Making labse-aligner service call",
                                 MODULE_CONTEXT)
                        res = LabseAlignerService.phrase_aligner(i)
                        response_list.append(res)
                        out = CustomResponse(Status.SUCCESS.value,
                                             response_list)
                    else:
                        log_info(
                            "Missing mandatory Parameters for labse-aligner:src_phrases or tgt",
                            MODULE_CONTEXT)
                        out = CustomResponse(
                            Status.MANDATORY_PARAM_MISSING.value, [])
                        return out.getres()
            except Exception as e:
                status = Status.SYSTEM_ERR.value
                status['why'] = str(e)
                out = CustomResponse(status, [])

            return out.getres()
        else:
            log_info("null inputs in request in labse-aligner API",
                     MODULE_CONTEXT)
            out = CustomResponse(Status.INVALID_API_REQUEST.value, None)
            return out.getres()
Exemplo n.º 28
0
    def post(self):
        body = request.json

        log_info('received request for WordSaveResource',
                 AppContext.getContext())
        if body == None:
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        if 'words' not in body:
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                                 None)
            return res.getresjson(), 400

        for word in body['words']:
            if word['locale'] != 'en':
                res = CustomResponse(
                    Status.ERR_ENGLISH_MANDATORY_WHILE_SAVING.value, None)
                return res.getresjson(), 400

        result = wordRepo.store(body['words'])

        if result == False:
            res = CustomResponse(Status.ERR_SCHEMA_VALIDATION.value, None)
            return res.getresjson(), 400

        res = CustomResponse(Status.SUCCESS.value, None)
        return res.getres()
Exemplo n.º 29
0
    def post(self):
        body        = request.json

        parser = reqparse.RequestParser()
        parser.add_argument('dict_fallback', type=int, location='args', help='set 1 to invoke google transalte and 0 to not', required=False,default=1)
        args    = parser.parse_args()
        dict_fallback=args["dict_fallback"]
        
        log_info('received request for WordSearch', AppContext.getContext())
        if 'word' not in body or 'word_locale' not in body or 'target_locale' not in body:
            res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
            return res.getresjson(), 400

        if (body['word_locale'] == 'en') or (body['target_locale'] == 'en'):
            result = None
            if body['word_locale'] == 'en':
                body['word'] = body['word'].lower()
                result = wordRepo.search_english(body['word'], body['target_locale'])
            else:
                result = wordRepo.search_vernacular(body['word'], body['word_locale'])
            if result == None and DICTIONARY_FALLBACK==True:
                translate   = GoogleTranslate()
                '''
                    - call google apis to get the translation
                    - save the translation
                    - return the response
                '''
                log_info('checking google for the searched word ({})'.format(body['word']), AppContext.getContext())

                input_word, translated_word, input_locale = translate.translate_text(body['target_locale'], body['word'])
                log_info('google returned input ({}), translated ({})'.format(input_word, translated_word), AppContext.getContext())
                if translated_word == None:
                    res = CustomResponse(Status.SUCCESS.value, None)
                    return res.getres()
                else:
                    if body['word_locale'] == 'en':
                        result = wordRepo.update(body['word'], 'en', translated_word, body['target_locale'])
                    else:
                        result = wordRepo.update(translated_word, body['target_locale'], body['word'], body['word_locale'])

                    if result == None:
                        res = CustomResponse(Status.SUCCESS.value, None)
                        return res.getres()
                    else:
                        res = CustomResponse(Status.SUCCESS.value, result)
                        return res.getres()
            else:
                log_info('returning word search from local database', AppContext.getContext())
                res = CustomResponse(Status.SUCCESS.value, result)
                return res.getres()
        else:
            res = CustomResponse(Status.ERR_ENGLISH_MANDATORY.value, None)
            return res.getresjson(), 400
Exemplo n.º 30
0
    def translate_func(inputs):

        inputs = inputs
        out = {}
        pred_score = list()
        sentence_id, node_id = list(), list()
        input_subwords, output_subwords = list(), list()
        i_src, tgt = list(), list()
        tagged_tgt, tagged_src = list(), list()
        s_id, n_id = [0000], [0000]
        i_s0_src, i_s0_tgt, i_save = list(), list(), list()
        i_tmx_phrases = list()

        try:
            for i in inputs:
                s0_src, s0_tgt, save = "NA", "NA", False
                if all(v in i for v in ['s_id', 'n_id']):
                    s_id = [i['s_id']]
                    n_id = [i['n_id']]

                if any(v not in i for v in ['src', 'id']):
                    log_info("either id or src missing in some input",
                             MODULE_CONTEXT)
                    out = CustomResponse(Status.ID_OR_SRC_MISSING.value,
                                         inputs)
                    return out

                if any(v in i for v in ['s0_src', 's0_tgt', 'save']):
                    s0_src, s0_tgt, save = handle_custome_input(
                        i, s0_src, s0_tgt, save)

                i_s0_src.append(s0_src), i_s0_tgt.append(
                    s0_tgt), i_save.append(save)

                log_info("input sentences:{}".format(i['src']), MODULE_CONTEXT)
                i_src.append(i['src'])
                i['src'] = i['src'].strip()

                src_language, tgt_language = misc.get_src_tgt_langauge(i['id'])
                if src_language == 'English' and i['src'].isupper():
                    i['src'] = i['src'].title()
                i['src'] = misc.convert_digits_preprocess(
                    src_language, i['src'])

                if special_case_handler.special_case_fits(i['src']):
                    log_info(
                        "sentence fits in special case, returning accordingly and not going to model",
                        MODULE_CONTEXT)
                    translation = special_case_handler.handle_special_cases(
                        i['src'], i['id'])
                    scores = [1]
                    input_sw, output_sw, tag_tgt, tag_src = "", "", translation, i[
                        'src']

                else:
                    log_info("translating using NMT-model:{}".format(i['id']),
                             MODULE_CONTEXT)
                    prefix, i['src'] = special_case_handler.prefix_handler(
                        i['src'])
                    i['src'], date_original, url_original, num_array, num_map = tagger_util.tag_number_date_url(
                        i['src'])
                    tag_src = (prefix + " " + i['src']).lstrip()

                    i['src'], is_missing_stop_punc = special_case_handler.handle_a_sentence_wo_stop(
                        src_language, i['src'])

                    if i['id'] == 6:
                        "hi-en_exp-2 05-05-20"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)

                    elif i['id'] == 7:
                        "english-tamil"
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                    elif i['id'] == 10:
                        "english-gujrati"
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                    elif i['id'] == 15:
                        "english-kannada"
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                    elif i['id'] == 16:
                        "english-telgu"
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                    elif i['id'] == 17:
                        "english-malayalam"
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                    elif i['id'] == 18:
                        "english-punjabi"
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                    elif i['id'] == 42:
                        "english-marathi exp-2"
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                    elif i['id'] == 56:
                        "09/12/19-Exp-5.6:"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 8:
                        "ta-en 1st"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 44:
                        "eng-mr-3rd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 47:
                        "en-kn 2nd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 48:
                        "kn-en 1st"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 49:
                        "en-tel 2nd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 50:
                        "tel-en 1st"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 51:
                        "en-guj 2nd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 52:
                        "guj-en 1st"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 53:
                        "en-punjabi 2nd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 55:
                        "punjabi-en 1st"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 57:
                        "en-bengali 3rd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 58:
                        "bengali-en 2nd"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 59:
                        "en-malay 2nd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 60:
                        "malay-en 1st"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 62:
                        "mr-to-en 2nd"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 65:
                        "en-bengali 4th"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 66:
                        "bengali-en 3rd"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 67:
                        "ta-en 3rd"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode_v2(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 68:
                        "en-ta 5th"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode_v2(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 69:
                        "hi-en 3rd"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode_v2(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 70:
                        "en-hi 15th"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode_v2(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 71:
                        "te-en 2nd"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode_v2(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 72:
                        "en-te 3rd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode_v2(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    elif i['id'] == 73:
                        "ml-en 2nd"
                        i['src'] = sentence_processor.indic_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode_v2(
                            i)
                        translation = sentence_processor.moses_detokenizer(
                            translation)
                    elif i['id'] == 74:
                        "en-ml 3rd"
                        i['src'] = sentence_processor.moses_tokenizer(i['src'])
                        translation, scores, input_sw, output_sw = encode_translate_decode_v2(
                            i)
                        translation = sentence_processor.indic_detokenizer(
                            translation)
                    else:
                        log_info(
                            "Unsupported model id: {} for given input".format(
                                i['id']), MODULE_CONTEXT)
                        raise Exception(
                            "Unsupported Model ID - id: {} for given input".
                            format(i['id']))

                    translation = oc.postprocess_a_sentence_wo_stop(
                        tgt_language, translation, is_missing_stop_punc)
                    translation = (prefix + " " + translation).lstrip()
                    translation = translation.replace("▁", " ")
                    translation = misc.regex_pass(translation, [
                        patterns['p8'], patterns['p9'], patterns['p4'],
                        patterns['p5'], patterns['p6'], patterns['p7']
                    ])
                    tag_tgt = translation
                    translation = tagger_util.replace_tags_with_original(
                        translation, date_original, url_original, num_array,
                        num_map)
                    translation = oc.cleaner(tag_src, translation, i['id'])
                    translation = misc.convert_digits_postprocess(
                        tgt_language, translation)
                log_info(
                    "translate_function-experiment-{} output: {}".format(
                        i['id'], translation), MODULE_CONTEXT)
                tgt.append(translation)
                pred_score.append(scores)
                sentence_id.append(s_id[0]), node_id.append(n_id[0])
                input_subwords.append(input_sw), output_subwords.append(
                    output_sw)
                tagged_tgt.append(tag_tgt), tagged_src.append(tag_src)
                i_tmx_phrases.append(i.get("tmx_phrases", []))

            out['response_body'] = [{
                "tgt": tgt[i],
                "pred_score": pred_score[i],
                "s_id": sentence_id[i],
                "input_subwords": input_subwords[i],
                "output_subwords": output_subwords[i],
                "n_id": node_id[i],
                "src": i_src[i],
                "tagged_tgt": tagged_tgt[i],
                "tagged_src": tagged_src[i],
                "save": i_save[i],
                "s0_src": i_s0_src[i],
                "s0_tgt": i_s0_tgt[i],
                "tmx_phrases": i_tmx_phrases[i]
            } for i in range(len(tgt))]
            out = CustomResponse(Status.SUCCESS.value, out['response_body'])
        except ServerModelError as e:
            status = Status.SEVER_MODEL_ERR.value
            status['why'] = str(e)
            log_exception(
                "ServerModelError error in TRANSLATE_UTIL-translate_func: {} and {}"
                .format(e,
                        sys.exc_info()[0]), MODULE_CONTEXT, e)
            out = CustomResponse(status, inputs)
        except Exception as e:
            status = Status.SYSTEM_ERR.value
            status['why'] = str(e)
            log_exception(
                "Unexpected error:%s and %s" % (e, sys.exc_info()[0]),
                MODULE_CONTEXT, e)
            out = CustomResponse(status, inputs)

        return out