def get(self): parser = reqparse.RequestParser() parser.add_argument( 'start_page', type=int, location='args', help= 'start_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument( 'end_page', type=int, location='args', help= 'end_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument('ad-userid', location='headers', type=str, help='userid cannot be empty', required=True) parser.add_argument('job_id', type=str, location='args', help='Job Id is required', required=False) parser.add_argument('record_id', type=str, location='args', help='record_id is required', required=True) args = parser.parse_args() AppContext.addRecordID(args['record_id']) log_info( "FileContentGetResource record_id {} for user {}".format( args['record_id'], args['ad-userid']), AppContext.getContext()) try: result = fileContentRepo.get(args['ad-userid'], args['record_id'], args['start_page'], args['end_page']) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 log_info( "FileContentGetResource record_id {} for user {} has {} pages". format(args['record_id'], args['ad-userid'], result['total']), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, result['pages'], result['total']) return res.getres() except Exception as e: log_exception("FileContentGetResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('ad-userid') pages = body['pages'] file_locale = '' if 'file_locale' in body: file_locale = body['file_locale'] job_id = '' if 'job_id' in body: job_id = body['job_id'] record_id = None if 'record_id' in body: record_id = body['record_id'] src_lang = None if 'src_lang' in body: src_lang = body['src_lang'] tgt_lang = None if 'tgt_lang' in body: tgt_lang = body['tgt_lang'] if 'pages' not in body or user_id is None or record_id == None or src_lang == None or tgt_lang == None: log_info( 'Missing params in FileContentSaveResource {}, user_id:{}'. format(body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 AppContext.addRecordID(record_id) log_info( "FileContentSaveResource record_id ({}) for user ({})".format( record_id, user_id), AppContext.getContext()) try: if fileContentRepo.store(user_id, file_locale, record_id, pages, src_lang, tgt_lang) == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 log_info( "FileContentSaveResource record_id ({}) for user ({}) saved". format(record_id, user_id), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, None) return res.getres() except Exception as e: log_exception("FileContentSaveResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('x-user-id') if 'sentences' not in body or user_id is None or 'workflowCode' not in body: log_info( 'Missing params in SaveSentenceResource {}, user_id:{}'.format( body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 sentences = body['sentences'] workflowCode = body['workflowCode'] AppContext.addRecordID(None) log_info( "SaveSentenceResource for user {}, number sentences to update {} request {}" .format(user_id, len(sentences), body), AppContext.getContext()) try: result = sentenceRepo.update_sentences(user_id, sentences, workflowCode) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 if USER_TRANSLATION_ENABLED: try: result = sentenceRepo.save_sentences(user_id, sentences) except Exception as e: log_exception("SaveSentenceResource", AppContext.getContext(), e) # sentence_ids = [] # for sentence in sentences: # sentence_ids.append(sentence['s_id']) # result = sentenceRepo.get_sentence(user_id, sentence_ids) # if result == False: # res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) # return res.getresjson(), 400 # else: res = CustomResponse(Status.SUCCESS.value, sentences) return res.getres() except Exception as e: log_exception("SaveSentenceResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def get_record_by_page(self, record_id, page_number): try: collections = get_db()[DB_SCHEMA_NAME] results = collections.aggregate([ { '$match' : {'recordID': record_id,'page_info.page_no': page_number} }, { '$project' : { '_id': 0}} ]) for doc in results: return doc except Exception as e: AppContext.addRecordID(record_id) log_exception("Exception on fetching record by page | DigitalDocumentModel :{}".format(str(e)) , AppContext.getContext(), e) return False
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('x-user-id') modifiedSentences = None if 'modifiedSentences' in body: modifiedSentences = body['modifiedSentences'] workflowCode = None record_id = None if 'blocks' not in body or user_id is None: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 if 'workflowCode' in body: workflowCode = body['workflowCode'] if 'record_id' in body: record_id = body['record_id'] blocks = body['blocks'] AppContext.addRecordID(record_id) log_info( "FileContentUpdateResource for user ({}), to update ({}) blocks". format(user_id, len(blocks)), AppContext.getContext()) log_info(str(body), AppContext.getContext()) try: result, updated_blocks = fileContentRepo.update( record_id, user_id, blocks, workflowCode, modifiedSentences) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 log_info( "FileContentUpdateResource for user ({}) updated".format( user_id), AppContext.getContext()) response = {'blocks': updated_blocks, 'workflowCode': workflowCode} res = CustomResponse(Status.SUCCESS.value, response, len(updated_blocks)) return res.getres() except Exception as e: log_exception("FileContentUpdateResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() if 'files' not in body or not body['files']: return post_error("Data Missing", "files is required", None), 400 if 'recordID' not in body or not body['recordID']: return post_error("Data Missing", "recordID is required", None), 400 # if 'jobID' not in body or not body['jobID']: # return post_error("Data Missing","jobID is required",None), 400 files = body['files'] userID = body['metadata']['userID'] recordID = body['recordID'] if not userID: return post_error("Data Missing", "userID is required", None), 400 AppContext.addRecordID(recordID) log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) try: AppContext.addRecordID(recordID) log_info( 'DigitalDocumentSaveResource request received, user_id:{}, record_id:{}' .format(userID, recordID), AppContext.getContext()) result = digitalRepo.store(userID, recordID, files) if result == False: log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) return post_error("Data Missing", "Failed to store doc since data is missing", None), 400 elif result is None: AppContext.addRecordID(recordID) log_info( 'DigitalDocumentSaveResource request completed, user_id:{}, record_id:{}' .format(userID, recordID), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, None) return res.getres() else: log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) return result, 400 except Exception as e: AppContext.addRecordID(recordID) log_exception( "Exception on save document | DigitalDocumentSaveResource :{}". format(str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to store doc since data is missing", None), 400
def update_words(self, user_id, words): for word in words: Validation = validator.update_word_validation(word) if Validation is not None: return Validation page = word['page_no'] region_id = word['region_id'] word_id = word['word_id'] record_id = word['record_id'] user_word = word['updated_word'] AppContext.addRecordID(record_id) log_info("DigitalDocumentRepo update word request", AppContext.getContext()) #str(page) region_to_update = self.docModel.get_word_region( user_id, record_id, region_id, page) if region_to_update: if region_to_update['identifier'] == region_id: region_to_update['updated'] = True for data in region_to_update['regions']: for word in data['regions']: if word['identifier'] == word_id: word['ocr_text'] = word['text'] word['text'] = user_word break else: pass # return post_error("Data Missing","No record with the given user_id,record_id and word_id",None) else: return post_error( "Data Missing", "No record with the given user_id,record_id and region_id", None) AppContext.addRecordID(record_id) log_info( "DigitalDocumentRepo update word region :{}".format( str(region_to_update)), AppContext.getContext()) print(region_to_update) if self.docModel.update_word(user_id, record_id, region_id, region_to_update, page) == False: return post_error( "Data Missing", "Failed to update word since data is missing", None) return True
def update(self, record_id, user_id, blocks, workflowCode, modifiedSentences=None): updated_blocks = [] saved_blocks = [] update_s0 = False ''' - workflowCode: - WF_S_TR and WF_S_TKTR, changes the sentence structure hence s0 pair needs to be updated - DP_WFLOW_S_C, doesn't changes the sentence structure hence no need to update the s0 pair ''' if workflowCode is not None and (workflowCode == 'WF_S_TR' or workflowCode == 'WF_S_TKTR'): update_s0 = True log_info( "FileContentUpdateRepo -workflowcode : {} | update_S0 : {}".format( workflowCode, update_s0), AppContext.getContext()) for block in blocks: updated_blocks.append( self.update_block_info(block, update_s0, modifiedSentences)) if len(updated_blocks) > 0: for updated_block in updated_blocks: AppContext.addRecordID(record_id) log_info("FileContentUpdateRepo -updating blocks", AppContext.getContext()) if self.blockModel.update_block( record_id, user_id, updated_block['data']['block_identifier'], updated_block) == False: return False, saved_blocks AppContext.addRecordID(record_id) log_info("FileContentUpdateRepo -fetching back updated blocks", AppContext.getContext()) saved_block_results = self.blockModel.get_block_by_block_identifier( record_id, user_id, updated_block['data']['block_identifier']) for saved_block in saved_block_results: saved_blocks.append(saved_block['data'][0]) log_info( "FileContentUpdateRepo -updated blocks : {}".format( str(saved_blocks)), AppContext.getContext()) return True, saved_blocks
def get(self, user_id, s_id): AppContext.addRecordID(None) log_info( "SentenceBlockGetResource {} for user {}".format(s_id, user_id), AppContext.getContext()) try: result = SentenceRepositories.get_sentence_block(user_id, s_id) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, result) return result, 200 except Exception as e: log_exception("SentenceBlockGetResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('x-user-id') if 'record_ids' not in body or user_id is None: log_info( 'Missing params in SentenceStatisticsCount {}, user_id:{}'. format(body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 record_ids = body['record_ids'] bleu_return = None if 'bleu_score' in body: bleu_return = body['bleu_score'] else: bleu_return = False AppContext.addRecordID(None) log_info( "SentenceStatisticsCount for user {}, sentence count for record_ids {}" .format(user_id, record_ids), AppContext.getContext()) try: result = sentenceRepo.get_sentences_counts(record_ids, bleu_return) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, result) return res.getres() except Exception as e: log_exception("SentenceStatisticsCount ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def store(self, userID, recordID, files): try: for file in files: # recordID= recordID jobID = recordID.split('|')[0] fileID = file['file']['identifier'] file_name = file['file']['name'] locale = file['config']['language'] file_type = file['file']['type'] pages = file['pages'] log_info( "DigitalDocumentRepo save document for user: {}| record: {}| count of pages received: {}" .format(userID, recordID, str(len(pages))), AppContext.getContext()) blocks = [] for page in pages: block = self.create_regions_from_page(userID, jobID, recordID, fileID, file_name, locale, file_type, page) if len(block.keys()) > 5: blocks.append(block) else: return block log_info( 'DigitalDocumentRepo page blocks created for insert, user_id:{}, record_id:{}, block length:{}' .format(userID, recordID, str(len(blocks))), AppContext.getContext()) result = self.docModel.store_bulk_blocks(blocks) if result == False: return False except Exception as e: AppContext.addRecordID(recordID) log_exception( 'Exception on save document | DigitalDocumentRepo :{}'.format( str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to store doc since :{}".format(str(e)), None)
def get_blocks_by_page(self, record_id, page_number): try: collections = get_db()[DB_SCHEMA_NAME] results = collections.aggregate([{ '$match': { 'page_no': page_number, 'record_id': record_id } }, { '$group': { '_id': '$data_type', 'data': { '$push': "$data" } } }]) return results except Exception as e: AppContext.addRecordID(record_id) log_exception("db connection exception ", AppContext.getContext(), e) return False
def update_sentences(self, user_id, sentences, workflowCode): update_s0 = False ''' - workflowCode: - WF_S_TR and WF_S_TKTR, changes the sentence structure hence s0 pair needs to be updated - DP_WFLOW_S_C, doesn't changes the sentence structure hence no need to update the s0 pair ''' if workflowCode is not None and (workflowCode == 'WF_S_TR' or workflowCode == 'WF_S_TKTR'): update_s0 = True for sentence in sentences: if update_s0: sentence['s0_tgt'] = sentence['tgt'] sentence['s0_src'] = sentence['src'] if 'save' not in sentence: sentence['save'] = False if 'bleu_score' not in sentence: sentence['bleu_score'] = 0 if 'time_spent_ms' not in sentence: sentence['time_spent_ms'] = 0 if 'rating_score' not in sentence: sentence['rating_score'] = None n_id_splits = sentence['n_id'].split('|') record_id = n_id_splits[0] + '|' + n_id_splits[1] # sentence['record_id'] = record_id AppContext.addRecordID(record_id) log_info("SaveSentenceRepo -saving sentence blocks", AppContext.getContext()) if self.sentenceModel.update_sentence_by_s_id( record_id, user_id, sentence) == False: return False return True
def create_regions_from_page(self, userID, jobID, recordID, fileID, file_name, locale, file_type, page): try: AppContext.addRecordID(recordID) log_info( 'DigitalDocumentRepo page blocks creation started for record_id:{}, page_number:{}' .format(recordID, str(page['page_no'])), AppContext.getContext()) block_info = {} block_info['userID'] = userID block_info['jobID'] = jobID block_info['recordID'] = recordID block_info['file_identifier'] = fileID block_info['file_name'] = file_name block_info['file_locale'] = locale block_info['file_type'] = file_type block_info['created_on'] = datetime.utcnow() page_info = {} page_info['page_no'] = page['page_no'] + 1 page_info['page_identifier'] = page['identifier'] page_info['page_boundingBox'] = page['boundingBox'] page_info['page_img_path'] = page['path'] if 'resolution' in page.keys(): page_info['page_resolution'] = page['resolution'] block_info['page_info'] = page_info block_info['regions'] = page['regions'] return block_info except Exception as e: AppContext.addRecordID(recordID) log_exception( 'Exception on save document | DigitalDocumentRepo :{}'.format( str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to store doc since data is missing", None)
def get_pages(self, record_id, start_page=1, end_page=5): total_page_count = self.docModel.get_document_total_page_count( record_id) if start_page == 0 and end_page == 0: start_page = 1 end_page = total_page_count if start_page == 0: start_page = 1 if end_page == 0: end_page = 5 if start_page > end_page: return False if start_page > total_page_count: return False AppContext.addRecordID(record_id) log_info( "DigitalDocumentRepo fetching doc by pages for record_id:{}". format(str(record_id)), AppContext.getContext()) pages = [] data = {} data_page = [] for i in range(start_page, end_page + 1): page_block = self.docModel.get_record_by_page(record_id, i) if page_block == False: return False else: data_page.append(page_block) pg_block_formated = self.format_page_data(data_page) data['pages'] = pg_block_formated data['start_page'] = start_page data['end_page'] = end_page data['total'] = total_page_count return data
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('x-user-id') s_ids = None if 'sentences' in body: s_ids = body['sentences'] if user_id is None or s_ids is None: log_info( 'Missing params in FetchSentenceResource {}, user_id:{}'. format(body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 AppContext.addRecordID(None) log_info( "FetchSentenceResource s_ids {} for user {}".format( len(s_ids), user_id), AppContext.getContext()) try: result = sentenceRepo.get_sentence(user_id, s_ids) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, result) return res.getres() except Exception as e: log_exception("FetchSentenceResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def get(self): parser = reqparse.RequestParser() parser.add_argument( 'start_page', type=int, location='args', help= 'start_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument( 'end_page', type=int, location='args', help= 'end_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument('recordID', type=str, location='args', help='record_id is required', required=True) args = parser.parse_args() AppContext.addRecordID(args['recordID']) log_info( "DigitalDocumentGetResource record_id {} ".format( args['recordID']), AppContext.getContext()) try: result = digitalRepo.get_pages(args['recordID'], args['start_page'], args['end_page']) if result == False: return post_error("Data Missing", "Failed to get pages since data is missing", None), 400 AppContext.addRecordID(args['recordID']) log_info( "DigitalDocumentGetResource record_id {} has {} pages".format( args['recordID'], result['total']), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, result['pages'], result['total']) return res.getres() except Exception as e: AppContext.addRecordID(args['recordID']) log_exception( "Exception in DigitalDocumentGetResource |{}".format(str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to get pages since data is missing", None), 400
def store(self, user_id, file_locale, record_id, pages, src_lang, tgt_lang): blocks = [] for page in pages: log_info(page, AppContext.getContext()) page_info = {} page_info['page_no'] = page['page_no'] page_info['page_width'] = page['page_width'] page_info['page_height'] = page['page_height'] try: if 'images' in page and page['images'] != None: for image in page['images']: blocks.append( self.create_block_info(image, record_id, page_info, 'images', user_id, src_lang, tgt_lang)) except Exception as e: AppContext.addRecordID(record_id) log_exception( 'images key not present, thats strange:{}'.format(str(e)), AppContext.getContext(), e) try: if 'lines' in page and page['lines'] != None: for line in page['lines']: blocks.append( self.create_block_info(line, record_id, page_info, 'lines', user_id, src_lang, tgt_lang)) except Exception as e: AppContext.addRecordID(record_id) log_info( 'lines key is not present, ignorning further:{}'.format( str(e)), AppContext.getContext()) pass try: if 'text_blocks' in page and page['text_blocks'] != None: for text in page['text_blocks']: blocks.append( self.create_block_info(text, record_id, page_info, 'text_blocks', user_id, src_lang, tgt_lang)) except Exception as e: AppContext.addRecordID(record_id) log_exception( 'text_blocks key not present, thats strange:{}'.format( str(e)), AppContext.getContext(), e) pass if self.blockModel.store_bulk_blocks(blocks) == False: return False return True
from flask import Flask from flask.blueprints import Blueprint from flask_cors import CORS from anuvaad_auditor.loghandler import log_info import routes import config from utilities import AppContext server = Flask(__name__) if config.ENABLE_CORS: cors = CORS(server, resources={r"/api/*": {"origins": "*"}}) for blueprint in vars(routes).values(): if isinstance(blueprint, Blueprint): server.register_blueprint(blueprint, url_prefix=config.API_URL_PREFIX) if __name__ == "__main__": AppContext.addRecordID(None) log_info('starting server at {} at port {}'.format(config.HOST, config.PORT), AppContext.getContext()) server.run(host=config.HOST, port=config.PORT, debug=config.DEBUG)