def get_category(list_product_names, job_id): output_list = [] logger.info("Request received {}".format(list_product_names)) if list_product_names: for product_name_dict in list_product_names: try: valid_record, error_response = validate_product_args(product_name_dict) if valid_record: result = process_product(product_name_dict, cat_model, dang_model, logger) output_list.append(result) else: for key, value in product_name_dict.items(): error_response[key] = value output_list.append(error_response) except Exception as err: logger.error( 'get_category:Exception {} occurred against input: {} for job_id {}'. format(err, list_product_names, job_id)) sentry_client.captureException( message = "Exception occurred against input in get_category", extra = {"error" : err,"job_id" : job_id, "product_name_dict" : product_name_dict}) else: error_response = ERROR_CODE['MissingProductList'] output_list.append(error_response) logger.info("Result produced {}".format(output_list)) return output_list
def get_category(): try: list_product_names = list(request.get_json()) output_list = [] for product_name_dict in list_product_names: app.logger.info("Request received {}".format(product_name_dict)) results = {} results_cache = '' product_name = product_name_dict.get('product_name', "") if product_name: product_name_clean = (re.sub(ALPHA_NUM_REGEX, '', product_name)).lower() product_name_key = 'catfight:' + ':' + product_name_clean results_cache = r.get(product_name_key) if not results_cache: results = predict_category( product_name.encode('ascii', 'ignore'), cat_model, dang_model, app.logger) if results: r.setex(product_name_key, json.dumps(results), CACHE_EXPIRY) results['cached'] = False else: results = json.loads(results_cache) results['cached'] = True else: results['invalid_product_name'] = True results['waybill'] = product_name_dict.get('wbn', None) app.logger.info("Result produced {}".format(results)) output_list.append(results) return Response(json.dumps(output_list), mimetype='application/json') except Exception as err: app.logger.error('Exception {} occurred against payload: {}'.format( err, list_product_names)) sentry_client.captureException(message="predict.py: Exception occured", extra={ "error": err, "payload": list_product_names })
def get_category(): try: list_product_names = list(request.get_json()) output_list = [] for product_name_dict in list_product_names: app.logger.info("Request received {}".format(product_name_dict)) results = {} results_cache = '' product_name = product_name_dict.get('product_name', "") if product_name: product_name_clean = (re.sub(ALPHA_NUM_REGEX, '', product_name)).lower() product_name_key = 'catfight:' +':' + product_name_clean results_cache = r.get(product_name_key) if not results_cache: results = predict_category(product_name.encode('ascii','ignore'), cat_model, dang_model, app.logger) if results: r.setex(product_name_key, json.dumps(results), CACHE_EXPIRY) results['cached'] = False else: results = json.loads(results_cache) results['cached'] = True else: results['invalid_product_name'] = True results['waybill'] = product_name_dict.get('wbn', None) app.logger.info("Result produced {}".format(results)) output_list.append(results) return Response(json.dumps(output_list), mimetype='application/json') except Exception as err: app.logger.error( 'Exception {} occurred against payload: {}'.format( err, list_product_names)) sentry_client.captureException( message = "predict.py: Exception occured", extra = {"error" : err, "payload" : list_product_names})
def get_products(): """ Function to fetch job from disque queue, catfight_input, splitting the job into vendor and results, and calling search_addresses to generate products for the job passed """ while True: try: jobs = client.get_job([catfight_input]) for queue_name, job_id, job in jobs: job_data = json.loads(job) vendor = job_data['vendor'] products = json.loads(job_data['payload']) results = get_category(products, job_id) if results: results_dict = {} results_dict['vendor'] = vendor results_dict['catfight_results'] = results second_job_id = client.add_job(catfight_output, json.dumps(results_dict), retry = 5) client.ack_job(job_id) logger.info("Successfully fetched from Disque queue catfight_input GET Job ID {} with job {}". format(job_id, job)) logger.info("Successfully added to Disque queue catfight_output with Job ID {} and job {}". format(second_job_id, job)) else: logger.info("No results found for Job ID {} with job {}". format(job_id, job)) except Exception as e: logger.info("Function get_products failed for Job ID {} with job {} with error {}". format(job_id,job,e)) sentry_client.captureException( message = "get_products failed", extra = {"error" : e}) pass
logger = logging.getLogger('Catfight App') handler = RotatingFileHandler(CATFIGHT_LOGGING_PATH, maxBytes=200000000, backupCount=10) formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(message)s") handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(logging.INFO) logger.info("Loading Process Started") try: dang_model = dangerousModel() cat_model = categoryModel() except Exception as err: logger.error("Error {} while loading models".format(err)) sentry_client.captureException( message = "service_category_disque : Failed to load models", extra = {"error" : err}) logger.info("Loading Process Complete") ERROR_CODE = { 'MissingProductName' : 'MissingProduct', 'MissingProductList' : 'MissingProductList', 'MissingWBN' : 'MissingWBN' } def validate_product_args(record): value = True error_response = {} if not record.get('prd', None): error_response = {'error': ERROR_CODE['MissingProductName']}
def predict_category(product_name, wbn, cat_model, dang_model, logger): try: l_product_name = product_name.lower() product_words = re.findall(CLEAN_PRODUCT_NAME_REGEX, l_product_name) clean_product_name = " ".join(product_words) vectorizer = cat_model.vectorizer clf_bayes = cat_model.clf_bayes clf_chi = cat_model.clf_chi clf_rf = cat_model.clf_rf second_level_vectorizer = cat_model.second_level_vectorizer second_level_clf_bayes = cat_model.second_level_clf_bayes second_level_clf_fpr = cat_model.second_level_clf_fpr second_level_clf_rf = cat_model.second_level_clf_rf class1 = clf_bayes.predict(vectorizer.transform([l_product_name]))[0] class2_prob_vector = clf_chi.predict_proba( vectorizer.transform([l_product_name]))[0] class3_prob_vector = clf_rf.predict_proba( vectorizer.transform([l_product_name]))[0] if len(np.unique(class2_prob_vector)) == 1: class2 = "Delhivery_Others" else: class2 = clf_bayes.classes_[np.argmax(class2_prob_vector)] if len(np.unique(class3_prob_vector)) == 1: class3 = "Delhivery_Others" else: class3 = clf_bayes.classes_[np.argmax(class3_prob_vector)] if class3 == "Delhivery_Others": if class1 == class2: first_level = class1 elif class1 == "Delhivery_Others": first_level = class2 elif class2 == "Delhivery_Others": first_level = class1 else: first_level = class2 else: first_level = class3 second_level = "" if first_level in cat_model.second_level_cat_names_set_nb: prob_vector = second_level_clf_fpr[first_level].predict_proba( second_level_vectorizer[first_level].transform( [l_product_name]))[0] if len(np.unique(prob_vector)) == 1: second_level = second_level_clf_bayes[first_level].predict( second_level_vectorizer[first_level].transform( [l_product_name]))[0] else: second_level = second_level_clf_bayes[first_level].classes_[ np.argmax(prob_vector)] elif first_level in cat_model.second_level_cat_names_set_rf: prob_vector = second_level_clf_rf[first_level].predict_proba( second_level_vectorizer[first_level].transform( [l_product_name]))[0] if len(np.unique(prob_vector)) == 1: second_level = second_level_clf_bayes[first_level].predict( second_level_vectorizer[first_level].transform( [l_product_name]))[0] else: second_level = second_level_clf_bayes[first_level].classes_[ np.argmax(prob_vector)] dg_report = predict_dangerous(clean_product_name, wbn, first_level, dang_model.dg_keywords, logger) result = {} result['cat'] = first_level result['scat'] = second_level result['dg'] = dg_report['dangerous'] return result except Exception as err: logger.error('Exception {} occurred against product: {}'.format( err, product_name)) sentry_client.captureException(message="predict.py: Exception occured", extra={ "error": err, "product_name": product_name })
def predict_category(product_name, wbn, cat_model, dang_model, logger): try: l_product_name = product_name.lower() product_words = re.findall(CLEAN_PRODUCT_NAME_REGEX, l_product_name) clean_product_name = " ".join(product_words) vectorizer = cat_model.vectorizer clf_bayes = cat_model.clf_bayes clf_chi = cat_model.clf_chi clf_rf = cat_model.clf_rf second_level_vectorizer = cat_model.second_level_vectorizer second_level_clf_bayes = cat_model.second_level_clf_bayes second_level_clf_fpr = cat_model.second_level_clf_fpr second_level_clf_rf = cat_model.second_level_clf_rf class1 = clf_bayes.predict(vectorizer.transform([l_product_name]))[0] class2_prob_vector = clf_chi.predict_proba(vectorizer.transform([l_product_name]))[0] class3_prob_vector = clf_rf.predict_proba(vectorizer.transform([l_product_name]))[0] if len(np.unique(class2_prob_vector)) == 1: class2 = "Delhivery_Others" else: class2 = clf_bayes.classes_[np.argmax(class2_prob_vector)] if len(np.unique(class3_prob_vector)) == 1: class3 = "Delhivery_Others" else: class3 = clf_bayes.classes_[np.argmax(class3_prob_vector)] if class3 == "Delhivery_Others": if class1 == class2: first_level = class1 elif class1 == "Delhivery_Others": first_level = class2 elif class2 == "Delhivery_Others": first_level = class1 else: first_level = class2 else: first_level = class3 second_level = "" if first_level in cat_model.second_level_cat_names_set_nb: prob_vector = second_level_clf_fpr[first_level].predict_proba( second_level_vectorizer[first_level].transform([l_product_name]))[0] if len(np.unique(prob_vector)) == 1: second_level = second_level_clf_bayes[first_level].predict( second_level_vectorizer[first_level].transform([l_product_name]))[0] else: second_level = second_level_clf_bayes[first_level].classes_[np.argmax(prob_vector)] elif first_level in cat_model.second_level_cat_names_set_rf: prob_vector = second_level_clf_rf[first_level].predict_proba( second_level_vectorizer[first_level].transform([l_product_name]))[0] if len(np.unique(prob_vector)) == 1: second_level = second_level_clf_bayes[first_level].predict( second_level_vectorizer[first_level].transform([l_product_name]))[0] else: second_level = second_level_clf_bayes[first_level].classes_[np.argmax(prob_vector)] dg_report = predict_dangerous(clean_product_name, wbn, first_level, dang_model.dg_keywords, logger) result = {} result['cat'] = first_level result['scat'] = second_level result['dg'] = dg_report['dangerous'] return result except Exception as err: logger.error( 'Exception {} occurred against product: {}'.format( err, product_name)) sentry_client.captureException( message = "predict.py: Exception occured", extra = {"error" : err, "product_name" : product_name})