def doregister(): cust_id = request.args.get('cust_id', None) lang_type = request.args.get('lang_type', None) email_id = request.args.get('email_id', None) password_ = request.args.get('password', None) #emailsender = EmailSender() #emailsender.send() if cust_id == None or cust_id == '': logging.error('\'' + cust_id + '\' is invalid customer subdomain. ') return render_template("register.html", error_msg='Error: Invalid User Organization') cust_id = cust_id.strip().lower() intenteng = IntentExtractor() cust = getCustomerModel().authenticate(cust_id) if cust == None: cust = getCustomerModel().create(cust_id, language=lang_type, email_id=email_id, password = password_, newflag=True, retrainflag=True, ticketflag = True, done=True) if cust: tickets_learner().import_responsedata(cust['cust_name'], cust['language']) tickets_learner().import_trainingdata(cust['cust_name'], cust['language']) intenteng.prepareTrainingData_nospacy(cust['cust_name']) intenteng.startTrainingProcess(cust['cust_name']) return redirect(url_for('.list', cust_id=cust_id)) else: return redirect(url_for('.list', cust_id=cust_id)) logging.error('\'' + cust_id + '\' is invalid customer subdomain. ') return render_template("register.html", error_msg='Error: Invalid User Organization')
def prepareTrainingData(self, cust_id): logging.info("prepareTrainingData : Started " + str(cust_id)) self.X, self.y = [], [] tickets_learn = tickets_learner() ticket_data = tickets_learn.getTrainingData(cust_id=cust_id) from agentapp.UtilityClass_spacy import UtilityClass_spacy utilspace = UtilityClass_spacy() lang = getCustomerModel().getLanguage(cust_id) xX = [] yY = [] for linestms in ticket_data: for linestm in linestms: strx = str(linestm['tags'] + ' . ' + linestm['query']).strip() strx = utilspace.preprocessText(strx, lang=lang, ner=True) strx = str(self.utilclass.cleanData(strx, lang=lang, lowercase=True, remove_stops=True, tag_remove=True)) if (strx != ''): xX.append(strx.strip().split()) yY.append(str(linestm['resp_category']).strip()) self.X = xX self.y = yY self.X, self.y = np.array(self.X, dtype=object), np.array(self.y, dtype=object) logging.info ("Total Training Examples : %s" % len(self.y)) logging.info ("prepareTrainingData : Completed " + str(cust_id)) return
def removeRespDuplicate(self, cust_id): logging.info('removeRespDuplicate : Started ' + str(cust_id)) tickets_learn = tickets_learner() resp_model = getResponseModel() resp_data = tickets_learn.getResponseData(cust_id=cust_id) for resp_logs in resp_data: for resp_logx in resp_logs: for resp_logy in resp_logs: if (resp_logx['id'] == resp_logy['id'] or resp_logx['modifiedflag'] == True or resp_logx['defaultflag'] == True): continue respx_list = resp_logx['tags'].split() respy_list = resp_logy['tags'].split() count = 0 for items in respx_list: if items in respy_list: count += 1 #diff_cent = int((len(respx_list) - count) * 100 / len(respx_list)) if (count == len(respx_list)): resp_model.delete(resp_logx['id'], cust_id) logging.info('Deleting Duplicate Response id : ' + str(resp_logx['id']) + ' Over ' + str(resp_logy['id'])) logging.info('removeRespDuplicate : Completed ' + str(cust_id))
def extractIntentData_cust(self, cust_id): logging.info ('extractIntentData_cust : Started : ' + str(cust_id)) trainlog = get_model() traindata = getTrainingModel() tickets_learn = tickets_learner() ticket_struct = [] trainlog_struct = [] intent_data = tickets_learn.getTrainingLog(cust_id=cust_id, log_type = 'intent', done=True) len_traindata = 0 for intent_logs in intent_data: len_traindata += len (intent_logs) for intent_log in intent_logs: intents_data = intent_log["json_data"] intents_data_json = json.loads(intents_data) try: description = intents_data_json['description'] except KeyError as err: print (err) continue description = intents_data_json['description'] subject = intents_data_json['subject'] tags = ', '.join(intents_data_json['requester']['tags']) response = '' id = intents_data_json['id'] comment_len = len(intents_data_json['comments']) if comment_len > 1: for i in range(comment_len-1, -1, -1): requester_email = '' commentor_email = '' try: requester_email = intents_data_json['requester']['email'] commentor_email = intents_data_json['comments'][i]['author']['email'] except KeyError as err: logging.error(err) break if requester_email != commentor_email: response = intents_data_json['comments'][i]['value'] response = cleanhtml (response) break ticket_struct.append({'id' : id, 'query' : str(subject + ' . ' + description), 'query_category' : '', 'feedback_flag' : False, 'feedback_prob' : 0, 'done' : False, 'response': response, 'resp_category': '', 'tags' : tags}) trainlog_struct.append({'id' : intent_log['id'], 'type': intent_log['type'], 'created': intent_log['created'], 'json_data': intent_log['json_data'], 'done': False}) logging.info ('No of Training Data Processing : ' + str(len_traindata)) if (len (ticket_struct) > 1): ticket_pd = pd.DataFrame(ticket_struct) trainlog_pd = pd.DataFrame(trainlog_struct) ticket_pd = ticket_pd.drop_duplicates(subset=['id'], keep='last') trainlog_pd = trainlog_pd.drop_duplicates(subset=['id'], keep='last') traindata.batchUpdate(ticket_pd, cust_id) trainlog.batchUpdate(trainlog_pd, cust_id) logging.info ('extractIntentData_cust : Completed : ' + str(cust_id))
def prepareTrainingData(self, cust_id): logging.info("prepareTrainingData : Started : " + str(cust_id)) tickets_learn = tickets_learner() ticket_data = tickets_learn.getTrainingData(cust_id=cust_id) self.analytics_pd['cust_id'] = cust_id ticket_struct = [] self.analytics_pd['ticket_total_count'] = 0 for linestms in ticket_data: self.analytics_pd['ticket_total_count'] += len(linestms) for linestm in linestms: if linestm['response'].strip() != '': ticket_struct.append({ 'id': linestm['id'], 'query': linestm['query'], 'response': linestm['response'].strip(), 'tags': linestm['tags'], 'resp_category': linestm['resp_category'], 'feedback_resp': linestm['feedback_resp'] if 'feedback_resp' in linestm else '', 'feedback_prob': linestm['feedback_prob'], 'predict_prob': linestm['predict_prob'] if 'predict_prob' in linestm else 0, 'feedback_flag': linestm['feedback_flag'], 'created': linestm['created'] }) self.ticket_pd = pd.DataFrame(ticket_struct) resp_struct = [] resp_data = tickets_learn.getResponseData(cust_id=cust_id) self.analytics_pd['response_total_count'] = 0 for linestms in resp_data: self.analytics_pd['response_total_count'] += len(linestms) for linestm in linestms: resp_struct.append({ 'id': linestm['id'], 'modifiedflag': linestm['modifiedflag'], 'defaultflag': linestm['defaultflag'] }) self.response_pd = pd.DataFrame(resp_struct) logging.info("Total Training Examples : %s" % len(self.ticket_pd)) logging.info("prepareTrainingData : Completed : " + str(cust_id)) return
def extractFeedbackData_cust(self, cust_id): logging.info ('extractFeedbackData_cust : Started : ' + str(cust_id)) trainlog = get_model() traindata = getTrainingModel() respdata = getResponseModel() tickets_learn = tickets_learner() ticket_struct = [] trainlog_struct = [] intent_data = tickets_learn.getTrainingLog(cust_id=cust_id, log_type = 'feedback', done=None) len_traindata = 0 for intent_logs in intent_data: len_traindata += len (intent_logs) for intent_log in intent_logs: intents_data = intent_log["json_data"] intents_data_json = json.loads(intents_data) selected_response_id = intents_data_json["selected_response_id"] selected_response_prob = (intents_data_json["selected_response_prob"]/100) if 'selected_response_prob' in intents_data_json else 0 cust_id = intents_data_json["ticket_data"]['currentAccount']['subdomain'] id = intents_data_json["ticket_data"]['id'] train_data = traindata.read(id, cust_id=cust_id) response_data = respdata.read(selected_response_id, cust_id=cust_id) if train_data != None and response_data != None: ''' traindata.update(train_data["tags"], train_data['query'], response_data["response_text"], query_category=train_data['query_category'], resp_category=train_data['resp_category'], feedback_resp=response_data['res_category'], feedback_flag=True, feedback_prob=selected_response_prob, predict_prob = train_data['predict_prob'], done=True, id=train_data['id'], cust_id=cust_id) ''' ticket_struct.append({'id' : train_data['id'], 'query' : train_data['query'], 'query_category' : train_data['query_category'], 'response': response_data["response_text"], 'resp_category': train_data['resp_category'], 'feedback_resp' : response_data['res_category'], 'feedback_flag' : True, 'feedback_prob' : selected_response_prob, 'predict_prob': train_data['predict_prob'] if 'predict_prob' in train_data else 0, 'done' : train_data['done'], 'tags' : train_data['tags']}) trainlog_struct.append({'id' : intent_log['id'], 'type': intent_log['type'], 'created': intent_log['created'], 'json_data': intent_log['json_data'], 'done': False}) print('Updating Feedback : ' , id, cust_id) logging.info ('No of Training Data Processing : ' + str(len_traindata)) if (len (ticket_struct) > 1): ticket_pd = pd.DataFrame(ticket_struct) trainlog_pd = pd.DataFrame(trainlog_struct) ticket_pd = ticket_pd.drop_duplicates(subset=['id'], keep='last') trainlog_pd = trainlog_pd.drop_duplicates(subset=['id'], keep='last') traindata.batchUpdate(ticket_pd, cust_id) trainlog.batchUpdate(trainlog_pd, cust_id) logging.info ('extractFeedbackData_cust : Completed : ' + str(cust_id))
def populateResponseData(self, cust_id): logging.info('populateResponseData : Started ' + str(cust_id)) try: tx = self.ticket_pd['response_cluster'] except KeyError as err: logging.error("populateResponseData : " + str(err)) return resp_model = getResponseModel() last_id = 9999 tickets_learn = tickets_learner() resp_data = tickets_learn.getResponseData(cust_id=cust_id, modifiedflag=False, defaultflag=False) for resp_logs in resp_data: for resp_log in resp_logs: resp_model.delete(resp_log['id'], cust_id) last_id = resp_log['id'] rep_index = int(last_id) + 1 for index, item in self.ticket_pd.iterrows(): if item['select_response'] == 'true' and item['select_tags'].strip( ) != '' and item['response_summary'].strip( ) != '' and item['response_tags'] != '': resptitle = item['response_title'] if ( item['response_title'] != '') else (cust_id + '_Response_' + str(rep_index)) resp_model.create(resptitle, str(cust_id + '_Response_' + str(rep_index)), item['response_summary'], item['select_tags'], item['response_tags'], done=True, id=rep_index, cust_id=cust_id) rep_index += 1 self.removeRespDuplicate(cust_id) csvfile = self.ticket_pd.to_csv() self.storage.put_bucket(csvfile, str("SmartReply_DataFrame_" + str(cust_id)), filetype='csv') logging.info('populateResponseData : Completed ' + str(cust_id)) return
def dashboard(): cust_id = request.args.get('cust_id', None) ticket_learn = tickets_learner() analytic_data = ticket_learn.getAnalyticsData(cust_id) error_msg = '' for linestms in analytic_data: ticket_served = linestms[0]['ticket_total_count'] if len(linestms) > 0 else 0 ticket_applied = linestms[0]['Feedback_tickets_count'] if len(linestms) > 0 else 0 ticket_not_applied = int(ticket_served - ticket_applied) if len(linestms) > 0 else 0 response_total = linestms[0]['response_total_count'] if len(linestms) > 0 else 0 response_modified = linestms[0]['response_modified_count'] if len(linestms) > 0 else 0 response_default = linestms[0]['response_default_count'] if len(linestms) > 0 else 0 response_new = int(response_total - (response_modified + response_default)) if len(linestms) > 0 else 0 return render_template( "dashboard.html", cust_id=cust_id, error_msg=error_msg, ticket_served=ticket_served, ticket_applied=ticket_applied, response_total=response_total, response_modified=response_modified, response_default=response_default, response_new=response_new, ticket_not_applied=ticket_not_applied)
def startTrainLogPrediction(self, cust_id): logging.info("startTrainLogPrediction : Started " + str(cust_id)) if self.model == None: logging.info('Cant process as no Training ') return traindata = getTrainingModel() ticketslearn = tickets_learner() ticket_pd = ticketslearn.getTrainingData_DataFrame(cust_id) if (len(ticket_pd) > 0): ticket_pd['resp_category'], ticket_pd[ 'predict_prob'] = self.getPredictedIntent_prob( ticket_pd['query'], cust_id) traindata.batchUpdate(ticket_pd, cust_id=cust_id) logging.info("startTrainLogPrediction : Completed " + str(cust_id)) return
def prepareTestingData(self, cust_id): logging.info("prepareTestingData : Started " + str(cust_id)) self.test_X, self.test_y = [], [] tickets_learn = tickets_learner() ticket_data = tickets_learn.getTrainingData(cust_id=cust_id) lang = getCustomerModel().getLanguage(cust_id) xX = [] yY = [] for linestms in ticket_data: for linestm in linestms: logging.debug (str(linestm['tags'] + ', ' + linestm['query']) + " => " + linestm['response']) strx = str(linestm['tags'] + ', ' + linestm['query']) strx = self.utilclass.cleanData(strx, lang=lang, lowercase=True, remove_stops=True, tag_remove=True) xX.append(strx.strip().split()) yY.append(linestm['resp_category'].strip()) self.test_X = xX self.test_y = yY self.test_X, self.test_y = np.array(self.test_X, dtype=object), np.array(self.test_y, dtype=object) logging.info ("Total Testing Examples : %s" % len(self.test_y)) logging.info("prepareTestingData : Completed " + str(cust_id)) return
def predictIntent(): logging.info('predictIntent : ') intent_input = '' cust_id = '' intenteng = IntentExtractor() ticketLearner = tickets_learner() utilclass = UtilityClass() received_data = request.json try: cust_id = received_data['currentAccount']['subdomain'] except KeyError as err: logging.error(err) cust_id = 'default' cust = getCustomerModel().authenticate(cust_id.strip().lower(), newflag=False) if cust == None: cust_id = 'default' else: cust_id = cust['cust_name'] logging.info('Customer Id : ' + str(cust_id)) get_model().create('intent', json.dumps(request.json), done=True, cust_id=cust_id) len_comment = len(received_data['comments']) if ((len_comment > 0) and (received_data['requester']['email'] == received_data['comments'][0]['author']['email'])): intent_input = utilclass.cleanhtml(received_data['comments'][0]['value']) else: intent_input = utilclass.cleanhtml(received_data['description'] + '. ' + received_data['subject']) predicted_intent = intenteng.getIntentForText(intent_input, cust_id) formatted_resp = ticketLearner.formatOutput(predicted_intent, cust_id) logging.info('\'' + str(intent_input) + '\' >> ' + str(formatted_resp)) json_resp = json.dumps(formatted_resp) #get_model().create('response', json_resp, done=True, cust_id=cust_id) return json_resp
def prepareTrainingData(self, cust_id): logging.info("prepareTrainingData : Started : " + str(cust_id)) tickets_learn = tickets_learner() ticket_data = tickets_learn.getTrainingData(cust_id=cust_id, done=None) ticket_struct = [] for linestms in ticket_data: for linestm in linestms: if linestm['response'].strip() != '': ticket_struct.append({ 'id': linestm['id'], 'query': linestm['query'], 'response': linestm['response'].strip(), 'tags': linestm['tags'] }) self.ticket_pd = pd.DataFrame(ticket_struct) #print (self.ticket_pd) logging.info("Total Training Examples : %s" % len(self.ticket_pd)) logging.info("prepareTrainingData : Completed : " + str(cust_id)) return
def startEvaluation(self, cust_id): logging.info('startEvaluation : Started : ' + str(cust_id)) tickets_learn = tickets_learner() if (len(self.ticket_pd) > 0): intenteng = IntentExtractor() intenteng_resp = IntentExtractor_resp() intenteng_resp.prepareTrainingData(cust_id) intenteng_resp.startTrainingProcess(cust_id) self.ticket_pd[ 'TrainingModel_intent'] = intenteng.getPredictedIntent_list( self.ticket_pd['query'], cust_id) self.ticket_pd[ 'ResponseModel_intent'] = intenteng_resp.getPredictedIntent_list( self.ticket_pd['query'], cust_id) respmap = tickets_learn.get_response_map(cust_id, 'tags') resptagmap = tickets_learn.get_response_map(cust_id, 'resp_tags') self.ticket_pd['intent_tags'] = self.ticket_pd[ 'TrainingModel_intent'].apply( lambda x: self.getMatch(respmap, x).lower().split()) self.ticket_pd['response_tags'] = self.ticket_pd[ 'TrainingModel_intent'].apply( lambda x: self.getMatch(resptagmap, x).lower().split()) self.ticket_pd['query_list'] = self.ticket_pd['query'].apply( lambda x: x.lower().split()) self.ticket_pd['response_list'] = self.ticket_pd['response'].apply( lambda x: x.lower().split()) self.ticket_pd['query_list'] = self.ticket_pd['query_list'].apply( lambda x: self.applystem(x)) self.ticket_pd['response_list'] = self.ticket_pd[ 'response_list'].apply(lambda x: self.applystem(x)) self.ticket_pd['intent_tags_in_query_list'] = self.ticket_pd.apply( lambda x: self.matchword(x['query_list'], x['intent_tags']), axis=1) self.ticket_pd[ 'resp_tags_in_response_list'] = self.ticket_pd.apply( lambda x: self.matchword(x['response_list'], x[ 'response_tags']), axis=1) self.ticket_pd[ 'percentage_match_in_query_list'] = self.ticket_pd.apply( lambda x: self.percentmatchword(x['query_list'], x[ 'intent_tags']), axis=1) self.ticket_pd[ 'percentage_match_in_response_list'] = self.ticket_pd.apply( lambda x: self.percentmatchword(x['response_list'], x[ 'response_tags']), axis=1) resp_text_map = tickets_learn.get_response_map( cust_id, 'response_text') self.ticket_pd['intent_response_text'] = self.ticket_pd[ 'TrainingModel_intent'].apply( lambda x: self.getMatch(resp_text_map, x)) self.ticket_pd['response_response_text'] = self.ticket_pd[ 'ResponseModel_intent'].apply( lambda x: self.getMatch(resp_text_map, x)) self.ticket_pd['bleu_score_intent'] = self.ticket_pd.apply( lambda x: self.getBleuScore(x['response'], x[ 'intent_response_text']), axis=1) self.ticket_pd['bleu_score_response'] = self.ticket_pd.apply( lambda x: self.getBleuScore(x['response'], x[ 'response_response_text']), axis=1) csvfile = self.ticket_pd.to_csv() self.storage.put_bucket(csvfile, str("TrainingModel_Evaluate_" + str(cust_id)), filetype='csv') logging.info("startEvaluation : Completed : " + str(cust_id)) return