示例#1
0
def doregister():    
    cust_id = request.args.get('cust_id', None)
    lang_type = request.args.get('lang_type', None)
    email_id = request.args.get('email_id', None)
    password_ = request.args.get('password', None)
    
    #emailsender = EmailSender()
    #emailsender.send()
    
    if cust_id == None or cust_id == '': 
        logging.error('\'' + cust_id + '\' is invalid customer subdomain. ')
        return render_template("register.html", error_msg='Error: Invalid User Organization')
    cust_id = cust_id.strip().lower()
    intenteng = IntentExtractor() 
    cust = getCustomerModel().authenticate(cust_id)
    if cust == None: 
        cust = getCustomerModel().create(cust_id, language=lang_type, 
            email_id=email_id, password = password_, newflag=True, retrainflag=True, ticketflag = True, done=True) 
        if cust:
            tickets_learner().import_responsedata(cust['cust_name'], cust['language'])
            tickets_learner().import_trainingdata(cust['cust_name'], cust['language']) 
            intenteng.prepareTrainingData_nospacy(cust['cust_name']) 
            intenteng.startTrainingProcess(cust['cust_name']) 
            return redirect(url_for('.list', cust_id=cust_id))
    else: 
        return redirect(url_for('.list', cust_id=cust_id))
    logging.error('\'' + cust_id + '\' is invalid customer subdomain. ')
    return render_template("register.html", error_msg='Error: Invalid User Organization')
示例#2
0
    def prepareTrainingData(self, cust_id):
        logging.info("prepareTrainingData : Started " + str(cust_id))
        self.X, self.y = [], []
        tickets_learn = tickets_learner()
        ticket_data = tickets_learn.getTrainingData(cust_id=cust_id)
        
        from agentapp.UtilityClass_spacy import UtilityClass_spacy
        utilspace = UtilityClass_spacy()
        lang = getCustomerModel().getLanguage(cust_id)
        xX = []
        yY = []
        for linestms in ticket_data:           
            for linestm in linestms:
                strx = str(linestm['tags'] + ' . ' + linestm['query']).strip()
                strx = utilspace.preprocessText(strx, lang=lang, ner=True)
                strx = str(self.utilclass.cleanData(strx, lang=lang, lowercase=True, remove_stops=True, tag_remove=True))               
                if (strx != ''):
                    xX.append(strx.strip().split())
                    yY.append(str(linestm['resp_category']).strip())
        self.X = xX
        self.y = yY
        
        self.X, self.y = np.array(self.X, dtype=object), np.array(self.y, dtype=object)

        logging.info ("Total Training Examples : %s" % len(self.y))
        logging.info ("prepareTrainingData : Completed " + str(cust_id))
        return
示例#3
0
 def removeRespDuplicate(self, cust_id):
     logging.info('removeRespDuplicate : Started ' + str(cust_id))
     tickets_learn = tickets_learner()
     resp_model = getResponseModel()
     resp_data = tickets_learn.getResponseData(cust_id=cust_id)
     for resp_logs in resp_data:
         for resp_logx in resp_logs:
             for resp_logy in resp_logs:
                 if (resp_logx['id'] == resp_logy['id']
                         or resp_logx['modifiedflag'] == True
                         or resp_logx['defaultflag'] == True):
                     continue
                 respx_list = resp_logx['tags'].split()
                 respy_list = resp_logy['tags'].split()
                 count = 0
                 for items in respx_list:
                     if items in respy_list:
                         count += 1
                 #diff_cent = int((len(respx_list) - count) * 100 / len(respx_list))
                 if (count == len(respx_list)):
                     resp_model.delete(resp_logx['id'], cust_id)
                     logging.info('Deleting Duplicate Response id : ' +
                                  str(resp_logx['id']) + ' Over ' +
                                  str(resp_logy['id']))
     logging.info('removeRespDuplicate : Completed ' + str(cust_id))
示例#4
0
    def extractIntentData_cust(self, cust_id):   
        logging.info ('extractIntentData_cust : Started : ' + str(cust_id))
        trainlog = get_model()
        traindata = getTrainingModel() 
        tickets_learn = tickets_learner()
        
        ticket_struct = []
        trainlog_struct = []
        intent_data = tickets_learn.getTrainingLog(cust_id=cust_id, log_type = 'intent', done=True)
        len_traindata = 0
        for intent_logs in intent_data:   
            len_traindata += len (intent_logs)        
            for intent_log in intent_logs:                 
                intents_data = intent_log["json_data"] 
                intents_data_json = json.loads(intents_data)                
                try:                     
                    description = intents_data_json['description']
                except KeyError as err: 
                    print (err)
                    continue
                description = intents_data_json['description']
                subject = intents_data_json['subject']
                tags = ', '.join(intents_data_json['requester']['tags']) 
                response = ''
                id = intents_data_json['id']
                comment_len = len(intents_data_json['comments'])
                if comment_len > 1:
                    for i in range(comment_len-1, -1, -1): 
                        requester_email = '' 
                        commentor_email = ''  
                        try: 
                            requester_email = intents_data_json['requester']['email']
                            commentor_email = intents_data_json['comments'][i]['author']['email'] 
                        except KeyError as err: 
                            logging.error(err)
                            break  
                        if requester_email != commentor_email:
                            response = intents_data_json['comments'][i]['value']
                            response = cleanhtml (response)
                            break
                ticket_struct.append({'id' : id, 'query' : str(subject + ' . ' + description), 'query_category' : '', 
                    'feedback_flag' : False, 'feedback_prob' : 0, 'done' : False, 'response': response, 'resp_category': '', 'tags' : tags})
                trainlog_struct.append({'id' : intent_log['id'], 'type': intent_log['type'], 'created': intent_log['created'], 'json_data': intent_log['json_data'], 'done': False})

        logging.info ('No of Training Data Processing : ' + str(len_traindata)) 
        if (len (ticket_struct) > 1):
            ticket_pd = pd.DataFrame(ticket_struct)
            trainlog_pd = pd.DataFrame(trainlog_struct)
            ticket_pd = ticket_pd.drop_duplicates(subset=['id'], keep='last')
            trainlog_pd = trainlog_pd.drop_duplicates(subset=['id'], keep='last')
            traindata.batchUpdate(ticket_pd, cust_id)
            trainlog.batchUpdate(trainlog_pd, cust_id)
        logging.info ('extractIntentData_cust : Completed : ' + str(cust_id))  
示例#5
0
    def prepareTrainingData(self, cust_id):
        logging.info("prepareTrainingData : Started : " + str(cust_id))
        tickets_learn = tickets_learner()
        ticket_data = tickets_learn.getTrainingData(cust_id=cust_id)
        self.analytics_pd['cust_id'] = cust_id
        ticket_struct = []
        self.analytics_pd['ticket_total_count'] = 0
        for linestms in ticket_data:
            self.analytics_pd['ticket_total_count'] += len(linestms)
            for linestm in linestms:
                if linestm['response'].strip() != '':
                    ticket_struct.append({
                        'id':
                        linestm['id'],
                        'query':
                        linestm['query'],
                        'response':
                        linestm['response'].strip(),
                        'tags':
                        linestm['tags'],
                        'resp_category':
                        linestm['resp_category'],
                        'feedback_resp':
                        linestm['feedback_resp']
                        if 'feedback_resp' in linestm else '',
                        'feedback_prob':
                        linestm['feedback_prob'],
                        'predict_prob':
                        linestm['predict_prob']
                        if 'predict_prob' in linestm else 0,
                        'feedback_flag':
                        linestm['feedback_flag'],
                        'created':
                        linestm['created']
                    })
        self.ticket_pd = pd.DataFrame(ticket_struct)

        resp_struct = []
        resp_data = tickets_learn.getResponseData(cust_id=cust_id)
        self.analytics_pd['response_total_count'] = 0
        for linestms in resp_data:
            self.analytics_pd['response_total_count'] += len(linestms)
            for linestm in linestms:
                resp_struct.append({
                    'id': linestm['id'],
                    'modifiedflag': linestm['modifiedflag'],
                    'defaultflag': linestm['defaultflag']
                })
        self.response_pd = pd.DataFrame(resp_struct)

        logging.info("Total Training Examples : %s" % len(self.ticket_pd))
        logging.info("prepareTrainingData : Completed : " + str(cust_id))
        return
示例#6
0
    def extractFeedbackData_cust(self, cust_id):   
        logging.info ('extractFeedbackData_cust : Started : ' + str(cust_id))
        trainlog = get_model()
        traindata = getTrainingModel() 
        respdata = getResponseModel()
        tickets_learn = tickets_learner()
        
        ticket_struct = []
        trainlog_struct = []
        intent_data = tickets_learn.getTrainingLog(cust_id=cust_id, log_type = 'feedback', done=None)
        len_traindata = 0
        for intent_logs in intent_data:
            len_traindata += len (intent_logs)           
            for intent_log in intent_logs:                
                intents_data = intent_log["json_data"] 
                intents_data_json = json.loads(intents_data)
                selected_response_id = intents_data_json["selected_response_id"]
                selected_response_prob = (intents_data_json["selected_response_prob"]/100) if 'selected_response_prob' in intents_data_json else 0
                cust_id = intents_data_json["ticket_data"]['currentAccount']['subdomain'] 
                id = intents_data_json["ticket_data"]['id']                
                train_data = traindata.read(id, cust_id=cust_id)
                response_data = respdata.read(selected_response_id, cust_id=cust_id)                
                if train_data != None and response_data != None: 
                    '''
                    traindata.update(train_data["tags"], train_data['query'], response_data["response_text"], query_category=train_data['query_category'], 
                                     resp_category=train_data['resp_category'], feedback_resp=response_data['res_category'],
                                     feedback_flag=True, feedback_prob=selected_response_prob, predict_prob = train_data['predict_prob'],
                                     done=True, id=train_data['id'], cust_id=cust_id)
                    '''
                    ticket_struct.append({'id' : train_data['id'], 'query' : train_data['query'], 'query_category' : train_data['query_category'], 
                        'response': response_data["response_text"], 'resp_category': train_data['resp_category'], 
                        'feedback_resp' : response_data['res_category'], 'feedback_flag' : True, 'feedback_prob' : selected_response_prob, 
                        'predict_prob': train_data['predict_prob'] if 'predict_prob' in train_data else 0, 
                        'done' : train_data['done'], 'tags' : train_data['tags']})
                    trainlog_struct.append({'id' : intent_log['id'], 'type': intent_log['type'], 'created': intent_log['created'], 
                                            'json_data': intent_log['json_data'], 'done': False})
                    print('Updating Feedback : ' , id, cust_id)

        logging.info ('No of Training Data Processing : ' + str(len_traindata))
        if (len (ticket_struct) > 1):
            ticket_pd = pd.DataFrame(ticket_struct)
            trainlog_pd = pd.DataFrame(trainlog_struct)
            ticket_pd = ticket_pd.drop_duplicates(subset=['id'], keep='last')
            trainlog_pd = trainlog_pd.drop_duplicates(subset=['id'], keep='last')
            traindata.batchUpdate(ticket_pd, cust_id)
            trainlog.batchUpdate(trainlog_pd, cust_id)
        logging.info ('extractFeedbackData_cust : Completed : ' + str(cust_id))  
示例#7
0
    def populateResponseData(self, cust_id):
        logging.info('populateResponseData : Started ' + str(cust_id))
        try:
            tx = self.ticket_pd['response_cluster']
        except KeyError as err:
            logging.error("populateResponseData : " + str(err))
            return

        resp_model = getResponseModel()

        last_id = 9999
        tickets_learn = tickets_learner()
        resp_data = tickets_learn.getResponseData(cust_id=cust_id,
                                                  modifiedflag=False,
                                                  defaultflag=False)
        for resp_logs in resp_data:
            for resp_log in resp_logs:
                resp_model.delete(resp_log['id'], cust_id)
                last_id = resp_log['id']

        rep_index = int(last_id) + 1
        for index, item in self.ticket_pd.iterrows():
            if item['select_response'] == 'true' and item['select_tags'].strip(
            ) != '' and item['response_summary'].strip(
            ) != '' and item['response_tags'] != '':
                resptitle = item['response_title'] if (
                    item['response_title'] != '') else (cust_id +
                                                        '_Response_' +
                                                        str(rep_index))
                resp_model.create(resptitle,
                                  str(cust_id + '_Response_' + str(rep_index)),
                                  item['response_summary'],
                                  item['select_tags'],
                                  item['response_tags'],
                                  done=True,
                                  id=rep_index,
                                  cust_id=cust_id)
                rep_index += 1

        self.removeRespDuplicate(cust_id)

        csvfile = self.ticket_pd.to_csv()
        self.storage.put_bucket(csvfile,
                                str("SmartReply_DataFrame_" + str(cust_id)),
                                filetype='csv')
        logging.info('populateResponseData : Completed ' + str(cust_id))
        return
示例#8
0
def dashboard():    
    cust_id = request.args.get('cust_id', None)
    ticket_learn = tickets_learner() 
    analytic_data = ticket_learn.getAnalyticsData(cust_id)
    error_msg = ''
    for linestms in analytic_data:           
        ticket_served = linestms[0]['ticket_total_count'] if len(linestms) > 0 else 0
        ticket_applied = linestms[0]['Feedback_tickets_count'] if len(linestms) > 0 else 0
        ticket_not_applied = int(ticket_served - ticket_applied) if len(linestms) > 0 else 0
        response_total = linestms[0]['response_total_count'] if len(linestms) > 0 else 0
        response_modified = linestms[0]['response_modified_count'] if len(linestms) > 0 else 0
        response_default = linestms[0]['response_default_count'] if len(linestms) > 0 else 0
        response_new = int(response_total - (response_modified + response_default)) if len(linestms) > 0 else 0
    return render_template(
        "dashboard.html", cust_id=cust_id, error_msg=error_msg, 
        ticket_served=ticket_served, ticket_applied=ticket_applied, response_total=response_total, 
        response_modified=response_modified, response_default=response_default, response_new=response_new,
        ticket_not_applied=ticket_not_applied)        
示例#9
0
    def startTrainLogPrediction(self, cust_id):
        logging.info("startTrainLogPrediction : Started " + str(cust_id))

        if self.model == None:
            logging.info('Cant process as no Training ')
            return

        traindata = getTrainingModel()
        ticketslearn = tickets_learner()
        ticket_pd = ticketslearn.getTrainingData_DataFrame(cust_id)
        if (len(ticket_pd) > 0):
            ticket_pd['resp_category'], ticket_pd[
                'predict_prob'] = self.getPredictedIntent_prob(
                    ticket_pd['query'], cust_id)
            traindata.batchUpdate(ticket_pd, cust_id=cust_id)

        logging.info("startTrainLogPrediction : Completed " + str(cust_id))
        return
示例#10
0
 def prepareTestingData(self, cust_id):
     logging.info("prepareTestingData : Started " + str(cust_id))        
     self.test_X, self.test_y = [], []
     
     tickets_learn = tickets_learner()
     ticket_data = tickets_learn.getTrainingData(cust_id=cust_id)
     lang = getCustomerModel().getLanguage(cust_id)
     xX = []
     yY = []
     for linestms in ticket_data:           
         for linestm in linestms:
             logging.debug (str(linestm['tags'] + ', ' + linestm['query']) + " =>  " + linestm['response'])
             strx = str(linestm['tags'] + ', ' + linestm['query'])
             strx = self.utilclass.cleanData(strx, lang=lang, lowercase=True, remove_stops=True, tag_remove=True)                
             xX.append(strx.strip().split())
             yY.append(linestm['resp_category'].strip()) 
     self.test_X = xX
     self.test_y = yY
    
     self.test_X, self.test_y = np.array(self.test_X, dtype=object), np.array(self.test_y, dtype=object)
     logging.info ("Total Testing Examples : %s" % len(self.test_y))
     logging.info("prepareTestingData : Completed " + str(cust_id)) 
     return 
    def predictIntent():
        logging.info('predictIntent : ') 
        intent_input = ''
        cust_id = ''
        intenteng = IntentExtractor() 
        ticketLearner = tickets_learner()
        utilclass = UtilityClass()
        received_data = request.json        
        try: 
            cust_id = received_data['currentAccount']['subdomain']
        except KeyError as err:
            logging.error(err)
            cust_id = 'default'
        
        cust = getCustomerModel().authenticate(cust_id.strip().lower(), newflag=False)
        
        if cust == None:
            cust_id = 'default'
        else: 
            cust_id = cust['cust_name']

        logging.info('Customer Id : ' + str(cust_id))
        
        get_model().create('intent', json.dumps(request.json), done=True, cust_id=cust_id)
        len_comment = len(received_data['comments']) 
        if ((len_comment > 0) and (received_data['requester']['email'] == received_data['comments'][0]['author']['email'])):
            intent_input = utilclass.cleanhtml(received_data['comments'][0]['value'])
        else:
            intent_input = utilclass.cleanhtml(received_data['description'] + '. ' + received_data['subject'])
            
        predicted_intent = intenteng.getIntentForText(intent_input, cust_id) 
        formatted_resp = ticketLearner.formatOutput(predicted_intent, cust_id) 
        logging.info('\'' + str(intent_input) + '\' >> ' + str(formatted_resp))
        json_resp = json.dumps(formatted_resp)
        #get_model().create('response', json_resp, done=True, cust_id=cust_id)
        return json_resp
示例#12
0
    def prepareTrainingData(self, cust_id):
        logging.info("prepareTrainingData : Started : " + str(cust_id))
        tickets_learn = tickets_learner()
        ticket_data = tickets_learn.getTrainingData(cust_id=cust_id, done=None)

        ticket_struct = []
        for linestms in ticket_data:
            for linestm in linestms:
                if linestm['response'].strip() != '':
                    ticket_struct.append({
                        'id':
                        linestm['id'],
                        'query':
                        linestm['query'],
                        'response':
                        linestm['response'].strip(),
                        'tags':
                        linestm['tags']
                    })
        self.ticket_pd = pd.DataFrame(ticket_struct)
        #print (self.ticket_pd)
        logging.info("Total Training Examples : %s" % len(self.ticket_pd))
        logging.info("prepareTrainingData : Completed : " + str(cust_id))
        return
示例#13
0
    def startEvaluation(self, cust_id):
        logging.info('startEvaluation : Started : ' + str(cust_id))
        tickets_learn = tickets_learner()

        if (len(self.ticket_pd) > 0):
            intenteng = IntentExtractor()
            intenteng_resp = IntentExtractor_resp()
            intenteng_resp.prepareTrainingData(cust_id)
            intenteng_resp.startTrainingProcess(cust_id)

            self.ticket_pd[
                'TrainingModel_intent'] = intenteng.getPredictedIntent_list(
                    self.ticket_pd['query'], cust_id)
            self.ticket_pd[
                'ResponseModel_intent'] = intenteng_resp.getPredictedIntent_list(
                    self.ticket_pd['query'], cust_id)

            respmap = tickets_learn.get_response_map(cust_id, 'tags')
            resptagmap = tickets_learn.get_response_map(cust_id, 'resp_tags')

            self.ticket_pd['intent_tags'] = self.ticket_pd[
                'TrainingModel_intent'].apply(
                    lambda x: self.getMatch(respmap, x).lower().split())
            self.ticket_pd['response_tags'] = self.ticket_pd[
                'TrainingModel_intent'].apply(
                    lambda x: self.getMatch(resptagmap, x).lower().split())

            self.ticket_pd['query_list'] = self.ticket_pd['query'].apply(
                lambda x: x.lower().split())
            self.ticket_pd['response_list'] = self.ticket_pd['response'].apply(
                lambda x: x.lower().split())

            self.ticket_pd['query_list'] = self.ticket_pd['query_list'].apply(
                lambda x: self.applystem(x))
            self.ticket_pd['response_list'] = self.ticket_pd[
                'response_list'].apply(lambda x: self.applystem(x))

            self.ticket_pd['intent_tags_in_query_list'] = self.ticket_pd.apply(
                lambda x: self.matchword(x['query_list'], x['intent_tags']),
                axis=1)
            self.ticket_pd[
                'resp_tags_in_response_list'] = self.ticket_pd.apply(
                    lambda x: self.matchword(x['response_list'], x[
                        'response_tags']),
                    axis=1)

            self.ticket_pd[
                'percentage_match_in_query_list'] = self.ticket_pd.apply(
                    lambda x: self.percentmatchword(x['query_list'], x[
                        'intent_tags']),
                    axis=1)
            self.ticket_pd[
                'percentage_match_in_response_list'] = self.ticket_pd.apply(
                    lambda x: self.percentmatchword(x['response_list'], x[
                        'response_tags']),
                    axis=1)

            resp_text_map = tickets_learn.get_response_map(
                cust_id, 'response_text')
            self.ticket_pd['intent_response_text'] = self.ticket_pd[
                'TrainingModel_intent'].apply(
                    lambda x: self.getMatch(resp_text_map, x))
            self.ticket_pd['response_response_text'] = self.ticket_pd[
                'ResponseModel_intent'].apply(
                    lambda x: self.getMatch(resp_text_map, x))
            self.ticket_pd['bleu_score_intent'] = self.ticket_pd.apply(
                lambda x: self.getBleuScore(x['response'], x[
                    'intent_response_text']),
                axis=1)
            self.ticket_pd['bleu_score_response'] = self.ticket_pd.apply(
                lambda x: self.getBleuScore(x['response'], x[
                    'response_response_text']),
                axis=1)

            csvfile = self.ticket_pd.to_csv()
            self.storage.put_bucket(csvfile,
                                    str("TrainingModel_Evaluate_" +
                                        str(cust_id)),
                                    filetype='csv')
        logging.info("startEvaluation : Completed : " + str(cust_id))
        return