예제 #1
0
    def load_train_data(self):
        print(
            'Start to load training data for entity output module; accept which'
        )

        fname = util.get_data_fname(task=5, trn=True)
        fd = open(fname, 'r')
        json_data = json.load(fd)
        fd.close()

        train_x = []
        train_y = []

        i = 0
        for story in json_data:
            if i % 100 == 0:
                print(i)
            i += 1

            _, utterances = util.split_knowledge(
                story['utterances'] + [story['answer']['utterance']])

            if knowledge.is_terminate(utterances):

                last_mentioned_rest = None
                accept_rest = None
                accept_utter = None

                user_turn = True

                for idx, sent in enumerate(utterances):
                    if user_turn:
                        user_turn = False
                    else:
                        ext_values, ext_sent = util.extract_sent(sent)

                        if knowledge.is_accepted(sent):
                            _, accept_utter = util.extract_sent(
                                utterances[idx - 1])

                        if accept_utter is None:
                            if '<R_name>' in ext_values.keys():
                                last_mentioned_rest = ext_values['<R_name>'][0]
                        else:
                            if '<R_phone>' in ext_values.keys():
                                accept_rest = ext_values['<R_phone>'][0][:-6]
                            elif '<R_address>' in ext_values.keys():
                                accept_rest = ext_values['<R_address>'][0][:-8]
                        user_turn = True

                if accept_rest is not None:
                    train_x.append(accept_utter)
                    if accept_rest == last_mentioned_rest:
                        train_y.append(1)  # last mentioned
                    else:
                        train_y.append(0)  # last recommended

        return train_x, train_y
예제 #2
0
    def load_train_data(self):
        print ('Start to load training data for entity output module; next mention')

        fname = util.get_data_fname(task=3, trn=True)
        fd = open(fname, 'r')
        json_data = json.load(fd)
        fd.close()

        i = 0

        train_x = []
        train_y = []

        for story in json_data:
            if i % 100 == 0:
                print(i)
            i += 1

            api_res, utterances = util.split_knowledge(story['utterances'] + [story['answer']['utterance']])
            sorted_rest = util.sort_knowledge(api_res)
            recommend_idx = -1

            for idx, sent in enumerate(utterances):
                y = [0, 0, 0]

                ext_value, _ = util.extract_sent(sent)

                if '<R_name>' in ext_value.keys():

                    # recommendation
                    if knowledge.is_recommend(sent):
                        recommend_idx += 1

                        request_utter_idx = idx - 1
                        if utterances[request_utter_idx] == '<silence>':
                            request_utter_idx -= 2

                        if utterances[request_utter_idx] != '<silence>':
                            _, ext_sent = util.extract_sent(utterances[request_utter_idx])
                            train_x.append(ext_sent)
                            y[self.next_idx] = 1
                            train_y.append(y)

                    # only mention
                    else:
                        _, ext_sent = util.extract_sent(utterances[idx-1])
                        train_x.append(ext_sent)
                        rest_name = sent.split()[-1]
                        rest_idx = sorted_rest.index(rest_name)

                        if rest_idx == 0:
                            y[self.fst_idx] = 1
                        if recommend_idx - rest_idx == 1:
                            y[self.prev_idx] = 1
                        assert y != [0, 0, 0]
                        train_y.append(y)
        return train_x, train_y
예제 #3
0
    def load_train_data(self):
        print('Start to load training data for entity output module; accept which')

        fname = util.get_data_fname(task=5, trn=True)
        fd = open(fname, 'r')
        json_data = json.load(fd)
        fd.close()

        train_x = []
        train_y = []

        i = 0
        for story in json_data:
            if i % 100 == 0:
                print(i)
            i += 1

            _, utterances = util.split_knowledge(story['utterances'] + [story['answer']['utterance']])

            if knowledge.is_terminate(utterances):

                last_mentioned_rest = None
                accept_rest = None
                accept_utter = None
                
                user_turn = True

                for idx, sent in enumerate(utterances):
                    if user_turn:
                        user_turn = False
                    else:
                        ext_values, ext_sent = util.extract_sent(sent)
                        
                        if knowledge.is_accepted(sent):
                            _, accept_utter = util.extract_sent(utterances[idx-1])
    
                        if accept_utter is None:
                            if '<R_name>' in ext_values.keys():
                                last_mentioned_rest = ext_values['<R_name>'][0]
                        else:
                            if '<R_phone>' in ext_values.keys():
                                accept_rest = ext_values['<R_phone>'][0][:-6]
                            elif '<R_address>' in ext_values.keys():
                                accept_rest = ext_values['<R_address>'][0][:-8]
                        user_turn = True

                if accept_rest is not None:
                    train_x.append(accept_utter)
                    if accept_rest == last_mentioned_rest:
                        train_y.append(1)  # last mentioned
                    else:
                        train_y.append(0)  # last recommended

        return train_x, train_y
예제 #4
0
    def load_train_data(self, task):
        print ('Start to load training data for action selector module')

        train_user_utter = []
        train_context = []
        train_bot_utter = []

        train_y = []

        fname = util.get_data_fname(task)
        fd = open(fname, 'r')
        json_data = json.load(fd)
        fd.close()

        api_order = knowledge.get_api_order(unseen_slot=False)

        i = 0

        for story in json_data:

            if i % 100 == 0:
                print(i)
            i += 1

            _, utterances = util.split_knowledge(story['utterances'] + [story['answer']['utterance']])

            story_user_utter = []
            story_context = []
            story_bot_utter = []

            user_turn = True
            bot_sent = None

            sv_pair = {}

            for sent in utterances:
                if user_turn:
                    ext_values, ext_sent = util.extract_sent(sent)

                    sv_pair = self.entity_tracking.predict(sv_pair, ext_values, ext_sent)
                    context = self.entity_tracking.get_context(api_order, sv_pair)

                    story_user_utter.append(ext_sent)
                    story_context.append(context)
                    story_bot_utter.append(bot_sent)

                    user_turn = False

                else:
                    bot_sent = util.get_action_template(sent)
                    user_turn = True

            train_user_utter.append(story_user_utter)
            train_context.append(story_context)
            train_bot_utter.append(story_bot_utter)

            train_y.append(bot_sent)

        return train_user_utter, train_context, train_bot_utter, train_y
예제 #5
0
    def predict(self, sent):
        _, ext_sent = util.extract_sent(sent)
        vector = util.get_multiple_sent_vector([ext_sent])
        prob = self.model.predict(vector)[0][0]

        if prob > 0.5:
            return 'last_mentioned'
        else:
            return 'last_recommended'
예제 #6
0
    def predict(self, sent):
        _, ext_sent = util.extract_sent(sent)
        vector = util.get_multiple_sent_vector([ext_sent])
        prob = self.model.predict(vector)[0][0]

        if prob > 0.5:
            return 'last_mentioned'
        else:
            return 'last_recommended'
예제 #7
0
    def predict_story(self, api_order, clr_order, story):

        api_result, utterances = util.split_knowledge(story['utterances'])
        sorted_api_result = util.sort_knowledge(api_result)

        story_user_utter = []
        story_context = []
        story_bot_utter = []

        user_turn = True
        bot_sent = None

        sv_pair = {}
        context = [0]

        for sent in utterances:
            if user_turn:
                ext_values, ext_sent = util.extract_sent(sent)

                sv_pair = self.entity_tracking.predict(sv_pair, ext_values,
                                                       ext_sent)
                context = self.entity_tracking.get_context(api_order, sv_pair)

                story_user_utter.append(ext_sent)
                story_context.append(context)
                story_bot_utter.append(bot_sent)

                user_turn = False

            else:
                bot_sent = util.get_action_template(sent)
                user_turn = True

        prob = self.action_selector.predict_story(story_user_utter,
                                                  story_context,
                                                  story_bot_utter)
        action_mask = knowledge.get_action_mask(context)
        masked_prob = np.multiply(prob, action_mask)
        idx = np.argmax(masked_prob)

        act_template = knowledge.SYS_RES_TEMP_LST[idx]

        return self.entity_output.predict_story(api_order, clr_order, sv_pair,
                                                sorted_api_result, utterances,
                                                act_template)
예제 #8
0
    def predict_story(self, api_order, clr_order, story):

        api_result, utterances = util.split_knowledge(story['utterances'])
        sorted_api_result = util.sort_knowledge(api_result)

        story_user_utter = []
        story_context = []
        story_bot_utter = []

        user_turn = True
        bot_sent = None

        sv_pair = {}
        context = [0]

        for sent in utterances:
            if user_turn:
                ext_values, ext_sent = util.extract_sent(sent)

                sv_pair = self.entity_tracking.predict(sv_pair, ext_values, ext_sent)
                context = self.entity_tracking.get_context(api_order, sv_pair)

                story_user_utter.append(ext_sent)
                story_context.append(context)
                story_bot_utter.append(bot_sent)

                user_turn = False

            else:
                bot_sent = util.get_action_template(sent)
                user_turn = True

        prob = self.action_selector.predict_story(story_user_utter, story_context, story_bot_utter)
        action_mask = knowledge.get_action_mask(context)
        masked_prob = np.multiply(prob, action_mask)
        idx = np.argmax(masked_prob)

        act_template = knowledge.SYS_RES_TEMP_LST[idx]

        return self.entity_output.predict_story(api_order, clr_order, sv_pair, sorted_api_result, utterances, act_template)
예제 #9
0
    def get_sent_label_pair(self, api_order, api_sent, utterances):
        api_sv = util.get_api_sv(api_order, api_sent)
        sent_label_pair = {k: [] for k in api_order}

        for sent in utterances:
            ext_values, ext_sent = util.extract_sent(sent)

            for slot in ext_values.keys():
                value = api_sv[slot]
                slot_sent = ext_sent.replace(slot, '<R_value>')

                if value in ext_values[slot]:
                    val_idx = ext_values[slot].index(value)
                else:
                    val_idx = self.neither_idx
                vect_y = [0, 0, 0]
                vect_y[val_idx] = 1
                sent_label_pair[slot].append([slot_sent, vect_y])

        for slot in api_order:
            for idx in range(len(sent_label_pair[slot])-1):
                sent_label_pair[slot][idx][1] = [0, 0, 1]

        return sent_label_pair
예제 #10
0
    def load_train_data(self, task):
        print('Start to load training data for action selector module')

        train_user_utter = []
        train_context = []
        train_bot_utter = []

        train_y = []

        fname = util.get_data_fname(task)
        fd = open(fname, 'r')
        json_data = json.load(fd)
        fd.close()

        api_order = knowledge.get_api_order(unseen_slot=False)

        i = 0

        for story in json_data:

            if i % 100 == 0:
                print(i)
            i += 1

            _, utterances = util.split_knowledge(
                story['utterances'] + [story['answer']['utterance']])

            story_user_utter = []
            story_context = []
            story_bot_utter = []

            user_turn = True
            bot_sent = None

            sv_pair = {}

            for sent in utterances:
                if user_turn:
                    ext_values, ext_sent = util.extract_sent(sent)

                    sv_pair = self.entity_tracking.predict(
                        sv_pair, ext_values, ext_sent)
                    context = self.entity_tracking.get_context(
                        api_order, sv_pair)

                    story_user_utter.append(ext_sent)
                    story_context.append(context)
                    story_bot_utter.append(bot_sent)

                    user_turn = False

                else:
                    bot_sent = util.get_action_template(sent)
                    user_turn = True

            train_user_utter.append(story_user_utter)
            train_context.append(story_context)
            train_bot_utter.append(story_bot_utter)

            train_y.append(bot_sent)

        return train_user_utter, train_context, train_bot_utter, train_y
예제 #11
0
 def predict(self, sent):
     _, ext_sent = util.extract_sent(sent)
     vector = util.get_multiple_sent_vector([ext_sent])
     prob = self.model.predict(vector)
     ans_idx = np.argmax(prob)
     return self.answer_lst[ans_idx]
예제 #12
0
    def load_train_data(self):
        print(
            'Start to load training data for entity output module; next mention'
        )

        fname = util.get_data_fname(task=3, trn=True)
        fd = open(fname, 'r')
        json_data = json.load(fd)
        fd.close()

        i = 0

        train_x = []
        train_y = []

        for story in json_data:
            if i % 100 == 0:
                print(i)
            i += 1

            api_res, utterances = util.split_knowledge(
                story['utterances'] + [story['answer']['utterance']])
            sorted_rest = util.sort_knowledge(api_res)
            recommend_idx = -1

            for idx, sent in enumerate(utterances):
                y = [0, 0, 0]

                ext_value, _ = util.extract_sent(sent)

                if '<R_name>' in ext_value.keys():

                    # recommendation
                    if knowledge.is_recommend(sent):
                        recommend_idx += 1

                        request_utter_idx = idx - 1
                        if utterances[request_utter_idx] == '<silence>':
                            request_utter_idx -= 2

                        if utterances[request_utter_idx] != '<silence>':
                            _, ext_sent = util.extract_sent(
                                utterances[request_utter_idx])
                            train_x.append(ext_sent)
                            y[self.next_idx] = 1
                            train_y.append(y)

                    # only mention
                    else:
                        _, ext_sent = util.extract_sent(utterances[idx - 1])
                        train_x.append(ext_sent)
                        rest_name = sent.split()[-1]
                        rest_idx = sorted_rest.index(rest_name)

                        if rest_idx == 0:
                            y[self.fst_idx] = 1
                        if recommend_idx - rest_idx == 1:
                            y[self.prev_idx] = 1
                        assert y != [0, 0, 0]
                        train_y.append(y)
        return train_x, train_y
예제 #13
0
 def predict(self, sent):
     _, ext_sent = util.extract_sent(sent)
     vector = util.get_multiple_sent_vector([ext_sent])
     prob = self.model.predict(vector)
     ans_idx = np.argmax(prob)
     return self.answer_lst[ans_idx]