Пример #1
0
    def consult_skill(self, skill_name):

        if skill_name in self.consulted_skills:
            return

        logging.debug("consulting skill '%s'" % skill_name)

        m = self.load_skill(skill_name)
        self.consulted_skills.add(skill_name)
        skill_dir = self.skill_paths[skill_name]

        try:

            # print m
            # print getattr(m, '__all__', None)

            # for name in dir(m):
            #     print name

            for m2 in getattr(m, 'DEPENDS'):
                self.consult_skill(m2)

            if hasattr(m, 'PL_SOURCES'):

                for inputfn in m.PL_SOURCES:

                    pl_path = "%s/%s" % (skill_dir, inputfn)

                    pyxsb_command("consult('%s')." % pl_path)

        except:
            logging.error('failed to load skill "%s"' % skill_name)
            logging.error(traceback.format_exc())
            sys.exit(1)

        return m
Пример #2
0
    def consult_skill (self, skill_name):

        if skill_name in self.consulted_skills:
            return

        logging.debug("consulting skill '%s'" % skill_name)

        m = self.load_skill(skill_name)
        self.consulted_skills.add(skill_name)
        skill_dir = self.skill_paths[skill_name]

        try:

            # print m
            # print getattr(m, '__all__', None)

            # for name in dir(m):
            #     print name

            for m2 in getattr (m, 'DEPENDS'):
                self.consult_skill(m2)

            if hasattr(m, 'PL_SOURCES'):

                for inputfn in m.PL_SOURCES:

                    pl_path = "%s/%s" % (skill_dir, inputfn)

                    pyxsb_command("consult('%s')."% pl_path)

        except:
            logging.error('failed to load skill "%s"' % skill_name)
            logging.error(traceback.format_exc())
            sys.exit(1)

        return m
Пример #3
0
    def process_input(self, ctx, inp_raw, run_trace=False, do_eliza=True):
        """ process user input, return score, responses, actions, solutions, context """

        if run_trace:
            pyxsb_command("trace.")
        else:
            pyxsb_command("notrace.")

        tokens_raw = tokenize(inp_raw, ctx.lang)
        tokens = []
        for t in tokens_raw:
            if t == u'nspc':
                continue
            tokens.append(t)
        inp = u" ".join(tokens)

        ctx.set_inp(inp)
        self.mem_set(ctx.realm, 'action', None)

        logging.debug(
            '==============================================================================='
        )
        logging.debug('process_input: %s' % repr(inp))

        #
        # do we have an exact match in our training data for this input?
        #

        found_resp = False
        for lang, d, md5s, args, src_fn, src_line in self.dte.lookup_data_train(
                inp, ctx.lang):

            afn, acode = self.dte.lookup_code(md5s)
            ecode = '%s\n%s(ctx' % (acode, afn)
            if args:
                for arg in args:
                    ecode += ',%s' % repr(arg)
            ecode += ')\n'

            logging.debug('exact training data match found: %s:%s' %
                          (src_fn, src_line))
            logging.debug(ecode)

            # import pdb; pdb.set_trace()
            try:
                exec(ecode, globals(), locals())
                found_resp = True
            except:
                logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
                logging.error(ecode)

        if not found_resp:
            logging.debug('no exact training data match for this input found.')

        #
        # ask neural net if we did not find an answer
        #

        resps = ctx.get_resps()
        if not resps and self.nlp_model:

            from nlp_model import _START, _STOP, _OR

            logging.debug('trying neural net on: %s' % repr(inp))

            try:
                # ok, exact matching has not yielded any results -> use neural network to
                # generate response(s)

                # import pdb; pdb.set_trace()

                predicted_ids = self.nlp_model.predict(inp)

                # x = self.nlp_model.compute_x(inp)

                # # logging.debug("x: %s -> %s" % (utterance, x))

                # source, source_len, dest, dest_len = self.nlp_model._prepare_batch ([[x, []]], offset=0)

                # # predicted_ids: GreedyDecoder; [batch_size, max_time_step, 1]
                # # BeamSearchDecoder; [batch_size, max_time_step, beam_width]
                # predicted_ids = self.tf_model.predict(self.tf_session, encoder_inputs=source,
                #                                       encoder_inputs_length=source_len)

                # # for seq_batch in predicted_ids:
                # #     for k in range(5):
                # #         logging.debug('--------- k: %d ----------' % k)
                # #         seq = seq_batch[:,k]
                # #         for p in seq:
                # #             if p == -1:
                # #                 break
                # #             decoded = self.inv_output_dict[p]
                # #             logging.debug (u'%s: %s' %(p, decoded))

                # extract best codes, run them all to see which ones yield the highest scoring responses

                cmd = []
                # for p in predicted_ids[0][:,0]:
                for decoded in predicted_ids:
                    # if p in self.inv_output_dict:
                    #     decoded = self.inv_output_dict[p]
                    # else:
                    #     decoded = p

                    if decoded == _STOP or decoded == _OR:

                        try:
                            logging.debug('trying cmd: %s' % repr(cmd))
                            afn, acode = self.dte.lookup_code(cmd[0])
                            ecode = '%s\n%s(ctx' % (acode, afn)
                            if len(cmd) > 1:
                                for arg in cmd[1:]:
                                    ecode += ',%s' % repr(json.loads(arg))
                            ecode += ')\n'

                            logging.debug(ecode)

                            exec(ecode, globals(), locals())
                        except:
                            logging.debug('EXCEPTION CAUGHT %s' %
                                          traceback.format_exc())

                        cmd = []
                        if decoded == _STOP:
                            break
                    else:
                        cmd.append(decoded)

            except:
                # probably ok (prolog code generated by neural network might not always work)
                logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())

        pyxsb_command("notrace.")

        #
        # extract highest-scoring responses
        #

        resps = ctx.get_resps()

        if not resps and do_eliza:
            logging.debug('producing ELIZA-style response for input %s' % inp)

            from psychology import psychology
            psychology.do_eliza(ctx)
            resps = ctx.get_resps()

        #
        # pick random response
        #

        if len(resps) > 0:
            i = random.randrange(0, len(resps))
            out, score, action, action_arg = resps[i]
            ctx.commit_resp(i)

            logging.debug(u'picked resp #%d (score: %f): %s' % (i, score, out))

            logging.debug(u'MEM: %s' % ctx.realm)
            memd = self.mem_dump(ctx.realm)
            for k, v, score in memd:
                logging.debug(u'MEM:    %-20s: %s (%f)' % (k, v, score))
            logging.debug(u'MEM: %s' % ctx.user)
            memd = self.mem_dump(ctx.user)
            for k, v, score in memd:
                logging.debug(u'MEM:    %-20s: %s (%f)' % (k, v, score))

        else:
            out = u''
            score = 0.0
            logging.debug(u'No response found.')

        action = self.mem_get(ctx.realm, 'action')
        return out, score, action
Пример #4
0
    def test_skill(self, skill_name, run_trace=False, test_name=None):

        if run_trace:
            pyxsb_command("trace.")
        else:
            pyxsb_command("notrace.")

        m = self.skills[skill_name]

        logging.info('running tests of skill %s ...' % (skill_name))

        num_tests = 0
        num_fails = 0
        for tc in self.dte.lookup_tests(skill_name):
            t_name, self.lang, prep_code, prep_fn, rounds, src_fn, self.src_line = tc

            if test_name:
                if t_name != test_name:
                    logging.info('skipping test %s' % t_name)
                    continue

            ctx = self.create_context(user=TEST_USER,
                                      realm=TEST_REALM,
                                      test_mode=True)
            round_num = 0
            num_tests += 1

            self.mem_clear(TEST_REALM)
            self.mem_clear(TEST_USER)

            # prep

            if prep_code:
                pcode = '%s\n%s(ctx)\n' % (prep_code, prep_fn)
                try:
                    exec(pcode, globals(), locals())
                except:
                    logging.error('EXCEPTION CAUGHT %s' %
                                  traceback.format_exc())

            for test_inp, test_out, test_action, test_action_arg in rounds:

                logging.info("test_skill: %s round %d test_inp    : %s" %
                             (t_name, round_num, repr(test_inp)))
                logging.info("test_skill: %s round %d test_out    : %s" %
                             (t_name, round_num, repr(test_out)))

                # look up code in data engine

                matching_resp = False
                acode = None

                ctx.set_inp(test_inp)
                self.mem_set(ctx.realm, 'action', None)

                for lang, d, md5s, args, src_fn, src_line in self.dte.lookup_data_train(
                        test_inp, self.lang):

                    afn, acode = self.dte.lookup_code(md5s)
                    ecode = '%s\n%s(ctx' % (acode, afn)
                    if args:
                        for arg in args:
                            ecode += ',%s' % repr(arg)
                    ecode += ')\n'
                    # import pdb; pdb.set_trace()
                    try:
                        exec(ecode, globals(), locals())
                    except:
                        logging.error(
                            'test_skill: %s round %d EXCEPTION CAUGHT %s' %
                            (t_name, round_num, traceback.format_exc()))
                        logging.error(ecode)

                if acode is None:
                    logging.error(
                        u'Error: %s: no training data for test_in "%s" found in DB!'
                        % (t_name, test_inp))
                    num_fails += 1
                    break

                resps = ctx.get_resps()

                for i, resp in enumerate(resps):
                    actual_out, score, actual_action, actual_action_arg = resp
                    # logging.info("test_skill: %s round %d %s" % (clause.location, round_num, repr(abuf)) )

                    if len(test_out) > 0:
                        if len(actual_out) > 0:
                            actual_out = u' '.join(
                                tokenize(actual_out, self.lang))
                        logging.info(
                            "test_skill: %s round %d actual_out  : %s (score: %f)"
                            % (t_name, round_num, actual_out, score))
                        if actual_out != test_out:
                            logging.info(
                                "test_skill: %s round %d UTTERANCE MISMATCH." %
                                (t_name, round_num))
                            continue  # no match

                    logging.info("test_skill: %s round %d UTTERANCE MATCHED!" %
                                 (t_name, round_num))
                    matching_resp = True
                    ctx.commit_resp(i)

                    # check action

                    if test_action:
                        afn, acode = self.dte.lookup_code(test_action)
                        ecode = '%s\n%s(ctx' % (acode, afn)
                        if test_action_arg:
                            ecode += ',%s' % repr(test_action_arg)
                        ecode += ')\n'
                        exec(ecode, globals(), locals())

                    break

                if not matching_resp:
                    logging.error(
                        u'test_skill: %s round %d no matching response found.'
                        % (t_name, round_num))
                    num_fails += 1
                    break

                round_num += 1

        return num_tests, num_fails
Пример #5
0
    def __init__(self,
                 db_url=DEFAULT_DB_URL,
                 xsb_arch_dir=DEFAULT_XSB_ARCH_DIR,
                 toplevel=DEFAULT_TOPLEVEL,
                 skill_paths=DEFAULT_SKILL_PATHS,
                 lang=DEFAULT_LANG,
                 nlp_model_args=DEFAULT_NLP_MODEL_ARGS,
                 skill_args=DEFAULT_SKILL_ARGS,
                 uttclass_model_args=DEFAULT_UTTCLASS_MODEL_ARGS):

        self.lang = lang
        self.nlp_model_args = nlp_model_args
        self.skill_args = skill_args
        self.uttclass_model_args = uttclass_model_args

        #
        # database connection
        #

        self.engine = model.data_engine_setup(db_url, echo=False)
        self.Session = sessionmaker(bind=self.engine)
        self.session = self.Session()

        #
        # TensorFlow (deferred, as tf can take quite a bit of time to set up)
        #

        self.tf_session = None
        self.nlp_model = None
        self.uttclass_model = None

        #
        # skill management, setup
        #

        self.skills = {}  # skill_name -> module obj
        self.skill_paths = {}  # skill_name -> pathname
        self.consulted_skills = set()
        self.toplevel = toplevel
        self.all_skills = []

        # import pdb; pdb.set_trace()

        if skill_paths:
            for sp in skill_paths[::-1]:
                sys.path.insert(0, sp)
        else:
            # auto-config

            # __file__ -> ../skills
            mp = os.path.dirname(os.path.abspath(__file__)) + '/skills'
            sys.path.insert(0, mp)

            # ./skills
            cwd = os.getcwd()
            sys.path.insert(0, cwd + '/skills')

            # .
            sys.path.insert(0, cwd)

        for mp in sys.path:
            logging.debug("Module search path: %s" % mp)

        self.load_skill(toplevel)

        #
        # Prolog engine, data engine
        #

        pyxsb_start_session(xsb_arch_dir)
        self.dte = DataEngine(self.session)

        pyxsb_command('import default_sys_error_handler/1 from error_handler.')
        pyxsb_command(
            'assertz((default_user_error_handler(Ball):-default_sys_error_handler(Ball))).'
        )

        #
        # restore memory
        #

        q = u''
        for m in self.session.query(model.Mem):

            v = json_to_xsb(m.v)

            if q:
                q += ', '
            q += u"assertz(memory('%s', '%s', %s, %f))" % (m.realm, m.k,
                                                           unicode(v), m.score)

        if not q:
            q = u'assertz(memory(self, self, self, 1.0))'
        q += u'.'
        pyxsb_command(q)
Пример #6
0
    def process_input (self, ctx, inp_raw, run_trace=False, do_eliza=True):

        """ process user input, return score, responses, actions, solutions, context """

        if run_trace:
            pyxsb_command("trace.")
        else:
            pyxsb_command("notrace.")

        tokens_raw  = tokenize(inp_raw, ctx.lang)
        tokens = []
        for t in tokens_raw:
            if t == u'nspc':
                continue
            tokens.append(t)
        inp = u" ".join(tokens)

        ctx.set_inp(inp)
        self.mem_set (ctx.realm, 'action', None)

        logging.debug('===============================================================================')
        logging.debug('process_input: %s' % repr(inp))

        #
        # do we have an exact match in our training data for this input?
        #

        found_resp = False
        for lang, d, md5s, args, src_fn, src_line in self.dte.lookup_data_train (inp, ctx.lang):

            afn, acode = self.dte.lookup_code(md5s)
            ecode = '%s\n%s(ctx' % (acode, afn)
            if args:
                for arg in args:
                    ecode += ',%s' % repr(arg)
            ecode += ')\n'

            logging.debug ('exact training data match found: %s:%s' % (src_fn, src_line))
            logging.debug (ecode)

            # import pdb; pdb.set_trace()
            try:
                exec (ecode, globals(), locals())
                found_resp = True
            except:
                logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
                logging.error(ecode)

        if not found_resp:
            logging.debug('no exact training data match for this input found.')

        #
        # ask neural net if we did not find an answer
        #

        resps = ctx.get_resps()
        if not resps and self.nlp_model:
            
            from nlp_model import _START, _STOP, _OR

            logging.debug('trying neural net on: %s' % repr(inp))

            try:
                # ok, exact matching has not yielded any results -> use neural network to
                # generate response(s)

                # import pdb; pdb.set_trace()

                predicted_ids = self.nlp_model.predict(inp)

                # x = self.nlp_model.compute_x(inp)

                # # logging.debug("x: %s -> %s" % (utterance, x))

                # source, source_len, dest, dest_len = self.nlp_model._prepare_batch ([[x, []]], offset=0)

                # # predicted_ids: GreedyDecoder; [batch_size, max_time_step, 1]
                # # BeamSearchDecoder; [batch_size, max_time_step, beam_width]
                # predicted_ids = self.tf_model.predict(self.tf_session, encoder_inputs=source, 
                #                                       encoder_inputs_length=source_len)

                # # for seq_batch in predicted_ids:
                # #     for k in range(5):
                # #         logging.debug('--------- k: %d ----------' % k)
                # #         seq = seq_batch[:,k]
                # #         for p in seq:
                # #             if p == -1:
                # #                 break
                # #             decoded = self.inv_output_dict[p]
                # #             logging.debug (u'%s: %s' %(p, decoded))

                # extract best codes, run them all to see which ones yield the highest scoring responses

                cmd = []
                # for p in predicted_ids[0][:,0]:
                for decoded in predicted_ids:
                    # if p in self.inv_output_dict:
                    #     decoded = self.inv_output_dict[p]
                    # else:
                    #     decoded = p

                    if decoded == _STOP or decoded == _OR:

                        try:
                            logging.debug('trying cmd: %s' % repr(cmd))
                            afn, acode = self.dte.lookup_code(cmd[0])
                            ecode = '%s\n%s(ctx' % (acode, afn)
                            if len(cmd)>1:
                                for arg in cmd[1:]:
                                    ecode += ',%s' % repr(json.loads(arg))
                            ecode += ')\n'

                            logging.debug(ecode)

                            exec (ecode, globals(), locals())
                        except:
                            logging.debug('EXCEPTION CAUGHT %s' % traceback.format_exc())

                        cmd = []
                        if decoded == _STOP:
                            break
                    else:
                        cmd.append(decoded)

            except:
                # probably ok (prolog code generated by neural network might not always work)
                logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())

        pyxsb_command("notrace.")

        #
        # extract highest-scoring responses
        #

        resps = ctx.get_resps()

        if not resps and do_eliza:
            logging.debug ('producing ELIZA-style response for input %s' % inp)

            from psychology import psychology
            psychology.do_eliza(ctx)
            resps = ctx.get_resps()

        #
        # pick random response
        #

        if len(resps)>0:
            i = random.randrange(0, len(resps))
            out, score, action, action_arg = resps[i]
            ctx.commit_resp(i)

            logging.debug(u'picked resp #%d (score: %f): %s' % (i, score, out))

            logging.debug(u'MEM: %s' % ctx.realm)
            memd = self.mem_dump(ctx.realm)
            for k, v, score in memd:
                logging.debug(u'MEM:    %-20s: %s (%f)' % (k, v, score))
            logging.debug(u'MEM: %s' % ctx.user)
            memd = self.mem_dump(ctx.user)
            for k, v, score in memd:
                logging.debug(u'MEM:    %-20s: %s (%f)' % (k, v, score))

        else:
            out        = u''
            score      = 0.0
            logging.debug(u'No response found.')

        action = self.mem_get (ctx.realm, 'action')
        return out, score, action
Пример #7
0
    def test_skill (self, skill_name, run_trace=False, test_name=None):

        if run_trace:
            pyxsb_command("trace.")
        else:
            pyxsb_command("notrace.")

        m = self.skills[skill_name]

        logging.info('running tests of skill %s ...' % (skill_name))

        num_tests = 0
        num_fails = 0
        for tc in self.dte.lookup_tests(skill_name):
            t_name, self.lang, prep_code, prep_fn, rounds, src_fn, self.src_line = tc

            if test_name:
                if t_name != test_name:
                    logging.info ('skipping test %s' % t_name)
                    continue

            ctx        = self.create_context(user=TEST_USER, realm=TEST_REALM, test_mode=True)
            round_num  = 0
            num_tests += 1

            self.mem_clear(TEST_REALM)
            self.mem_clear(TEST_USER)

            # prep

            if prep_code:
                pcode = '%s\n%s(ctx)\n' % (prep_code, prep_fn)
                try:
                    exec (pcode, globals(), locals())
                except:
                    logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())

            for test_inp, test_out, test_action, test_action_arg in rounds:
               
                logging.info("test_skill: %s round %d test_inp    : %s" % (t_name, round_num, repr(test_inp)) )
                logging.info("test_skill: %s round %d test_out    : %s" % (t_name, round_num, repr(test_out)) )

                # look up code in data engine

                matching_resp = False
                acode         = None

                ctx.set_inp(test_inp)
                self.mem_set (ctx.realm, 'action', None)

                for lang, d, md5s, args, src_fn, src_line in self.dte.lookup_data_train (test_inp, self.lang):

                    afn, acode = self.dte.lookup_code(md5s)
                    ecode = '%s\n%s(ctx' % (acode, afn)
                    if args:
                        for arg in args:
                            ecode += ',%s' % repr(arg)
                    ecode += ')\n'
                    # import pdb; pdb.set_trace()
                    try:
                        exec (ecode, globals(), locals())
                    except:
                        logging.error('test_skill: %s round %d EXCEPTION CAUGHT %s' % (t_name, round_num, traceback.format_exc()))
                        logging.error(ecode)

                if acode is None:
                    logging.error (u'Error: %s: no training data for test_in "%s" found in DB!' % (t_name, test_inp))
                    num_fails += 1
                    break

                resps = ctx.get_resps()

                for i, resp in enumerate(resps):
                    actual_out, score, actual_action, actual_action_arg = resp
                    # logging.info("test_skill: %s round %d %s" % (clause.location, round_num, repr(abuf)) )

                    if len(test_out) > 0:
                        if len(actual_out)>0:
                            actual_out = u' '.join(tokenize(actual_out, self.lang))
                        logging.info("test_skill: %s round %d actual_out  : %s (score: %f)" % (t_name, round_num, actual_out, score) )
                        if actual_out != test_out:
                            logging.info("test_skill: %s round %d UTTERANCE MISMATCH." % (t_name, round_num))
                            continue # no match

                    logging.info("test_skill: %s round %d UTTERANCE MATCHED!" % (t_name, round_num))
                    matching_resp = True
                    ctx.commit_resp(i)

                    # check action

                    if test_action:
                        afn, acode = self.dte.lookup_code(test_action)
                        ecode = '%s\n%s(ctx' % (acode, afn)
                        if test_action_arg:
                            ecode += ',%s' % repr(test_action_arg)
                        ecode += ')\n'
                        exec (ecode, globals(), locals())

                    break

                if not matching_resp:
                    logging.error (u'test_skill: %s round %d no matching response found.' % (t_name, round_num))
                    num_fails += 1
                    break

                round_num   += 1

        return num_tests, num_fails
Пример #8
0
    def __init__(self, 
                 db_url              = DEFAULT_DB_URL, 
                 xsb_arch_dir        = DEFAULT_XSB_ARCH_DIR, 
                 toplevel            = DEFAULT_TOPLEVEL, 
                 skill_paths         = DEFAULT_SKILL_PATHS, 
                 lang                = DEFAULT_LANG, 
                 nlp_model_args      = DEFAULT_NLP_MODEL_ARGS,
                 skill_args          = DEFAULT_SKILL_ARGS,
                 uttclass_model_args = DEFAULT_UTTCLASS_MODEL_ARGS):

        self.lang                = lang
        self.nlp_model_args      = nlp_model_args
        self.skill_args          = skill_args
        self.uttclass_model_args = uttclass_model_args

        #
        # database connection
        #

        self.engine  = model.data_engine_setup(db_url, echo=False)
        self.Session = sessionmaker(bind=self.engine)
        self.session = self.Session()

        #
        # TensorFlow (deferred, as tf can take quite a bit of time to set up)
        #

        self.tf_session     = None
        self.nlp_model      = None
        self.uttclass_model = None

        #
        # skill management, setup
        #

        self.skills             = {}   # skill_name -> module obj
        self.skill_paths        = {}   # skill_name -> pathname
        self.consulted_skills   = set()
        self.toplevel           = toplevel
        self.all_skills         = []
        
        # import pdb; pdb.set_trace()

        if skill_paths:
            for sp in skill_paths[::-1]:
                sys.path.insert(0,sp)
        else:
            # auto-config

            # __file__ -> ../skills
            mp = os.path.dirname(os.path.abspath(__file__)) + '/skills'
            sys.path.insert(0, mp)

            # ./skills
            cwd = os.getcwd()
            sys.path.insert(0, cwd + '/skills')

            # .
            sys.path.insert(0, cwd)

        for mp in sys.path:
            logging.debug ("Module search path: %s" % mp)

        self.load_skill(toplevel)

        #
        # Prolog engine, data engine
        #

        pyxsb_start_session(xsb_arch_dir)
        self.dte = DataEngine(self.session)

        pyxsb_command('import default_sys_error_handler/1 from error_handler.')
        pyxsb_command('assertz((default_user_error_handler(Ball):-default_sys_error_handler(Ball))).')

        #
        # restore memory
        #

        q = u''
        for m in self.session.query(model.Mem):

            v = json_to_xsb(m.v)

            if q:
                q += ', '
            q += u"assertz(memory('%s', '%s', %s, %f))" % (m.realm, m.k, unicode(v), m.score)

        if not q:
            q = u'assertz(memory(self, self, self, 1.0))'
        q += u'.'
        pyxsb_command(q)