def consult_skill(self, skill_name): if skill_name in self.consulted_skills: return logging.debug("consulting skill '%s'" % skill_name) m = self.load_skill(skill_name) self.consulted_skills.add(skill_name) skill_dir = self.skill_paths[skill_name] try: # print m # print getattr(m, '__all__', None) # for name in dir(m): # print name for m2 in getattr(m, 'DEPENDS'): self.consult_skill(m2) if hasattr(m, 'PL_SOURCES'): for inputfn in m.PL_SOURCES: pl_path = "%s/%s" % (skill_dir, inputfn) pyxsb_command("consult('%s')." % pl_path) except: logging.error('failed to load skill "%s"' % skill_name) logging.error(traceback.format_exc()) sys.exit(1) return m
def consult_skill (self, skill_name): if skill_name in self.consulted_skills: return logging.debug("consulting skill '%s'" % skill_name) m = self.load_skill(skill_name) self.consulted_skills.add(skill_name) skill_dir = self.skill_paths[skill_name] try: # print m # print getattr(m, '__all__', None) # for name in dir(m): # print name for m2 in getattr (m, 'DEPENDS'): self.consult_skill(m2) if hasattr(m, 'PL_SOURCES'): for inputfn in m.PL_SOURCES: pl_path = "%s/%s" % (skill_dir, inputfn) pyxsb_command("consult('%s')."% pl_path) except: logging.error('failed to load skill "%s"' % skill_name) logging.error(traceback.format_exc()) sys.exit(1) return m
def process_input(self, ctx, inp_raw, run_trace=False, do_eliza=True): """ process user input, return score, responses, actions, solutions, context """ if run_trace: pyxsb_command("trace.") else: pyxsb_command("notrace.") tokens_raw = tokenize(inp_raw, ctx.lang) tokens = [] for t in tokens_raw: if t == u'nspc': continue tokens.append(t) inp = u" ".join(tokens) ctx.set_inp(inp) self.mem_set(ctx.realm, 'action', None) logging.debug( '===============================================================================' ) logging.debug('process_input: %s' % repr(inp)) # # do we have an exact match in our training data for this input? # found_resp = False for lang, d, md5s, args, src_fn, src_line in self.dte.lookup_data_train( inp, ctx.lang): afn, acode = self.dte.lookup_code(md5s) ecode = '%s\n%s(ctx' % (acode, afn) if args: for arg in args: ecode += ',%s' % repr(arg) ecode += ')\n' logging.debug('exact training data match found: %s:%s' % (src_fn, src_line)) logging.debug(ecode) # import pdb; pdb.set_trace() try: exec(ecode, globals(), locals()) found_resp = True except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) logging.error(ecode) if not found_resp: logging.debug('no exact training data match for this input found.') # # ask neural net if we did not find an answer # resps = ctx.get_resps() if not resps and self.nlp_model: from nlp_model import _START, _STOP, _OR logging.debug('trying neural net on: %s' % repr(inp)) try: # ok, exact matching has not yielded any results -> use neural network to # generate response(s) # import pdb; pdb.set_trace() predicted_ids = self.nlp_model.predict(inp) # x = self.nlp_model.compute_x(inp) # # logging.debug("x: %s -> %s" % (utterance, x)) # source, source_len, dest, dest_len = self.nlp_model._prepare_batch ([[x, []]], offset=0) # # predicted_ids: GreedyDecoder; [batch_size, max_time_step, 1] # # BeamSearchDecoder; [batch_size, max_time_step, beam_width] # predicted_ids = self.tf_model.predict(self.tf_session, encoder_inputs=source, # encoder_inputs_length=source_len) # # for seq_batch in predicted_ids: # # for k in range(5): # # logging.debug('--------- k: %d ----------' % k) # # seq = seq_batch[:,k] # # for p in seq: # # if p == -1: # # break # # decoded = self.inv_output_dict[p] # # logging.debug (u'%s: %s' %(p, decoded)) # extract best codes, run them all to see which ones yield the highest scoring responses cmd = [] # for p in predicted_ids[0][:,0]: for decoded in predicted_ids: # if p in self.inv_output_dict: # decoded = self.inv_output_dict[p] # else: # decoded = p if decoded == _STOP or decoded == _OR: try: logging.debug('trying cmd: %s' % repr(cmd)) afn, acode = self.dte.lookup_code(cmd[0]) ecode = '%s\n%s(ctx' % (acode, afn) if len(cmd) > 1: for arg in cmd[1:]: ecode += ',%s' % repr(json.loads(arg)) ecode += ')\n' logging.debug(ecode) exec(ecode, globals(), locals()) except: logging.debug('EXCEPTION CAUGHT %s' % traceback.format_exc()) cmd = [] if decoded == _STOP: break else: cmd.append(decoded) except: # probably ok (prolog code generated by neural network might not always work) logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) pyxsb_command("notrace.") # # extract highest-scoring responses # resps = ctx.get_resps() if not resps and do_eliza: logging.debug('producing ELIZA-style response for input %s' % inp) from psychology import psychology psychology.do_eliza(ctx) resps = ctx.get_resps() # # pick random response # if len(resps) > 0: i = random.randrange(0, len(resps)) out, score, action, action_arg = resps[i] ctx.commit_resp(i) logging.debug(u'picked resp #%d (score: %f): %s' % (i, score, out)) logging.debug(u'MEM: %s' % ctx.realm) memd = self.mem_dump(ctx.realm) for k, v, score in memd: logging.debug(u'MEM: %-20s: %s (%f)' % (k, v, score)) logging.debug(u'MEM: %s' % ctx.user) memd = self.mem_dump(ctx.user) for k, v, score in memd: logging.debug(u'MEM: %-20s: %s (%f)' % (k, v, score)) else: out = u'' score = 0.0 logging.debug(u'No response found.') action = self.mem_get(ctx.realm, 'action') return out, score, action
def test_skill(self, skill_name, run_trace=False, test_name=None): if run_trace: pyxsb_command("trace.") else: pyxsb_command("notrace.") m = self.skills[skill_name] logging.info('running tests of skill %s ...' % (skill_name)) num_tests = 0 num_fails = 0 for tc in self.dte.lookup_tests(skill_name): t_name, self.lang, prep_code, prep_fn, rounds, src_fn, self.src_line = tc if test_name: if t_name != test_name: logging.info('skipping test %s' % t_name) continue ctx = self.create_context(user=TEST_USER, realm=TEST_REALM, test_mode=True) round_num = 0 num_tests += 1 self.mem_clear(TEST_REALM) self.mem_clear(TEST_USER) # prep if prep_code: pcode = '%s\n%s(ctx)\n' % (prep_code, prep_fn) try: exec(pcode, globals(), locals()) except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) for test_inp, test_out, test_action, test_action_arg in rounds: logging.info("test_skill: %s round %d test_inp : %s" % (t_name, round_num, repr(test_inp))) logging.info("test_skill: %s round %d test_out : %s" % (t_name, round_num, repr(test_out))) # look up code in data engine matching_resp = False acode = None ctx.set_inp(test_inp) self.mem_set(ctx.realm, 'action', None) for lang, d, md5s, args, src_fn, src_line in self.dte.lookup_data_train( test_inp, self.lang): afn, acode = self.dte.lookup_code(md5s) ecode = '%s\n%s(ctx' % (acode, afn) if args: for arg in args: ecode += ',%s' % repr(arg) ecode += ')\n' # import pdb; pdb.set_trace() try: exec(ecode, globals(), locals()) except: logging.error( 'test_skill: %s round %d EXCEPTION CAUGHT %s' % (t_name, round_num, traceback.format_exc())) logging.error(ecode) if acode is None: logging.error( u'Error: %s: no training data for test_in "%s" found in DB!' % (t_name, test_inp)) num_fails += 1 break resps = ctx.get_resps() for i, resp in enumerate(resps): actual_out, score, actual_action, actual_action_arg = resp # logging.info("test_skill: %s round %d %s" % (clause.location, round_num, repr(abuf)) ) if len(test_out) > 0: if len(actual_out) > 0: actual_out = u' '.join( tokenize(actual_out, self.lang)) logging.info( "test_skill: %s round %d actual_out : %s (score: %f)" % (t_name, round_num, actual_out, score)) if actual_out != test_out: logging.info( "test_skill: %s round %d UTTERANCE MISMATCH." % (t_name, round_num)) continue # no match logging.info("test_skill: %s round %d UTTERANCE MATCHED!" % (t_name, round_num)) matching_resp = True ctx.commit_resp(i) # check action if test_action: afn, acode = self.dte.lookup_code(test_action) ecode = '%s\n%s(ctx' % (acode, afn) if test_action_arg: ecode += ',%s' % repr(test_action_arg) ecode += ')\n' exec(ecode, globals(), locals()) break if not matching_resp: logging.error( u'test_skill: %s round %d no matching response found.' % (t_name, round_num)) num_fails += 1 break round_num += 1 return num_tests, num_fails
def __init__(self, db_url=DEFAULT_DB_URL, xsb_arch_dir=DEFAULT_XSB_ARCH_DIR, toplevel=DEFAULT_TOPLEVEL, skill_paths=DEFAULT_SKILL_PATHS, lang=DEFAULT_LANG, nlp_model_args=DEFAULT_NLP_MODEL_ARGS, skill_args=DEFAULT_SKILL_ARGS, uttclass_model_args=DEFAULT_UTTCLASS_MODEL_ARGS): self.lang = lang self.nlp_model_args = nlp_model_args self.skill_args = skill_args self.uttclass_model_args = uttclass_model_args # # database connection # self.engine = model.data_engine_setup(db_url, echo=False) self.Session = sessionmaker(bind=self.engine) self.session = self.Session() # # TensorFlow (deferred, as tf can take quite a bit of time to set up) # self.tf_session = None self.nlp_model = None self.uttclass_model = None # # skill management, setup # self.skills = {} # skill_name -> module obj self.skill_paths = {} # skill_name -> pathname self.consulted_skills = set() self.toplevel = toplevel self.all_skills = [] # import pdb; pdb.set_trace() if skill_paths: for sp in skill_paths[::-1]: sys.path.insert(0, sp) else: # auto-config # __file__ -> ../skills mp = os.path.dirname(os.path.abspath(__file__)) + '/skills' sys.path.insert(0, mp) # ./skills cwd = os.getcwd() sys.path.insert(0, cwd + '/skills') # . sys.path.insert(0, cwd) for mp in sys.path: logging.debug("Module search path: %s" % mp) self.load_skill(toplevel) # # Prolog engine, data engine # pyxsb_start_session(xsb_arch_dir) self.dte = DataEngine(self.session) pyxsb_command('import default_sys_error_handler/1 from error_handler.') pyxsb_command( 'assertz((default_user_error_handler(Ball):-default_sys_error_handler(Ball))).' ) # # restore memory # q = u'' for m in self.session.query(model.Mem): v = json_to_xsb(m.v) if q: q += ', ' q += u"assertz(memory('%s', '%s', %s, %f))" % (m.realm, m.k, unicode(v), m.score) if not q: q = u'assertz(memory(self, self, self, 1.0))' q += u'.' pyxsb_command(q)
def process_input (self, ctx, inp_raw, run_trace=False, do_eliza=True): """ process user input, return score, responses, actions, solutions, context """ if run_trace: pyxsb_command("trace.") else: pyxsb_command("notrace.") tokens_raw = tokenize(inp_raw, ctx.lang) tokens = [] for t in tokens_raw: if t == u'nspc': continue tokens.append(t) inp = u" ".join(tokens) ctx.set_inp(inp) self.mem_set (ctx.realm, 'action', None) logging.debug('===============================================================================') logging.debug('process_input: %s' % repr(inp)) # # do we have an exact match in our training data for this input? # found_resp = False for lang, d, md5s, args, src_fn, src_line in self.dte.lookup_data_train (inp, ctx.lang): afn, acode = self.dte.lookup_code(md5s) ecode = '%s\n%s(ctx' % (acode, afn) if args: for arg in args: ecode += ',%s' % repr(arg) ecode += ')\n' logging.debug ('exact training data match found: %s:%s' % (src_fn, src_line)) logging.debug (ecode) # import pdb; pdb.set_trace() try: exec (ecode, globals(), locals()) found_resp = True except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) logging.error(ecode) if not found_resp: logging.debug('no exact training data match for this input found.') # # ask neural net if we did not find an answer # resps = ctx.get_resps() if not resps and self.nlp_model: from nlp_model import _START, _STOP, _OR logging.debug('trying neural net on: %s' % repr(inp)) try: # ok, exact matching has not yielded any results -> use neural network to # generate response(s) # import pdb; pdb.set_trace() predicted_ids = self.nlp_model.predict(inp) # x = self.nlp_model.compute_x(inp) # # logging.debug("x: %s -> %s" % (utterance, x)) # source, source_len, dest, dest_len = self.nlp_model._prepare_batch ([[x, []]], offset=0) # # predicted_ids: GreedyDecoder; [batch_size, max_time_step, 1] # # BeamSearchDecoder; [batch_size, max_time_step, beam_width] # predicted_ids = self.tf_model.predict(self.tf_session, encoder_inputs=source, # encoder_inputs_length=source_len) # # for seq_batch in predicted_ids: # # for k in range(5): # # logging.debug('--------- k: %d ----------' % k) # # seq = seq_batch[:,k] # # for p in seq: # # if p == -1: # # break # # decoded = self.inv_output_dict[p] # # logging.debug (u'%s: %s' %(p, decoded)) # extract best codes, run them all to see which ones yield the highest scoring responses cmd = [] # for p in predicted_ids[0][:,0]: for decoded in predicted_ids: # if p in self.inv_output_dict: # decoded = self.inv_output_dict[p] # else: # decoded = p if decoded == _STOP or decoded == _OR: try: logging.debug('trying cmd: %s' % repr(cmd)) afn, acode = self.dte.lookup_code(cmd[0]) ecode = '%s\n%s(ctx' % (acode, afn) if len(cmd)>1: for arg in cmd[1:]: ecode += ',%s' % repr(json.loads(arg)) ecode += ')\n' logging.debug(ecode) exec (ecode, globals(), locals()) except: logging.debug('EXCEPTION CAUGHT %s' % traceback.format_exc()) cmd = [] if decoded == _STOP: break else: cmd.append(decoded) except: # probably ok (prolog code generated by neural network might not always work) logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) pyxsb_command("notrace.") # # extract highest-scoring responses # resps = ctx.get_resps() if not resps and do_eliza: logging.debug ('producing ELIZA-style response for input %s' % inp) from psychology import psychology psychology.do_eliza(ctx) resps = ctx.get_resps() # # pick random response # if len(resps)>0: i = random.randrange(0, len(resps)) out, score, action, action_arg = resps[i] ctx.commit_resp(i) logging.debug(u'picked resp #%d (score: %f): %s' % (i, score, out)) logging.debug(u'MEM: %s' % ctx.realm) memd = self.mem_dump(ctx.realm) for k, v, score in memd: logging.debug(u'MEM: %-20s: %s (%f)' % (k, v, score)) logging.debug(u'MEM: %s' % ctx.user) memd = self.mem_dump(ctx.user) for k, v, score in memd: logging.debug(u'MEM: %-20s: %s (%f)' % (k, v, score)) else: out = u'' score = 0.0 logging.debug(u'No response found.') action = self.mem_get (ctx.realm, 'action') return out, score, action
def test_skill (self, skill_name, run_trace=False, test_name=None): if run_trace: pyxsb_command("trace.") else: pyxsb_command("notrace.") m = self.skills[skill_name] logging.info('running tests of skill %s ...' % (skill_name)) num_tests = 0 num_fails = 0 for tc in self.dte.lookup_tests(skill_name): t_name, self.lang, prep_code, prep_fn, rounds, src_fn, self.src_line = tc if test_name: if t_name != test_name: logging.info ('skipping test %s' % t_name) continue ctx = self.create_context(user=TEST_USER, realm=TEST_REALM, test_mode=True) round_num = 0 num_tests += 1 self.mem_clear(TEST_REALM) self.mem_clear(TEST_USER) # prep if prep_code: pcode = '%s\n%s(ctx)\n' % (prep_code, prep_fn) try: exec (pcode, globals(), locals()) except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) for test_inp, test_out, test_action, test_action_arg in rounds: logging.info("test_skill: %s round %d test_inp : %s" % (t_name, round_num, repr(test_inp)) ) logging.info("test_skill: %s round %d test_out : %s" % (t_name, round_num, repr(test_out)) ) # look up code in data engine matching_resp = False acode = None ctx.set_inp(test_inp) self.mem_set (ctx.realm, 'action', None) for lang, d, md5s, args, src_fn, src_line in self.dte.lookup_data_train (test_inp, self.lang): afn, acode = self.dte.lookup_code(md5s) ecode = '%s\n%s(ctx' % (acode, afn) if args: for arg in args: ecode += ',%s' % repr(arg) ecode += ')\n' # import pdb; pdb.set_trace() try: exec (ecode, globals(), locals()) except: logging.error('test_skill: %s round %d EXCEPTION CAUGHT %s' % (t_name, round_num, traceback.format_exc())) logging.error(ecode) if acode is None: logging.error (u'Error: %s: no training data for test_in "%s" found in DB!' % (t_name, test_inp)) num_fails += 1 break resps = ctx.get_resps() for i, resp in enumerate(resps): actual_out, score, actual_action, actual_action_arg = resp # logging.info("test_skill: %s round %d %s" % (clause.location, round_num, repr(abuf)) ) if len(test_out) > 0: if len(actual_out)>0: actual_out = u' '.join(tokenize(actual_out, self.lang)) logging.info("test_skill: %s round %d actual_out : %s (score: %f)" % (t_name, round_num, actual_out, score) ) if actual_out != test_out: logging.info("test_skill: %s round %d UTTERANCE MISMATCH." % (t_name, round_num)) continue # no match logging.info("test_skill: %s round %d UTTERANCE MATCHED!" % (t_name, round_num)) matching_resp = True ctx.commit_resp(i) # check action if test_action: afn, acode = self.dte.lookup_code(test_action) ecode = '%s\n%s(ctx' % (acode, afn) if test_action_arg: ecode += ',%s' % repr(test_action_arg) ecode += ')\n' exec (ecode, globals(), locals()) break if not matching_resp: logging.error (u'test_skill: %s round %d no matching response found.' % (t_name, round_num)) num_fails += 1 break round_num += 1 return num_tests, num_fails
def __init__(self, db_url = DEFAULT_DB_URL, xsb_arch_dir = DEFAULT_XSB_ARCH_DIR, toplevel = DEFAULT_TOPLEVEL, skill_paths = DEFAULT_SKILL_PATHS, lang = DEFAULT_LANG, nlp_model_args = DEFAULT_NLP_MODEL_ARGS, skill_args = DEFAULT_SKILL_ARGS, uttclass_model_args = DEFAULT_UTTCLASS_MODEL_ARGS): self.lang = lang self.nlp_model_args = nlp_model_args self.skill_args = skill_args self.uttclass_model_args = uttclass_model_args # # database connection # self.engine = model.data_engine_setup(db_url, echo=False) self.Session = sessionmaker(bind=self.engine) self.session = self.Session() # # TensorFlow (deferred, as tf can take quite a bit of time to set up) # self.tf_session = None self.nlp_model = None self.uttclass_model = None # # skill management, setup # self.skills = {} # skill_name -> module obj self.skill_paths = {} # skill_name -> pathname self.consulted_skills = set() self.toplevel = toplevel self.all_skills = [] # import pdb; pdb.set_trace() if skill_paths: for sp in skill_paths[::-1]: sys.path.insert(0,sp) else: # auto-config # __file__ -> ../skills mp = os.path.dirname(os.path.abspath(__file__)) + '/skills' sys.path.insert(0, mp) # ./skills cwd = os.getcwd() sys.path.insert(0, cwd + '/skills') # . sys.path.insert(0, cwd) for mp in sys.path: logging.debug ("Module search path: %s" % mp) self.load_skill(toplevel) # # Prolog engine, data engine # pyxsb_start_session(xsb_arch_dir) self.dte = DataEngine(self.session) pyxsb_command('import default_sys_error_handler/1 from error_handler.') pyxsb_command('assertz((default_user_error_handler(Ball):-default_sys_error_handler(Ball))).') # # restore memory # q = u'' for m in self.session.query(model.Mem): v = json_to_xsb(m.v) if q: q += ', ' q += u"assertz(memory('%s', '%s', %s, %f))" % (m.realm, m.k, unicode(v), m.score) if not q: q = u'assertz(memory(self, self, self, 1.0))' q += u'.' pyxsb_command(q)