def convert_answerz(c): pred = c.head lang = pred.args[1].name n = pred.args[2].name pred = c.body s = pred.args[2].s head = Predicate( name='nlp_%s_r' % MODULE_NAME, args=[Predicate(name=lang), Predicate(name=n), Variable(name='R')]) body = Predicate( name='says', args=[Predicate(name=lang), Variable(name='R'), StringLiteral(s)]) clause = Clause(head=head, body=body) print unicode(clause)
def convert_nlp_test(pred): global test_cnt # print "% ", unicode(pred) lang = pred.args[0].name ivr_in = pred.args[1].args[0].args[0] ivr_out = pred.args[1].args[1].args[0] head = Predicate(name='nlp_test', args=[ StringLiteral(MODULE_NAME), Predicate(name=lang), StringLiteral('t%04d' % test_cnt), Predicate(name='FIXME'), ListLiteral([ivr_in, ivr_out, ListLiteral([])]) ]) test_cnt += 1 clause = Clause(head=head) print unicode(clause)
def _process_input_nnet (self, inp, res): solutions = [] logging.debug('_process_input_nnet: %s' % repr(inp)) try: # ok, exact matching has not yielded any results -> use neural network to # generate response(s) x = self.nlp_model.compute_x(inp) # logging.debug("x: %s -> %s" % (utterance, x)) source, source_len, dest, dest_len = self.nlp_model._prepare_batch ([[x, []]], offset=0) # predicted_ids: GreedyDecoder; [batch_size, max_time_step, 1] # BeamSearchDecoder; [batch_size, max_time_step, beam_width] predicted_ids = self.tf_model.predict(self.tf_session, encoder_inputs=source, encoder_inputs_length=source_len) # for seq_batch in predicted_ids: # for k in range(5): # logging.debug('--------- k: %d ----------' % k) # seq = seq_batch[:,k] # for p in seq: # if p == -1: # break # decoded = self.inv_output_dict[p] # logging.debug (u'%s: %s' %(p, decoded)) # extract best codes only acodes = [[]] for p in predicted_ids[0][:,0]: if p == -1: break decoded = self.inv_output_dict[p] if decoded == u'_EOS': break if decoded == u'__OR__': acodes.append([]) acodes[len(acodes)-1].append(decoded) # FIXME: for now, we try the first solution only acode = acodes[0] pcode = self._reconstruct_prolog_code (acode) logging.debug('_process_input_nnet: %s' % pcode) clause = Clause (None, pcode, location=self.dummyloc) solutions = self.rt.search (clause, env=res) except: # probably ok (prolog code generated by neural network might not always work) logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) return solutions
def _setup_context (self, user, lang, inp, prev_context, prev_res): cur_context = Predicate(do_gensym (self.rt, 'context')) res = { } if ASSERT_OVERLAY_VAR_NAME in prev_res: res[ASSERT_OVERLAY_VAR_NAME] = prev_res[ASSERT_OVERLAY_VAR_NAME].clone() res = do_assertz ({}, Clause ( Predicate('user', [cur_context, Predicate(user)]) , location=self.dummyloc), res=res) res = do_assertz ({}, Clause ( Predicate('lang', [cur_context, Predicate(lang)]) , location=self.dummyloc), res=res) token_literal = ListLiteral (list(map(lambda x: StringLiteral(x), inp))) res = do_assertz ({}, Clause ( Predicate('tokens', [cur_context, token_literal]) , location=self.dummyloc), res=res) currentTime = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC).isoformat() res = do_assertz ({}, Clause ( Predicate('time', [cur_context, StringLiteral(currentTime)]) , location=self.dummyloc), res=res) if prev_context: res = do_assertz ({}, Clause ( Predicate('prev', [cur_context, prev_context]) , location=self.dummyloc), res=res) # copy over all previous context statements to the new one s1s = self.rt.search_predicate ('context', [prev_context, '_1', '_2'], env=res) for s1 in s1s: res = do_assertz ({}, Clause ( Predicate('context', [cur_context, s1['_1'], s1['_2']]) , location=self.dummyloc), res=res) # copy over all previous mem statements to the new one s1s = self.rt.search_predicate ('mem', [prev_context, '_1', '_2'], env=res) for s1 in s1s: res = do_assertz ({}, Clause ( Predicate('mem', [cur_context, s1['_1'], s1['_2']]) , location=self.dummyloc), res=res) # import pdb; pdb.set_trace() res['C'] = cur_context return res, cur_context
def do_process_input(): global stdscr, prompt, cur_context, next_context, lang, inp, responses, kernal global match_module, match_loc_fn, match_loc_line inp_t = [] for t in tokenize(prompt, lang=lang): if t == u'nspc': continue inp_t.append(t) next_res, next_context = kernal._setup_context(user=USER_URI, lang=lang, inp=inp_t, prev_context=cur_context, prev_res={}) inp = kernal._compute_net_input(next_res, next_context) # # see if this input sequence is already covered by our training data # responses = [] match_loc_fn = None match_loc_line = None highscore = 0.0 for tdr in kernal.session.query(model.TrainingData).filter( model.TrainingData.lang == lang, model.TrainingData.inp == json.dumps(inp)): acode = json.loads(tdr.resp) pcode = kernal._reconstruct_prolog_code(acode) clause = Clause(None, pcode, location=kernal.dummyloc) solutions = kernal.rt.search(clause, env=next_res) for solution in solutions: actual_out, actual_actions, score = kernal._extract_response( next_context, solution) if score > highscore: responses = [] highscore = score if score < highscore: continue responses.append( (pcode, actual_out, actual_actions, score, solution)) match_module = tdr.module match_loc_fn = tdr.loc_fn match_loc_line = tdr.loc_line
def builtin_r_sayv(g, pe): """" r_sayv (+Context, +Var, +Fmt) """ pe._trace('CALLED BUILTIN r_sayv', g) pred = g.terms[g.inx] args = pred.args if len(args) != 3: raise PrologRuntimeError( 'r_sayv: 3 args (+Context, +Var, +Fmt) expected.', g.location) arg_context = pe.prolog_eval(args[0], g.env, g.location) arg_var = pe.prolog_eval(args[1], g.env, g.location) arg_fmt = pe.prolog_get_constant(args[2], g.env, g.location) if not isinstance(arg_var, Literal): raise PrologRuntimeError( u'r_sayv: failed to eval "%s"' % unicode(args[1]), g.location) # import pdb; pdb.set_trace() res = {} if isinstance(arg_var, StringLiteral): v = arg_var.s else: v = unicode(arg_var) if arg_fmt == 'd': v = unicode(int(float(v))) elif arg_fmt == 'f': v = unicode(float(v)) res = do_assertz(g.env, Clause(Predicate( 'c_say', [arg_context, StringLiteral(v)]), location=g.location), res=res) return [res]
def convert_nlp_gen(pred): print "% ", unicode(pred) lang = pred.args[0].name ms = pred.args[1].s res = convert_macro_string(ms) head = Predicate(name='nlp_%s_s' % MODULE_NAME, args=[ Predicate(name=lang), Predicate(name='fixme'), Variable(name='S') ]) body = Predicate(name='hears', args=[Predicate(name=lang), Variable(name='S'), res]) clause = Clause(head=head, body=body) print unicode(clause)
def builtin_r_say(g, pe): """" r_say (+Context, +Token) """ pe._trace('CALLED BUILTIN r_say', g) pred = g.terms[g.inx] args = pred.args if len(args) != 2: raise PrologRuntimeError('r_say: 2 args (+Context, +Token) expected.', g.location) arg_context = pe.prolog_eval(args[0], g.env, g.location) arg_token = pe.prolog_eval(args[1], g.env, g.location) # import pdb; pdb.set_trace() res = {} res = do_assertz(g.env, Clause(Predicate('c_say', [arg_context, arg_token]), location=g.location), res=res) return [res]
def process_input(self, utterance, utt_lang, user_uri, run_trace=False, do_eliza=True, prev_ctx=None): """ process user input, return score, responses, actions, solutions, context """ prev_context = prev_ctx res = {} tokens = tokenize(utterance, utt_lang) res, cur_context = self._setup_context(user=user_uri, lang=utt_lang, inp=tokens, prev_context=prev_context, prev_res=res) inp = self._compute_net_input(res, cur_context) logging.debug('process_input: %s' % repr(inp)) # # do we have an exact match in our training data for this input? # solutions = [] self.rt.set_trace(run_trace) for tdr in self.session.query(model.TrainingData).filter( model.TrainingData.lang == utt_lang, model.TrainingData.inp == json.dumps(inp)): acode = json.loads(tdr.resp) pcode = self._reconstruct_prolog_code(acode) clause = Clause(None, pcode, location=self.dummyloc) sols = self.rt.search(clause, env=res) if sols: solutions.extend(sols) if not solutions: solutions = self._process_input_nnet(inp, res) # # try dropping the context if we haven't managed to produce a result yet # if not solutions: res, cur_context = self._setup_context(user=user_uri, lang=utt_lang, inp=tokens, prev_context=None, prev_res={}) inp = self._compute_net_input(res, cur_context) solutions = self._process_input_nnet(inp, res) if not solutions and do_eliza: logging.info('producing ELIZA-style response for input %s' % utterance) clause = self.aip_parser.parse_line_clause_body( 'do_eliza(C, %s)' % utt_lang) solutions = self.rt.search(clause, env=res) self.rt.set_trace(False) # # extract highest-scoring responses only: # best_score = 0 best_resps = [] best_actions = [] best_solutions = [] for solution in solutions: actual_resp, actual_actions, score = self._extract_response( cur_context, solution) if score > best_score: best_score = score best_resps = [] best_actions = [] best_solutions = [] if score < best_score: continue best_resps.append(actual_resp) best_actions.append(actual_actions) best_solutions.append(solution) return best_score, best_resps, best_actions, best_solutions, cur_context
def test_module(self, module_name, run_trace=False, test_name=None): self.rt.set_trace(run_trace) m = self.modules[module_name] logging.info('running tests of module %s ...' % (module_name)) num_tests = 0 num_fails = 0 for tc in self.session.query( model.TestCase).filter(model.TestCase.module == module_name): if test_name: if tc.name != test_name: logging.info('skipping test %s' % tc.name) continue num_tests += 1 rounds = json.loads(tc.rounds) prep = json_to_prolog(tc.prep) round_num = 0 prev_context = None res = {} for t_in, t_out, test_actions in rounds: test_in = u' '.join(t_in) test_out = u' '.join(t_out) logging.info("nlp_test: %s round %d test_in : %s" % (tc.name, round_num, repr(test_in))) logging.info("nlp_test: %s round %d test_out : %s" % (tc.name, round_num, repr(test_out))) logging.info("nlp_test: %s round %d test_actions: %s" % (tc.name, round_num, repr(test_actions))) #if round_num>0: # import pdb; pdb.set_trace() res, cur_context = self._setup_context( user=TEST_USER, lang=tc.lang, inp=t_in, prev_context=prev_context, prev_res=res) # prep if prep: # import pdb; pdb.set_trace() # self.rt.set_trace(True) for p in prep: solutions = self.rt.search(Clause( None, p, location=self.dummyloc), env=res) if len(solutions) != 1: raise (PrologRuntimeError( 'Expected exactly one solution from preparation code for test "%s", got %d.' % (tc.name, len(solutions)))) res = solutions[0] # inp / resp inp = self._compute_net_input(res, cur_context) # look up code in DB acode = None matching_resp = False for tdr in self.session.query(model.TrainingData).filter( model.TrainingData.lang == tc.lang, model.TrainingData.inp == json.dumps(inp)): if acode: logging.warn( u'%s: more than one acode for test_in "%s" found in DB!' % (tc.name, test_in)) acode = json.loads(tdr.resp) pcode = self._reconstruct_prolog_code(acode) clause = Clause(None, pcode, location=self.dummyloc) solutions = self.rt.search(clause, env=res) # import pdb; pdb.set_trace() for solution in solutions: actual_out, actual_actions, score = self._extract_response( cur_context, solution) # logging.info("nlp_test: %s round %d %s" % (clause.location, round_num, repr(abuf)) ) if len(test_out) > 0: if len(actual_out) > 0: actual_out = u' '.join( tokenize(u' '.join(actual_out), tc.lang)) logging.info( "nlp_test: %s round %d actual_out : %s (score: %f)" % (tc.name, round_num, actual_out, score)) if actual_out != test_out: logging.info( "nlp_test: %s round %d UTTERANCE MISMATCH." % (tc.name, round_num)) continue # no match logging.info( "nlp_test: %s round %d UTTERANCE MATCHED!" % (tc.name, round_num)) # check actions if len(test_actions) > 0: logging.info( "nlp_test: %s round %d actual acts : %s" % (tc.name, round_num, repr(actual_actions))) # print repr(test_actions) actions_matched = True act = None for action in test_actions: for act in actual_actions: # print " check action match: %s vs %s" % (repr(action), repr(act)) if action == act: break if action != act: actions_matched = False break if not actions_matched: logging.info( "nlp_test: %s round %d ACTIONS MISMATCH." % (tc.name, round_num)) continue logging.info( "nlp_test: %s round %d ACTIONS MATCHED!" % (tc.name, round_num)) matching_resp = True res = solution break if matching_resp: break if acode is None: logging.error('failed to find db entry for %s' % json.dumps(inp)) logging.error( u'Error: %s: no training data for test_in "%s" found in DB!' % (tc.name, test_in)) num_fails += 1 break if not matching_resp: logging.error( u'nlp_test: %s round %d no matching response found.' % (tc.name, round_num)) num_fails += 1 break prev_context = cur_context round_num += 1 self.rt.set_trace(False) return num_tests, num_fails
def fetch_weather_forecast(kernal): api_key = kernal.config.get("weather", "api_key") logging.debug('fetch_weather_forecast cronj ob, api key: %s' % api_key) sl = SourceLocation(fn='__internet__', col=0, line=0) # # resolve city ids, timezones # locations = {} # owmCityId(wdeLosAngeles, 5368361). solutions = kernal.rt.search_predicate('owmCityId', ['_1', '_2']) for s in solutions: location = s['_1'].name city_id = int(s['_2'].f) # aiTimezone(wdeNewYorkCity, "America/New_York"). solutions2 = kernal.rt.search_predicate('aiTimezone', [location, '_1']) if len(solutions2) < 1: continue timezone = solutions2[0]['_1'].s solutions2 = kernal.rt.search_predicate('rdfsLabel', [location, 'en', '_1']) if len(solutions2) < 1: continue label = solutions2[0]['_1'].s # wdpdCoordinateLocation(wdeBerlin, "Point(13.383333333 52.516666666)"). solutions2 = kernal.rt.search_predicate('wdpdCoordinateLocation', [location, '_1']) if len(solutions2) < 1: continue m = coord_matcher.match(solutions2[0]['_1'].s) if not m: continue geo_lat = float(m.group(2)) geo_long = float(m.group(1)) if not location in locations: locations[location] = {} locations[location]['city_id'] = city_id locations[location]['timezone'] = timezone locations[location]['label'] = label locations[location]['long'] = geo_long locations[location]['lat'] = geo_lat def mangle_label(label): return ''.join(map(lambda c: c if c.isalnum() else '', label)) # # generate triples of weather and astronomical data # env = {} for location in locations: city_id = locations[location]['city_id'] timezone = locations[location]['timezone'] loc_label = mangle_label(locations[location]['label']) geo_lat = locations[location]['lat'] geo_long = locations[location]['long'] tz = pytz.timezone(timezone) ref_dt = datetime.now(tz).replace(hour=0, minute=0, second=0, microsecond=0) logging.debug("%s %s" % (location, ref_dt)) # # sunrise / sunset # l = astral.Location() l.name = 'name' l.region = 'region' l.latitude = geo_lat l.longitude = geo_long l.timezone = timezone l.elevation = 0 for day_offset in range(7): cur_date = (ref_dt + timedelta(days=day_offset)).date() sun = l.sun(date=cur_date, local=True) sun_const = u'aiUnlabeledSun%s%s' % (loc_label, cur_date.strftime('%Y%m%d')) env = do_retract(env, build_predicate('aiLocation', [sun_const, '_'])) env = do_retract(env, build_predicate('aiDate', [sun_const, '_'])) env = do_retract(env, build_predicate('aiDawn', [sun_const, '_'])) env = do_retract(env, build_predicate('aiSunrise', [sun_const, '_'])) env = do_retract(env, build_predicate('aiNoon', [sun_const, '_'])) env = do_retract(env, build_predicate('aiSunset', [sun_const, '_'])) env = do_retract(env, build_predicate('aiDusk', [sun_const, '_'])) env = do_assertz( env, Clause(location=sl, head=build_predicate('aiLocation', [sun_const, location]))) env = do_assertz( env, Clause(location=sl, head=build_predicate( 'aiDate', [sun_const, StringLiteral(cur_date.isoformat())]))) env = do_assertz( env, Clause(location=sl, head=build_predicate( 'aiDawn', [sun_const, StringLiteral(sun['dawn'].isoformat())]))) env = do_assertz( env, Clause( location=sl, head=build_predicate( 'aiSunrise', [sun_const, StringLiteral(sun['sunrise'].isoformat())]))) env = do_assertz( env, Clause(location=sl, head=build_predicate( 'aiNoon', [sun_const, StringLiteral(sun['noon'].isoformat())]))) env = do_assertz( env, Clause( location=sl, head=build_predicate( 'aiSunset', [sun_const, StringLiteral(sun['sunset'].isoformat())]))) env = do_assertz( env, Clause(location=sl, head=build_predicate( 'aiDusk', [sun_const, StringLiteral(sun['dusk'].isoformat())]))) logging.debug("%s %s %s -> %s" % (sun_const, cur_date, sun['sunrise'], sun['sunset'])) # # fetch json forecast data from OpenWeatherMap # url = 'http://api.openweathermap.org/data/2.5/forecast?id=%s&APPID=%s' % ( city_id, api_key) data = json.load(urllib2.urlopen(url)) if not 'list' in data: logging.error('failed to fetch weather data for %s, got: %s' % (location, repr(data))) continue # print repr(data['list']) for fc in data['list']: dt_to = datetime.strptime(fc['dt_txt'], '%Y-%m-%d %H:%M:%S') dt_to = dt_to.replace(tzinfo=pytz.utc) dt_from = dt_to - timedelta(hours=3) city_id = city_id temp_min = fc['main']['temp_min'] - KELVIN temp_max = fc['main']['temp_max'] - KELVIN code = fc['weather'][0]['id'] precipitation = float( fc['rain'] ['3h']) if 'rain' in fc and '3h' in fc['rain'] else 0.0 icon = fc['weather'][0]['icon'] description = fc['weather'][0]['description'] clouds = float(fc['clouds']['all']) fc_const = 'aiUnlabeledFc%s%s' % (loc_label, dt_from.strftime('%Y%m%d%H%M%S')) logging.debug("%s on %s-%s city_id=%s" % (fc_const, dt_from, dt_to, city_id)) # aiDescription(aiUnlabeledFcFreudental20161205180000, "clear sky"). # aiDtEnd(aiUnlabeledFcFreudental20161205180000, "2016-12-05T21:00:00+00:00"). # aiTempMin(aiUnlabeledFcFreudental20161205180000, -6.666). # aiIcon(aiUnlabeledFcFreudental20161205180000, "01n"). # aiLocation(aiUnlabeledFcFreudental20161205180000, wdeFreudental). # aiDtStart(aiUnlabeledFcFreudental20161205180000, "2016-12-05T18:00:00+00:00"). # aiClouds(aiUnlabeledFcFreudental20161205180000, 0.0). # aiPrecipitation(aiUnlabeledFcFreudental20161205180000, 0.0). # aiTempMax(aiUnlabeledFcFreudental20161205180000, -6.45). env = do_retract(env, build_predicate('aiDescription', [fc_const, '_'])) env = do_retract(env, build_predicate('aiDtEnd', [fc_const, '_'])) env = do_retract(env, build_predicate('aiTempMin', [fc_const, '_'])) env = do_retract(env, build_predicate('aiIcon', [fc_const, '_'])) env = do_retract(env, build_predicate('aiLocation', [fc_const, '_'])) env = do_retract(env, build_predicate('aiDtStart', [fc_const, '_'])) env = do_retract(env, build_predicate('aiClouds', [fc_const, '_'])) env = do_retract( env, build_predicate('aiPrecipitation', [fc_const, '_'])) env = do_retract(env, build_predicate('aiTempMax', [fc_const, '_'])) env = do_assertz( env, Clause(location=sl, head=build_predicate('aiLocation', [fc_const, location]))) env = do_assertz( env, Clause(location=sl, head=build_predicate('aiTempMin', [fc_const, temp_min]))) env = do_assertz( env, Clause(location=sl, head=build_predicate('aiTempMax', [fc_const, temp_max]))) env = do_assertz( env, Clause(location=sl, head=build_predicate('aiPrecipitation', [fc_const, precipitation]))) env = do_assertz( env, Clause(location=sl, head=build_predicate('aiClouds', [fc_const, clouds]))) env = do_assertz( env, Clause(location=sl, head=build_predicate( 'aiIcon', [fc_const, StringLiteral(icon)]))) env = do_assertz( env, Clause(location=sl, head=build_predicate( 'aiDescription', [fc_const, StringLiteral(description)]))) env = do_assertz( env, Clause(location=sl, head=build_predicate( 'aiDtStart', [fc_const, StringLiteral(dt_from.isoformat())]))) env = do_assertz( env, Clause( location=sl, head=build_predicate( 'aiDtEnd', [fc_const, StringLiteral(dt_to.isoformat())]))) kernal.rt.apply_overlay(WEATHER_DATA_MODULE, env)