Пример #1
0
def convert_nlp_test(pred):

    global test_cnt

    # print "% ", unicode(pred)

    lang = pred.args[0].name
    ivr_in = pred.args[1].args[0].args[0]
    ivr_out = pred.args[1].args[1].args[0]

    head = Predicate(name='nlp_test',
                     args=[
                         StringLiteral(MODULE_NAME),
                         Predicate(name=lang),
                         StringLiteral('t%04d' % test_cnt),
                         Predicate(name='FIXME'),
                         ListLiteral([ivr_in, ivr_out,
                                      ListLiteral([])])
                     ])

    test_cnt += 1

    clause = Clause(head=head)

    print unicode(clause)
Пример #2
0
    def push_context(self, key, value):

        l = self.read_context(key)

        # logging.debug ('context %s before push: %s' % (key, l))

        if not l:
            l = ListLiteral([])
        l.l.insert(0, value)

        l.l = l.l[:MAX_CONTEXT_LEN]

        # logging.debug ('context %s after push: %s' % (key, l))

        self.write_context(key, l)
Пример #3
0
    def _setup_context (self, user, lang, inp, prev_context, prev_res):

        cur_context = Predicate(do_gensym (self.rt, 'context'))
        res = { }
        if ASSERT_OVERLAY_VAR_NAME in prev_res:
            res[ASSERT_OVERLAY_VAR_NAME] = prev_res[ASSERT_OVERLAY_VAR_NAME].clone()

        res = do_assertz ({}, Clause ( Predicate('user',   [cur_context, Predicate(user)])  , location=self.dummyloc), res=res)
        res = do_assertz ({}, Clause ( Predicate('lang',   [cur_context, Predicate(lang)])  , location=self.dummyloc), res=res)

        token_literal = ListLiteral (list(map(lambda x: StringLiteral(x), inp)))
        res = do_assertz ({}, Clause ( Predicate('tokens', [cur_context, token_literal])    , location=self.dummyloc), res=res)

        currentTime = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC).isoformat()
        res = do_assertz ({}, Clause ( Predicate('time',   [cur_context, StringLiteral(currentTime)]) , location=self.dummyloc), res=res)

        if prev_context:

            res = do_assertz ({}, Clause ( Predicate('prev', [cur_context, prev_context]) , location=self.dummyloc), res=res)

            # copy over all previous context statements to the new one
            s1s = self.rt.search_predicate ('context', [prev_context, '_1', '_2'], env=res)
            for s1 in s1s:
                res = do_assertz ({}, Clause ( Predicate('context', [cur_context, s1['_1'], s1['_2']]) , location=self.dummyloc), res=res)
            # copy over all previous mem statements to the new one
            s1s = self.rt.search_predicate ('mem', [prev_context, '_1', '_2'], env=res)
            for s1 in s1s:
                res = do_assertz ({}, Clause ( Predicate('mem', [cur_context, s1['_1'], s1['_2']]) , location=self.dummyloc), res=res)
            # import pdb; pdb.set_trace()

        res['C'] = cur_context

        return res, cur_context
Пример #4
0
def builtin_tokenize(g, pe):
    """ tokenize (+Lang, +Str, -Tokens) """

    pe._trace('CALLED BUILTIN tokenize', g)

    pred = g.terms[g.inx]
    args = pred.args
    if len(args) != 3:
        raise PrologRuntimeError('tokenize: 3 args expected.', g.location)

    arg_lang = pe.prolog_eval(args[0], g.env, g.location)
    if not isinstance(arg_lang, Predicate) or len(arg_lang.args) > 0:
        raise PrologRuntimeError(
            'tokenize: first argument: constant expected, %s found instead.' %
            repr(args[0]), g.location)

    arg_str = pe.prolog_get_string(args[1], g.env, g.location)
    arg_tokens = pe.prolog_get_variable(args[2], g.env, g.location)

    tokens = list(
        map(lambda s: StringLiteral(s), tokenize(arg_str, lang=arg_lang.name)))

    g.env[arg_tokens] = ListLiteral(tokens)

    return True
Пример #5
0
def convert_macro_string(ms):

    pos   = 0
    state = STATE_NORMAL
    curs  = u''
    res   = []
    while pos<len(ms):

        c = ms[pos]

        if state == STATE_NORMAL:
            
            if c=='@':
                if len(curs.strip())>0:
                    res.append(StringLiteral(curs.strip()))
                curs = u''
                state = STATE_MACRO
            elif c=='(':
                if len(curs.strip())>0:
                    res.append(StringLiteral(curs.strip()))
                curs = u''
                choices = []
                state = STATE_CHOICE
            else:
                curs += c

        elif state == STATE_MACRO:
            if c==' ':
                state = STATE_NORMAL
        elif state == STATE_CHOICE:
            if c==')':
                state = STATE_NORMAL
                choices.append(StringLiteral(curs))
                curs = u''
                res.append(ListLiteral(choices))
            elif c=='|':
                choices.append(StringLiteral(curs))
                curs = u''
            else:
                curs += c
        pos += 1
       
    if state == STATE_NORMAL:
        if len(curs.strip())>0:
            res.append(StringLiteral(curs.strip()))

    return ListLiteral(res)
Пример #6
0
def _prolog_from_json(o):

    if o['pt'] == 'Constant':
        return Predicate(o['name'])
    if o['pt'] == 'StringLiteral':
        return StringLiteral(o['s'])
    if o['pt'] == 'NumberLiteral':
        return NumberLiteral(o['f'])
    if o['pt'] == 'ListLiteral':
        return ListLiteral(o['l'])

    raise PrologRuntimeError('cannot convert from json: %s .' % repr(o))
Пример #7
0
def builtin_context_get_fn(pred, env, rt):
    """ context_get(+Name) """

    rt._trace_fn('CALLED FUNCTION context_get', g)

    args = pred.args
    if len(args) != 1:
        raise PrologRuntimeError('context_get: 1 arg expected.')

    key = args[0].name

    v = pe.read_context(key)
    if not v:
        return ListLiteral([])

    return v
Пример #8
0
    def process_input (self, utterance, utt_lang, user_uri, test_mode=False, trace=False):

        """ process user input, return action(s) """

        gn = rdflib.Graph(identifier=CONTEXT_GRAPH_NAME)

        tokens = tokenize(utterance, utt_lang)

        self.kb.remove((CURIN, None, None, gn))

        quads = [ ( CURIN, KB_PREFIX+u'user',      user_uri,                                        gn),
                  ( CURIN, KB_PREFIX+u'utterance', utterance,                                       gn),
                  ( CURIN, KB_PREFIX+u'uttLang',   utt_lang,                                        gn),
                  ( CURIN, KB_PREFIX+u'tokens',    pl_literal_to_rdf(ListLiteral(tokens), self.kb), gn)
                  ]

        if test_mode:
            quads.append( ( CURIN, KB_PREFIX+u'currentTime', pl_literal_to_rdf(NumberLiteral(TEST_TIME), self.kb), gn ) )
        else:
            quads.append( ( CURIN, KB_PREFIX+u'currentTime', pl_literal_to_rdf(NumberLiteral(time.time()), self.kb), gn ) )
   
        self.kb.addN_resolve(quads)

        self.prolog_rt.reset_actions()

        if test_mode:

            for dr in self.db.session.query(model.DiscourseRound).filter(model.DiscourseRound.inp==utterance, 
                                                                         model.DiscourseRound.lang==utt_lang):
            
                prolog_s = ','.join(dr.resp.split(';'))

                logging.info("test tokens=%s prolog_s=%s" % (repr(tokens), prolog_s) )
                
                c = self.parser.parse_line_clause_body(prolog_s)
                # logging.debug( "Parse result: %s" % c)

                # logging.debug( "Searching for c: %s" % c )

                solutions = self.prolog_rt.search(c)

                # if len(solutions) == 0:
                #     raise PrologError ('nlp_test: %s no solution found.' % clause.location)
            
                # print "round %d utterances: %s" % (round_num, repr(prolog_rt.get_utterances())) 

        return self.prolog_rt.get_actions()
Пример #9
0
def rdf_to_pl(l):

    value = unicode(l)

    if isinstance(l, rdflib.Literal):
        if l.datatype:

            datatype = str(l.datatype)

            if datatype == 'http://www.w3.org/2001/XMLSchema#decimal':
                value = NumberLiteral(float(value))
            elif datatype == 'http://www.w3.org/2001/XMLSchema#float':
                value = NumberLiteral(float(value))
            elif datatype == 'http://www.w3.org/2001/XMLSchema#integer':
                value = NumberLiteral(float(value))
            elif datatype == 'http://www.w3.org/2001/XMLSchema#dateTime':
                dt = dateutil.parser.parse(value)
                value = NumberLiteral(time.mktime(dt.timetuple()))
            elif datatype == 'http://www.w3.org/2001/XMLSchema#date':
                dt = dateutil.parser.parse(value)
                value = NumberLiteral(time.mktime(dt.timetuple()))
            elif datatype == DT_LIST:
                value = json.JSONDecoder(
                    object_hook=_prolog_from_json).decode(value)
            elif datatype == DT_CONSTANT:
                value = Predicate(value)
            else:
                raise PrologRuntimeError(
                    'sparql_query: unknown datatype %s .' % datatype)
        else:
            if l.value is None:
                value = ListLiteral([])
            else:
                value = StringLiteral(value)

    else:
        value = StringLiteral(value)

    return value
Пример #10
0
    def process_input(self,
                      utterance,
                      utt_lang,
                      user_uri,
                      test_mode=False,
                      trace=False):
        """ process user input, return action(s) """

        gn = rdflib.Graph(identifier=CONTEXT_GRAPH_NAME)

        tokens = tokenize(utterance, utt_lang)

        self.kb.remove((CURIN, None, None, gn))

        quads = [(CURIN, KB_PREFIX + u'user', user_uri, gn),
                 (CURIN, KB_PREFIX + u'utterance', utterance, gn),
                 (CURIN, KB_PREFIX + u'uttLang', utt_lang, gn),
                 (CURIN, KB_PREFIX + u'tokens',
                  pl_literal_to_rdf(ListLiteral(tokens), self.kb), gn)]

        if test_mode:
            quads.append((CURIN, KB_PREFIX + u'currentTime',
                          pl_literal_to_rdf(NumberLiteral(TEST_TIME),
                                            self.kb), gn))
        else:
            quads.append((CURIN, KB_PREFIX + u'currentTime',
                          pl_literal_to_rdf(NumberLiteral(time.time()),
                                            self.kb), gn))

        self.kb.addN_resolve(quads)

        self.prolog_rt.reset_actions()
        self.prolog_rt.set_trace(trace)

        if test_mode:

            prolog_s = None

            for dr in self.db.session.query(model.DiscourseRound).filter(
                    model.DiscourseRound.inp == utterance,
                    model.DiscourseRound.lang == utt_lang):

                prolog_s = ','.join(dr.resp.split(';'))

                logging.info("test tokens=%s prolog_s=%s" %
                             (repr(tokens), prolog_s))

            if not prolog_s:
                logging.error('test utterance %s not found!' % utterance)
                return []

        else:

            x = self.nlp_model.compute_x(utterance)

            logging.debug("x: %s -> %s" % (utterance, x))

            # which bucket does it belong to?
            bucket_id = min([
                b for b in xrange(len(self.nlp_model.buckets))
                if self.nlp_model.buckets[b][0] > len(x)
            ])

            # get a 1-element batch to feed the sentence to the model
            encoder_inputs, decoder_inputs, target_weights = self.tf_model.get_batch(
                {bucket_id: [(x, [])]}, bucket_id)

            # print "encoder_inputs, decoder_inputs, target_weights", encoder_inputs, decoder_inputs, target_weights

            # get output logits for the sentence
            _, _, output_logits = self.tf_model.step(self.tf_session,
                                                     encoder_inputs,
                                                     decoder_inputs,
                                                     target_weights, bucket_id,
                                                     True)

            logging.debug("output_logits: %s" % repr(output_logits))

            # this is a greedy decoder - outputs are just argmaxes of output_logits.
            outputs = [
                int(np.argmax(logit, axis=1)) for logit in output_logits
            ]

            # print "outputs", outputs

            preds = map(lambda o: self.inv_output_dict[o], outputs)
            logging.debug("preds: %s" % repr(preds))

            prolog_s = ''

            do_and = True

            # import pdb; pdb.set_trace()

            for p in preds:

                if p[0] == '_':
                    continue  # skip _EOS

                if p == u'or':
                    do_and = False
                    continue

                if len(prolog_s) > 0:
                    if do_and:
                        prolog_s += ', '
                    else:
                        prolog_s += '; '
                prolog_s += p

                do_and = True

            logging.debug('?- %s' % prolog_s)

        abufs = []

        c = self.parser.parse_line_clause_body(prolog_s)
        # logging.debug( "Parse result: %s" % c)

        # logging.debug( "Searching for c: %s" % c )

        solutions = self.prolog_rt.search(c)

        # if len(solutions) == 0:
        #     raise PrologError ('nlp_test: %s no solution found.' % clause.location)

        # print "round %d utterances: %s" % (round_num, repr(prolog_rt.get_utterances()))

        abufs = self.prolog_rt.get_actions()

        return abufs
Пример #11
0
def builtin_sparql_query(g, pe):

    pe._trace('CALLED BUILTIN sparql_query', g)

    pred = g.terms[g.inx]
    args = pred.args
    if len(args) < 1:
        raise PrologRuntimeError('sparql_query: at least 1 argument expected.')

    query = pe.prolog_get_string(args[0], g.env)

    # logging.debug("builtin_sparql_query called, query: '%s'" % query)

    # run query

    result = pe.kb.query(query)

    # logging.debug("builtin_sparql_query result: '%s'" % repr(result))

    if len(result) == 0:
        return False

    # turn result into lists of literals we can then bind to prolog variables

    res_map = {}
    res_vars = {}  # variable idx -> variable name

    for binding in result:

        for v in binding.labels:

            l = binding[v]

            value = rdf_to_pl(l)

            if not v in res_map:
                res_map[v] = []
                res_vars[binding.labels[v]] = v

            res_map[v].append(value)

    # logging.debug("builtin_sparql_query res_map : '%s'" % repr(res_map))
    # logging.debug("builtin_sparql_query res_vars: '%s'" % repr(res_vars))

    # apply bindings to environment vars

    v_idx = 0

    for arg in args[1:]:

        sparql_var = res_vars[v_idx]
        prolog_var = pe.prolog_get_variable(arg, g.env)
        value = res_map[sparql_var]

        # logging.debug("builtin_sparql_query mapping %s -> %s: '%s'" % (sparql_var, prolog_var, value))

        g.env[prolog_var] = ListLiteral(value)

        v_idx += 1

    return True
Пример #12
0
def _rdf_exec(g, pe, generate_lists=False):

    # rdflib.plugins.sparql.parserutils.CompValue
    #
    # class CompValue(OrderedDict):
    #     def __init__(self, name, **values):
    #
    # SelectQuery(
    #   p =
    #     Project(
    #       p =
    #         LeftJoin(
    #           p2 =
    #             BGP(
    #               triples = [(rdflib.term.Variable(u'leaderobj'), rdflib.term.URIRef(u'http://dbpedia.org/ontology/leader'), rdflib.term.Variable(u'leader'))]
    #               _vars = set([rdflib.term.Variable(u'leaderobj'), rdflib.term.Variable(u'leader')])
    #             )
    #           expr =
    #             TrueFilter(
    #               _vars = set([])
    #             )
    #           p1 =
    #             BGP(
    #               triples = [(rdflib.term.Variable(u'leader'), rdflib.term.URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef(u'http://schema.org/Person')), (rdflib.term.Variable(u'leader'), rdflib.term.URIRef(u'http://www.w3.org/2000/01/rdf-schema#label'), rdflib.term.Variable(u'label'))]
    #               _vars = set([rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leader')])
    #             )
    #           _vars = set([rdflib.term.Variable(u'leaderobj'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leader')])
    #         )
    #       PV = [rdflib.term.Variable(u'leader'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leaderobj')]
    #       _vars = set([rdflib.term.Variable(u'leaderobj'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leader')])
    #     )
    #   datasetClause = None
    #   PV = [rdflib.term.Variable(u'leader'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leaderobj')]
    #   _vars = set([rdflib.term.Variable(u'leaderobj'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leader')])
    # )

    pred = g.terms[g.inx]
    args = pred.args
    # if len(args) == 0 or len(args) % 3 != 0:
    #     raise PrologRuntimeError('rdf: one or more argument triple(s) expected, got %d args' % len(args))

    distinct = False
    triples = []
    optional_triples = []
    filters = []
    limit = 0
    offset = 0

    arg_idx = 0
    var_map = {}  # string -> rdflib.term.Variable

    while arg_idx < len(args):

        arg_s = args[arg_idx]

        # check for optional structure
        if isinstance(arg_s, Predicate) and arg_s.name == 'optional':

            s_args = arg_s.args

            if len(s_args) != 3:
                raise PrologRuntimeError('rdf: optional: triple arg expected')

            arg_s = s_args[0]
            arg_p = s_args[1]
            arg_o = s_args[2]

            logging.debug('rdf: optional arg triple: %s' % repr(
                (arg_s, arg_p, arg_o)))

            optional_triples.append(
                (pl_to_rdf(arg_s, g.env, pe, var_map,
                           pe.kb), pl_to_rdf(arg_p, g.env, pe, var_map, pe.kb),
                 pl_to_rdf(arg_o, g.env, pe, var_map, pe.kb)))

            arg_idx += 1

        # check for filter structure
        elif isinstance(arg_s, Predicate) and arg_s.name == 'filter':

            logging.debug('rdf: filter structure detected: %s' %
                          repr(arg_s.args))

            s_args = arg_s.args

            # transform multiple arguments into explicit and-tree

            pl_expr = s_args[0]
            for a in s_args[1:]:
                pl_expr = Predicate('and', [pl_expr, a])

            filters.append(
                prolog_to_filter_expression(pl_expr, g.env, pe, var_map,
                                            pe.kb))

            arg_idx += 1

        # check for distinct
        elif isinstance(arg_s, Predicate) and arg_s.name == 'distinct':

            s_args = arg_s.args
            if len(s_args) != 0:
                raise PrologRuntimeError(
                    'rdf: distinct: unexpected arguments.')

            distinct = True
            arg_idx += 1

        # check for limit/offset
        elif isinstance(arg_s, Predicate) and arg_s.name == 'limit':

            s_args = arg_s.args
            if len(s_args) != 1:
                raise PrologRuntimeError('rdf: limit: one argument expected.')

            limit = pe.prolog_get_int(s_args[0], g.env)
            arg_idx += 1

        elif isinstance(arg_s, Predicate) and arg_s.name == 'offset':

            s_args = arg_s.args
            if len(s_args) != 1:
                raise PrologRuntimeError('rdf: offset: one argument expected.')

            offset = pe.prolog_get_int(s_args[0], g.env)
            arg_idx += 1

        else:

            if arg_idx > len(args) - 3:
                raise PrologRuntimeError(
                    'rdf: not enough arguments for triple')

            arg_p = args[arg_idx + 1]
            arg_o = args[arg_idx + 2]

            logging.debug('rdf: arg triple: %s' % repr((arg_s, arg_p, arg_o)))

            triples.append(
                (pl_to_rdf(arg_s, g.env, pe, var_map,
                           pe.kb), pl_to_rdf(arg_p, g.env, pe, var_map, pe.kb),
                 pl_to_rdf(arg_o, g.env, pe, var_map, pe.kb)))

            arg_idx += 3

    logging.debug('rdf: triples: %s' % repr(triples))
    logging.debug('rdf: optional_triples: %s' % repr(optional_triples))
    logging.debug('rdf: filters: %s' % repr(filters))

    if len(triples) == 0:
        raise PrologRuntimeError(
            'rdf: at least one non-optional triple expected')

    var_list = var_map.values()
    var_set = set(var_list)

    p = CompValue('BGP', triples=triples, _vars=var_set)

    for t in optional_triples:
        p = CompValue('LeftJoin',
                      p1=p,
                      p2=CompValue('BGP', triples=[t], _vars=var_set),
                      expr=CompValue('TrueFilter', _vars=set([])))

    for f in filters:
        p = CompValue('Filter', p=p, expr=f, _vars=var_set)

    if limit > 0:
        p = CompValue('Slice', start=offset, length=limit, p=p, _vars=var_set)

    if distinct:
        p = CompValue('Distinct', p=p, _vars=var_set)

    algebra = CompValue('SelectQuery',
                        p=p,
                        datasetClause=None,
                        PV=var_list,
                        _vars=var_set)

    result = pe.kb.query_algebra(algebra)

    logging.debug('rdf: result (len: %d): %s' % (len(result), repr(result)))

    if len(result) == 0:
        return False

    if generate_lists:

        # bind each variable to list of values

        for binding in result:

            for v in binding.labels:

                l = binding[v]

                value = rdf_to_pl(l)

                if not v in g.env:
                    g.env[v] = ListLiteral([])

                g.env[v].l.append(value)

        return True

    else:

        # turn result into list of bindings

        res_bindings = []
        for binding in result:

            res_binding = {}

            for v in binding.labels:

                l = binding[v]

                value = rdf_to_pl(l)

                res_binding[v] = value

            res_bindings.append(res_binding)

        if len(res_bindings) == 0 and len(result) > 0:
            res_bindings.append({})  # signal success

        logging.debug('rdf: res_bindings: %s' % repr(res_bindings))

        return res_bindings
Пример #13
0
def builtin_context_score(g, pe):
    """ context_score(+Name, ?Value, +Points, ?Score [, +MinPoints]) """

    pe._trace('CALLED BUILTIN context_score', g)

    pred = g.terms[g.inx]
    args = pred.args
    if len(args) < 4:
        raise PrologRuntimeError('context_score: at least 4 args expected.')
    if len(args) > 5:
        raise PrologRuntimeError('context_score: max 5 args expected.')

    key = args[0].name
    value = pe.prolog_eval(args[1], g.env)
    points = pe.prolog_get_float(args[2], g.env)
    scorev = pe.prolog_get_variable(args[3], g.env)

    if len(args) == 5:
        min_score = pe.prolog_get_float(args[4], g.env)
    else:
        min_score = 0.0

    score = g.env[scorev].f if scorev in g.env else 0.0

    if value:

        stack = pe.read_context(key)

        if stack:
            i = 1
            for v in stack.l:
                if v == value:
                    score += points / float(i)
                    break
                i += 1

        if score < min_score:
            return False
        g.env[scorev] = NumberLiteral(score)
        return True

    if not isinstance(args[1], Variable):
        raise PrologRuntimeError(
            u'score_context: arg 2 literal or variable expected, %s found instead.'
            % unicode(args[1]))

    res = []

    stack = pe.read_context(key)
    if stack:
        i = 1
        for v in stack.l:
            s = score + points / float(i)
            if s >= min_score:
                res.append({
                    args[1].name: v,
                    scorev: NumberLiteral(score + points / float(i))
                })
            i += 1
    else:
        if score >= min_score:
            res.append({
                args[1].name: ListLiteral([]),
                scorev: NumberLiteral(score)
            })

    return res