示例#1
0
def main():
    builder = OntoBuilder()
    builder.build_knowledge_base('data/knowledge_base.txt')
    builder.build_facts('data/fact_base.txt')
    # builder.store('data/knowledge_base.json')

    onto_container = OntoContainer()
    onto_container.load("data/knowledge_base.json")
    onto_container.build_secondary_connections()

    algo1 = Algorithm(onto_container=onto_container,
                      filename='algo/patterns/simple_connection.json')

    algo_container = AlgoContainer()
    algo_container.add_algorithm(algo1)

    brain = Brain(onto_container=onto_container, algo_container=algo_container)
    estimator = Estimator(brain)
    algo_composer = AlgoComposer(brain=brain, estimator=estimator)

    input = 'do people in Russia speak english?'
    # input = 'does USA have people?'

    graph_walker = GraphWalker(brain=brain)
    graph_walker.train_mode = True
    result = graph_walker.resolve(input)
    print(result)
    exit()

    algorithm = algo_composer.compose(input, 'right')
    if algorithm:
        algorithm.save('algo/patterns/composed.json')
def main():
    builder = OntoBuilder2()
    # builder.build_knowledge_base('data/knowledge_base.txt')
    builder.build_facts('data/fact_base.txt')
    builder.store('data/knowledge_base.json')

    onto_container = OntoContainer()
    onto_container.load("data/knowledge_base.json")
    onto_container.build_secondary_connections()

    algo1 = Algorithm(onto_container=onto_container, filename='algo/patterns/simple_connection.json')

    algo_container = AlgoContainer()
    algo_container.add_algorithm(algo1)

    brain = Brain(onto_container=onto_container, algo_container=algo_container)
    estimator = Estimator(brain)

    algo_builder = AlgoBuilder(brain)
    algo_builder.build_from('data/algo_base.txt', './algo/patterns')

    # input = 'do people in a slavic speaking country speak english?'
    input = 'do people in a USA speak english?'
    # input = 'does USA have people?'

    graph_walker = GraphWalker(brain=brain)
    graph_walker.train_mode = True
    result = graph_walker.resolve(input)
    print(result)
    exit()
def main():
    builder = OntoBuilder2()
    builder.build_facts('data/fact_base.txt')
    builder.store('data/knowledge_base.json')

    onto_container = OntoContainer()
    onto_container.load("data/knowledge_base.json")
    onto_container.build_secondary_connections()

    algo_container = AlgoContainer()

    brain = Brain(onto_container=onto_container, algo_container=algo_container)
    estimator = Estimator(brain)

    algo_builder = AlgoBuilder(brain)
    algo_builder.build_from('data/algo_base.txt', './algo/patterns')

    algo_container.add_algorithm(
        Algorithm(onto_container=onto_container,
                  filename='algo/patterns/closed_q_reply.json'))
    algo_container.add_algorithm(
        Algorithm(onto_container=onto_container,
                  filename='algo/patterns/what_question_reply.json'))
    algo_container.add_algorithm(
        Algorithm(onto_container=onto_container,
                  filename='algo/patterns/switch_context.json'))
    algo_container.add_algorithm(
        Algorithm(onto_container=onto_container,
                  filename='algo/patterns/get_closest.json'))
    algo_container.attach_to_brain(brain)

    # input = 'do people in a slavic speaking country speak english?'
    input = 'do people in a USA speak english?'
    # input = 'does USA have people?'

    algo_runner = AlgoRunner(brain=brain)
    result = algo_runner.run(input)
    print(result)
示例#4
0
from nlp import Clause
from onto.onto_container import OntoContainer
from onto_resolver import OntoResolver

container = OntoContainer()
container.load("onto/moneycare.json")

clause = Clause()
clause.load("test/sample_query3.json")

resolver = OntoResolver(container)
reply = resolver.get_reply(clause)

print(reply)
示例#5
0
 def __init__(self):
     self.nodes = set()
     self.id_counter = 0
     self.fact_counter = 0
     self.direction_counter = 0
     self.container = OntoContainer()
示例#6
0
class OntoBuilder:

    def __init__(self):
        self.nodes = set()
        self.id_counter = 0
        self.fact_counter = 0
        self.direction_counter = 0
        self.container = OntoContainer()


    def load_list_from_file(filename):
        lines = []
        with open(filename, 'r', encoding='utf-8') as file:
            for line in file:
                lines.append(line.strip())
        return lines


    def build_knowledge_base(self, filename):
        lines = OntoBuilder.load_list_from_file(filename)
        for line in lines:
            self._build_kb_item(line)

        self.container.nodes = list(self.nodes)
        self.container.sort_nodes_by_id()


    def build_facts(self, filename):
        lines = OntoBuilder.load_list_from_file(filename)
        direction_nodes = []
        fact_nodes = []
        for line in lines:
            if line.startswith('#'):
                continue
            node = self._build_fact(line)
            if node:
                if line[:2].lower() == 'to':
                    self.direction_counter += 1
                    node.pattern = 'direction {}'.format(self.direction_counter)
                    direction_nodes.append(node)
                else:
                    self.fact_counter += 1
                    node.pattern = 'fact {}'.format(self.fact_counter)
                    fact_nodes.append(node)

        if len(direction_nodes) > 1:
            self.id_counter += 1
            pattern = 'direction'
            abstract_direction_node = Node(id=str(self.id_counter), pattern=pattern, container=self.container, abstract=True)
            self.container.nodes.append(abstract_direction_node)
            for node in direction_nodes:
                self._add_bidirect_connections(abstract_direction_node, node)


    def store(self, filename):
        out_val = {'nodes': self.container.nodes, 'connections': self.container.connections}
        with open(filename, mode='wt', encoding='utf-8') as output_file:
            print(self._serialize(out_val), file=output_file)


    @staticmethod
    def _serialize(value):
        return json.dumps(value, cls=OntoEncoder)


    def _find_node_by_pattern(self, pattern):
        nodes = [node for node in self.nodes if node.pattern == pattern]
        if nodes:
            return nodes[0]
        return None


    def _build_kb_item(self, line):
        matches = re.findall("(\[[\w\d\s-]+\])", line)
        make_abstract_node = '+' in line
        make_connection = '*' in line
        nodes = []
        for m in matches:
            pattern = m.strip('[').strip(']')
            node = self._find_node_by_pattern(pattern)
            if not node:
                self.id_counter += 1
                node = Node(id=str(self.id_counter), pattern=pattern, container=self.container, abstract=False)
            nodes.append(node)

        if make_connection:
            self._add_bidirect_connections(nodes[0], nodes[1])

        if make_abstract_node:
            self.id_counter += 1
            pattern = ' '.join([node.pattern for node in nodes])
            abstract_node = Node(id=str(self.id_counter), pattern=pattern, container=self.container, abstract=True)
            for node in nodes:
                self._add_bidirect_connections(node, abstract_node)

            nodes.append(abstract_node)

        self.nodes.update(nodes)


    def _add_bidirect_connections(self, node1, node2):
        if node1 == node2:
            raise BaseException('cannot connect node to itself')
        connection = Connection(source=node1, target=node2, container=self.container)
        self.container.connections.append(connection)
        connection = Connection(source=node2, target=node1, container=self.container)
        self.container.connections.append(connection)


    def _build_fact(self, line):
        translator = str.maketrans('', '', string.punctuation.replace('-', '') + '«»')
        terms = line.translate(translator).split()
        nodes_to_connect = set()
        nodes = []
        # collect nodes for patterns
        for term in terms:
            node = self.container.get_node_by_pattern(term)
            if node:
                nodes.append(node)

        # find upmost abstract node for each
        eliminated = []
        for node in nodes:
            most_abstract_node = self._get_most_abstract_node(node, nodes, eliminated)
            if most_abstract_node:
                nodes_to_connect.add(most_abstract_node)
        nodes_to_connect = [node for node in nodes_to_connect if node not in eliminated]

        # connect em all somehow
        if len(nodes_to_connect) < 2:
            return
        if len(nodes_to_connect) == 2:
            self._add_bidirect_connections(nodes_to_connect[0], nodes_to_connect[1])
        else:
            self.id_counter += 1
            # self.fact_counter += 1
            # pattern = 'direction {}'.format(self.fact_counter)
            fact_node = Node(id=str(self.id_counter), pattern='', container=self.container, abstract=True)
            fact_node.knowledge_center = True
            self.container.nodes.append(fact_node)
            for node in nodes_to_connect:
                self._add_bidirect_connections(fact_node, node)
            return fact_node


    def _get_upper_abstract_nodes(self, node):
        return [conn.target for conn in self.container.connections if conn.source == node and conn.target.abstract]


    def _get_most_abstract_node(self, src_node, nodes, eliminated):
        current_node = src_node
        while True:
            abstract_node = self._get_most_abstract_node_step(current_node, nodes, eliminated)
            if abstract_node == current_node or abstract_node in eliminated:
                return abstract_node
            eliminated.append(current_node)
            current_node = abstract_node


    def _get_most_abstract_node_step(self, src_node, nodes, eliminated):
        upper_abstract = self._get_upper_abstract_nodes(src_node)
        upper_abstract = [node for node in upper_abstract if node not in eliminated]
        if len(upper_abstract) == 0:
            return src_node

        candidate_abstracts = set()
        for abstract_node in upper_abstract:
            for node in nodes:
                if node == src_node:
                    continue
                if self.container.are_nodes_connected(node, abstract_node):
                    candidate_abstracts.add(abstract_node)
                    eliminated.append(node)
        if len(candidate_abstracts) > 1:
            raise BaseException('donna what to do with 2 parallel abstracts')
        if len(candidate_abstracts) == 1:
            return candidate_abstracts.pop()
        else:
            return src_node
示例#7
0
 def __init__(self):
     self.nodes = []
     self.stop_words = ['a', 'is', 'in']
     self.container = OntoContainer()
     self.line_nodes = {}
示例#8
0
class OntoBuilder2:
    def __init__(self):
        self.nodes = []
        self.stop_words = ['a', 'is', 'in']
        self.container = OntoContainer()
        self.line_nodes = {}

    def load_list_from_file(filename):
        lines = []
        with open(filename, 'r', encoding='utf-8') as file:
            for line in file:
                lines.append(line.strip())
        return lines

    def tokenize_line(self, line):
        translator = str.maketrans('', '',
                                   string.punctuation.replace('-', '') + '«»')
        terms = line.translate(translator).split()
        return terms

    def build_facts(self, filename):
        lines = OntoBuilder2.load_list_from_file(filename)

        self._make_connections(lines)

        direction_nodes = []
        fact_nodes = []
        fact_counter = 0
        direction_counter = 0
        for line in self.line_nodes:
            nodes = self.line_nodes[line]
            if not self._is_fact(nodes):
                continue

            fact_node = self._build_fact(nodes)
            if fact_node:
                if line[:2].lower() == 'to':
                    direction_counter += 1
                    fact_node.pattern = 'direction {}'.format(
                        direction_counter)
                    direction_nodes.append(fact_node)
                else:
                    fact_counter += 1
                    fact_node.pattern = 'fact {}'.format(fact_counter)
                    fact_nodes.append(fact_node)

    def store(self, filename):
        out_val = {
            'nodes': self.container.nodes,
            'connections': self.container.connections
        }
        with open(filename, mode='wt', encoding='utf-8') as output_file:
            print(self._serialize(out_val), file=output_file)

    @staticmethod
    def _serialize(value):
        return json.dumps(value, cls=OntoEncoder)

    def _make_connections(self, lines):
        for line in lines:
            if not line.startswith('#'):
                self.line_nodes[line] = self._build_nodes(line)

        self._make_simple_connections(lines)
        self._make_knowledge_connections(lines)

    def _make_knowledge_connections(self, lines):
        while True:
            if not self._make_knowledge_connections_1pass(lines):
                break

    def _make_knowledge_connections_1pass(self, lines):
        fact_nodes = []
        for line in lines:
            if line not in self.line_nodes:
                continue

            nodes = self.line_nodes[line]
            if len(nodes) > 2:
                fact_nodes.append(nodes)

        bigrams = []
        for f_nodes in fact_nodes:
            bigrams.extend(self._get_possible_bigrams(f_nodes))

        counter = Counter(bigrams)
        top_frequent = counter.most_common(1)[0]
        num_times = top_frequent[1]
        if num_times < 2:
            return False

        bigram_repr = top_frequent[0]
        nodes_repr = bigram_repr.split('-')
        node1 = self.container.get_node_by_id(nodes_repr[0])
        node2 = self.container.get_node_by_id(nodes_repr[1])

        # create combined node
        combined_node = Node(self.container.next_node_id(),
                             node1.pattern + ' ' + node2.pattern,
                             self.container)
        combined_node.abstract = True
        self.container.nodes.append(combined_node)

        # replace the two nodes in every fact with a newly created abstract node
        for line in self.line_nodes:
            nodes = self.line_nodes[line]
            if node1 in nodes and node2 in nodes:
                pos1 = nodes.index(node1)
                nodes[pos1] = combined_node
                nodes.remove(node2)

        # connect source nodes to a newly created abstract node
        weight = min(1.0, num_times * minimal_weight)

        self._add_bidirect_connections(node1, combined_node, weight)
        self._add_bidirect_connections(node2, combined_node, weight)
        return True

    def _make_simple_connections(self, lines):
        for line in lines:
            if line not in self.line_nodes:
                continue
            terms = self.tokenize_line(line)
            if len(terms) < 6 and 'is' in terms:
                self._build_simple_connection(line)

    def _get_possible_bigrams(self, nodes):
        bigrams = []
        for i in range(len(nodes) - 1):
            node1 = nodes[i]
            node2 = nodes[i + 1]
            bigram = self._get_bigram_repr(node1, node2)
            if bigram not in bigrams:
                bigrams.append(bigram)
            if i < len(nodes) - 2:
                node2 = nodes[i + 2]
                bigram = self._get_bigram_repr(node1, node2)
                if bigram not in bigrams:
                    bigrams.append(bigram)
        return bigrams

    def _get_bigram_repr(self, node1, node2):
        id1 = int(node1.node_id)
        id2 = int(node2.node_id)
        min_id = min(id1, id2)
        max_id = max(id1, id2)
        return '{}-{}'.format(min_id, max_id)

    def _build_nodes(self, line):
        terms = self.tokenize_line(line)
        nodes = []
        for term in terms:
            if term in self.stop_words:
                continue
            node = self.container.get_node_by_pattern(term)
            if node is None:
                node = Node(self.container.next_node_id(), term,
                            self.container)
                self.container.nodes.append(node)
            nodes.append(node)

        return nodes

    def _is_fact(self, nodes):
        there_are_abstract_nodes = sum([1 for node in nodes if node.abstract])
        return there_are_abstract_nodes or len(nodes) > 2

    def _build_simple_connection(self, line):
        nodes = self.line_nodes[line]
        if len(nodes) > 2:
            raise Exception(
                'cannot handle more than 2 nodes in _build_simple_connection()'
            )

        self._add_bidirect_connections(nodes[0], nodes[1], minimal_weight)

    def _build_fact(self, nodes):
        fact_node = Node(self.container.next_node_id(),
                         pattern='',
                         container=self.container,
                         abstract=True)
        fact_node.knowledge_center = True
        self.container.nodes.append(fact_node)
        for node in nodes:
            self._add_bidirect_connections(fact_node, node, minimal_weight)
        return fact_node

    def _add_bidirect_connections(self, node1, node2, weight):
        if node1 == node2:
            raise BaseException('cannot connect node to itself')
        connection = Connection(source=node1,
                                target=node2,
                                container=self.container)
        connection.weight = weight
        self.container.connections.append(connection)
        connection = Connection(source=node2,
                                target=node1,
                                container=self.container)
        connection.weight = weight
        self.container.connections.append(connection)
from onto.onto_container import OntoContainer

container = OntoContainer()
container.load("light_match.json")