Exemplo n.º 1
0
 def test_add(self):
     ids = IdManager()
     assert 'a' not in ids
     ids.add_token('a')
     assert 'a' in ids
     ids.add_token('a')
     assert len(ids) == 1
     ids.add_sent(['b', 'c'])
     assert len(ids) == 3
     for i in ['b', 'c']:
         assert i in ids
Exemplo n.º 2
0
class SimpleIntent(object):
    """General intent used to match sentences or phrases"""
    LENIENCE = 0.6

    def __init__(self, name=''):
        self.name = name
        self.ids = IdManager(Ids)
        self.net = None

    def match(self, sent):
        return max(0, self.net.run(self.vectorize(sent))[0])

    def vectorize(self, sent):
        vector = self.ids.vector()
        unknown = 0
        for token in sent:
            if token in self.ids:
                self.ids.assign(vector, token, 1.0)
            else:
                unknown += 1
        if len(sent) > 0:
            self.ids.assign(vector, Ids.unknown_tokens,
                            unknown / float(len(sent)))
            self.ids.assign(vector, Ids.w_1, len(sent) / 1)
            self.ids.assign(vector, Ids.w_2, len(sent) / 2.)
            self.ids.assign(vector, Ids.w_3, len(sent) / 3.)
            self.ids.assign(vector, Ids.w_4, len(sent) / 4.)
        return vector

    def configure_net(self):
        self.net = fann.neural_net()
        self.net.create_standard_array([len(self.ids), 10, 1])
        self.net.set_activation_function_hidden(
            fann.SIGMOID_SYMMETRIC_STEPWISE)
        self.net.set_activation_function_output(
            fann.SIGMOID_SYMMETRIC_STEPWISE)
        self.net.set_train_stop_function(fann.STOPFUNC_BIT)
        self.net.set_bit_fail_limit(0.1)

    def train(self, train_data):
        for sent in train_data.my_sents(self.name):
            self.ids.add_sent(sent)

        inputs = []
        outputs = []

        def add(vec, out):
            inputs.append(self.vectorize(vec))
            outputs.append([out])

        def pollute(sent, p):
            sent = sent[:]
            for _ in range(int((len(sent) + 2) / 3)):
                sent.insert(p, ':null:')
            add(sent, self.LENIENCE)

        def weight(sent):
            def calc_weight(w):
                return pow(len(w), 3.0)

            total_weight = 0.0
            for word in sent:
                total_weight += calc_weight(word)
            for word in sent:
                weight = 0 if word.startswith('{') else calc_weight(word)
                add([word], weight / total_weight)

        for sent in train_data.my_sents(self.name):
            add(sent, 1.0)
            weight(sent)
            if not any(word[0] == ':' for word in sent):
                pollute(sent, 0)
                pollute(sent, len(sent))

        for sent in train_data.other_sents(self.name):
            add(sent, 0.0)
        add([], 0.0)

        inputs, outputs = resolve_conflicts(inputs, outputs)

        train_data = fann.training_data()
        train_data.set_train_data(inputs, outputs)

        for _ in range(10):
            self.configure_net()
            self.net.train_on_data(train_data, 1000, 0, 0)
            self.net.test_data(train_data)
            if self.net.get_bit_fail() == 0:
                break

    def save(self, prefix):
        prefix += '.intent'
        self.net.save(str(prefix + '.net'))  # Must have str()
        self.ids.save(prefix)

    @classmethod
    def from_file(cls, name, prefix):
        prefix += '.intent'
        self = cls(name)
        self.net = fann.neural_net()
        self.net.create_from_file(str(prefix + '.net'))  # Must have str()
        self.ids.load(prefix)
        return self