def test_basic(): nr_class = 3 model = AveragedPerceptron(((1, ), (2, ), (3, ), (4, ), (5, ))) instances = [(1, {1: 1, 3: -5}), (2, {2: 4, 3: 5})] for clas, feats in instances: eg = Example(nr_class) eg.features = feats model(eg) eg.costs = [i != clas for i in range(nr_class)] model.update(eg) eg = Example(nr_class) eg.features = {1: 2, 2: 1} model(eg) assert eg.guess == 2 eg = Example(nr_class) eg.features = {0: 2, 2: 1} model(eg) assert eg.scores[1] == 0 eg = Example(nr_class) eg.features = {1: 2, 2: 1} model(eg) assert eg.scores[2] > 0 eg = Example(nr_class) eg.features = {1: 2, 1: 1} model(eg) assert eg.scores[1] > 0 eg = Example(nr_class) eg.features = {0: 3, 3: 1} model(eg) assert eg.scores[1] < 0 eg = Example(nr_class) eg.features = {0: 3, 3: 1} model(eg) assert eg.scores[2] > 0
def model(instances): templates = [] for batch in instances: for _, feats in batch: for key in feats: templates.append((key,)) templates = tuple(set(templates)) model = AveragedPerceptron(templates) for batch in instances: model.time += 1 for clas, feats in batch: for key, value in feats.items(): model.update_weight(key, clas, value) return model
def model(instances): templates = [] for batch in instances: for _, feats in batch: for key in feats: templates.append((key, )) templates = tuple(set(templates)) model = AveragedPerceptron(templates) for batch in instances: model.time += 1 for clas, feats in batch: for key, value in feats.items(): model.update_weight(key, clas, value) return model
def test_dump_load(model): loc = '/tmp/test_model' model.end_training() model.dump(loc) string = open(loc, 'rb').read() assert string new_model = AveragedPerceptron([(1,), (2,), (3,), (4,)]) nr_class = 5 assert get_scores(nr_class, model, {1: 1, 3: 1, 4: 1}) != \ get_scores(nr_class, new_model, {1:1, 3:1, 4:1}) assert get_scores(nr_class, model, {2:1, 5:1}) != \ get_scores(nr_class, new_model, {2:1, 5:1}) assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) != \ get_scores(nr_class, new_model, {2:1, 3:1, 4:1}) new_model.load(loc) assert get_scores(nr_class, model, {1:1, 3:1, 4:1}) == \ get_scores(nr_class, new_model, {1:1, 3:1, 4:1}) assert get_scores(nr_class, model, {2:1, 5:1}) == \ get_scores(nr_class, new_model, {2:1, 5:1}) assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) == \ get_scores(nr_class, new_model, {2:1, 3:1, 4:1})
def test_dump_load(model): loc = tempfile.mkstemp()[1] model.end_training() model.dump(loc) string = open(loc, 'rb').read() assert string new_model = AveragedPerceptron([(1, ), (2, ), (3, ), (4, )]) nr_class = 5 assert get_scores(nr_class, model, {1: 1, 3: 1, 4: 1}) != \ get_scores(nr_class, new_model, {1:1, 3:1, 4:1}) assert get_scores(nr_class, model, {2:1, 5:1}) != \ get_scores(nr_class, new_model, {2:1, 5:1}) assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) != \ get_scores(nr_class, new_model, {2:1, 3:1, 4:1}) new_model.load(loc) assert get_scores(nr_class, model, {1:1, 3:1, 4:1}) == \ get_scores(nr_class, new_model, {1:1, 3:1, 4:1}) assert get_scores(nr_class, model, {2:1, 5:1}) == \ get_scores(nr_class, new_model, {2:1, 5:1}) assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) == \ get_scores(nr_class, new_model, {2:1, 3:1, 4:1})
def test_basic(): nr_class = 3 model = AveragedPerceptron(((1,), (2,), (3,), (4,), (5,))) instances = [ (1, {1: 1, 3: -5}), (2, {2: 4, 3: 5}) ] for clas, feats in instances: eg = Example(nr_class) eg.set_features(feats) model(eg) eg.set_label(clas) model.update(eg) eg = Example(nr_class) eg.set_features({1: 2, 2: 1}) model(eg) assert eg.guess == 2 eg = Example(nr_class) eg.set_features({0: 2, 2: 1}) model(eg) assert eg.scores[1] == 0 eg = Example(nr_class) eg.set_features({1: 2, 2: 1}) model(eg) assert eg.scores[2] > 0 eg = Example(nr_class) eg.set_features({1: 2, 1: 1}) model(eg) assert eg.scores[1] > 0 eg = Example(nr_class) eg.set_features({0: 3, 3: 1}) model(eg) assert eg.scores[1] < 0 eg = Example(nr_class) eg.set_features({0: 3, 3: 1}) model(eg) assert eg.scores[2] > 0
def test_basic(): nr_class = 3 model = AveragedPerceptron(((1,), (2,), (3,), (4,), (5,))) instances = [ (1, {1: 1, 3: -5}), (2, {2: 4, 3: 5}) ] for clas, feats in instances: eg = Example(nr_class) eg.features = feats model(eg) eg.costs = [i != clas for i in range(nr_class)] model.update(eg) eg = Example(nr_class) eg.features = {1: 2, 2: 1} model(eg) assert eg.guess == 2 eg = Example(nr_class) eg.features = {0: 2, 2: 1} model(eg) assert eg.scores[1] == 0 eg = Example(nr_class) eg.features = {1: 2, 2: 1} model(eg) assert eg.scores[2] > 0 eg = Example(nr_class) eg.features = {1: 2, 1: 1} model(eg) assert eg.scores[1] > 0 eg = Example(nr_class) eg.features = {0: 3, 3: 1} model(eg) assert eg.scores[1] < 0 eg = Example(nr_class) eg.features = {0: 3, 3: 1} model(eg) assert eg.scores[2] > 0
def __init__(self, nlp, nr_class): self.nlp = nlp self.nr_class = nr_class self._eg = Example(nr_class=nr_class) self._model = AveragedPerceptron([])
class ThincModel(object): def __init__(self, nlp, nr_class): self.nlp = nlp self.nr_class = nr_class self._eg = Example(nr_class=nr_class) self._model = AveragedPerceptron([]) def Eg(self, text, opt=None, label=None): eg = self._eg eg.reset() doc = self.nlp(text) features = [] word_types = set() i = 0 for token in doc[:-1]: next_token = doc[i + 1] strings = (token.lower_, next_token.lower_) key = hash_string('%s_%s' % strings) feat_slot = 0 feat_value = 1 features.append((0, token.lower, 1)) features.append((feat_slot, key, feat_value)) i += 1 eg.features = features if opt is not None: eg.is_valid = [(clas in opt) for clas in range(self.nr_class)] if label is not None: eg.costs = [clas != label for clas in range(self.nr_class)] return eg def predict(self, text, opt): return self._model.predict_example(self.Eg(text, opt)) def train(self, examples, n_iter=5): for i in range(n_iter): loss = 0 random.shuffle(examples) negation_count = 0 for text, opt, label in examples: eg = self.Eg(text, opt, label) self._model.train_example(eg) loss += eg.guess != label print(loss) self._model.end_training() def evaluate(self, examples): total = 0 correct = 0 for i, (text, opt, label) in enumerate(examples): eg = self.predict(text, opt) correct += eg.guess == label total += 1 return correct / total def dump(self, loc): self._model.dump(loc) def load(self, loc): self._model.load(loc)
def __init__(self, n_classes, get_bow, *args, **kwargs): AveragedPerceptron.__init__(self, tuple()) self.nr_class = n_classes self.get_bow = get_bow