def test_basic(instances, sgd): X, y = instances nr_class = 3 model = LinearModel(nr_class) yh, backprop = model.begin_update(X) loss1 = ((yh - y) ** 2).sum() backprop(yh - y, sgd) yh, backprop = model.begin_update(X) loss2 = ((yh - y) ** 2).sum() assert loss2 < loss1 print(loss2, loss1)
def test_init(): model = LinearModel(3) keys = numpy.ones((5, ), dtype="uint64") values = numpy.ones((5, ), dtype="f") lengths = numpy.zeros((2, ), dtype=numpy.int_) lengths[0] = 3 lengths[1] = 2 scores, backprop = model.begin_update((keys, values, lengths)) assert scores.shape == (2, 3) d_feats = backprop(scores) assert d_feats is None
def main(): train, dev = datasets.imdb() train_X, train_y = zip(*train) dev_X, dev_y = zip(*dev) model = LinearModel(2) train_y = to_categorical(train_y, nb_classes=2) dev_y = to_categorical(dev_y, nb_classes=2) nlp = spacy.load("en") train_X = [ model.ops.asarray([tok.orth for tok in doc], dtype="uint64") for doc in nlp.pipe(train_X) ] dev_X = [ model.ops.asarray([tok.orth for tok in doc], dtype="uint64") for doc in nlp.pipe(dev_X) ] dev_X = preprocess(model.ops, dev_X) with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer): trainer.dropout = 0.0 trainer.batch_size = 512 trainer.nb_epoch = 3 trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): keys_vals_lens = preprocess(model.ops, X) scores, backprop = model.begin_update(keys_vals_lens, drop=trainer.dropout) backprop(scores - y, optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def build_text_classifier(nr_class, width=64, **cfg): nr_vector = cfg.get('nr_vector', 5000) pretrained_dims = cfg.get('pretrained_dims', 0) with Model.define_operators({ '>>': chain, '+': add, '|': concatenate, '**': clone }): if cfg.get('low_data') and pretrained_dims: model = (SpacyVectors >> flatten_add_lengths >> with_getitem( 0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width))**2 >> zero_init( Affine(nr_class, width, drop_factor=0.0)) >> logistic) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width // 2, nr_vector, column=2) suffix = HashEmbed(width // 2, nr_vector, column=3) shape = HashEmbed(width // 2, nr_vector, column=4) trained_vectors = (FeatureExtracter( [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten( uniqued((lower | prefix | suffix | shape) >> LN( Maxout(width, width + (width // 2) * 3)), column=0))) if pretrained_dims: static_vectors = ( SpacyVectors >> with_flatten(Affine(width, pretrained_dims))) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width * 2 else: vectors = trained_vectors vectors_width = width static_vectors = None cnn_model = ( vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual( (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**2, pad=2) >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0))) linear_model = ( _preprocess_doc >> LinearModel(nr_class, drop_factor=0.)) model = ((linear_model | cnn_model) >> zero_init( Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic) model.nO = nr_class model.lsuv = False return model
def build_bow_text_classifier( nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg ): with Model.define_operators({">>": chain}): model = with_cpu( Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class) ) if not no_output_layer: model = model >> (cpu_softmax if exclusive_classes else logistic) model.nO = nr_class return model
def test_pickle_linear_model(): model = LinearModel(10) model2 = pickle.loads(pickle.dumps(model))
def build_text_classifier(nr_class, width=64, **cfg): depth = cfg.get("depth", 2) nr_vector = cfg.get("nr_vector", 5000) pretrained_dims = cfg.get("pretrained_dims", 0) with Model.define_operators({">>": chain, "+": add, "|": concatenate, "**": clone}): if cfg.get("low_data") and pretrained_dims: model = ( SpacyVectors >> flatten_add_lengths >> with_getitem(0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width)) ** 2 >> zero_init(Affine(nr_class, width, drop_factor=0.0)) >> logistic ) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width // 2, nr_vector, column=2) suffix = HashEmbed(width // 2, nr_vector, column=3) shape = HashEmbed(width // 2, nr_vector, column=4) trained_vectors = FeatureExtracter( [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID] ) >> with_flatten( uniqued( (lower | prefix | suffix | shape) >> LN(Maxout(width, width + (width // 2) * 3)), column=0, ) ) if pretrained_dims: static_vectors = SpacyVectors >> with_flatten( Affine(width, pretrained_dims) ) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width * 2 else: vectors = trained_vectors vectors_width = width static_vectors = None tok2vec = vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual((ExtractWindow(nW=1) >> LN(Maxout(width, width * 3)))) ** depth, pad=depth, ) cnn_model = ( tok2vec >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0)) ) linear_model = ( _preprocess_doc >> with_cpu(Model.ops, LinearModel(nr_class)) ) if cfg.get('exclusive_classes'): output_layer = Softmax(nr_class, nr_class * 2) else: output_layer = ( zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic ) model = ( (linear_model | cnn_model) >> output_layer ) model.tok2vec = chain(tok2vec, flatten) model.nO = nr_class model.lsuv = False return model