示例#1
0
def create_pretraining_model(nlp, tok2vec, objective="basic"):
    """Define a network for the pretraining."""
    output_size = nlp.vocab.vectors.data.shape[1]
    # This is annoying, but the parser etc have the flatten step after
    # the tok2vec. To load the weights in cleanly, we need to match
    # the shape of the models' components exactly. So what we cann
    # "tok2vec" has to be the same set of processes as what the components do.
    with Model.define_operators({">>": chain, "|": concatenate}):

        l2r_model = (
            tok2vec.l2r
            >> flatten
            >> LN(Maxout(output_size, tok2vec.l2r.nO, pieces=3))
            >> zero_init(Affine(output_size, drop_factor=0.0))
        )
        r2l_model = (
            tok2vec.r2l
            >> flatten
            >> LN(Maxout(output_size, tok2vec.r2l.nO, pieces=3))
            >> zero_init(Affine(output_size, drop_factor=0.0))
        )

        model = tok2vec.embed >> (l2r_model | r2l_model)

    model.tok2vec = tok2vec
    model.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
    tok2vec.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
    tokvecs = tok2vec([nlp.make_doc('hello there'), nlp.make_doc(u'and hello')])
    print(tokvecs.shape)
    return model
示例#2
0
def build_text_classifier(nr_class, width=64, **cfg):
    nr_vector = cfg.get('nr_vector', 5000)
    pretrained_dims = cfg.get('pretrained_dims', 0)
    with Model.define_operators({
            '>>': chain,
            '+': add,
            '|': concatenate,
            '**': clone
    }):
        if cfg.get('low_data') and pretrained_dims:
            model = (SpacyVectors >> flatten_add_lengths >> with_getitem(
                0, Affine(width, pretrained_dims)) >>
                     ParametricAttention(width) >> Pooling(sum_pool) >>
                     Residual(ReLu(width, width))**2 >> zero_init(
                         Affine(nr_class, width, drop_factor=0.0)) >> logistic)
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = (FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten(
                uniqued((lower | prefix | suffix | shape) >> LN(
                    Maxout(width, width + (width // 2) * 3)),
                        column=0)))

        if pretrained_dims:
            static_vectors = (
                SpacyVectors >> with_flatten(Affine(width, pretrained_dims)))
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        cnn_model = (
            vectors >> with_flatten(
                LN(Maxout(width, vectors_width)) >> Residual(
                    (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**2,
                pad=2) >> flatten_add_lengths >> ParametricAttention(width) >>
            Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >>
            zero_init(Affine(nr_class, width, drop_factor=0.0)))

        linear_model = (
            _preprocess_doc >> LinearModel(nr_class, drop_factor=0.))

        model = ((linear_model | cnn_model) >> zero_init(
            Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic)
    model.nO = nr_class
    model.lsuv = False
    return model
def fine_tune_class_vector(nr_class, *, exclusive_classes=True, **cfg):
    """Select features from the class-vectors from the last hidden state,
    softmax them, and then mean-pool them to produce one feature per vector.
    The gradients of the class vectors are incremented in the backward pass,
    to allow fine-tuning.
    """
    return chain(
        get_pytt_class_tokens, flatten_add_lengths,
        with_getitem(
            0,
            chain(Affine(cfg["token_vector_width"], cfg["token_vector_width"]),
                  tanh)), Pooling(mean_pool),
        Affine(2, cfg["token_vector_width"], drop_factor=0), softmax)
示例#4
0
def create_pretraining_model(nlp, tok2vec, objective="cosine", nr_char=10):
    """Define a network for the pretraining. We simply add an output layer onto
    the tok2vec input model. The tok2vec input model needs to be a model that
    takes a batch of Doc objects (as a list), and returns a list of arrays.
    Each array in the output needs to have one row per token in the doc.
    """
    if objective == "characters":
        out_sizes = [256] * nr_char
        output_layer = chain(LN(Maxout(300, pieces=3)),
                             MultiSoftmax(out_sizes, 300))
    else:
        output_size = nlp.vocab.vectors.data.shape[1]
        output_layer = chain(LN(Maxout(300, pieces=3)),
                             Affine(output_size, drop_factor=0.0))
    # This is annoying, but the parser etc have the flatten step after
    # the tok2vec. To load the weights in cleanly, we need to match
    # the shape of the models' components exactly. So what we cann
    # "tok2vec" has to be the same set of processes as what the components do.
    tok2vec = chain(tok2vec, flatten)
    model = chain(tok2vec, output_layer)
    model = masked_language_model(nlp.vocab, model)
    model.tok2vec = tok2vec
    model.output_layer = output_layer
    model.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
    return model
示例#5
0
文件: _ml.py 项目: cs394-s20/Aqua
def build_nel_encoder(embed_width, hidden_width, ner_types, **cfg):
    if "entity_width" not in cfg:
        raise ValueError(Errors.E144.format(param="entity_width"))

    conv_depth = cfg.get("conv_depth", 2)
    cnn_maxout_pieces = cfg.get("cnn_maxout_pieces", 3)
    pretrained_vectors = cfg.get("pretrained_vectors", None)
    context_width = cfg.get("entity_width")

    with Model.define_operators({">>": chain, "**": clone}):
        # context encoder
        tok2vec = Tok2Vec(
            width=hidden_width,
            embed_size=embed_width,
            pretrained_vectors=pretrained_vectors,
            cnn_maxout_pieces=cnn_maxout_pieces,
            subword_features=True,
            conv_depth=conv_depth,
            bilstm_depth=0,
        )

        model = (
            tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> Residual(
                zero_init(Maxout(hidden_width, hidden_width))) >> zero_init(
                    Affine(context_width, hidden_width, drop_factor=0.0)))

        model.tok2vec = tok2vec
        model.nO = context_width
    return model
示例#6
0
def test_unwrapped(nN=2, nI=3, nO=4):
    if PyTorchWrapper is None:
        return
    model = Affine(nO, nI)
    X = numpy.zeros((nN, nI), dtype="f")
    X += numpy.random.uniform(size=X.size).reshape(X.shape)
    sgd = SGD(model.ops, 0.001)
    Y = numpy.zeros((nN, nO), dtype="f")
    check_learns_zero_output(model, sgd, X, Y)
示例#7
0
def sigmoid_last_hidden(nr_class, *, exclusive_classes=False, **cfg):
    width = cfg["token_vector_width"]
    return chain(
        get_last_hidden,
        flatten_add_lengths,
        Pooling(mean_pool),
        zero_init(Affine(nr_class, width, drop_factor=0.0)),
        logistic,
    )
示例#8
0
def softmax_tanh_class_vector(nr_class, *, exclusive_classes=True, **cfg):
    """Select features from the class-vectors from the last hidden state,
    mean-pool them, and softmax to produce one vector per document.
    The gradients of the class vectors are incremented in the backward pass,
    to allow fine-tuning.
    """
    width = cfg["token_vector_width"]
    return chain(get_pytt_class_tokens, flatten_add_lengths,
                 with_getitem(0, chain(Affine(width, width), tanh)),
                 Pooling(mean_pool), Softmax(2, width))
示例#9
0
文件: ner.py 项目: entn-at/active_ner
    def Model(cls, nr_class, **cfg):
        depth = util.env_opt('parser_hidden_depth', cfg.get('hidden_depth', 1))
        subword_features = util.env_opt('subword_features',
                                        cfg.get('subword_features', True))
        conv_depth = util.env_opt('conv_depth', cfg.get('conv_depth', 4))
        conv_window = util.env_opt('conv_window', cfg.get('conv_depth', 1))
        t2v_pieces = util.env_opt('cnn_maxout_pieces',
                                  cfg.get('cnn_maxout_pieces', 3))
        bilstm_depth = util.env_opt('bilstm_depth', cfg.get('bilstm_depth', 0))
        self_attn_depth = util.env_opt('self_attn_depth',
                                       cfg.get('self_attn_depth', 0))
        assert depth == 1
        parser_maxout_pieces = util.env_opt('parser_maxout_pieces',
                                            cfg.get('maxout_pieces', 2))
        token_vector_width = util.env_opt('token_vector_width',
                                          cfg.get('token_vector_width', 96))
        hidden_width = util.env_opt('hidden_width',
                                    cfg.get('hidden_width', 64))
        embed_size = util.env_opt('embed_size', cfg.get('embed_size', 2000))
        tok2vec = get_t2v(token_vector_width,
                          embed_size,
                          conv_depth=conv_depth,
                          conv_window=conv_window,
                          cnn_maxout_pieces=t2v_pieces,
                          subword_features=subword_features,
                          bilstm_depth=bilstm_depth)
        tok2vec = chain(tok2vec, flatten)
        tok2vec.nO = token_vector_width
        lower = PrecomputableAffine(hidden_width,
                                    nF=cls.nr_feature,
                                    nI=token_vector_width,
                                    nP=parser_maxout_pieces)
        lower.nP = parser_maxout_pieces

        with Model.use_device('cpu'):
            upper = Affine(nr_class, hidden_width, drop_factor=0.0)
        upper.W *= 0

        cfg = {
            'nr_class': nr_class,
            'hidden_depth': depth,
            'token_vector_width': token_vector_width,
            'hidden_width': hidden_width,
            'maxout_pieces': parser_maxout_pieces,
            'pretrained_vectors': None,
            'bilstm_depth': bilstm_depth,
            'self_attn_depth': self_attn_depth,
            'conv_depth': conv_depth,
            'conv_window': conv_window,
            'embed_size': embed_size,
            'cnn_maxout_pieces': t2v_pieces
        }
        return ParserModel(tok2vec, lower, upper), cfg
示例#10
0
文件: _ml.py 项目: dev-seasia/dev
def build_nel_encoder(embed_width, hidden_width, ner_types, **cfg):
    # TODO proper error
    if "entity_width" not in cfg:
        raise ValueError("entity_width not found")
    if "context_width" not in cfg:
        raise ValueError("context_width not found")

    conv_depth = cfg.get("conv_depth", 2)
    cnn_maxout_pieces = cfg.get("cnn_maxout_pieces", 3)
    pretrained_vectors = cfg.get(
        "pretrained_vectors")  # self.nlp.vocab.vectors.name
    context_width = cfg.get("context_width")
    entity_width = cfg.get("entity_width")

    with Model.define_operators({">>": chain, "**": clone}):
        model = (
            Affine(entity_width, entity_width + context_width + 1 + ner_types)
            >> Affine(1, entity_width, drop_factor=0.0) >> logistic)

        # context encoder
        tok2vec = (Tok2Vec(
            width=hidden_width,
            embed_size=embed_width,
            pretrained_vectors=pretrained_vectors,
            cnn_maxout_pieces=cnn_maxout_pieces,
            subword_features=True,
            conv_depth=conv_depth,
            bilstm_depth=0,
        ) >> flatten_add_lengths >> Pooling(mean_pool) >> Residual(
            zero_init(Maxout(hidden_width, hidden_width))) >> zero_init(
                Affine(context_width, hidden_width)))

        model.tok2vec = tok2vec

    model.tok2vec = tok2vec
    model.tok2vec.nO = context_width
    model.nO = 1
    return model
示例#11
0
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (HashEmbed(width, 5000, column=1)
                 | StaticVectors(vectors_name, width, column=5)
                 | HashEmbed(width // 2, 750, column=2)
                 | HashEmbed(width // 2, 750, column=3)
                 | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width))

        sent2vec = (with_flatten(embed) >> Residual(
            prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >>
            MultiHeadedAttention() >> with_flatten(
                Maxout(width, width, pieces=3))) >> flatten_add_lengths >>
                    ParametricAttention(width, hard=False) >>
                    Pooling(mean_pool) >> Residual(LN(Maxout(width))))

        model = (foreach(sent2vec, drop_factor=2.0) >> Residual(
            prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >>
            MultiHeadedAttention() >> with_flatten(LN(Affine(width, width))))
                 >> flatten_add_lengths >> ParametricAttention(
                     width, hard=False) >> Pooling(mean_pool) >> Residual(
                         LN(Maxout(width)))**2 >> Softmax(nr_class))
    model.lsuv = False
    return model
示例#12
0
def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False, **cfg):
    """
    Build a simple CNN text classifier, given a token-to-vector model as inputs.
    If exclusive_classes=True, a softmax non-linearity is applied, so that the
    outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
    is applied instead, so that outputs are in the range [0, 1].
    """
    with Model.define_operators({">>": chain}):
        if exclusive_classes:
            output_layer = Softmax(nr_class, tok2vec.nO)
        else:
            output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
        model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
    model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    return model
示例#13
0
文件: _ml.py 项目: cs394-s20/Aqua
def build_text_classifier(nr_class, width=64, **cfg):
    depth = cfg.get("depth", 2)
    nr_vector = cfg.get("nr_vector", 5000)
    pretrained_dims = cfg.get("pretrained_dims", 0)
    with Model.define_operators({
            ">>": chain,
            "+": add,
            "|": concatenate,
            "**": clone
    }):
        if cfg.get("low_data") and pretrained_dims:
            model = (SpacyVectors >> flatten_add_lengths >> with_getitem(
                0, Affine(width, pretrained_dims)) >>
                     ParametricAttention(width) >> Pooling(sum_pool) >>
                     Residual(ReLu(width, width))**2 >> zero_init(
                         Affine(nr_class, width, drop_factor=0.0)) >> logistic)
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten(
                uniqued(
                    (lower | prefix | suffix | shape) >> LN(
                        Maxout(width, width + (width // 2) * 3)),
                    column=0,
                ))

        if pretrained_dims:
            static_vectors = SpacyVectors >> with_flatten(
                Affine(width, pretrained_dims))
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        tok2vec = vectors >> with_flatten(
            LN(Maxout(width, vectors_width)) >> Residual(
                (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**depth,
            pad=depth,
        )
        cnn_model = (
            tok2vec >> flatten_add_lengths >> ParametricAttention(width) >>
            Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >>
            zero_init(Affine(nr_class, width, drop_factor=0.0)))

        linear_model = build_bow_text_classifier(nr_class,
                                                 ngram_size=cfg.get(
                                                     "ngram_size", 1),
                                                 exclusive_classes=False)
        if cfg.get("exclusive_classes"):
            output_layer = Softmax(nr_class, nr_class * 2)
        else:
            output_layer = (zero_init(
                Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic)
        model = (linear_model | cnn_model) >> output_layer
        model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    model.lsuv = False
    return model
示例#14
0
def create_model(nr_in, nr_out):
    return Affine(nr_in, nr_out)
示例#15
0
def affine():
    return Affine(5, 3)
示例#16
0
def get_model(W_values, b_values):
    model = Affine(W_values.shape[0], W_values.shape[1], ops=NumpyOps())
    model.initialize_params()
    model.W[:] = W_values
    model.b[:] = b_values
    return model
def affine_output(nO, nI, drop_factor, **cfg):
    return Affine(nO, nI, drop_factor=drop_factor)
def tensor_affine_tok2vec(output_size, tensor_size, **cfg):
    return chain(get_tensors, flatten, Affine(output_size, tensor_size))