示例#1
0
def main(width=64,
         depth=2,
         vector_length=64,
         min_batch_size=1,
         max_batch_size=32,
         dropout=0.9,
         dropout_decay=1e-3,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    if cupy is not None:
        print("Using GPU")
        Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate
    }):
        lower_case = Embed(width, vector_length, 5000, column=0)
        prefix = Embed(width, vector_length, 5000, column=2)
        suffix = Embed(width, vector_length, 5000, column=3)

        model = (layerize(flatten_sequences) >> (lower_case + prefix + suffix)
                 >> Residual(ExtractWindow(nW=1) >> Maxout(width))**depth >>
                 Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2).sum() / y.shape[0]
            if loss > 0.:
                optimizer.set_loss(loss)

            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            if epoch_train_acc / n_train >= 0.999:
                break
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
示例#2
0
def create_attn_proxy(attn):
    """Return a proxy to the attention layer which will fetch the attention
    weights on each call, appending them to the list 'output'.
    """
    output = []

    def get_weights(Xs_lengths, drop=0.):
        Xs, lengths = Xs_lengths
        output.append(attn._get_attention(attn.Q, Xs, lengths)[0])
        return attn.begin_update(Xs_lengths, drop=drop)

    return output, layerize(get_weights)
示例#3
0
文件: _ml.py 项目: spacy-io/spaCy
def doc2feats(cols=None):
    if cols is None:
        cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]

    def forward(docs, drop=0.0):
        feats = []
        for doc in docs:
            feats.append(doc.to_array(cols))
        return feats, None

    model = layerize(forward)
    model.cols = cols
    return model
示例#4
0
def FeatureExtracter(lang, attrs=[LOWER, SHAPE, PREFIX, SUFFIX], tokenized=True):
    nlp = spacy.blank(lang)
    nlp.vocab.lex_attr_getters[PREFIX] = lambda string: string[:3]
    nlp.vocab.lex_attr_getters[SUFFIX] = lambda string: string[-3:]
    def forward(texts, drop=0.):
        if tokenized:
            docs = [Doc(nlp.vocab, words) for words in texts]
        else:
            docs = [nlp(text) for text in texts]
        features = [doc.to_array(attrs) for doc in docs]
        def backward(d_features, sgd=None):
            return d_features
        return features, backward
    return layerize(forward)
示例#5
0
文件: _ml.py 项目: spacy-io/spaCy
def get_col(idx):
    if idx < 0:
        raise IndexError(Errors.E066.format(value=idx))

    def forward(X, drop=0.0):
        if isinstance(X, numpy.ndarray):
            ops = NumpyOps()
        else:
            ops = CupyOps()
        output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)

        def backward(y, sgd=None):
            dX = ops.allocate(X.shape)
            dX[:, idx] += y
            return dX

        return output, backward

    return layerize(forward)
示例#6
0
def Siamese(layer, similarity):
    def begin_update(inputs, drop=0.):
        ops = layer.ops
        if drop != 0.:
            dropped = []
            for in1, in2 in inputs:
                if in1.size > in2.size:
                    mask = _get_mask(ops, in1.shape, drop)
                else:
                    mask = _get_mask(ops, in2.shape, drop)
                in1 = in1 * mask[:in1.shape[0]]
                in2 = in2 * mask[:in2.shape[0]]
                dropped.append((in1, in2))
            inputs = dropped

        input1, input2 = list(zip(*inputs))
        vec1, bp_vec1 = layer.begin_update(input1, drop=0.)
        vec2, bp_vec2 = layer.begin_update(input2, drop=0.)
        output, bp_output = similarity.begin_update((vec1, vec2), drop=0.)

        def finish_update(d_output, sgd=None):
            d_vec1, d_vec2 = bp_output(d_output, sgd)
            # Remember that this is the same layer --
            # Is this bad? Are we making bp_vec2 stale?
            d_input1 = bp_vec1(d_vec1, lambda *args, **kwargs: None)
            d_input2 = bp_vec2(d_vec2, sgd)
            return (d_input1, d_input2)

        return output, finish_update

    model = layerize(begin_update)

    model._layers.append(layer)
    model._layers.append(similarity)

    def on_data(self, X, y):
        input1, input2 = list(zip(*X))
        for hook in layer.on_data_hooks:
            hook(layer, input1, y)

    model.on_data_hooks.append(on_data)
    return model
示例#7
0
文件: _ml.py 项目: AvinashGupta/spaCy
def get_col(idx):
    assert idx >= 0, idx

    def forward(X, drop=0.):
        assert idx >= 0, idx
        if isinstance(X, numpy.ndarray):
            ops = NumpyOps()
        else:
            ops = CupyOps()
        output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)

        def backward(y, sgd=None):
            assert idx >= 0, idx
            dX = ops.allocate(X.shape)
            dX[:, idx] += y
            return dX

        return output, backward

    return layerize(forward)
示例#8
0
def Residual(layer):
    def forward(X, drop=0.0):
        y, bp_y = layer.begin_update(X, drop=drop)
        output = X + y

        def backward(d_output, sgd=None):
            return d_output + bp_y(d_output, sgd)

        return output, backward

    model = layerize(forward)
    model._layers.append(layer)

    def on_data(self, X, y=None):
        for layer in self._layers:
            for hook in layer.on_data_hooks:
                hook(layer, X, y)

    model.on_data_hooks.append(on_data)
    return model
示例#9
0
def Siamese(layer, similarity):
    def begin_update(inputs, drop=0.):
        ops = layer.ops
        if drop not in (None, 0.):
            dropped = []
            for in1, in2 in inputs:
                if in1.size > in2.size:
                    mask = _get_mask(ops, in1.shape, drop)
                else:
                    mask = _get_mask(ops, in2.shape, drop)
                in1 = in1 * mask[:in1.shape[0]]
                in2 = in2 * mask[:in2.shape[0]]
                dropped.append((in1, in2))
            inputs = dropped

        input1, input2 = zip(*inputs)
        vec1, bp_vec1 = layer.begin_update(input1, drop=0.)
        vec2, bp_vec2 = layer.begin_update(input2, drop=0.)
        output, bp_output = similarity.begin_update((vec1, vec2), drop=0.)
        def finish_update(d_output, sgd=None):
            d_vec1, d_vec2 = bp_output(d_output, sgd)
            # Remember that this is the same layer --
            # Is this bad? Are we making bp_vec2 stale?
            d_input1 = bp_vec1(d_vec1, lambda *args, **kwargs: None)
            d_input2 = bp_vec2(d_vec2, sgd)
            return (d_input1, d_input2)
        return output, finish_update
    model = layerize(begin_update)

    model._layers.append(layer)
    model._layers.append(similarity)
    def on_data(self, X, y):
        input1, input2 = zip(*X)
        for hook in layer.on_data_hooks:
            hook(layer, input1, y)
    model.on_data_hooks.append(on_data)
    return model
示例#10
0
文件: _ml.py 项目: spacy-io/spaCy
def getitem(i):
    def getitem_fwd(X, drop=0.0):
        return X[i], None

    return layerize(getitem_fwd)
示例#11
0
文件: _ml.py 项目: spacy-io/spaCy
def print_shape(prefix):
    def forward(X, drop=0.0):
        return X, lambda dX, **kwargs: dX

    return layerize(forward)
示例#12
0
文件: _ml.py 项目: spacy-io/spaCy
def asarray(ops, dtype):
    def forward(X, drop=0.0):
        return ops.asarray(X, dtype=dtype), None

    return layerize(forward)
示例#13
0
文件: _ml.py 项目: spacy-io/spaCy
def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
    if depth == 0:
        return layerize(noop())
    model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
    return with_square_sequences(PyTorchWrapperRNN(model))
示例#14
0
def main(
    width=300,
    depth=4,
    vector_length=64,
    min_batch_size=1,
    max_batch_size=32,
    dropout=0.9,
    dropout_decay=1e-3,
    nb_epoch=20,
    L2=1e-6,
    device="cpu",
):
    cfg = dict(locals())
    print(cfg, file=sys.stderr)
    if cupy is not None and device != "cpu":
        print("Using GPU", file=sys.stderr)
        Model.ops = CupyOps()
        Model.ops.device = device
    train_data, check_data, tag_map = twitter_ner()
    dev_words, dev_tags = zip(*check_data)
    nr_tag = len(tag_map)

    extracter = FeatureExtracter("en", attrs=[ORTH, LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
        glove = StaticVectors("en", width // 2, column=0)
        lower_case = HashEmbed(width, 500, column=1) + HashEmbed(width, 100, column=1)
        shape = HashEmbed(width // 2, 200, column=2)
        prefix = HashEmbed(width // 2, 100, column=3)
        suffix = HashEmbed(width // 2, 100, column=4)

        model = (
            layerize(flatten_sequences)
            >> (lower_case | shape | prefix | suffix)
            >> BN(Maxout(width, pieces=3), nO=width)
            >> Residual(ExtractWindow(nW=1) >> BN(Maxout(width, pieces=3), nO=width))
            ** depth
            >> Softmax(nr_tag)
        )

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            # if epoch_train_acc / n_train >= 0.999:
            #    break
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)), file=sys.stderr)
        print_dev_sentences(model, dev_words, dev_tags, dev_X, tag_map)