Пример #1
0
def test_extract_spans_forward_backward():
    model = extract_spans().initialize()
    X = Ragged(model.ops.alloc2f(15, 4), model.ops.asarray([5, 10], dtype="i"))
    spans = Ragged(
        model.ops.asarray([[0, 3], [2, 3], [5, 7]], dtype="i"),
        model.ops.asarray([2, 1], dtype="i"),
    )
    Y, backprop = model.begin_update((X, spans))
    assert list(Y.lengths) == [3, 1, 2]
    assert Y.dataXd.shape == (6, 4)
    dX, spans2 = backprop(Y)
    assert spans2 is spans
    assert dX.dataXd.shape == X.dataXd.shape
    assert list(dX.lengths) == list(X.lengths)
Пример #2
0
def test_extract_spans_span_indices():
    model = extract_spans().initialize()
    spans = Ragged(
        model.ops.asarray([[0, 3], [2, 3], [5, 7]], dtype="i"),
        model.ops.asarray([2, 1], dtype="i"),
    )
    x_lengths = model.ops.asarray([5, 10], dtype="i")
    indices = _get_span_indices(model.ops, spans, x_lengths)
    assert list(indices) == [0, 1, 2, 2, 10, 11]
Пример #3
0
def test_spancat_model_forward_backward(nO=5):
    tok2vec = build_Tok2Vec_model(**get_tok2vec_kwargs())
    docs = get_docs()
    spans_list = []
    lengths = []
    for doc in docs:
        spans_list.append(doc[:2])
        spans_list.append(doc[1:4])
        lengths.append(2)
    spans = Ragged(
        tok2vec.ops.asarray([[s.start, s.end] for s in spans_list], dtype="i"),
        tok2vec.ops.asarray(lengths, dtype="i"),
    )
    model = build_spancat_model(tok2vec, reduce_mean(),
                                chain(Relu(nO=nO),
                                      Logistic())).initialize(X=(docs, spans))

    Y, backprop = model((docs, spans), is_train=True)
    assert Y.shape == (spans.dataXd.shape[0], nO)
    backprop(Y)
Пример #4
0
def span_maker_forward(model, docs: List[Doc],
                       is_train) -> Tuple[Ragged, Callable]:
    ops = model.ops
    n_sents = model.attrs["n_sents"]
    candidates = []
    for doc in docs:
        cands = []
        try:
            sentences = [s for s in doc.sents]
        except ValueError:
            # no sentence info, normal in initialization
            for tok in doc:
                tok.is_sent_start = tok.i == 0
            sentences = [doc[:]]
        for ent in doc.ents:
            try:
                # find the sentence in the list of sentences.
                sent_index = sentences.index(ent.sent)
            except AttributeError:
                # Catch the exception when ent.sent is None and provide a user-friendly warning
                raise RuntimeError(Errors.E030) from None
            # get n previous sentences, if there are any
            start_sentence = max(0, sent_index - n_sents)
            # get n posterior sentences, or as many < n as there are
            end_sentence = min(len(sentences) - 1, sent_index + n_sents)
            # get token positions
            start_token = sentences[start_sentence].start
            end_token = sentences[end_sentence].end
            # save positions for extraction
            cands.append((start_token, end_token))

        candidates.append(ops.asarray2i(cands))
    candlens = ops.asarray1i([len(cands) for cands in candidates])
    candidates = ops.xp.concatenate(candidates)
    outputs = Ragged(candidates, candlens)
    # because this is just rearranging docs, the backprop does nothing
    return outputs, lambda x: []
Пример #5
0
def ragged_data(ops, list_data):
    lengths = numpy.array([len(x) for x in list_data], dtype="i")
    if not list_data:
        return Ragged(ops.alloc2f(0, 0), lengths)
    else:
        return Ragged(ops.flatten(list_data), lengths)
Пример #6
0
import pytest
import numpy
from thinc.api import get_width, Ragged, Padded
from thinc.util import get_array_module, is_numpy_array, to_categorical
from thinc.util import convert_recursive
from thinc.types import ArgsKwargs


@pytest.mark.parametrize(
    "obj,width",
    [
        (numpy.zeros((1, 2, 3, 4)), 4),
        (numpy.array(1), 0),
        (numpy.array([1, 2]), 3),
        ([numpy.zeros((1, 2)), numpy.zeros((1))], 2),
        (Ragged(numpy.zeros((1, 2)), numpy.zeros(1)), 2),
        (
            Padded(
                numpy.zeros((2, 1, 2)),
                numpy.zeros(2),
                numpy.array([1, 0]),
                numpy.array([0, 1]),
            ),
            2,
        ),
        ([], 0),
    ],
)
def test_get_width(obj, width):
    assert get_width(obj) == width
Пример #7
0
import pytest
import numpy
from thinc.api import get_width, Ragged, Padded
from thinc.util import get_array_module, is_numpy_array, to_categorical
from thinc.util import convert_recursive
from thinc.types import ArgsKwargs


@pytest.mark.parametrize(
    "obj,width",
    [
        (numpy.zeros((1, 2, 3, 4)), 4),
        (numpy.array(1), 0),
        (numpy.array([1, 2]), 3),
        ([numpy.zeros((1, 2)), numpy.zeros((1))], 2),
        (Ragged(numpy.zeros((1, 2)), numpy.zeros(1)), 2),  # type:ignore
        (
            Padded(
                numpy.zeros((2, 1, 2)),  # type:ignore
                numpy.zeros(2),  # type:ignore
                numpy.array([1, 0]),  # type:ignore
                numpy.array([0, 1]),  # type:ignore
            ),
            2,
        ),
        ([], 0),
    ],
)
def test_get_width(obj, width):
    assert get_width(obj) == width
Пример #8
0
def assert_ragged_data_match(X, Y):
    return assert_raggeds_match(Ragged(*X), Ragged(*Y))
Пример #9
0
 def zero_suggester(docs, *, ops=None):
     if ops is None:
         ops = get_current_ops()
     return Ragged(ops.xp.zeros((0, 0), dtype="i"),
                   ops.xp.zeros((len(docs), ), dtype="i"))