def interpret_file(path, encoding='utf-8', readers: dict = None):
    """Read a file's using the proper loader from the extension"""
    path = Path(path).expanduser().resolve()
    s = path.suffix.lower()
    if readers is None:
        readers = {}
    elif not isinstance(readers, dict):
        assert callable(readers)
        readers = {s: readers}
    if s in readers:
        func = readers[s]
        assert callable(func)
        return func(path)
    elif s == '.json':
        return srsly.read_json(path)
    elif s == '.jsonl':
        return srsly.read_jsonl(path)
    elif s in ('.yml', '.yaml'):
        # return yaml.load(path.read_bytes(), Loader=YamlLoader)
        return yaml.load(path.read_bytes())
    elif s in ('.pkl', '.bin', '.pickle'):
        return srsly.pickle_loads(path.read_text(encoding=encoding))
    elif s not in _TEXT_EXT:
        return path.read_bytes()
    else:
        return path.read_text(encoding=encoding)
Exemplo n.º 2
0
def test_pickle_with_flatten(linear):
    Xs = [linear.ops.alloc2f(2, 3), linear.ops.alloc2f(4, 3)]
    model = with_array(linear).initialize()
    pickled = srsly.pickle_dumps(model)
    loaded = srsly.pickle_loads(pickled)
    Ys = loaded.predict(Xs)
    assert len(Ys) == 2
    assert Ys[0].shape == (Xs[0].shape[0], linear.get_dim("nO"))
    assert Ys[1].shape == (Xs[1].shape[0], linear.get_dim("nO"))
Exemplo n.º 3
0
def test_pickle_string_store(text1, text2):
    stringstore = StringStore()
    store1 = stringstore[text1]
    store2 = stringstore[text2]
    data = srsly.pickle_dumps(stringstore, protocol=-1)
    unpickled = srsly.pickle_loads(data)
    assert unpickled[text1] == store1
    assert unpickled[text2] == store2
    assert len(stringstore) == len(unpickled)
Exemplo n.º 4
0
def test_pickle_string_store(text1, text2):
    stringstore = StringStore()
    store1 = stringstore[text1]
    store2 = stringstore[text2]
    data = srsly.pickle_dumps(stringstore, protocol=-1)
    unpickled = srsly.pickle_loads(data)
    assert unpickled[text1] == store1
    assert unpickled[text2] == store2
    assert len(stringstore) == len(unpickled)
Exemplo n.º 5
0
def test_pickle_vocab(text1, text2):
    vocab = Vocab(lex_attr_getters={int(NORM): lambda string: string[:-1]})
    vocab.set_vector("dog", numpy.ones((5, ), dtype="f"))
    lex1 = vocab[text1]
    lex2 = vocab[text2]
    assert lex1.norm_ == text1[:-1]
    assert lex2.norm_ == text2[:-1]
    data = srsly.pickle_dumps(vocab)
    unpickled = srsly.pickle_loads(data)
    assert unpickled[text1].orth == lex1.orth
    assert unpickled[text2].orth == lex2.orth
    assert unpickled[text1].norm == lex1.norm
    assert unpickled[text2].norm == lex2.norm
    assert unpickled[text1].norm != unpickled[text2].norm
    assert unpickled.vectors is not None
    assert list(vocab["dog"].vector) == [1.0, 1.0, 1.0, 1.0, 1.0]
Exemplo n.º 6
0
def test_pickle_vocab(text1, text2):
    vocab = Vocab(lex_attr_getters={int(NORM): lambda string: string[:-1]})
    vocab.set_vector("dog", numpy.ones((5,), dtype="f"))
    lex1 = vocab[text1]
    lex2 = vocab[text2]
    assert lex1.norm_ == text1[:-1]
    assert lex2.norm_ == text2[:-1]
    data = srsly.pickle_dumps(vocab)
    unpickled = srsly.pickle_loads(data)
    assert unpickled[text1].orth == lex1.orth
    assert unpickled[text2].orth == lex2.orth
    assert unpickled[text1].norm == lex1.norm
    assert unpickled[text2].norm == lex2.norm
    assert unpickled[text1].norm != unpickled[text2].norm
    assert unpickled.vectors is not None
    assert list(vocab["dog"].vector) == [1.0, 1.0, 1.0, 1.0, 1.0]
Exemplo n.º 7
0
def test_pickle_doc(en_vocab):
    words = ["a", "b", "c"]
    deps = ["dep"] * len(words)
    heads = [0] * len(words)
    doc = Doc(
        en_vocab,
        words=words,
        deps=deps,
        heads=heads,
    )
    data = srsly.pickle_dumps(doc)
    unpickled = srsly.pickle_loads(data)
    assert [t.text for t in unpickled] == words
    assert [t.dep_ for t in unpickled] == deps
    assert [t.head.i for t in unpickled] == heads
    assert list(doc.noun_chunks) == []
Exemplo n.º 8
0
def test_phrase_matcher_pickle(en_vocab):
    matcher = PhraseMatcher(en_vocab)
    mock = Mock()
    matcher.add("TEST", [Doc(en_vocab, words=["test"])])
    matcher.add("TEST2", [Doc(en_vocab, words=["test2"])], on_match=mock)
    doc = Doc(en_vocab, words=["these", "are", "tests", ":", "test", "test2"])
    assert len(matcher) == 2

    b = srsly.pickle_dumps(matcher)
    matcher_unpickled = srsly.pickle_loads(b)

    # call after pickling to avoid recursion error related to mock
    matches = matcher(doc)
    matches_unpickled = matcher_unpickled(doc)

    assert len(matcher) == len(matcher_unpickled)
    assert matches == matches_unpickled

    # clunky way to vaguely check that callback is unpickled
    (vocab, docs, callbacks, attr) = matcher_unpickled.__reduce__()[1]
    assert isinstance(callbacks.get("TEST2"), Mock)
Exemplo n.º 9
0
def train_epoch(
    model, sgd, hparams, train_X, train_y, dev_X, dev_y, device_id=-1, temperature=0.0
):
    model, sgd, hparams = srsly.pickle_loads(srsly.pickle_dumps((model, sgd, hparams)))
    if device_id >= 0:
        model.to_gpu(device_id)
        sgd.ops = model.ops
        sgd.to_gpu()
        if isinstance(train_y, numpy.ndarray):
            train_y = model.ops.asarray(train_y)
            dev_y = model.ops.asarray(dev_y)
    hparams = resample_hyper_params(hparams, temperature)
    sgd.learn_rate = hparams["learn_rate"]
    sgd.beta1 = hparams["beta1"]
    sgd.beta2 = hparams["beta2"]
    sgd.L2 = hparams["L2"]
    train_acc = 0.0
    train_n = 0
    for X, y in minibatch(
        train_X, train_y, size=hparams["batch_size"], nr_update=hparams["nr_update"]
    ):
        yh, finish_update = model.begin_update(X, drop=hparams["dropout"])
        if hasattr(y, "shape"):
            dy = (yh - y) / y.shape[0]
            train_acc += (y.argmax(axis=1) == yh.argmax(axis=1)).sum()
            train_n += y.shape[0]
        else:
            n_y = sum(len(y_i) for y_i in y)
            dy = [(yh[i] - y[i]) / n_y for i in range(len(yh))]
            for i in range(len(y)):
                train_acc += (y[i].argmax(axis=1) == yh[i].argmax(axis=1)).sum()
            train_n += n_y
        finish_update(dy, sgd=sgd)
    train_acc /= train_n
    with model.use_params(sgd.averages):
        dev_acc = model.evaluate(dev_X, dev_y)
    model.to_cpu()
    sgd.to_cpu()
    return device_id, ((model, sgd, hparams), float(train_acc), float(dev_acc))
Exemplo n.º 10
0
def train_epoch(
    model, sgd, hparams, train_X, train_y, dev_X, dev_y, device_id=-1, temperature=0.0
):
    model, sgd, hparams = srsly.pickle_loads(srsly.pickle_dumps((model, sgd, hparams)))
    if device_id >= 0:
        model.to_gpu(device_id)
        sgd.ops = model.ops
        sgd.to_gpu()
        if isinstance(train_y, numpy.ndarray):
            train_y = model.ops.asarray(train_y)
            dev_y = model.ops.asarray(dev_y)
    hparams = resample_hyper_params(hparams, temperature)
    sgd.learn_rate = hparams["learn_rate"]
    sgd.beta1 = hparams["beta1"]
    sgd.beta2 = hparams["beta2"]
    sgd.L2 = hparams["L2"]
    train_acc = 0.0
    train_n = 0
    for X, y in minibatch(
        train_X, train_y, size=hparams["batch_size"], nr_update=hparams["nr_update"]
    ):
        yh, finish_update = model.begin_update(X, drop=hparams["dropout"])
        if hasattr(y, "shape"):
            dy = (yh - y) / y.shape[0]
            train_acc += (y.argmax(axis=1) == yh.argmax(axis=1)).sum()
            train_n += y.shape[0]
        else:
            n_y = sum(len(y_i) for y_i in y)
            dy = [(yh[i] - y[i]) / n_y for i in range(len(yh))]
            for i in range(len(y)):
                train_acc += (y[i].argmax(axis=1) == yh[i].argmax(axis=1)).sum()
            train_n += n_y
        finish_update(dy, sgd=sgd)
    train_acc /= train_n
    with model.use_params(sgd.averages):
        dev_acc = model.evaluate(dev_X, dev_y)
    model.to_cpu()
    sgd.to_cpu()
    return device_id, ((model, sgd, hparams), float(train_acc), float(dev_acc))
Exemplo n.º 11
0
 def __setstate__(self, state_data):
     self.__dict__ = srsly.pickle_loads(state_data)
Exemplo n.º 12
0
 def from_bytes(self, bytes_data, **kwargs):
     pkls = srsly.pickle_loads(bytes_data)
     for field in self.serialization_fields:
         setattr(self, field, pkls[field])
     return self
Exemplo n.º 13
0
Arquivo: util.py Projeto: zxlzr/spikex
def pickle_loads(data):
    return srsly.pickle_loads(data)
Exemplo n.º 14
0
 def __setstate__(self, state_data):
     self.__dict__ = srsly.pickle_loads(state_data)