示例#1
0
def dset(ps, kind):
    assert ps.dset.startswith('mnist')
    p = pth.Path(ps.dir_data) / ps.dset / kind
    if not p.exists():
        vs = tuple(reader(ps, kind))
        R.dump(p / ps.dset, lambda: recorder(vs))
    ds = R.dataset(p / ps.dset)
    return ds, feats
示例#2
0
def dset(ps, kind):
    assert ps.dset == 'enwik8'
    p = pth.Path(ps.dir_data) / ps.dset
    pv = p / ps.vocab_path
    p = p / kind
    if not p.exists():
        tokenizer = encoder.tokenizer_for(ps)
        tp = F.Topic(ps.dset, tokenizer(reader(ps, kind)))
        R.dump(p / ps.dset, lambda: recorder(tp))
        if kind == 'train' and not pv.exists():
            R.dump(pv, lambda: [tokenizer.vocab.record()])
    ds = R.dataset(p / ps.dset)
    return ds, feats
示例#3
0
def dset(ps, kind):
    assert ps.dset == 'squad'
    p = pth.Path(ps.dir_data) / ps.dset
    pv = p / ps.vocab_path
    p = p / kind
    if not p.exists():
        tokenizer = encoder.tokenizer_for(ps)
        ts = F.Topics(tokenizer(reader(ps, kind)))
        for n in registry['all']:
            R.dump(p / n, lambda: registry[n](ts))
        if kind == 'train' and not pv.exists():
            R.dump(pv, lambda: [tokenizer.vocab.record()])
    ds = R.dataset(p / ps.dset_subset)
    return ds, feats[ps.dset_subset]