Exemplo n.º 1
0
def test_sentence_level_sampler_dependent_sampling():
    sls = SentenceLevelSampler()
    text = "写代码。多写代码。写好代码。"
    text_list = cut_part(text, psubsent)
    res = sls.dependent_sampling(text_list)
    assert type(res) == list
    assert len(res) == 3
Exemplo n.º 2
0
def test_sentence_level_sampler_swap():
    sls = SentenceLevelSampler(types=["swap"])
    text = "我爱你。你爱我。NLP 很有意思。简洁最重要。"
    res = sls.make_samples(text)
    assert len(res) == 2
Exemplo n.º 3
0
def test_sentence_level_sampler_none_text():
    sls = SentenceLevelSampler()
    text = ""
    assert sls.make_samples(text) == {}
Exemplo n.º 4
0
def test_sentence_level_sampler_single_sent():
    sls = SentenceLevelSampler()
    text = "我爱你。"
    assert len(sls.make_samples(text)) == 4
Exemplo n.º 5
0
def test_sentence_level_sampler_none():
    sls = SentenceLevelSampler([])
    text = "我爱你。你爱我。"
    assert sls.make_samples(text) == {}
Exemplo n.º 6
0
def test_sentence_level_sampler():
    sls = SentenceLevelSampler()
    text = "我爱你。你爱我。"
    res = sls.make_samples(text)
    assert type(res) == dict
    assert len(res) == 4
Exemplo n.º 7
0
from pnlp.ptxt import Regex, Text, Length
from pnlp.pnorm import NumNorm
from pnlp.penh import TokenLevelSampler, SentenceLevelSampler
from pnlp.pmag import MagicDict
from pnlp.stopwords import StopWords
from pnlp.stopwords import chinese_stopwords, english_stopwords

from pnlp.utils import pstr, concurring, divide2int
from pnlp.utils import generate_batches_by_num, generate_batches_by_size


num_norm = NumNorm()
reg = Regex()
reader = Reader()
tlsampler = TokenLevelSampler()
slsampler = SentenceLevelSampler()


__title__ = 'pnlp'
__version__ = '0.4.0'
__author__ = 'Yam'
__license__ = 'Apache-2.0'
__copyright__ = 'Copyright 2019, 2020 Yam'
__all__ = ['Reader',
           'Text', 'Regex', 'Length',
           'MagicDict',
           'NumNorm',
           'StopWords',
           'TokenLevelSampler', 'SentenceLevelSampler'
           ]