Exemplo n.º 1
0
def test_Reader_file():
    res = []
    reader = Reader()
    for line in reader(os.path.join(DATA_PATH, 'a.md')):
        res.append(line)
    assert len(res) == 3
    assert res[0].text == 'line 1 in a.'
Exemplo n.º 2
0
def test_Reader_gen_plines():
    lines = Reader.gen_plines(os.path.join(DATA_PATH, 'b.txt'))
    assert isinstance(lines, types.GeneratorType) == True
    assert len(list(lines)) == 3
Exemplo n.º 3
0
def test_Reader_gen_flines():
    paths = Reader.gen_files(DATA_PATH, '*.txt')
    articles = Reader.gen_articles(paths)
    lines = Reader.gen_flines(articles)
    assert isinstance(lines, types.GeneratorType) == True
    assert len(list(lines)) == 9
Exemplo n.º 4
0
def test_Reader_gen_files():
    paths = Reader.gen_files(DATA_PATH, '*.md')
    assert isinstance(paths, types.GeneratorType) == True
    assert len(list(paths)) == 3
Exemplo n.º 5
0
def get_Reader_path_match_res(request):
    res = []
    reader = Reader(request.param)
    for line in reader(DATA_PATH):
        res.append(line)
    return res
Exemplo n.º 6
0
from pnlp.piop import Reader, Dict
from pnlp.ptxt import Regex, Text, Length
from pnlp.pnorm import NumNorm
from pnlp.penh import TokenLevelSampler, SentenceLevelSampler
from pnlp.pmag import MagicDict
from pnlp.stopwords import StopWords
from pnlp.stopwords import chinese_stopwords, english_stopwords

from pnlp.utils import pstr, concurring, divide2int
from pnlp.utils import generate_batches_by_num, generate_batches_by_size


num_norm = NumNorm()
reg = Regex()
reader = Reader()
tlsampler = TokenLevelSampler()
slsampler = SentenceLevelSampler()


__title__ = 'pnlp'
__version__ = '0.4.0'
__author__ = 'Yam'
__license__ = 'Apache-2.0'
__copyright__ = 'Copyright 2019, 2020 Yam'
__all__ = ['Reader',
           'Text', 'Regex', 'Length',
           'MagicDict',
           'NumNorm',
           'StopWords',
           'TokenLevelSampler', 'SentenceLevelSampler'
Exemplo n.º 7
0
def test_Reader_gen_files_with_regex():
    paths = Reader.gen_files(DATA_PATH, "(md)|(txt)", True)
    assert isinstance(paths, types.GeneratorType) == True
    assert len(list(paths)) == 6