Пример #1
0
 def test_ioerror(self):
     dataset = Wikipedia(data_dir=self.tempdir)
     with self.assertRaises(IOError):
         _ = list(dataset.texts())
Пример #2
0
 def test_download(self):
     dataset = Wikipedia(data_dir=self.tempdir)
     dataset.download()
     self.assertTrue(os.path.exists(dataset.filename))
Пример #3
0
from __future__ import absolute_import, unicode_literals

import os
import shutil
import tempfile
import unittest

from textacy import data_dir
from textacy.compat import unicode_
from textacy.datasets.wikipedia import Wikipedia

DATASET = Wikipedia(lang='en', version='latest')


@unittest.skipUnless(
    DATASET.filename,
    'Wikipedia dataset must be downloaded before running tests')
class WikipediaTestCase(unittest.TestCase):
    def setUp(self):
        self.tempdir = tempfile.mkdtemp(prefix='test_datasets_',
                                        dir=os.path.dirname(
                                            os.path.abspath(__file__)))

    @unittest.skip("No need to download a new dataset every time")
    def test_download(self):
        dataset = Wikipedia(data_dir=self.tempdir)
        dataset.download()
        self.assertTrue(os.path.exists(dataset.filename))

    def test_ioerror(self):
        dataset = Wikipedia(data_dir=self.tempdir)
def test_ioerror(tmpdir):
    dataset = Wikipedia(data_dir=str(tmpdir))
    with pytest.raises(IOError):
        _ = list(dataset.texts())
def test_download(tmpdir):
    dataset = Wikipedia(data_dir=str(tmpdir))
    dataset.download()
    assert os.path.exists(dataset.filename)
Пример #6
0
from __future__ import absolute_import, unicode_literals

import os

import pytest

from textacy import compat
from textacy.datasets.wikipedia import Wikipedia

DATASET = Wikipedia(lang="en", version="latest")

pytestmark = pytest.mark.skipif(
    DATASET.filename is None,
    reason="Wikipedia dataset must be downloaded before running tests",
)


@pytest.mark.skip("No need to download a new dataset every time")
def test_download(tmpdir):
    dataset = Wikipedia(data_dir=str(tmpdir))
    dataset.download()
    assert os.path.exists(dataset.filename)


def test_ioerror(tmpdir):
    dataset = Wikipedia(data_dir=str(tmpdir))
    with pytest.raises(IOError):
        _ = list(dataset.texts())


def test_texts():