Exemplo n.º 1
0
 def test_get_stream(self) -> None:
     from urllib.response import addinfourl
     # arrange
     filefetcher = FileFetcher(dict(test_file='test.txt', ),
                               base_dir=self._TESTDATA_DIR)
     # act
     stream, _ = filefetcher.get_stream('test_file')
     # assert
     self.assertIsInstance(stream, addinfourl)
Exemplo n.º 2
0
 def test__get_file_uri__absolute_basedir(self) -> None:
     # arrange
     test_file = path.join(self._TESTDATA_DIR, 'test.txt')
     # act
     file = FileFetcher._build_uri(test_file)
     # assert
     self.assertTrue(file.startswith('file://'), file)
     self.assertEqual(Path(test_file).as_uri(), file)
Exemplo n.º 3
0
 def test__get_file_uri__with_basedir(self) -> None:
     # arrange
     test_file = 'test.txt'
     test_base = self._TESTDATA_DIR
     # act
     file = FileFetcher._build_uri(test_file, test_base)
     # assert
     self.assertTrue(file.startswith('file://'), file)
     self.assertEqual(
         Path(path.join(test_base, test_file)).as_uri(),
         file,
     )
Exemplo n.º 4
0
 def test__get_file_uri__unknown_basedir(self) -> None:
     # arrange
     filefetcher = FileFetcher(dict())
     # act & assert
     with self.assertRaises(FileNotFoundError):
         filefetcher._get_file_uri('unknown_file')
Exemplo n.º 5
0
 def test__get_file_uri__unknown_file(self) -> None:
     # arrange
     test_file = path.join(self._TESTDATA_DIR, 'unknown.file')
     # act & assert
     with self.assertRaisesRegex(FileNotFoundError, 'Not a file'):
         FileFetcher._build_uri(test_file)
Exemplo n.º 6
0
 def test__get_file_uri__without_basedir(self) -> None:
     # arrange
     test_file = 'test.txt'
     # act & assert
     with self.assertRaisesRegex(FileNotFoundError, 'Path not absolute'):
         FileFetcher._build_uri(test_file)
Exemplo n.º 7
0
 def test__uri_sort_query__unsorted(self) -> None:
     # assert
     self.assertEqual(
         'https://asdf?bar=2&foo=1',
         FileFetcher._uri_sort_query('https://asdf?foo=1&bar=2'))
Exemplo n.º 8
0
 def test__uri_sort_query__query(self) -> None:
     # assert
     self.assertEqual('https://asdf?foo=1',
                      FileFetcher._uri_sort_query('https://asdf?foo=1'))
Exemplo n.º 9
0
 def test__valid_url(self) -> None:
     self.assertTrue(FileFetcher._valid_uri('file:///foo_bar/baz'))
     self.assertFalse(FileFetcher._valid_uri('http://foo.bar/baz'))
     self.assertFalse(FileFetcher._valid_uri('https://foo.bar/baz'))
Exemplo n.º 10
0
 def test__uri_sort_query__fragment(self) -> None:
     # assert
     self.assertEqual('https://asdf#foo',
                      FileFetcher._uri_sort_query('https://asdf#foo'))
Exemplo n.º 11
0
 def test__uri_sort_query__path(self) -> None:
     # assert
     self.assertEqual('https://as/df',
                      FileFetcher._uri_sort_query('https://as/df'))
Exemplo n.º 12
0
            uri, 'https://www.reddit.com/r/test.json?after=foo%2Fbar+bazz')


class RedditResetTest(unittest.TestCase):
    def test_reset_done(self) -> None:
        # arrange
        crawler = Reddit(subreddit='test')
        crawler._after = 'foo'
        # act
        crawler._reset()
        # assert
        self.assertIsNone(crawler._after)


_FILE_FETCHER = FileFetcher({  # relative to "./testdata_instagram"
    '/r/aww.json?after=': 'aww.json',
}, base_dir=path_join(dirname(__file__), 'testdata_reddit'))


class RedditCrawlTest(unittest.TestCase):
    def setUp(self) -> None:
        self.crawler = Reddit(subreddit='aww')
        self.crawler._remote_fetcher = _FILE_FETCHER

    def tearDown(self) -> None:
        del self.crawler

    def test_crawl(self) -> None:
        # arrange
        expected_after = 't3_dqx42l'
        expected_images = ImageCollection()
Exemplo n.º 13
0
    @ddt_data((None, None), (23, '23'))  # type: ignore
    @ddt_unpack  # type: ignore
    def test_older(self, older: Optional[int], older_qs: str) -> None:
        # act
        api_uri = Pr0gramm._get_api_uri(flags=0, promoted=False, older=older)
        (_, _, _, query_string, _) = urlsplit(api_uri)
        query = parse_qs(query_string)
        # assert
        self.assertEqual([older_qs], query.get('older', [None]))


_FILE_FETCHER = FileFetcher({  # relative to "./testdata_pr0gramm"
    '/api/items/get?flags=1&promoted=1&tags=%21%28s%3A15000%29+-%22video%22':
        'get-flags_1-promoted_1-tags_s15000-video.json',
    '/api/items/get?flags=1&promoted=1&tags=%21%28s%3A1000%29+-%22video%22':
        'get-flags_1-promoted_1-tags_s1000-video.json',
    '/api/items/get?flags=1&promoted=0&tags=%21%28s%3A1000%29+-%22video%22':
        'get-flags_1-promoted_0-tags_s1000-video.json',
}, base_dir=path_join(dirname(__file__), 'testdata_pr0gramm'))


class Pr0grammResetTest(unittest.TestCase):
    def test_reset_done(self) -> None:
        # arrange
        crawler = Pr0gramm()
        crawler._older = 1337
        # act
        crawler._reset()
        # assert
        self.assertIsNone(crawler._older)