示例#1
0
    def test_performance(self):
        url_table = SQLiteURLTable(':memory:')

        urls = [{'url': 'http://example.com/{}'.format(i)} for i in range(1000)]

        time_start = time.time()
        url_table.add_many(urls, level=0, status=Status.todo)
        time_end = time.time()

        time_diff = time_end - time_start

        print(time_diff)
        self.assertGreaterEqual(0.1, time_diff)
示例#2
0
    def test_performance(self):
        url_table = SQLiteURLTable(':memory:')

        urls = [{
            'url': 'http://example.com/{}'.format(i)
        } for i in range(1000)]

        time_start = time.time()
        url_table.add_many(urls, level=0, status=Status.todo)
        time_end = time.time()

        time_diff = time_end - time_start

        print(time_diff)
        self.assertGreaterEqual(0.1, time_diff)
示例#3
0
    def test_engine_bad_url_record(self):
        url_table = SQLiteURLTable(":memory:")
        processor = MockProcessor()
        statistics = Statistics()

        url_table.add_many(
            [
                {"url": "http://example.........com/invalidurl"},
                {"url": "http://www.example.comáb©:ðéf"},
                {"url": "correct horse battery staple"},
            ]
        )

        engine = Engine(url_table, processor, statistics)

        # It shouldn't crash with ValueError during URL parse
        yield From(engine())
示例#4
0
    def test_large_table(self):
        time_start = time.time()
        url_table = SQLiteURLTable(':memory:')

        for num in range(1000):
            urls = []
            for num_2 in range(100):
                urls.append('http://example.com/{}{}'.format(num, num_2))

            url_table.add_many([{
                'url': url
            } for url in urls],
                               referrer='http://example.com',
                               level=0,
                               top_url='http://example.net')

        time_end = time.time()
        time_diff = time_end - time_start

        print(time_diff)

        while True:
            time_start = time.time()
            try:
                url_record = url_table.check_out(Status.todo)
            except NotFound:
                break

            url_table.check_in(url_record.url, Status.done)

            time_end = time.time()
            time_diff = time_end - time_start

            print(time_diff)
示例#5
0
    def test_engine_bad_url_record(self):
        url_table = SQLiteURLTable(':memory:')
        processor = MockProcessor()
        statistics = Statistics()

        url_table.add_many([
            {
                'url': 'http://example.........com/invalidurl'
            },
            {
                'url': 'http://www.example.comáb©:ðéf'
            },
            {
                'url': 'correct horse battery staple'
            },
        ])

        engine = Engine(url_table, processor, statistics)

        # It shouldn't crash with ValueError during URL parse
        yield From(engine())
示例#6
0
    def test_large_table(self):
        time_start = time.time()
        url_table = SQLiteURLTable(':memory:')

        for num in range(1000):
            urls = []
            for num_2 in range(100):
                urls.append('http://example.com/{}{}'.format(num, num_2))

            url_table.add_many(
                [{'url': url} for url in urls],
                referrer='http://example.com', level=0,
                top_url='http://example.net')

        time_end = time.time()
        time_diff = time_end - time_start

        print(time_diff)

        while True:
            time_start = time.time()
            try:
                url_record = url_table.check_out(Status.todo)
            except NotFound:
                break

            url_table.check_in(url_record.url, Status.done)

            time_end = time.time()
            time_diff = time_end - time_start

            print(time_diff)
示例#7
0
 def get_url_table(self):
     return SQLiteURLTable(':memory:')
示例#8
0
 def test_sqlite_url_table(self):
     url_table = SQLiteURLTable(':memory:')
     self._generic_url_table_tester(url_table)