def test_queue_with_delay(self): connection = Connection(host='hbase-docker', port=9090) queue = HBaseQueue(connection, 1, b'queue', use_snappy=False, drop=True) r5 = r3.copy() crawl_at = int(time()) + 1000 r5.meta[b'crawl_at'] = crawl_at batch = [(r5.meta[b'fingerprint'], 0.5, r5, True)] queue.schedule(batch) with mock.patch('frontera.contrib.backends.hbase.time') as mocked_time: mocked_time.return_value = time() assert queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10) == [] mocked_time.return_value = crawl_at + 1 assert set([ r.url for r in queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10) ]) == set([r5.url])
def test_queue(self): connection = Connection(host='hbase-docker', port=9090) queue = HBaseQueue(connection, 2, b'queue', True) batch = [('10', 0.5, r1, True), ('11', 0.6, r2, True), ('12', 0.7, r3, True)] queue.schedule(batch) assert set([r.url for r in queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10)]) == set([r3.url]) assert set([r.url for r in queue.get_next_requests(10, 1, min_requests=3, min_hosts=1, max_requests_per_host=10)]) == set([r1.url, r2.url])
def test_queue_with_delay(self): connection = Connection(host='hbase-docker', port=9090) queue = HBaseQueue(connection, 1, b'queue', True) r5 = r3.copy() r5.meta[b'crawl_at'] = int(time()) + 1 batch = [(r5.meta[b'fingerprint'], 0.5, r5, True)] queue.schedule(batch) assert queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10) == [] sleep(1.5) assert set([r.url for r in queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10)]) == set([r5.url])
def test_queue_with_delay(self): connection = Connection(host='hbase-docker', port=9090) queue = HBaseQueue(connection, 1, b'queue', use_snappy=False, drop=True) r5 = r3.copy() crawl_at = int(time()) + 1000 r5.meta[b'crawl_at'] = crawl_at batch = [(r5.meta[b'fingerprint'], 0.5, r5, True)] queue.schedule(batch) with mock.patch('frontera.contrib.backends.hbase.time') as mocked_time: mocked_time.return_value = time() assert queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10) == [] mocked_time.return_value = crawl_at + 1 assert set([r.url for r in queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10)]) == set([r5.url])
def test_queue_with_delay(self): connection = Connection(host='hbase-docker', port=9090) queue = HBaseQueue(connection, 1, b'queue', True) r5 = r3.copy() r5.meta[b'crawl_at'] = int(time()) + 1 batch = [(r5.meta[b'fingerprint'], 0.5, r5, True)] queue.schedule(batch) assert queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10) == [] sleep(1.5) assert set([ r.url for r in queue.get_next_requests( 10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10) ]) == set([r5.url])
def test_queue_with_post_request(self): connection = Connection(host='hbase-docker', port=9090) queue = HBaseQueue(connection, 1, b'queue', drop=True, use_snappy=False) batch = [('10', 0.5, r1, True)] queue.schedule(batch) requests = queue.get_next_requests(10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10) self.assertEqual(b'POST', requests[0].method) self.assertEqual(data, requests[0].body)