def test_random_read_write(self): """Test random read/write""" q = SQLiteQueue(self.path, auto_commit=self.auto_commit) n = 0 for _ in range(1000): if random.random() < 0.5: if n > 0: q.get() n -= 1 else: self.assertRaises(Empty, q.get, block=False) else: q.put('var%d' % random.getrandbits(16)) n += 1
def test_random_read_write(self): """Test random read/write""" q = SQLiteQueue(self.path) n = 0 for i in range(1000): if random.random() < 0.5: if n > 0: q.get() n -= 1 else: self.assertEqual(None, q.get()) else: q.put('var%d' % random.getrandbits(16)) n += 1
def test_random_read_write(self): """Test random read/write""" q = SQLiteQueue(self.path, auto_commit=self.auto_commit) n = 0 for i in range(1000): if random.random() < 0.5: if n > 0: q.get() n -= 1 else: self.assertEqual(None, q.get()) else: q.put('var%d' % random.getrandbits(16)) task_done_if_required(q) n += 1
def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = self.queue_class(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = SQLiteQueue(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % i, data) # assert adding another one still works q.put('foobar') data = q.get() q.shrink_disk_usage() self.assertEqual('foobar', data)
def test_multiple_consumers(self): """Test sqlqueue can be used by multiple consumers.""" queue = SQLiteQueue(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(): for x in range(1000): queue.put('var%d' % x) counter = [] # Set all to 0 for _ in range(1000): counter.append(0) def consumer(index): for i in range(200): data = queue.get(block=True) self.assertTrue('var' in data) counter[index * 200 + i] = data p = Thread(target=producer) p.start() consumers = [] for index in range(5): t = Thread(target=consumer, args=(index, )) t.start() consumers.append(t) p.join() for t in consumers: t.join() self.assertEqual(0, queue.qsize()) for x in range(1000): self.assertNotEqual(0, counter[x], "not 0 for counter's index %s" % x) self.assertEqual(len(set(counter)), len(counter))
def test_protocol_2(self): q = SQLiteQueue(path=self.path) self.assertEqual(q.protocol, 2 if sys.version_info[0] == 2 else 4)
def test_protocol_1(self): shutil.rmtree(self.path, ignore_errors=True) q = SQLiteQueue(path=self.path) self.assertEqual(q.protocol, 2 if sys.version_info[0] == 2 else 4)
def __init__(self, spec=None, spider=None, spiders=None, start_jobs=None, queue_path=None, threads=25, buffer_size=DEFAULT_GROUP_BUFFER_SIZE, throttle=DEFAULT_THROTTLE): # NOTE: crawling could work depth-first but: # buffer_size should be 0 (requires to fix quenouille issue #1) # Params self.start_jobs = start_jobs self.queue_path = queue_path self.threads = threads self.buffer_size = buffer_size self.throttle = throttle self.using_persistent_queue = queue_path is not None self.http = create_pool(threads=threads) self.state = CrawlerState() self.started = False # Memory queue if not self.using_persistent_queue: queue = Queue() # Persistent queue else: queue = SQLiteQueue(queue_path, multithreading=True, auto_commit=False) # Creating spiders if spec is not None: if 'spiders' in spec: spiders = { name: DefinitionSpider(s, name=name) for name, s in spec['spiders'].items() } self.single_spider = False else: spiders = {'default': DefinitionSpider(spec)} self.single_spider = True elif spider is not None: spiders = {'default': spider} elif spiders is None: raise TypeError( 'minet.Crawler: expecting either `spec`, `spider` or `spiders`.' ) # Solving function spiders for name, s in spiders.items(): if callable(s) and not isinstance(s, Spider): spiders[name] = FunctionSpider(s, name) self.queue = queue self.spiders = spiders
def test_put_0(self): q = SQLiteQueue(path=self.path) q.put(0) d = q.get(block=False) self.assertIsNotNone(d)
def test_json_serializer(self): q = SQLiteQueue(path=self.path, serializer=serializers.json) x = dict(a=1, b=2, c=dict(d=list(range(5)), e=[1])) q.put(x) self.assertEquals(q.get(), x)
def test_protocol_2(self): q = SQLiteQueue(path=self.path) self.assertEqual(q.protocol, None)
def test_update(self): q = SQLiteQueue(path=self.path) qid = q.put("val1") q.update(item="val2", id=qid) item = q.get(id=qid) self.assertEqual(item, "val2")