示例#1
0
    def test_random_read_write(self):
        """Test random read/write"""

        q = SQLiteQueue(self.path, auto_commit=self.auto_commit)
        n = 0
        for _ in range(1000):
            if random.random() < 0.5:
                if n > 0:
                    q.get()
                    n -= 1
                else:
                    self.assertRaises(Empty, q.get, block=False)
            else:
                q.put('var%d' % random.getrandbits(16))
                n += 1
示例#2
0
    def test_random_read_write(self):
        """Test random read/write"""

        q = SQLiteQueue(self.path)
        n = 0
        for i in range(1000):
            if random.random() < 0.5:
                if n > 0:
                    q.get()
                    n -= 1
                else:
                    self.assertEqual(None, q.get())
            else:
                q.put('var%d' % random.getrandbits(16))
                n += 1
示例#3
0
    def test_random_read_write(self):
        """Test random read/write"""

        q = SQLiteQueue(self.path, auto_commit=self.auto_commit)
        n = 0
        for i in range(1000):
            if random.random() < 0.5:
                if n > 0:
                    q.get()
                    n -= 1
                else:
                    self.assertEqual(None, q.get())
            else:
                q.put('var%d' % random.getrandbits(16))
                task_done_if_required(q)
                n += 1
示例#4
0
    def test_open_close_1000(self):
        """Write 1000 items, close, reopen checking if all items are there"""

        q = self.queue_class(self.path, auto_commit=self.auto_commit)
        for i in range(1000):
            q.put('var%d' % i)

        self.assertEqual(1000, q.qsize())
        del q
        q = SQLiteQueue(self.path)
        self.assertEqual(1000, q.qsize())
        for i in range(1000):
            data = q.get()
            self.assertEqual('var%d' % i, data)
        # assert adding another one still works
        q.put('foobar')
        data = q.get()
        q.shrink_disk_usage()
        self.assertEqual('foobar', data)
示例#5
0
    def test_multiple_consumers(self):
        """Test sqlqueue can be used by multiple consumers."""

        queue = SQLiteQueue(path=self.path,
                            multithreading=True,
                            auto_commit=self.auto_commit)

        def producer():
            for x in range(1000):
                queue.put('var%d' % x)

        counter = []
        # Set all to 0
        for _ in range(1000):
            counter.append(0)

        def consumer(index):
            for i in range(200):
                data = queue.get(block=True)
                self.assertTrue('var' in data)
                counter[index * 200 + i] = data

        p = Thread(target=producer)
        p.start()
        consumers = []
        for index in range(5):
            t = Thread(target=consumer, args=(index, ))
            t.start()
            consumers.append(t)

        p.join()
        for t in consumers:
            t.join()

        self.assertEqual(0, queue.qsize())
        for x in range(1000):
            self.assertNotEqual(0, counter[x],
                                "not 0 for counter's index %s" % x)

        self.assertEqual(len(set(counter)), len(counter))
示例#6
0
 def test_protocol_2(self):
     q = SQLiteQueue(path=self.path)
     self.assertEqual(q.protocol, 2 if sys.version_info[0] == 2 else 4)
示例#7
0
 def test_protocol_1(self):
     shutil.rmtree(self.path, ignore_errors=True)
     q = SQLiteQueue(path=self.path)
     self.assertEqual(q.protocol, 2 if sys.version_info[0] == 2 else 4)
示例#8
0
    def __init__(self,
                 spec=None,
                 spider=None,
                 spiders=None,
                 start_jobs=None,
                 queue_path=None,
                 threads=25,
                 buffer_size=DEFAULT_GROUP_BUFFER_SIZE,
                 throttle=DEFAULT_THROTTLE):

        # NOTE: crawling could work depth-first but:
        # buffer_size should be 0 (requires to fix quenouille issue #1)

        # Params
        self.start_jobs = start_jobs
        self.queue_path = queue_path
        self.threads = threads
        self.buffer_size = buffer_size
        self.throttle = throttle

        self.using_persistent_queue = queue_path is not None
        self.http = create_pool(threads=threads)
        self.state = CrawlerState()
        self.started = False

        # Memory queue
        if not self.using_persistent_queue:
            queue = Queue()

        # Persistent queue
        else:
            queue = SQLiteQueue(queue_path,
                                multithreading=True,
                                auto_commit=False)

        # Creating spiders
        if spec is not None:
            if 'spiders' in spec:
                spiders = {
                    name: DefinitionSpider(s, name=name)
                    for name, s in spec['spiders'].items()
                }
                self.single_spider = False
            else:
                spiders = {'default': DefinitionSpider(spec)}
                self.single_spider = True

        elif spider is not None:
            spiders = {'default': spider}

        elif spiders is None:
            raise TypeError(
                'minet.Crawler: expecting either `spec`, `spider` or `spiders`.'
            )

        # Solving function spiders
        for name, s in spiders.items():
            if callable(s) and not isinstance(s, Spider):
                spiders[name] = FunctionSpider(s, name)

        self.queue = queue
        self.spiders = spiders
示例#9
0
 def test_put_0(self):
     q = SQLiteQueue(path=self.path)
     q.put(0)
     d = q.get(block=False)
     self.assertIsNotNone(d)
示例#10
0
 def test_json_serializer(self):
     q = SQLiteQueue(path=self.path, serializer=serializers.json)
     x = dict(a=1, b=2, c=dict(d=list(range(5)), e=[1]))
     q.put(x)
     self.assertEquals(q.get(), x)
示例#11
0
 def test_protocol_2(self):
     q = SQLiteQueue(path=self.path)
     self.assertEqual(q.protocol, None)
示例#12
0
 def test_update(self):
     q = SQLiteQueue(path=self.path)
     qid = q.put("val1")
     q.update(item="val2", id=qid)
     item = q.get(id=qid)
     self.assertEqual(item, "val2")