Пример #1
0
    def test_can_get_worker_by_uuid(self):
        worker = WorkerFactory.create()
        WorkerFactory.create()

        loaded_worker = Worker.by_uuid(worker.uuid, self.db)
        expect(loaded_worker.id).to_equal(worker.id)

        invalid_worker = Worker.by_uuid(uuid4(), self.db)
        expect(invalid_worker).to_be_null()
Пример #2
0
    def _ping_api(self):
        self._remove_zombie_workers()

        worker = Worker.by_uuid(self.uuid, self.db)

        self.db.begin(subtransactions=True)

        try:
            if worker:
                worker.last_ping = datetime.now()
                worker.current_url = self.working_url
            else:
                worker = Worker(uuid=self.uuid, current_url=self.working_url)
                self.db.add(worker)
            self.db.flush()
            self.db.commit()
        except OperationalError:
            self.db.rollback()

        self.publish(dumps({
            'type': 'worker-status',
            'workerId': str(worker.uuid)
        }))

        return True
Пример #3
0
    def _start_job(self, url):
        self.update_otto_limiter()

        self.working_url = url

        self.db.begin(subtransactions=True)
        worker = Worker.by_uuid(self.uuid, self.db)
        worker.current_url = url
        worker.last_ping = datetime.now()
        self.db.flush()
        self.db.commit()

        return True
Пример #4
0
    def test_worker_alive_can_add_new_worker(self):
        worker_uuid = uuid4()

        response = yield self.http_client.fetch(
            self.get_url('/worker/%s/alive' % str(worker_uuid)),
            method='POST',
            body=''
        )

        worker = Worker.by_uuid(worker_uuid, self.db)

        expect(worker).not_to_be_null()
        expect(response.code).to_equal(200)
        expect(response.body).to_be_like(str(worker.uuid))
Пример #5
0
    def test_worker_complete_work(self):
        worker = WorkerFactory.create(current_url="http://www.globo.com/")
        self.db.flush()

        response = yield self.http_client.fetch(
            self.get_url('/worker/%s/complete' % str(worker.uuid)),
            method='POST',
            body=''
        )

        worker = Worker.by_uuid(worker.uuid, self.db)

        expect(worker).not_to_be_null()
        expect(response.code).to_equal(200)
        expect(response.body).to_be_like('OK')
        expect(worker.current_url).to_be_null()
Пример #6
0
    def has_limit_to_work(cls, db, active_domains, url, avg_links_per_page=10):
        from holmes.models import Worker  # Avoid circular dependency

        if avg_links_per_page < 1:
            avg_links_per_page = 1

        limiters = cls.get_limiters_for_domains(db, active_domains)

        limiter = cls.__get_limiter_for_url(limiters, url)

        if limiter:
            worker_count = Worker.number_of_workers_in_same_limiter_url(db, limiter.url)

            if worker_count >= math.ceil(float(limiter.value) / float(avg_links_per_page)):
                return False

        return True
Пример #7
0
    def test_worker_alive_can_ping_existing_worker(self):
        date = datetime.now()

        worker = WorkerFactory.create(last_ping=date)
        self.db.flush()

        response = yield self.http_client.fetch(
            self.get_url('/worker/%s/alive' % str(worker.uuid)),
            method='POST',
            body=''
        )

        worker = Worker.by_uuid(worker.uuid, self.db)

        expect(worker).not_to_be_null()
        expect(response.code).to_equal(200)
        expect(response.body).to_be_like(str(worker.uuid))
        expect(worker.last_ping).to_be_greater_than(date)
Пример #8
0
    def _complete_job(self, lock, error=None):
        self.working_url = None
        worker = Worker.by_uuid(self.uuid, self.db)

        if worker:
            for i in range(3):
                self.db.begin(subtransactions=True)

                try:
                    self.cache.release_next_job(lock)
                    worker.current_url = None
                    worker.last_ping = datetime.utcnow()
                    self.db.flush()
                    self.db.commit()
                    break
                except Exception:
                    err = sys.exc_info()[1]
                    if 'Deadlock found' in str(err):
                        self.error('Deadlock happened! Trying again (try number %d)! (Details: %s)' % (i, str(err)))
                    else:
                        self.db.rollback()
                        raise

        return True