def test_can_get_worker_by_uuid(self): worker = WorkerFactory.create() WorkerFactory.create() loaded_worker = Worker.by_uuid(worker.uuid, self.db) expect(loaded_worker.id).to_equal(worker.id) invalid_worker = Worker.by_uuid(uuid4(), self.db) expect(invalid_worker).to_be_null()
def _ping_api(self): self._remove_zombie_workers() worker = Worker.by_uuid(self.uuid, self.db) self.db.begin(subtransactions=True) try: if worker: worker.last_ping = datetime.now() worker.current_url = self.working_url else: worker = Worker(uuid=self.uuid, current_url=self.working_url) self.db.add(worker) self.db.flush() self.db.commit() except OperationalError: self.db.rollback() self.publish(dumps({ 'type': 'worker-status', 'workerId': str(worker.uuid) })) return True
def _start_job(self, url): self.update_otto_limiter() self.working_url = url self.db.begin(subtransactions=True) worker = Worker.by_uuid(self.uuid, self.db) worker.current_url = url worker.last_ping = datetime.now() self.db.flush() self.db.commit() return True
def test_worker_alive_can_add_new_worker(self): worker_uuid = uuid4() response = yield self.http_client.fetch( self.get_url('/worker/%s/alive' % str(worker_uuid)), method='POST', body='' ) worker = Worker.by_uuid(worker_uuid, self.db) expect(worker).not_to_be_null() expect(response.code).to_equal(200) expect(response.body).to_be_like(str(worker.uuid))
def test_worker_complete_work(self): worker = WorkerFactory.create(current_url="http://www.globo.com/") self.db.flush() response = yield self.http_client.fetch( self.get_url('/worker/%s/complete' % str(worker.uuid)), method='POST', body='' ) worker = Worker.by_uuid(worker.uuid, self.db) expect(worker).not_to_be_null() expect(response.code).to_equal(200) expect(response.body).to_be_like('OK') expect(worker.current_url).to_be_null()
def has_limit_to_work(cls, db, active_domains, url, avg_links_per_page=10): from holmes.models import Worker # Avoid circular dependency if avg_links_per_page < 1: avg_links_per_page = 1 limiters = cls.get_limiters_for_domains(db, active_domains) limiter = cls.__get_limiter_for_url(limiters, url) if limiter: worker_count = Worker.number_of_workers_in_same_limiter_url(db, limiter.url) if worker_count >= math.ceil(float(limiter.value) / float(avg_links_per_page)): return False return True
def test_worker_alive_can_ping_existing_worker(self): date = datetime.now() worker = WorkerFactory.create(last_ping=date) self.db.flush() response = yield self.http_client.fetch( self.get_url('/worker/%s/alive' % str(worker.uuid)), method='POST', body='' ) worker = Worker.by_uuid(worker.uuid, self.db) expect(worker).not_to_be_null() expect(response.code).to_equal(200) expect(response.body).to_be_like(str(worker.uuid)) expect(worker.last_ping).to_be_greater_than(date)
def _complete_job(self, lock, error=None): self.working_url = None worker = Worker.by_uuid(self.uuid, self.db) if worker: for i in range(3): self.db.begin(subtransactions=True) try: self.cache.release_next_job(lock) worker.current_url = None worker.last_ping = datetime.utcnow() self.db.flush() self.db.commit() break except Exception: err = sys.exc_info()[1] if 'Deadlock found' in str(err): self.error('Deadlock happened! Trying again (try number %d)! (Details: %s)' % (i, str(err))) else: self.db.rollback() raise return True