def stop_start_job(self, frequency, start=datetime.utcnow(), restart=True): """ Restarts the job for a particular frequency. """ try: logger.info("Stopping/starting %s at %s" % (frequency, start)) # Set up connection to W3ACT: w = w3act(cfg.get('act','url'),cfg.get('act','username'),cfg.get('act','password')) # Set up connection to H3: h = hapyx.HapyX("https://%s:%s" % (cfg.get('h3','host'), cfg.get('h3','port')), username=cfg.get('h3','username'), password=cfg.get('h3','password')) # Stop job if currently running: if frequency in h.list_jobs() and h.status(frequency) != "": """Stops a running job, notifies RabbitMQ and cleans up the directory.""" launch_id = h.get_launch_id(frequency) job = W3actJob.from_directory(w, "%s/%s" % (HERITRIX_JOBS, frequency), heritrix=h) job.stop() remove_action_files(frequency) crawl.status.update_job_status.delay(job.name, "%s/%s" % (job.name, launch_id), "STOPPED") # Pass on to the next step in the chain: logger.info("Requesting assembly of output for: %s/%s" % (frequency, launch_id)) assemble_job_output.delay(frequency,launch_id) else: job = None # Start job if requested: if restart: targets = w.get_ld_export(frequency) # logger.info("Found %s Targets in export." % len(export)) # targets = [t for t in export if (t["startDate"] is None or t["startDate"] < start) and (t["endDateISO"] is None or t["crawlEndDateISO"] > start)] logger.debug("Found %s Targets in date range." % len(targets)) job = W3actJob(w, targets, frequency, heritrix=h) logger.info("Starting job %s..." % job.name) job.start() launch_id = h.get_launch_id(frequency) crawl.status.update_job_status.delay(job.name, "%s/%s" % (job.name, launch_id), "LAUNCHED" ) logger.info("Launched job %s/%s with %s seeds." % (job.name, launch_id, len(job.seeds))) return "Launched job %s/%s with %s seeds." % (job.name, launch_id, len(job.seeds)) else: if job: logger.info("Stopped job %s/%s without restarting..." % (job.name, launch_id)) return "Stopped job %s/%s without restarting..." % (job.name, launch_id) else: logger.warning("No running '%s' job to stop!" % frequency) return "No running '%s' job to stop!" % frequency except BaseException as e: logger.exception(e) raise self.retry(countdown=10, exe=e)
def run(self): # Set up connection to H3: h = get_hapy_for_job(self.job) logger.info("I'm stopping %s" % (self.job.name)) # Stop job if currently running: if self.job.name in h.list_jobs() and h.status(self.job.name) != "": """Stops a running job, cleans up the directory, initiates job assembly.""" launch_id = h.get_launch_id(self.job.name) job = W3actJob.from_directory("%s/%s" % (h3().local_job_folder, self.job.name), heritrix=h) job.stop() remove_action_files(self.job.name, HERITRIX_JOBS=h3().local_job_folder) # Record an output file that can be use as a Target by a different task: mark_job_as(job, launch_id, 'stopped') else: logger.warning("No {} job to be stopped!".format(self.job.name))
def stop_start_job(self, frequency, start=datetime.utcnow(), restart=True): """ Restarts the job for a particular frequency. """ try: logger.info("Stopping/starting %s at %s" % (frequency, start)) # Set up connection to W3ACT: w = w3act(cfg.get('act', 'url'), cfg.get('act', 'username'), cfg.get('act', 'password')) # Set up connection to H3: h = hapyx.HapyX("https://%s:%s" % (cfg.get('h3', 'host'), cfg.get('h3', 'port')), username=cfg.get('h3', 'username'), password=cfg.get('h3', 'password')) # Stop job if currently running: if frequency in h.list_jobs() and h.status(frequency) != "": """Stops a running job, notifies RabbitMQ and cleans up the directory.""" launch_id = h.get_launch_id(frequency) job = W3actJob.from_directory(w, "%s/%s" % (HERITRIX_JOBS, frequency), heritrix=h) job.stop() remove_action_files(frequency) crawl.status.update_job_status.delay( job.name, "%s/%s" % (job.name, launch_id), "STOPPED") # Pass on to the next step in the chain: logger.info("Requesting assembly of output for: %s/%s" % (frequency, launch_id)) assemble_job_output.delay(frequency, launch_id) else: job = None # Start job if requested: if restart: targets = w.get_ld_export(frequency) # logger.info("Found %s Targets in export." % len(export)) # targets = [t for t in export if (t["startDate"] is None or t["startDate"] < start) and (t["endDateISO"] is None or t["crawlEndDateISO"] > start)] logger.debug("Found %s Targets in date range." % len(targets)) job = W3actJob(w, targets, frequency, heritrix=h) logger.info("Starting job %s..." % job.name) job.start() launch_id = h.get_launch_id(frequency) crawl.status.update_job_status.delay( job.name, "%s/%s" % (job.name, launch_id), "LAUNCHED") logger.info("Launched job %s/%s with %s seeds." % (job.name, launch_id, len(job.seeds))) return "Launched job %s/%s with %s seeds." % (job.name, launch_id, len(job.seeds)) else: if job: logger.info("Stopped job %s/%s without restarting..." % (job.name, launch_id)) return "Stopped job %s/%s without restarting..." % (job.name, launch_id) else: logger.warning("No running '%s' job to stop!" % frequency) return "No running '%s' job to stop!" % frequency except BaseException as e: logger.exception(e) raise self.retry(countdown=10, exe=e)