Пример #1
0
def stop_start_job(self, frequency, start=datetime.utcnow(), restart=True):
    """
    Restarts the job for a particular frequency.
    """
    try:
        logger.info("Stopping/starting %s at %s" % (frequency, start))

        # Set up connection to W3ACT:
        w = w3act(cfg.get('act','url'),cfg.get('act','username'),cfg.get('act','password'))
        # Set up connection to H3:
        h = hapyx.HapyX("https://%s:%s" % (cfg.get('h3','host'), cfg.get('h3','port')), username=cfg.get('h3','username'), password=cfg.get('h3','password'))

        # Stop job if currently running:
        if frequency in h.list_jobs() and h.status(frequency) != "":
            """Stops a running job, notifies RabbitMQ and cleans up the directory."""
            launch_id = h.get_launch_id(frequency)
            job = W3actJob.from_directory(w, "%s/%s" % (HERITRIX_JOBS, frequency), heritrix=h)
            job.stop()
            remove_action_files(frequency)
            crawl.status.update_job_status.delay(job.name, "%s/%s" % (job.name, launch_id), "STOPPED")

            # Pass on to the next step in the chain:
            logger.info("Requesting assembly of output for: %s/%s" % (frequency, launch_id))
            assemble_job_output.delay(frequency,launch_id)
        else:
            job = None

        # Start job if requested:
        if restart:
            targets = w.get_ld_export(frequency)
            # logger.info("Found %s Targets in export." % len(export))
            #    targets = [t for t in export if (t["startDate"] is None or t["startDate"] < start) and (t["endDateISO"] is None or t["crawlEndDateISO"] > start)]
            logger.debug("Found %s Targets in date range." % len(targets))
            job = W3actJob(w, targets, frequency, heritrix=h)
            logger.info("Starting job %s..." % job.name)
            job.start()
            launch_id = h.get_launch_id(frequency)
            crawl.status.update_job_status.delay(job.name, "%s/%s" % (job.name, launch_id), "LAUNCHED" )
            logger.info("Launched job %s/%s with %s seeds." % (job.name, launch_id, len(job.seeds)))
            return "Launched job %s/%s with %s seeds." % (job.name, launch_id, len(job.seeds))
        else:
            if job:
                logger.info("Stopped job %s/%s without restarting..." % (job.name, launch_id))
                return "Stopped job %s/%s without restarting..." % (job.name, launch_id)
            else:
                logger.warning("No running '%s' job to stop!" % frequency)
                return "No running '%s' job to stop!" % frequency
    except BaseException as e:
        logger.exception(e)
        raise self.retry(countdown=10, exe=e)
Пример #2
0
    def run(self):
        # Set up connection to H3:
        h = get_hapy_for_job(self.job)

        logger.info("I'm stopping %s" % (self.job.name))

        # Stop job if currently running:
        if self.job.name in h.list_jobs() and h.status(self.job.name) != "":
            """Stops a running job, cleans up the directory, initiates job assembly."""
            launch_id = h.get_launch_id(self.job.name)
            job = W3actJob.from_directory("%s/%s" % (h3().local_job_folder, self.job.name), heritrix=h)
            job.stop()
            remove_action_files(self.job.name, HERITRIX_JOBS=h3().local_job_folder)

            # Record an output file that can be use as a Target by a different task:
            mark_job_as(job, launch_id, 'stopped')
        else:
            logger.warning("No {} job to be stopped!".format(self.job.name))
Пример #3
0
def stop_start_job(self, frequency, start=datetime.utcnow(), restart=True):
    """
    Restarts the job for a particular frequency.
    """
    try:
        logger.info("Stopping/starting %s at %s" % (frequency, start))

        # Set up connection to W3ACT:
        w = w3act(cfg.get('act', 'url'), cfg.get('act', 'username'),
                  cfg.get('act', 'password'))
        # Set up connection to H3:
        h = hapyx.HapyX("https://%s:%s" %
                        (cfg.get('h3', 'host'), cfg.get('h3', 'port')),
                        username=cfg.get('h3', 'username'),
                        password=cfg.get('h3', 'password'))

        # Stop job if currently running:
        if frequency in h.list_jobs() and h.status(frequency) != "":
            """Stops a running job, notifies RabbitMQ and cleans up the directory."""
            launch_id = h.get_launch_id(frequency)
            job = W3actJob.from_directory(w,
                                          "%s/%s" % (HERITRIX_JOBS, frequency),
                                          heritrix=h)
            job.stop()
            remove_action_files(frequency)
            crawl.status.update_job_status.delay(
                job.name, "%s/%s" % (job.name, launch_id), "STOPPED")

            # Pass on to the next step in the chain:
            logger.info("Requesting assembly of output for: %s/%s" %
                        (frequency, launch_id))
            assemble_job_output.delay(frequency, launch_id)
        else:
            job = None

        # Start job if requested:
        if restart:
            targets = w.get_ld_export(frequency)
            # logger.info("Found %s Targets in export." % len(export))
            #    targets = [t for t in export if (t["startDate"] is None or t["startDate"] < start) and (t["endDateISO"] is None or t["crawlEndDateISO"] > start)]
            logger.debug("Found %s Targets in date range." % len(targets))
            job = W3actJob(w, targets, frequency, heritrix=h)
            logger.info("Starting job %s..." % job.name)
            job.start()
            launch_id = h.get_launch_id(frequency)
            crawl.status.update_job_status.delay(
                job.name, "%s/%s" % (job.name, launch_id), "LAUNCHED")
            logger.info("Launched job %s/%s with %s seeds." %
                        (job.name, launch_id, len(job.seeds)))
            return "Launched job %s/%s with %s seeds." % (job.name, launch_id,
                                                          len(job.seeds))
        else:
            if job:
                logger.info("Stopped job %s/%s without restarting..." %
                            (job.name, launch_id))
                return "Stopped job %s/%s without restarting..." % (job.name,
                                                                    launch_id)
            else:
                logger.warning("No running '%s' job to stop!" % frequency)
                return "No running '%s' job to stop!" % frequency
    except BaseException as e:
        logger.exception(e)
        raise self.retry(countdown=10, exe=e)