Пример #1
0
    def kill_job(self, job):
        """
        Notify Worker that a job should be killed

        Args:
            job (modelrunner.Job):  job instance
        """

        if job.status == Job.STATUS_QUEUED:
            # case 1:  job is in QUEUED state
            #          remove it from the queue and mark as killed

            job_queue = job_queue_name(job.model)
            logger.info(
                "killing job {} by removing from queue {}".
                format(job.uuid, job_queue))

            command_dict = {'command': 'PROCESS_JOB', 'job_uuid': job.uuid}
            remove_command(redis_connection(), job_queue, command_dict)
            job.status = Job.STATUS_KILLED
            # save it
            Job[job.uuid] = job
        elif job.status == Job.STATUS_RUNNING:
            # case 2:  job is in RUNNING state
            #          send message to worker to kill the job
            worker = worker_name(job.worker_url, job.model)
            worker_channel = node_channel_name(worker)
            logger.info("sending command to kill job on channel {}".
                        format(worker_channel))
            command_dict = {'command': "KILL_JOB", 'job_uuid': job.uuid}
            publish_command(redis_connection(), worker_channel, command_dict)
        else:
            logger.info("kill called on job {} in incompatible state {}".
                        format(job.uuid, job.status))
Пример #2
0
def test_run_good_bad():

    model_name = "test"
    config = make_config(model_name)

    worker = get_worker(config)
    sleep8_job = setup_queued_job(config, "processed_test", "sleep_8.zip")
    bad_job = setup_queued_job(config, "failed_test", "bad.zip")
    enqueue_worker_job(sleep8_job)
    enqueue_worker_job(bad_job)

    # process good and bad jobs in bg thread
    tq = Thread(target=worker.wait_for_queue_commands)
    tq.start()

    # give it some time
    time.sleep(10)

    assert Job[sleep8_job.uuid].status == Job.STATUS_PROCESSED
    assert Job[bad_job.uuid].status == Job.STATUS_FAILED

    # stop waiting
    stop_queue_command = {'command': 'STOP_PROCESSING_QUEUE'}
    enqueue_command(
        redis_connection(),
        job_queue_name(model_name),
        stop_queue_command)

    tq.join()

    cleanup(config)
Пример #3
0
def enqueue_complete_job(job):
    """
    Submit job to queue for primary 
    """
    redis_conn = redis_connection()
    queue_name = primary_queue_name(job.primary_url)
    command_dict = {'command': 'COMPLETE_JOB', 'job_uuid': job.uuid}
    enqueue_command(redis_conn, queue_name, command_dict)
Пример #4
0
def enqueue_worker_job(job):
    """
    Submit job to queue for worker
    """
    redis_conn = redis_connection()
    queue_name = job_queue_name(job.model)
    command_dict = {'command': 'PROCESS_JOB', 'job_uuid': job.uuid}
    enqueue_command(redis_conn, queue_name, command_dict)
Пример #5
0
    def enqueue_job(self, queue):
        job = {'id': self.count,
               'origin': self.name,
               'status': 'NEW'}

        command_dict = {'command': 'PROCESS_JOB', 'job': job}
        enqueue_command(redis_connection(), queue, command_dict)
        self.count += 1
Пример #6
0
def publish(channel_name, command_dict, wait_time=0):
    """
    Test worker command processing
    """
    redis_conn = redis_connection()
    if wait_time > 0:
        time.sleep(wait_time)

    publish_command(redis_conn, channel_name, command_dict)
Пример #7
0
def get_primary(config):
    primary_handler = PrimaryServer(
                        config["primary_url"],
                        config["primary_data_dir"])
    channels = [node_channel_name(primary_handler.node.name),
                all_nodes_channel_name()]
    primary = Dispatcher(
                redis_connection(),
                primary_handler,
                primary_queue_name(primary_handler.node.name),
                channels)
    return primary
Пример #8
0
def get_worker(config):
    worker_handler = WorkerServer(
                        config["worker_url"],
                        config["worker_data_dir"],
                        config["model"],
                        config["command_dict"])
    channels = [node_channel_name(worker_handler.node.name),
                all_nodes_channel_name()]
    worker = Dispatcher(redis_connection(),
                        worker_handler,
                        job_queue_name(config["model"]),
                        channels)
    return worker
Пример #9
0
def cleanup(config):

    redis_conn = redis_connection()

    def delete_subdirs(d):
        for subdir in os.listdir(d):
            full_subdir = os.path.join(d, subdir)
            if os.path.isdir(full_subdir):
                shutil.rmtree(full_subdir, ignore_errors=True)

    delete_subdirs(config["primary_data_dir"])
    delete_subdirs(config["worker_data_dir"])

    redis_conn.flushdb()
Пример #10
0
    def process_job(self, command_dict):
        job = command_dict['job']
        self.jobs[job['id']] = job
        job['status'] = 'PROCESSING'
        sleep_amount = 0
        while (sleep_amount < self.sleep_time):
            time.sleep(1)
            if job['status'] == 'KILLING':
                job['status'] = 'KILLED'
                break
            sleep_amount += 1

        if sleep_amount == self.sleep_time:
            job['status'] = 'PROCESSED'
            command_dict = {'command': 'COMPLETE_JOB', 'job': job}
            enqueue_command(redis_connection(), job['origin'], command_dict)
Пример #11
0
    def process_job(self, command_dict):
        job = command_dict['job']
        self.jobs[job['id']] = job
        job['status'] = 'PROCESSING'
        sleep_amount = 0
        while(sleep_amount < self.sleep_time):
            time.sleep(1)
            if job['status'] == 'KILLING':
                job['status'] = 'KILLED'
                break
            sleep_amount += 1

        if sleep_amount == self.sleep_time:
            job['status'] = 'PROCESSED'
            command_dict = {'command': 'COMPLETE_JOB', 'job': job}
            enqueue_command(redis_connection(), job['origin'], command_dict)
Пример #12
0
    def refresh_node_status(self):
        """
        Refresh the status of all nodes by
        1.  Deleting existing state
        2.  Publishing a request for all nodes to update

        All listening nodes will update the Node hash with their state
        """

        for node in Node.values():
            del Node[node.name]

        status_command = {"command": "UPDATE_STATUS"}
        publish_command(
            redis_connection(),
            all_nodes_channel_name(),
            status_command)
Пример #13
0
    def enqueue(self, job, job_data_blob=None, job_data_url=None):
        """
        Write job data to file and queue up for processing

        Note:  This should be run async wrt a web server as it will block
            on fetching/writing data

        Args:
            job_data_blob (blob):  blob of a zip file to be written to disk
            job_data_url (str):  the url of a zip file to fetched

        """

        # only allow job data as blob or url
        assert((job_data_blob is None) ^ (job_data_url is None))

        job_data_dir = os.path.join(self.data_dir, job.uuid)
        if(not os.path.exists(job_data_dir)):
            os.mkdir(job_data_dir)

        job_data_file = os.path.join(job_data_dir, "input.zip")
        if(job_data_blob):
            logger.info("writing input file for job to {}".
                        format(job_data_file))
            file_handle = open(job_data_file, 'wb')
            file_handle.write(job_data_blob)
            file_handle.close()
        else:
            logger.info("retrieving input file for job and writing to {}".
                        format(job_data_file))
            fetch_file_from_url(job_data_url, job_data_dir, "input.zip")

        # add to global job list then queue it to be run
        job.primary_url = self.node.node_url
        job.primary_data_dir = self.data_dir  # to know where output.zip is
        job.status = Job.STATUS_QUEUED
        Job[job.uuid] = job
        job_queue = job_queue_name(job.model)
        command_dict = {'command': 'PROCESS_JOB', 'job_uuid': job.uuid}
        enqueue_command(redis_connection(), job_queue, command_dict)
Пример #14
0
def test_primary_worker_scenario():

    primary_handler = PrimaryCommandHandler()
    worker_handler = WorkerCommandHandler()

    primary = Dispatcher(redis_connection(),
                         primary_handler,
                         "primary",
                         ["primary"])

    worker = Dispatcher(redis_connection(),
                        worker_handler,
                        "worker",
                        ["worker"])

    # start them up
    Thread(target=primary.wait_for_queue_commands).start()
    Thread(target=primary.wait_for_channel_commands).start()
    Thread(target=worker.wait_for_queue_commands).start()
    Thread(target=worker.wait_for_channel_commands).start()

    # submit a job
    primary_handler.enqueue_job("worker")

    # wait for it to complete
    sleep_time = 0
    while(sleep_time < worker_handler.sleep_time + 1):
        time.sleep(1)
        sleep_time += 1

    assert len(primary_handler.jobs) == 1 and\
        primary_handler.jobs[0]['status'] == 'COMPLETE'

    stop_queue_command = {'command': 'STOP_PROCESSING_QUEUE'}
    stop_channel_command = {'command': 'STOP_PROCESSING_CHANNELS'}

    publish_command(redis_connection(), "worker", stop_queue_command)
    publish_command(redis_connection(), "primary", stop_queue_command)
    publish_command(redis_connection(), "worker", stop_channel_command)
    publish_command(redis_connection(), "primary", stop_channel_command)
Пример #15
0
def test_primary_worker_scenario():

    primary_handler = PrimaryCommandHandler()
    worker_handler = WorkerCommandHandler()

    primary = Dispatcher(redis_connection(), primary_handler, "primary",
                         ["primary"])

    worker = Dispatcher(redis_connection(), worker_handler, "worker",
                        ["worker"])

    # start them up
    Thread(target=primary.wait_for_queue_commands).start()
    Thread(target=primary.wait_for_channel_commands).start()
    Thread(target=worker.wait_for_queue_commands).start()
    Thread(target=worker.wait_for_channel_commands).start()

    # submit a job
    primary_handler.enqueue_job("worker")

    # wait for it to complete
    sleep_time = 0
    while (sleep_time < worker_handler.sleep_time + 1):
        time.sleep(1)
        sleep_time += 1

    assert len(primary_handler.jobs) == 1 and\
        primary_handler.jobs[0]['status'] == 'COMPLETE'

    stop_queue_command = {'command': 'STOP_PROCESSING_QUEUE'}
    stop_channel_command = {'command': 'STOP_PROCESSING_CHANNELS'}

    publish_command(redis_connection(), "worker", stop_queue_command)
    publish_command(redis_connection(), "primary", stop_queue_command)
    publish_command(redis_connection(), "worker", stop_channel_command)
    publish_command(redis_connection(), "primary", stop_channel_command)
Пример #16
0
 def kill_job(self, channel):
     command_dict = {'command': 'KILL_JOB'}
     publish_command(redis_connection(), channel, command_dict)
Пример #17
0
    def enqueue_job(self, queue):
        job = {'id': self.count, 'origin': self.name, 'status': 'NEW'}

        command_dict = {'command': 'PROCESS_JOB', 'job': job}
        enqueue_command(redis_connection(), queue, command_dict)
        self.count += 1
Пример #18
0
 def kill_job(self, channel):
     command_dict = {'command': 'KILL_JOB'}
     publish_command(redis_connection(), channel, command_dict)
Пример #19
0
logger.info("modelrunner %s (Python %s)" %
            (__version__,
             '.'.join(map(str, sys.version_info[:3]))))

# so we can load config via cmd line args
parse_command_line()
parse_config_file(config.options.config_file)

# initialize the global application settings
initialize(config.options.redis_url)

# get the command_ keys
command_dict = config.options.group_dict("model_command")

worker_handler = WorkerServer(
                    config.options.worker_url,
                    config.options.data_dir,
                    config.options.model,
                    command_dict)
channels = [node_channel_name(worker_handler.node.name),
            all_nodes_channel_name()]
worker = Dispatcher(redis_connection(),
                    worker_handler,
                    job_queue_name(config.options.model),
                    channels)

# start listening for commands on queue and channels in bg
Thread(target=worker.wait_for_queue_commands).start()
Thread(target=worker.wait_for_channel_commands).start()
Пример #20
0
    def process_job(self, command_dict):
        """
        process job
        command format {'command': 'PROCESS_JOB',
                        'job_uuid': <uuid>}

        """
        job_uuid = command_dict['job_uuid']
        try:
            job = Job[job_uuid]
        except KeyError as e:
            # Job not found is not worth re-raising
            logger.warn(e)
            logger.warn("Job {} missing".format(job_uuid))
            return

        # assign the job to this worker
        job.worker_url = self.node.node_url
        job.worker_data_dir = self.data_dir
        job_data_dir = self._setup_job_dir(job)

        # setup subproc to run model command and output to local job log
        logger.info("preparing input for job {}".format(job.uuid))
        job_data_log = open(os.path.join(job_data_dir, "job_log.txt"), 'w')

        # primary_queue to notify primary server of any errors or completion
        primary_queue = primary_queue_name(job.primary_url)

        # update job status
        job.status = Job.STATUS_RUNNING
        job.on_primary = False  # now on worker
        Job[job.uuid] = job

        # catch data prep exceptions so that we mark the job as failed
        try:
            self._prep_input(job)
        except:
            # Fail the job, log it and notify primary
            failure_msg = "Failed prepping data for job {}".format(job.uuid)
            logger.error(failure_msg)
            job_data_log.write(failure_msg)
            job_data_log.close()
            job.status = Job.STATUS_FAILED
            Job[job.uuid] = job
            command_dict = {'command': 'COMPLETE_JOB', 'job_uuid': job.uuid}
            enqueue_command(redis_connection(), primary_queue, command_dict)
            return

        # Input has been prepped so start the job
        command = self.model_commands[self.node.model]
        logger.info("starting job {}".format(job.uuid))

        # add the input and output dir to the command
        popen_proc = self._run_subprocess(command, job, job_data_log)

        # set hidden status attributes
        self.set_node_status(Node.STATUS_RUNNING,
                             job_uuid=job.uuid,
                             job_pid=popen_proc.pid)

        logger.info("job {} running with pid {}".format(
            job.uuid, popen_proc.pid))

        # wait for command to finish or for it to be killed
        return_code = popen_proc.wait()

        # Reset hidden status attributes
        self.set_node_status(Node.STATUS_WAITING)

        # close job log
        job_data_log.close()
        logger.info("finished job {} with return code {}".format(
            job.uuid, return_code))

        # update job status (use command return code for now)
        if (return_code == 0):
            logger.info("zipping output of job {}".format(job.uuid))
            self._prep_output(job)
            job.status = Job.STATUS_PROCESSED
        elif return_code == -signal.SIGKILL:
            job.status = Job.STATUS_KILLED
        else:
            job.status = Job.STATUS_FAILED

        Job[job.uuid] = job

        # notify primary server job is done
        command_dict = {'command': 'COMPLETE_JOB', 'job_uuid': job.uuid}
        enqueue_command(redis_connection(), primary_queue, command_dict)
Пример #21
0
# setup log
logger = logging.getLogger('modelrunner')

logger.info("modelrunner %s (Python %s)" %
            (__version__, '.'.join(map(str, sys.version_info[:3]))))

# so we can load config via cmd line args
parse_command_line()
parse_config_file(config.options.config_file)

# initialize the global application settings
initialize(config.options.redis_url)

# get the command_ keys
command_dict = config.options.group_dict("model_command")

worker_handler = WorkerServer(config.options.worker_url,
                              config.options.data_dir, config.options.model,
                              command_dict)
channels = [
    node_channel_name(worker_handler.node.name),
    all_nodes_channel_name()
]
worker = Dispatcher(redis_connection(), worker_handler,
                    job_queue_name(config.options.model), channels)

# start listening for commands on queue and channels in bg
Thread(target=worker.wait_for_queue_commands).start()
Thread(target=worker.wait_for_channel_commands).start()
Пример #22
0
# -*- coding: utf-8 -*-

from modelrunner import settings
from modelrunner.redisent import RedisEntity

import datetime
from six import string_types

RedisEntity._prefix = "test"
RedisEntity._db = settings.redis_connection()


class User(RedisEntity):
    def __init__(self, id=None, name=None, created=None):
        self.id = int(id)
        self.name = name
        self.created = self._init_created(created)

    def _init_created(self, created):
        if isinstance(created, string_types):
            return datetime.datetime.strptime(created, "%Y-%m-%dT%H:%M:%S")
        elif isinstance(created, datetime.datetime):
            return created
        else:
            raise ValueError("Invalid type {} for created attribute".format(
                type(created)))

    def __eq__(self, other):
        return (isinstance(other, self.__class__)
                and self.__dict__ == other.__dict__)
Пример #23
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Script to stop the configured redis instance
"""

from modelrunner import config
from modelrunner.settings import (initialize, redis_connection)

# setup config options
from tornado.options import parse_command_line, parse_config_file

# so we can load config via cmd line args
parse_command_line()
parse_config_file(config.options.config_file)

# initialize the global application settings
initialize(config.options.redis_url)

# stop redis
redis_connection().shutdown()
Пример #24
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Script to stop the configured redis instance
"""

from modelrunner import config
from modelrunner.settings import (
    initialize,
    redis_connection
)

# setup config options
from tornado.options import parse_command_line, parse_config_file

# so we can load config via cmd line args
parse_command_line()
parse_config_file(config.options.config_file)

# initialize the global application settings
initialize(config.options.redis_url)

# stop redis
redis_connection().shutdown()
Пример #25
0
 def get_queued_commands():
     return get_all_commands(
             redis_connection(),
             job_queue_name(model_name))
Пример #26
0
logger = logging.getLogger('modelrunner')

logger.info("modelrunner %s (Python %s)" %
            (__version__,
             '.'.join(map(str, sys.version_info[:3]))))

# so we can load config via cmd line args
parse_command_line()
parse_config_file(config.options.config_file)

# initialize the global application settings
initialize(config.options.redis_url)

# get the command_ keys
command_dict = config.options.group_dict("model_command")

primary_handler = PrimaryServer(
                    config.options.primary_url,
                    config.options.data_dir)
channels = [node_channel_name(primary_handler.node.name),
            all_nodes_channel_name()]
primary = Dispatcher(
            redis_connection(),
            primary_handler,
            primary_queue_name(primary_handler.node.name),
            channels)

# continuously wait for jobs to complete and for status inquiries
Thread(target=primary.wait_for_queue_commands).start()
Thread(target=primary.wait_for_channel_commands).start()
Пример #27
0
                                  primary_queue_name)

# setup log
logger = logging.getLogger('modelrunner')

logger.info("modelrunner %s (Python %s)" %
            (__version__, '.'.join(map(str, sys.version_info[:3]))))

# so we can load config via cmd line args
parse_command_line()
parse_config_file(config.options.config_file)

# initialize the global application settings
initialize(config.options.redis_url)

# get the command_ keys
command_dict = config.options.group_dict("model_command")

primary_handler = PrimaryServer(config.options.primary_url,
                                config.options.data_dir)
channels = [
    node_channel_name(primary_handler.node.name),
    all_nodes_channel_name()
]
primary = Dispatcher(redis_connection(), primary_handler,
                     primary_queue_name(primary_handler.node.name), channels)

# continuously wait for jobs to complete and for status inquiries
Thread(target=primary.wait_for_queue_commands).start()
Thread(target=primary.wait_for_channel_commands).start()