class Tracker(object): logger = get_logger(__file__.split('/')[-1]) started = False started_lock = threading.Lock() should_stop = False launcher_pid = None submitted_queue = Queue.Queue() waiting_queues_lock = threading.Lock() waiting_queues = {} tasks_waiting_for_input = [] tasks = [] # type: List[Task] fd_to_task_map = {} main_thread = None invocation_loop_thread = None with open(settings['aws_access_key_id_file'], 'r') as f: akid = f.read().strip() with open(settings['aws_secret_access_key_file'], 'r') as f: secret = f.read().strip() cacert = read_pem( settings['cacert_file']) if 'cacert_file' in settings else None srvcrt = read_pem( settings['srvcrt_file']) if 'srvcrt_file' in settings else None srvkey = read_pem( settings['srvkey_file']) if 'srvkey_file' in settings else None @classmethod def _handle_server_sock(cls, daemon_socketpoller): """ :type daemon_socketpoller: SocketPoller :param tasks: :return: """ batched_accept = True if batched_accept: while True: try: # ns :: socket._socketobject, addr :: address of lambda (ns, addr) = daemon_socketpoller.accept() Tracker.logger.debug("new conn from addr: %s:%s", addr[0], addr[1]) except socket.error, e: err = e.args[0] if err != errno.EAGAIN and err != errno.EWOULDBLOCK: Tracker.logger.error("error in accept: %s", e) break ns.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) ns.setblocking(False) # new SocketNB object from raw socket socknb = SocketNB(ns) # authenticate connection socknb.do_handshake() # init TaskStarter object with the created SocketNB task_starter = TaskStarter(socknb) cls.tasks.append(task_starter) cls.fd_to_task_map[ task_starter.current_state.fileno()] = task_starter Tracker.logger.debug("fd_task_map: {}".format( cls.fd_to_task_map)) else:
class SchedulerBase(object): logger = get_logger("SchedulerBase") should_stop = False @classmethod def schedule(cls, pipeline): SchedulerBase.logger.info('start scheduling pipeline: %s', pipeline.pipe_id) last_print = 0 tasks = [] while not cls.should_stop: buffer_empty = True deliver_empty = True for key, stage in pipeline.stages.iteritems(): stage.delivery_func = default_delivery_func if stage.delivery_func is None else stage.delivery_func if any([ not q.empty() and not isinstance(q, DurableQueue) for q in stage.buffer_queues.values() ]): buffer_empty = False stage.delivery_func(stage.buffer_queues, stage.deliver_queue, stale=len(tasks) == 0 and stage.deliver_queue.empty(), stage_conf=stage.config, stage_context=stage.context, pipedata=pipeline.pipedata) submitted_task_count = cls.submit_tasks(pipeline, tasks) if submitted_task_count != 0: deliver_empty = False if hasattr(cls, 'get_quota'): if cls.get_quota(pipeline) == 0: # if pipeline is sleeping deliver_empty = False elif not cls.get_deliverQueueEmpty(pipeline): deliver_empty = False deliver_empty = cls.get_deliver_queue_empty(pipeline) finished_tasks = [ t for t in tasks if isinstance(t.current_state, ErrorState) or isinstance(t.current_state, TerminalState) or isinstance(t.current_state, WaitForInputState) ] error_tasks = [ t for t in tasks if isinstance(t.current_state, ErrorState) ] SchedulerBase.logger.debug("Have {} finished tasks".format( len(finished_tasks))) cls.process_finish_tasks(finished_tasks) if len(error_tasks) > 0: logging.error(str(len(error_tasks)) + " tasks failed: ") errmsgs = [] for et in error_tasks: logging.error(et.current_state.str_extra()) errmsgs.append(et.current_state.str_extra()) raise Exception( str(len(error_tasks)) + " tasks failed\n" + "\n".join(errmsgs)) tasks = [ t for t in tasks if t not in finished_tasks and not isinstance(t.current_state, WaitForInputState) ] if buffer_empty and deliver_empty and len(tasks) == 0: break if time.time() > last_print + 1: print_task_states(tasks) # SchedulerBase.logger.debug("buffer empty: "+str(buffer_empty)+', deliver empty: '+str(deliver_empty)) last_print = time.time() time.sleep(1) # sleep to avoid spinning, we can use notification instead, but so far, this works. # it may increase overall latency by at most n*0.01 second, where n is the longest path in the pipeline SchedulerBase.logger.info('finish scheduling pipeline') @classmethod def submit_tasks(cls, pipeline, submitted): raise NotImplementedError() @classmethod def stop(cls): cls.should_stop = True @classmethod def get_deliver_queue_empty(cls, pipeline): deliver_empty = True for key, stage in pipeline.stages.iteritems(): if not stage.deliver_queue.empty(): deliver_empty = False return deliver_empty return deliver_empty @classmethod def process_finish_tasks(cls, tasks): pass
import string import json import logging import boto3 from sprocket.controlling.common.defs import Defs from sprocket.controlling.common.network import connect_socket from sprocket.controlling.common.socket_nb import SocketNB from sprocket.controlling.worker.fd_wrapper import FDWrapper from sprocket.controlling.common.logger import get_logger # from sprocket.util.misc import ForkedPdb s3_client = boto3.client('s3') logger = get_logger(__file__.split('/')[-1]) ### # set a value ### def _do_set(msg, vals, to_int): res = msg.split(':', 1) if len(res) != 2 or len(res[0]) < 1: vals['cmdsock'].enqueue('FAIL(invalid syntax for SET)') return False retval = 'OK:SET(%s)' if to_int: retval = 'OK:SETI(%s)'
class ResourceScheduler(SchedulerBase): resource_manager = ResourceManager() logger = get_logger(__file__.split('/')[-1]) @classmethod def submit_tasks(cls, pipeline, submitted=[]): count_submitted = 0 # number of tasks submitted tasks_to_redeliver = [] # tasks we failed to submit and must redeliver # submit all tasks for all stages in the pipeline for key, stage in pipeline.stages.iteritems(): while not stage.deliver_queue.empty(): next_task = stage.deliver_queue.get( ) # take first task off deliver queue t = Task( lambda_func=stage.lambda_function, init_state=stage.init_state, event=stage.event, in_events=next_task, emit_event=stage.emit, config=stage.config, pipe=pipeline.pipedata, regions=stage.region, ) # let scheduler, pipeline, and tracker see the created task ResourceScheduler.logger.debug('Task requires: "{}"'.format( t.resources)) resources_sufficient = True resources = map(ResourceRequest.from_json, t.resources) t.resources = resources # TODO: instantiate resource requests somewhere else for resource in resources: if not cls.resource_manager.resource_exists(resource.name): cls.resource_manager.register_resource( resource.name, resource.max_allocation) if cls.resource_manager.available_resource( resource.name) < resource.required_allocation: ResourceScheduler.logger.debug( "Insufficient resources for task {}. Need {} of {} but have {}" .format( t, resource.required_allocation, resource.name, cls.resource_manager.available_resource( resource.name))) resources_sufficient = False tasks_to_redeliver.append(next_task) break if resources_sufficient: ResourceScheduler.logger.debug( "Have sufficient resources for task {}".format(t)) cls.resource_manager.reserve_resources(resources) submitted.append(t) pipeline.tasks.append(t) Tracker.submit(t) count_submitted += 1 ResourceScheduler.logger.debug('submitted a task: ' + str(t)) # if we failed to submit a task due to resource insufficiency, place it back on deliver_queue ResourceScheduler.logger.debug( "Placing {} tasks back on delivery queue".format( len(tasks_to_redeliver))) map(lambda ttd: stage.deliver_queue.put(ttd), tasks_to_redeliver) return count_submitted @classmethod def process_finish_tasks(cls, tasks): for t in tasks: ResourceScheduler.logger.debug("Releasing resources") for r in t.resources: cls.resource_manager.release_resource(r.name, r.required_allocation) r.required_allocation = 0 # task is done; needs no more resources