Exemplo n.º 1
0
class PostGreSQLEngine(Component):
    """
    PostGreSQL DB Engine
    """
    implements(IDBEngine)

    db_user = ConfigItem('postgresqlengine', 'user', 'postgresql',
                         """User name of the postgresql database.""")

    db_pass = ConfigItem('postgresqlengine', 'password', '',
                         """Password of the postgresql database.""")

    db_host = ConfigItem('postgresqlengine', 'host', 'localhost',
                         """Host of the postgresql database.""")

    db_port = ConfigItem('postgresqlengine', 'port', '5432',
                         """Port of the postgresql database.""")

    db_name = ConfigItem('postgresqlengine', 'databasename', 'mydb',
                         """Name of the postgresql database.""")

    db_echo = BoolItem('postgresqlengine', 'echo', True,
                       """Use SQL Alchemy debug output.""")

    def __init__(self):
        """
        Initialize engine
        """
        self.engine = None

    def _get_connection_string(self):
        """
        Private method to build the current connection string
        """
        return _(
            "postgresql://%(user)s:%(password)s@%(host)s:%(port)s/%(dbname)s",
            user=self.db_user,
            password=self.db_pass,
            host=self.db_host,
            port=self.db_port,
            dbname=self.db_name)

    def get_engine(self):
        """
        Create a PostGreSQL DB Engine
        """
        if not self.engine:
            self.engine = create_engine(self._get_connection_string(),
                                        echo=self.db_echo)
        return self.engine

    def initdb(self):
        """
        Called from the dbmanager once it gets initialized
        """
        #if not self.get_engine():
        #    raise EngineCreationFailedError(_("Failed to create db engine for: %(connectionstring)s", connectionstring=self._get_connection_string()))

    def dbshutdown(self):
        """
Exemplo n.º 2
0
class GenericTaskScheduleStrategy(Component):
    implements(ITaskScheduleStrategy)
    """
    Implements a schedule strategy to select the next valid slave that should process a given task.
    We update the workers and rank them so that we give a slave with idle workers a better
    rating than a worker with pending work.
     - If rank > 0 => Slave has idle processes
     - If rank == 0 => Slave has currently the same number of tasks then processes
     - If rank < 0 => Slave has currently more tasks thank workers
    """
        
    def setup(self, scheduler, master):
        self.scheduler = scheduler
        self.master = master
    
    def rate(self, slave):
        """
        Rate a slave without any lock. Less if better.
        """
        # Worst rating if we have a slave without workers!
        if slave.workers < 1:
            return sys.float_info.max
        
        # Basic rating is the task/worker ratio
        rating = max(0, slave.tasks / (slave.workers * 1.0))
        
        # TODO: Add task finished per second ratio
        
        return rating
    
    def get_next_slave(self):
        """
        Get the slave that should process the next task, get the one with better rating
        """
        if self.master.node_registry:
            # Find the best score
            best_rating = sys.float_info.max
            best_node = None
            for node in self.master.node_registry:
                if self.master.node_registry[node] and self.master.node_registry[node].state == NodeState.active \
                        and self.master.node_registry[node].type == NodeType.slave \
                        and self.master.node_registry[node].rating < best_rating:
                    best_rating = self.master.node_registry[node].rating
                    best_node = node
            
            # Get all nodes with the same or similar score (some nice epsilon?)
            #Nodes ...
            #for node in self.master.node_registry:
            #    if node and node.rating == best_rating:
            #        best_rating = node.rating            
            # Get a random slave form the first 10% of slaves. This will give us a bit
            # of randomness in case sending the tasks failes
            
            # For now we just use the best node
            if best_node:
                return best_node
        return None
Exemplo n.º 3
0
class DNACurveNode(ApplicationNode):
    """
    DNA Curve Analysis application
    """
    implements(IApp)
    
    factor = IntItem('dnasample', 'factor', 1,
        """How many workloads does a single task get assigned, in our a workload is considered a row""")
    
    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(DNACurveNode, self).app_init()
    
    def app_main(self):
        """
        Applications main entry
        """
        return super(DNACurveNode, self).app_main()
    
    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        self.start_time = time.time()
        self.dna_system = DNACurveTaskSystem("ATGCAAATTG"*1000, "trifonov", name="Example", maxlen=1024*1024, factor=self.factor)
        return self.dna_system
    
    def work_finished(self, result, task_system):
        """
        Called when the work has been done, the results is what our ITaskSystem
        sent back to us. Check resukt for more info
        """
        # Reassamble result to be processed further
        try:
            print("Total time: {}".format(time.time() - self.start_time))
        except:
            traceback.print_exc()
        self.shutdown_main_loop()
    
    def push_tasksystem_response(self, result):
        """
        We just added a ITaskSystem on the framwork. Check result for more info
        """
        self.log.info("Tasks system send to computation framework")
    
    def push_tasksystem_failed(self, result):
        """
        We failed to push a ITaskSystem on the computation framework!
        """
        self.log.error("Tasks system failed to be send to framework!")
        # Check if the resuklt dict contains a traceback
        if "t" in result:
            self.log.error(result["t"])
Exemplo n.º 4
0
class EnvSetup(Component):
    """
    Component which will act as the setup manager for the environment. Also
    handles main backup/restore procedures if something hapens to the environment
    """
    implements(IEnvUpgrader, IEnvBackup, IEnvDelete)

    # IEnvDelete methods
    def env_delete(self):
        """
        Called when an env get's deleted, env is still valid
        """
        self.log.info("(EnvSetup) Deleting Environment...")

    # IEnvBackup methods
    def env_backup(self):
        """
        Called when we make a backup
        """
        backup_data = {}
        self.log.info("(EnvSetup) Backup Environment...")
        return backup_data

    def env_restore(self):
        """
        Called when we make a restore
        """
        self.log.info("(EnvSetup) Restore Environment...")

    # IEnvUpgrader methods
    def env_created(self):
        """
        Called when a new env has been created
        """
        self.log.info("(EnvSetup) Created Environment...")

    def env_need_upgrade(self, dbManager):
        """
        Called when we start an environment, if this call returns true the env will not able to
        load until we force an upgrade.

        TODO: This needs to be done!
        """
        return False

    def env_do_upgrade(self, dbManager):
        """
        This will perform the actual upgrade process. Be careful on using db transactions
        """
        self.log.info("(EnvSetup) Uprade Environment...")
Exemplo n.º 5
0
class DefaultApplication(Component):
    implements(IApp)
    
    def app_init(self):
        """
        Initialize application just before running it
        """
        self.log.info("Init Default Application...");

    def app_main(self):
        """
        Default Main implementation
        """
        self.log.info("Starting Default Application...");
        self.log.info("Closing Default Application...");
        return APP_RET_CODE_SUCCESS
Exemplo n.º 6
0
class MD5HashReverseNode(ApplicationNode):
    """
    Reverse hash application
    """
    implements(IApp)

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        ApplicationNode.app_init(self)

    def app_main(self):
        """
        Applications main entry
        """
        return ApplicationNode.app_main(self)

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        self.start_time = time.time()
        return MD5HashReverseTaskSystem(128)

    def work_finished(self, result, task_system):
        """
        Called when the work has been done, the results is what our ITaskSystem
        sent back to us. Check resukt for more info
        """
        print("Total time: {}".format(time.time() - self.start_time))
        if not result[1]:
            if result[0]:
                self.log.info("Hash as been reversed. Initial number was %s" %
                              str(result[0]))
            else:
                self.log.info("Failed to reverse the hash :(")
        else:
            self.log.error("Computation failed: %s" % str(result[1]))
        self.shutdown_main_loop()

    def push_tasksystem_response(self, result):
        """
        We just added a ITaskSystem on the framwork. Check result for more info
        """
        self.log.info("Tasks system send to computation framework")
Exemplo n.º 7
0
class AdvancedTaskManager(Component):
    implements(ITaskManager)
    """
    Advanced task manager which
    """
    
    #
    # Our async task update logic. This gets executed from our step
    # controller from the nodes main loop.
    # 
    # The update loop is composed by:
    #  - Generate task list
    #  - Execute task for current stat using current delta time and step count
    #  - Collect results and syncronize between systems (we still need to define what
    #    a system is, just a component that implements the system ExtensionPoint)
    #

    def update(self):
        """
        Update call from the frameworks main loop.
        """
        pass

    def _pre_execute(self):
        """
        Called before a step is exeuted
        """
        pass

    def _post_execute(self):
        """
        Called after a step has been performed
        """
        pass

    def _execute(self):
        """
        Execute a given step of the framework execution
        """
        pass
Exemplo n.º 8
0
class ApplicationNode(Component, Node):
    implements(IApp)
    """
    An application node is a consumer node of the framework
    """
    master_url = HostItem('applicationnode', 'master', 'localhost:8080',
                          """This slave master node""")

    def app_init(self):
        """
        Initialize application just before running it
        """
        super(ApplicationNode, self).app_init()

        # Null this one first
        self.master_node_tcp = None

    def app_main(self):
        """
        Launch a concurrent application
        """
        result = super(ApplicationNode, self).app_main()
        if result not in SUCCESS_RET_CODES:
            return result

        # Flag used to re-create the master handshake if an unexpected connection drop
        # was detected
        self.unexected_connection_error = False
        self.is_registered = False

        # Make sure we clear the system out
        self.task_system = None

        # Enter app main loop
        self.main_loop()

        # Stop all processes and threads
        self.stop_app_node()
        self.stop_api_thread()

        # Now launch base node
        return result

    def stop_app_node(self):
        try:
            self.unregister_from_master()
            self.master_node_tcp.close()
        except:
            traceback.print_exc()
            self.log.warn("Failed to close TCP compute channel with master!")

    # ---------------------------------------------------------------------
    # Master Node Registration
    # ---------------------------------------------------------------------

    def master_disconnected(self, gracefully):
        """
        Called when a master is disconnected (gracefully) or we had no response from the master itself (ungracefull)
        """
        self.log.info("Master disconnected (gracefully:%s)" % (gracefully))
        return True

    def get_master_url(self):
        """
        Get the URL where our master node is hosted
        """
        return "%s:%d" % (self.master_url)

    def get_master_address(self):
        """
        Get the adress and port in (host,port) fashion
        """
        return ('localhost', 8081)

    def has_master(self):
        """
        Check if the node has a master or not. Master node has no master itself
        """
        return True

    def generate_client_api(self):
        """
        Generate the client API of our compute channel
        """
        if self.master_node_tcp:

            @tcpremote(self.master_node_tcp_client)
            def work_finished(handler, request, result, task_system):
                self.work_finished(result, task_system)
                raise NoResponseRequired()

            @tcpremote(self.master_node_tcp_client)
            def task_finished(handler, request, task, result, error):
                self.task_finished(task, result, error)
                raise NoResponseRequired()

            @tcpremote(self.master_node_tcp_client)
            def push_tasksystem_failed(handler, request, result):
                self.push_tasksystem_failed(result)

            @tcpremote(self.master_node_tcp_client)
            def push_tasksystem_response(handler, request, result):
                self.push_tasksystem_response(result)

            @tcpremote(self.master_node_tcp_client)
            def push_task_failed(handler, request, result):
                self.push_task_failed(result)

            @tcpremote(self.master_node_tcp_client)
            def push_task_response(handler, request, result):
                self.push_task_response(result)

            @tcpremote(self.master_node_tcp_client)
            def push_tasks_failed(handler, request, result):
                self.push_tasks_failed(result)

            @tcpremote(self.master_node_tcp_client)
            def push_tasks_response(handler, request, result):
                self.push_tasks_response(result)

            @tcpremote(self.master_node_tcp_client)
            def register_client_failed(handler, request, result):
                self.register_client_failed(result)

            @tcpremote(self.master_node_tcp_client)
            def register_client_response(handler, request, result):
                self.register_client_response(result)

    def work_finished(self, result, task_system):
        """
        Called when the work has been done, the results is what our ITaskSystem
        sent back to us. Check resukt for more info
        """
        raise NotImplementedError("Node has not implemented work_finished!")

    def task_finished(self, task, result, error):
        """
        Called when a task has been done
        """
        raise NotImplementedError("Node has not implemented task_finished!")

    def push_tasksystem_response(self, result):
        """
        We just added a ITaskSystem on the framwork. Check result for more info
        """
        raise NotImplementedError(
            "Node has not implemented push_tasksystem_response!")

    def push_tasksystem_failed(self, result):
        """
        We failed to push a ITaskSystem on the computation framework!
        """
        raise NotImplementedError(
            "Node has not implemented push_tasksystem_failed!")

    def register_client_failed(self, result):
        """
        Called when we failed to register ouselfs to a master node. Raises an exception.
        """
        raise FailedToRegisterWithMaster(
            "Client failed to register with the assigned master!")

    def push_task_response(self, result):
        """
        We just add a Task to the computation framework
        """
        raise NotImplementedError(
            "Node has not implemented push_task_response!")

    def push_task_failed(self, result):
        """
        We failed to add a Task to the computation framework
        """
        raise NotImplementedError("Node has not implemented push_task_failed!")

    def push_tasks_response(self, result):
        """
        We just add a set of Tasks to the computation framework
        """
        raise NotImplementedError(
            "Node has not implemented push_tasks_response!")

    def push_tasks_failed(self, result):
        """
        We failed to add a set of Tasks to the computation framework
        """
        raise NotImplementedError(
            "Node has not implemented push_tasks_failed!")

    def register_client_response(self, result):
        """
        Called when we finsihed to register ouselfs to a master node. Raises an exception if the master
        rejected us.
        """
        if not result:
            raise FailedToRegisterWithMaster(
                "Master rejected our registration attempt!")

        # Now that we are registered we can start sending the application to the master and start processing it,
        # if this is a re-register we hope that the computation has not yet been completed!
        if self.task_system is None:
            self._start_processing()

    def register_with_master(self):
        """
        The node will register itself with the expected master node
        """
        try:
            # Try to register with the master
            result = self.master_node.register_client(self.node_id_str,
                                                      self.port, {})
            self.is_registered = True

            # if the node ID is we are getting from the master is different we are re-registering
            if result['id'] == self.node_id_str:
                self.node_id = uuid.UUID(result['id'])
                self.node_id_str = str(self.node_id)

            # Now we try to connect through our compute channel
            self.master_node_tcp, self.master_node_tcp_client = self.create_tcp_proxy(
                self.master_url[0], result['port'])
            self.generate_client_api()

            # Now connect
            self.master_node_tcp.connect()

            # No register us with the compute channel befor the master makes a timeout
            self.master_node_tcp.register_client(self.node_id_str)
        except:
            pass

    def unregister_from_master(self):
        """
        The node will unregister itself with the expected master node
        """
        if self.node_id:
            try:
                self.master_node.unregister_client(self.node_id_str)
            except Exception as e:
                self.log.error("Exception when unregistering from master: %s" %
                               str(e))
            self.node_id = None

    def send_heartbeat(self):
        """
        Send heartbeat to master in case we have one
        """
        self.conditional_register_with_master()
        if self.node_id:
            try:
                self.is_registered = self.master_node.heartbeat_client(
                    self.node_id_str)
            except:
                pass

    def conditional_register_with_master(self):
        """
        Try to register with master after an unexpected connection failure
        """
        if not self.is_registered:
            try:
                self.register_with_master()
            except:
                pass

    def rpc_call_failed(self, proxy, method, reason):
        """
        Called when an RPC call failed for an unexpected reason
        """
        self.log.debug("Method %s failed because of %s" % (method, reason))

        # Handle network connection failures
        if urllib2.URLError == reason:
            self.unexected_connection_error = True
            self.is_registered = False

    def rpc_call_success(self, proxy, method, result):
        """
        Called when an RPC call succeded
        """
        self.log.debug("Method %s succeded with %s" % (method, result))
        self.unexected_connection_error = False

        return result

    # ---------------------------------------------------------------------
    # ITaskSystem Handling
    # ---------------------------------------------------------------------

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        raise NotImplementedError("Node has not implemented get_task_system!")

    def start_processing(self):
        """
        Called when the app is not using a ITaskSystem and will instead just add tasks and
        will take care of the task flow itself
        """
        raise NotImplementedError("Node has not implemented start_processing!")

    def _start_processing(self):
        """
        Called once the application is registered with the framework and we 
        are ok to start our processing!
        """
        # Request task system instance

        self.task_system = self.get_task_system()
        if self.task_system:
            # Make sure its an instance of ITaskSystem
            if not isinstance(self.task_system, ITaskSystem):
                raise NotImplementedError(
                    'TaskSystem "%s" not an instance of ITaskSystem' %
                    str(self.task_system))

            # Pickle and send!
            self.master_node_tcp.push_tasksystem(self.task_system)
        else:
            self.start_processing()

    def push_task(self, task):
        """
        Send a task to the computation framework
        """
        self.master_node_tcp.push_task(task)

    def push_tasks(self, tasks):
        """
        Send a set of tasks to the computation framework
        """
        self.master_node_tcp.push_tasks(tasks)
Exemplo n.º 9
0
class ExpensiveNode(ApplicationNode):
    """
    Application node distributing the computation of an expensive task
    """
    implements(IApp)

    time_per_task = IntItem(
        'expensivesample', 'time_per_task', 1,
        """Time each task will perform on doing nothind (active wait) to simulate an expensive computation"""
    )

    num_tasks = IntItem('expensivesample', 'num_tasks', 8,
                        """Number of tasks that must be performend""")

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(ExpensiveNode, self).app_init()

    def app_main(self):
        """
        Applications main entry
        """
        return super(ExpensiveNode, self).app_main()

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        self.start_time = time.time()
        self.system = ExpensiveNodeTaskSystem(self.time_per_task,
                                              self.num_tasks)
        return self.system

    def work_finished(self, result, task_system):
        """
        Called when the work has been done, the results is what our ITaskSystem
        sent back to us. Check resukt for more info
        """
        end_time = time.time() - self.start_time
        self.log.info("Total time: {}".format(end_time))

        # Print expected single threaded time and improvement
        expected_time = self.time_per_task * self.num_tasks
        self.log.info("Plain python expected time: {}".format(expected_time))
        self.log.info("Concurrent improvememnet: {}%".format(
            (expected_time / end_time) * 100.0))
        self.shutdown_main_loop()

    def push_tasksystem_response(self, result):
        """
        We just added a ITaskSystem on the framwork. Check result for more info
        """
        self.log.info("Tasks system send to computation framework")

    def push_tasksystem_failed(self, result):
        """
        We failed to push a ITaskSystem on the computation framework!
        """
        self.log.error("Tasks system failed to be send to framework!")
        # Check if the resuklt dict contains a traceback
        if "t" in result:
            self.log.error(result["t"])
Exemplo n.º 10
0
class GenericTaskScheduler(Component):
    implements(ITaskScheduler)
    """
    Interface used by our distributed task scheduler. A scheduler receives an implemented system
    that will be executed on the distributed system through pickleing Python instances.
    """
    
    strategy = ExtensionPointItem('generictaskscheduler', 'strategy', ITaskScheduleStrategy, 'GenericTaskScheduleStrategy',
        """Task schedulers used to schedule execution""")
    
    def __init__(self):
        Component.__init__(self)
        self.stats = Stats.getInstance()
        
        # Map that maps tasks and slaves to be able to resend the tasks if the slave was deleted from the system
        self.task_map = {}
        
    def setup(self, master):
        self.master = master
        self.lock = self.master.registry_lock
        
        # This is the global systems task queue. Every time we add tasks we will add them to this queue.
        # The global queue is where the current strategy will pickup tasks and decide which ones shall
        # be sent over a slave to be processed (this is getting done from a thread that waits for the queue)
        # If a new tasks gets added or a task gets completed we will notify the strategy which then decides to 
        # pickup and process a new task.
        self.tasks = multiprocessing.JoinableQueue()
        
        # Schedule thread which will pickup processabel task and send them to a good slave
        self.schedule_thread = schedule_thread(self.log, self.tasks, self.handle_task)
        self.schedule_thread.start()
        
        # Do not pass the lock to the strategy, we have to ensure we handle locks for it
        self.strategy.setup(self, self.master)
    
    def stop(self):
        self.schedule_thread.stop()
        
    def _valid_id_no_lock(self, slave_id):
        """
        Check if slave id is pointing to a valid slave without any lock
        """
        return slave_id in self.master.node_registry and self._valid_slave_no_lock(self.master.node_registry[slave_id])
    
    def _valid_slave_no_lock(self, slave):
        """
        Check if a slave is valid without using any locks
        """
        return slave and slave.type == NodeType.slave and slave.state == NodeState.active
        
    def rate_slaves(self):
        """
        Update slaves
        """
        with self.lock.writelock:
            start = time.time()
            for slave_id in self.master.node_registry:
                if self._valid_slave_no_lock(self.master.node_registry[slave_id]):
                    self.master.node_registry[slave_id].rating = self.strategy.rate(self.master.node_registry[slave_id])
            ellapsed = time.time() - start
            self.stats.add_avg("GenericTaskScheduleStrategy-rate-time",ellapsed)
            
    def start_system(self, task_system):
        """
        Start an incomming task system
        """
        self.push_tasks(task_system.generate_tasks(self.master))
    
    def push_tasks(self, tasks):
        """
        Push all tasks on the global task queue
        """
        for task in tasks:
            self.push_task(task)
    
    def push_task(self, task):
        """
        Put a task on the global task queue
        """
        # Do not poison ourselfs!
        if task:
            self.tasks.put(task)
    
    def handle_task(self, task):
        """
        Send a task to a slave or in case it failed queue the task back
        """
        with self.lock.readlock:
            reschedule = True
            try:
                slave_id = self.strategy.get_next_slave()
                if slave_id:
                    #TODO: Pickle task and send to slave
                    task.slave_id = slave_id
                    start = time.time()
                    self.master.node_registry[task.slave_id].tcp_proxy.push_task(task)
                    #print("Sending task: {} in {}".format(task.name, time.time() - start))
                    reschedule = False
                    # Add task id to this slave so we could resend the task
                    self._tasked_pushed(task.slave_id)       
            except Exception as e:
                #self.log.error("Failed to send task to slave: %s. Queueing task again!" % str(e))
                self.stats.add_avg("GenericTaskScheduler-task-send-failed")
                
            # Make sure we try it again!
            if reschedule:
                self.push_task(task)
        
    def _tasked_pushed(self, slave_id):
        """
        A slave has aquired a new task, update its rank
        """
        #with self.lock.readlock:
        if self._valid_id_no_lock(slave_id):
            self.master.node_registry[slave_id].tasks += 1
            self.master.node_registry[slave_id].rating = self.strategy.rate(self.master.node_registry[slave_id])
            #print("Push: {}".format(self.master.node_registry[slave_id].tasks))
    
    def task_finished(self, task, result, error):
        """
        A slave has finished a new task, update its rank
        """
        task.finished(result, error)
        # Do not aquiere any write lock if the id is not valid!
        #with self.lock.readlock:
        if self._valid_id_no_lock(task.slave_id):
            self.master.node_registry[task.slave_id].tasks -= 1
            self.master.node_registry[task.slave_id].rating = self.strategy.rate(self.master.node_registry[task.slave_id])
Exemplo n.º 11
0
class GenericTaskManager(Component):
    implements(ITaskManager)
    """
    Simple task manager used in simple single job applications
    """
    
    num_workers = IntItem('GenericTaskManager', 'num_workers', -1,
        """Number of worker processed to be created, -1 will spawn as much as physical cores.""")
    
    def __init__(self, *args, **kwargs):
        Component.__init__(self, *args, **kwargs)
        # Initialize base manager stuff
        self._num_workers = 0
        self.results = multiprocessing.JoinableQueue()
        
    def init(self, identity, address):
        """
        Initialize the manager
        """
        self.identity = identity
        self.host = address[0]
        self.port = address[1]
        
        self._num_workers = self.num_workers
        if self._num_workers <= 0:
            self._num_workers = multiprocessing.cpu_count()
            
        # We now prepare our queues, both the joinable and the results
        # queues. Then we just create a process for each worker
        self.tasks = multiprocessing.JoinableQueue()
        self.processes = [TaskProcess(self.results, i, self.tasks, self.identity, self.host, self.port) for i in range(self._num_workers)]
        #self.processes = [TaskProcess(self.results, i) for i in range(self._num_workers)]
        
        context = zmq.Context()
        self.ventilator_send = context.socket(zmq.PUSH)
        self.ventilator_send.bind("tcp://127.0.0.1:%d" % WORKER_PORT)
    
    def get_num_workers(self):
        """
        Return the number of workers we use for our processing
        """
        return self._num_workers
    
    def start(self):
        """
        Start our worker processes
        """
        for worker in self.processes:
            worker.daemon = True
            worker.start()

    def stop(self):
        """
        Stop our worker processes
        """
        for i in xrange(self._num_workers):
            #send_to_zmq_zipped(self.ventilator_send, None)
            print("Adding task")
            self.tasks.put(None)
        # Poison for result listener
        self.results.put(None)

    def update_pool(self, _num_workers=-1):
        """
        Set the number of workers the task manager should use
        """
        self.stop()
        self.init(_num_workers)
        self.start()

    def push_task(self, task):
        """
        Push a task that should be completed by the workers
        """
        try:
            #send_to_zmq_zipped(self.ventilator_send, task)
            self.tasks.put(task)
        except:
            traceback.print_exc()
        return True

    def wait_for_all(self):
        """
        Wait until all tasks has been finished
        """
        pass
    
    def get_results_queue(self):
        """
        Return a refernce to the result queue
        """
        return self.results
    
    def task_finished(self, task, result, error):
        """
        Called once a task has been performed
        """
        task.finished(result, error)
Exemplo n.º 12
0
class MandlebrotSimpleNode(ApplicationNode):
    """
    Application node distributing the computation of the mandlebrot set using just tasks
    """
    implements(IApp)

    use_optimized_task = BoolItem(
        'mandlebrotsample', 'use_optimized_task', True,
        """Should we use the data optimized task or the lazy task""")

    send_task_batch = BoolItem(
        'mandlebrotsample', 'task_batch', True,
        """Should we send all tasks one by one or should we batch them into a hughe list"""
    )

    factor = IntItem(
        'mandlebrotsample', 'factor', 1,
        """How many workloads does a single task get assigned, in our a workload is considered a row"""
    )

    iters = IntItem('mandlebrotsample', 'iters', 20,
                    """Mandlebrot iterations per pixel""")

    height = IntItem('mandlebrotsample', 'height', 1024,
                     """Height of the mandlebrot set image""")

    width = IntItem('mandlebrotsample', 'width', 1536,
                    """Width of the mandlebrot set image""")

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(MandlebrotSimpleNode, self).app_init()

    def app_main(self):
        """
        Applications main entry
        """
        return super(MandlebrotSimpleNode, self).app_main()

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        # Do not create a tasks system, we will handle tasks on our own
        return None

    def start_processing(self):
        """
        Called when the app is not using a ITaskSystem and will instead just add tasks and
        will take care of the task flow itself
        """
        self.log.info("Starting computation")
        if self.send_task_batch:
            self.log.info(" Task batching enabled")

        self.start_time = time.time()
        self.image = np.zeros((self.height, self.width), dtype=np.uint8)

        # Init task related stuff
        self.min_x = -2.0
        self.max_x = 1.0
        self.min_y = -1.0
        self.max_y = 1.0

        self.pixel_size_x = (self.max_x - self.min_x) / self.width
        self.pixel_size_y = (self.max_y - self.min_y) / self.height

        # Job handling (very optimistic :D)
        self.jobs = 0
        self.finished_jobs = 0

        job_list = []
        workload = []

        rows = 0
        x = 0

        if self.use_optimized_task:
            num_tasks, reminder = divmod(self.width, self.factor)
            self.jobs = num_tasks + reminder

            for i in xrange(0, self.jobs):
                if self.send_task_batch:
                    job_list.append(
                        MandlebrotTaskOptimized("m",
                                                None,
                                                self.node_id_str,
                                                iters=self.iters,
                                                start_x=i,
                                                rows=self.factor,
                                                cols=self.height,
                                                pixel_size_x=self.pixel_size_x,
                                                pixel_size_y=self.pixel_size_y,
                                                min_x=self.min_x,
                                                min_y=self.min_y))
                else:
                    self.push_task(
                        MandlebrotTaskOptimized("m",
                                                None,
                                                self.node_id_str,
                                                iters=self.iters,
                                                start_x=i,
                                                rows=self.factor,
                                                cols=self.height,
                                                pixel_size_x=self.pixel_size_x,
                                                pixel_size_y=self.pixel_size_y,
                                                min_x=self.min_x,
                                                min_y=self.min_y))
        else:
            for x in range(self.width):
                # Distribute using rows
                rows += 1

                real = self.min_x + x * self.pixel_size_x
                for y in range(self.height):
                    imag = self.min_y + y * self.pixel_size_y
                    workload.append((x, y, real, imag, self.iters))

                # every self.factor rows create a task with the workload. Note that in this case we will force the system_id to be None while setting the client id
                if rows == self.factor:
                    if self.send_task_batch:
                        job_list.append(
                            MandlebrotTask("mandle_{}".format(x),
                                           None,
                                           self.node_id_str,
                                           iters=self.iters,
                                           workload=workload))
                    else:
                        self.push_task(
                            MandlebrotTask("mandle_{}".format(x),
                                           None,
                                           self.node_id_str,
                                           iters=self.iters,
                                           workload=workload))
                        self.jobs += 1
                    workload = []
                    rows = 0

            # Add last task with rest of workload
            if len(workload) > 0:
                if self.send_task_batch:
                    job_list.append(
                        MandlebrotTask("mandle_{}".format(x),
                                       None,
                                       self.node_id_str,
                                       iters=self.iters,
                                       workload=workload))
                else:
                    self.push_task(
                        MandlebrotTask("mandle_{}".format(x),
                                       None,
                                       self.node_id_str,
                                       iters=self.iters,
                                       workload=workload))
                    self.jobs += 1

            if self.send_task_batch:
                self.jobs = len(job_list)

        # Send batch or check for eventual end condition
        if self.send_task_batch:
            self.push_tasks(job_list)
        else:
            # Check in case we are already done!
            self.check_finished()

    def task_finished(self, task, result, error):
        """
        Called when a task has been done
        """
        # Integrate results in our image
        if result:
            for x, column in result.iteritems():
                for y, value in column.iteritems():
                    self.image[y, x] = value

        self.finished_jobs += 1
        self.check_finished()

    def check_finished(self):
        """
        Check if we finsihed all computation or not
        """
        if self.finished_jobs == self.jobs:
            self.log.info("All tasks finished!!")
            print("Calculated in {} seconds!".format(time.time() -
                                                     self.start_time))
            self.shutdown_main_loop()
            imshow(self.image)
            show()

    def push_task_response(self, result):
        """
        We just add a Task to the computation framework
        """
        pass
        #self.log.info("Task send to computation framework")

    def push_task_failed(self, result):
        """
        We failed to add a Task to the computation framework
        """
        self.log.info("Failed to send task send to computation framework")

    def push_tasks_response(self, result):
        """
        We just add a set of Tasks to the computation framework
        """
        self.log.info("Tasks send to computation framework")

    def push_tasks_failed(self, result):
        """
        We failed to add a set of Tasks to the computation framework
        """
        self.log.info("Failed to send tasks send to computation framework")
Exemplo n.º 13
0
class MandlebrotNode(ApplicationNode):
    """
    Application node distributing the computation of the mandlebrot set using an autonomous task system
    """
    implements(IApp)

    use_optimized_task = BoolItem(
        'mandlebrotsample', 'use_optimized_task', True,
        """Should we use the data optimized task or the lazy task""")

    factor = IntItem(
        'mandlebrotsample', 'factor', 1,
        """How many workloads does a single task get assigned, in our a workload is considered a row"""
    )

    iters = IntItem('mandlebrotsample', 'iters', 20,
                    """Mandlebrot iterations per pixel""")

    height = IntItem('mandlebrotsample', 'height', 1024,
                     """Height of the mandlebrot set image""")

    width = IntItem('mandlebrotsample', 'width', 1536,
                    """Width of the mandlebrot set image""")

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(MandlebrotNode, self).app_init()

    def app_main(self):
        """
        Applications main entry
        """
        return super(MandlebrotNode, self).app_main()

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        self.start_time = time.time()
        self.system = MandlebrotTaskSystem(-2.0, 1.0, -1.0, 1.0, self.height,
                                           self.width, self.iters, self.factor,
                                           self.use_optimized_task)
        return self.system

    def work_finished(self, result, task_system):
        """
        Called when the work has been done, the results is what our ITaskSystem
        sent back to us. Check resukt for more info
        """
        print("Total time: {}".format(time.time() - self.start_time))
        self.shutdown_main_loop()
        # Reassamble result to be processed further
        try:
            self.system.image = np.zeros((self.height, self.width),
                                         dtype=np.uint8)
            self.system.do_post_run(result)
        except:
            traceback.print_exc()

    def push_tasksystem_response(self, result):
        """
        We just added a ITaskSystem on the framwork. Check result for more info
        """
        self.log.info("Tasks system send to computation framework")

    def push_tasksystem_failed(self, result):
        """
        We failed to push a ITaskSystem on the computation framework!
        """
        self.log.error("Tasks system failed to be send to framework!")
        # Check if the resuklt dict contains a traceback
        if "t" in result:
            self.log.error(result["t"])
Exemplo n.º 14
0
class ExpensiveSimpleNode(ApplicationNode):
    """
    Application node distributing the computation of the mandlebrot set using just tasks
    """
    implements(IApp)

    send_task_batch = BoolItem(
        'expensivesample', 'task_batch', True,
        """Should we send all tasks one by one or should we batch them into a hughe list"""
    )

    time_per_task = IntItem(
        'expensivesample', 'time_per_task', 1,
        """Time each task will perform on doing nothind (active wait) to simulate an expensive computation"""
    )

    num_tasks = IntItem('expensivesample', 'num_tasks', 8,
                        """Number of tasks that must be performend""")

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(ExpensiveSimpleNode, self).app_init()

    def app_main(self):
        """
        Applications main entry
        """
        return super(ExpensiveSimpleNode, self).app_main()

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        # Do not create a tasks system, we will handle tasks on our own
        return None

    def start_processing(self):
        """
        Called when the app is not using a ITaskSystem and will instead just add tasks and
        will take care of the task flow itself
        """
        self.log.info("Starting computation")
        if self.send_task_batch:
            self.log.info(" Task batching enabled")

        self.start_time = time.time()
        self.finished_jobs = 0
        if self.send_task_batch:
            self.push_tasks([
                ExpensiveTask("expensive_{}".format(i),
                              None,
                              self.node_id_str,
                              sleep_time=self.time_per_task)
                for i in range(self.num_tasks)
            ])
        else:
            for i in range(self.num_tasks):
                self.push_task(
                    ExpensiveTask("expensive_{}".format(i),
                                  None,
                                  self.node_id_str,
                                  sleep_time=self.time_per_task))
            self.check_finished()

    def task_finished(self, task, result, error):
        """
        Called when a task has been done
        """
        self.finished_jobs += 1
        self.check_finished()

    def check_finished(self):
        """
        Check if we finsihed all computation or not
        """
        self.log.info("%d -> %d" % (self.finished_jobs, self.num_tasks))
        if self.finished_jobs == self.num_tasks:
            self.log.info("All tasks finished!!")
            end_time = time.time() - self.start_time
            self.log.info("Total time: {}".format(end_time))

            # Print expected single threaded time and improvement
            expected_time = self.time_per_task * self.num_tasks
            self.log.info(
                "Plain python expected time: {}".format(expected_time))
            self.log.info("Concurrent improvememnet: {}%".format(
                (expected_time / end_time) * 100.0))
            self.shutdown_main_loop()

    def push_task_response(self, result):
        """
        We just add a Task to the computation framework
        """
        pass
        #self.log.info("Task send to computation framework")

    def push_task_failed(self, result):
        """
        We failed to add a Task to the computation framework
        """
        self.log.info("Failed to send task send to computation framework")

    def push_tasks_response(self, result):
        """
        We just add a set of Tasks to the computation framework
        """
        self.log.info("Tasks send to computation framework")

    def push_tasks_failed(self, result):
        """
        We failed to add a set of Tasks to the computation framework
        """
        self.log.info("Failed to send tasks send to computation framework")
Exemplo n.º 15
0
class SlaveNode(Component, ComputeNode):
    implements(IApp)
    """
    A slave is a remote worker node that receives jobs from a master works them out and
    then returns the result to the master.
    """
    master_url = HostItem('slavenode', 'master', 'localhost:8080',
        """This slave master node""")
    
    def app_init(self):
        """
        Initialize application just before running it
        """
        super(SlaveNode, self).app_init()
        
        # Null this one first
        self.master_node_tcp = None

    def app_main(self):
        """
        Launch a concurrent application
        """
        result = super(SlaveNode, self).app_main()
        if result not in SUCCESS_RET_CODES:
            return result
        
        # Start computation
        try:
            self.setup_compute_node()
        except Exception:
            self.stop_compute_node()
            return APP_RET_CODE_FAILED
        
        # Flag used to re-create the master handshake if an unexpected connection drop
        # was detected
        self.unexected_connection_error = False
        self.is_registered = False
        
        # Enter app main loop
        self.main_loop()
        
        # Stop all processes and threads
        self.stop_compute_node()
        self.stop_api_thread()
        
        # Now launch base node
        return result
    
    def stop_compute_node(self):
        ComputeNode.stop_compute_node(self)
        try:
            self.unregister_from_master()
            self.master_node_tcp.close()
        except:
            traceback.print_exc()
            self.log.warn("Failed to close TCP compute channel with master!")
    
    # ---------------------------------------------------------------------
    # Master Node Registration
    # ---------------------------------------------------------------------
    
    def master_disconnected(self, gracefully):
        """
        Called when a master is disconnected (gracefully) or we had no response from the master itself (ungracefull)
        """
        self.log.info("Master disconnected (gracefully:%s)" % (gracefully))
        return True
    
    def get_master_url(self):
        """
        Get the URL where our master node is hosted
        """
        return "%s:%d" % (self.master_url)
    
    def get_master_address(self):
        """
        Get the adress and port in (host,port) fashion
        """
        return ('localhost',8081)
    
    def has_master(self):
        """
        Check if the node has a master or not. Master node has no master itself
        """
        return True
    
    def generate_client_api(self):
        """
        Generate the client API of our compute channel
        """
        if self.master_node_tcp:
            @tcpremote(self.master_node_tcp_client)
            def push_task(handler, request, task):
                self.stats.add_avg('push_task')
                return self.push_task(task)
            
            @tcpremote(self.master_node_tcp_client)
            def register_slave_failed(handler, request, result):
                self.register_slave_failed(result)
            
            @tcpremote(self.master_node_tcp_client)
            def register_slave_response(handler, request, result):
                self.register_slave_response(result)
    
    def register_slave_failed(self, result):
        """
        Called when we failed to register ouselfs to a master node. Raises an exception.
        """
        raise FailedToRegisterWithMaster("Slave failed to register with the assigned master!")
    
    def register_slave_response(self, result):
        """
        Called when we finsihed to register ouselfs to a master node. Raises an exception if the master
        rejected us.
        """
        if not result:
            raise FailedToRegisterWithMaster("Master rejected our registration attempt!")
            
    def register_with_master(self):
        """
        The node will register itself with the expected master node
        """
        try:
            # Try to register with the master
            # TODO: Send all data the master requires to use the node best:
            #  - Processor information: Amount, type, cache, ...
            #  - RAM: Amount, speed, type, ECC?
            #  - GPU: Type of cards, OpenCL, Cuda, amount, speed, memory, ...
            #  - Net: Interface speed, connection speed, roundtrip, ...
            #  - OS: Os type, previledges, ...
            result = self.master_node.register_slave(self.node_id_str, self.port, {'workers':self.get_num_workers()})
            self.is_registered = True
            
            # if the node ID is we are getting from the master is different we are re-registering
            if result['id'] == self.node_id_str:
                self.node_id = uuid.UUID(result['id'])
                self.node_id_str = str(self.node_id)
            
            # Now we try to connect through our compute channel
            self.master_node_tcp, self.master_node_tcp_client = self.create_tcp_proxy(self.master_url[0], result['port'])
            self.generate_client_api()
            
            # Now connect
            self.master_node_tcp.connect()
            
            # No register us with the compute channel befor the master makes a timeout
            self.master_node_tcp.register_slave(self.node_id_str)
        except:
            traceback.print_exc()
            
    def unregister_from_master(self):
        """
        The node will unregister itself with the expected master node
        """
        if self.node_id:
            try:
                self.master_node.unregister_slave(self.node_id_str)
                self.master_node_tcp.close()
            except Exception as e:
                traceback.print_exc()
                self.log.error("Exception when unregistering from master: %s" % str(e))
            self.node_id = None
    
    def send_heartbeat(self):
        """
        Send heartbeat to master in case we have one
        """
        self.conditional_register_with_master()
        if self.node_id:
            try:
                self.is_registered = self.master_node.heartbeat_slave(self.node_id_str)
            except:
                pass
    
    def conditional_register_with_master(self):
        """
        Try to register with master after an unexpected connection failure
        """
        if not self.is_registered:
            try:
                self.register_with_master()
            except:
                pass
    
    def rpc_call_failed(self, proxy, method, reason):
        """
        Called when an RPC call failed for an unexpected reason
        """
        self.log.debug("Method %s failed because of %s" % (method, reason))
        
        # Handle network connection failures
        if urllib2.URLError == reason:
            self.unexected_connection_error = True
            self.is_registered = False
    
    def rpc_call_success(self, proxy, method, result):
        """
        Called when an RPC call succeded
        """
        self.log.debug("Method %s succeded with %s" % (method, result))
        self.unexected_connection_error = False
        
        return result
    
    # ---------------------------------------------------------------------
    # Task Handling
    # ---------------------------------------------------------------------
    
    def task_finished(self, task, result, error):
        """
        Called when a task has finished its computation, the result object contains the task, 
        the result or an error and additional information
        """
        try:
            self.master_node_tcp.task_finished(task, result, error)
            return True
        except:
            traceback.print_exc()
            return False
Exemplo n.º 16
0
class ZMQTaskManager(Component, threading.Thread):
    implements(ITaskManager)
    """
    Simple task manager used in simple single job applications
    """
    
    num_workers = IntItem('ZMQTaskManager', 'num_workers', -1,
        """Number of worker processed to be created, -1 will spawn as much as physical cores.""")
    
    master_backend_port = HostItem('ZMQTaskManager', 'master_backend_port', 'localhost:5001',
        """Masters backend port where we will request tasks.""")
    
    def __init__(self):
        threading.Thread.__init__(self)
        Component.__init__(self)
        
        # Some thread related stuff
        self.daemon = True
        self.kill_switch = False
        
        # Create contect and socket
        self.context = zmq.Context()
        
        # Initialize base manager stuff
        self._num_workers = 0
        self.results = multiprocessing.JoinableQueue()
        
    def init(self, identity, address):
        """
        Initialize the manager
        """
        self.identity = identity
        self.host = address[0]
        self.port = address[1]
        
        self._num_workers = self.num_workers
        if self._num_workers <= 0:
            self._num_workers = multiprocessing.cpu_count()
            
        # We now prepare our queues, both the joinable and the results
        # queues. Then we just create a process for each worker
        self.tasks = multiprocessing.JoinableQueue()
        self.processes = [TaskProcess(self.results, i, self.tasks, self.identity, self.host, self.port) for i in range(self._num_workers)]
        #self.processes = [TaskProcess(self.results, i) for i in range(self._num_workers)]
        
        context = zmq.Context()
        self.ventilator_send = context.socket(zmq.PUSH)
        self.ventilator_send.bind("tcp://127.0.0.1:%d" % WORKER_PORT)
    
    def get_num_workers(self):
        """
        Return the number of workers we use for our processing
        """
        return self._num_workers
    
    def start(self):
        """
        Start our worker processes
        """
        threading.Thread.start(self)
        for worker in self.processes:
            worker.daemon = True
            worker.start()

    def stop(self):
        """
        Stop our worker processes
        """
        self.log.info("Shutting down ZMQTaskManager")
        for i in xrange(self._num_workers):
            #send_to_zmq_zipped(self.ventilator_send, None)
            self.tasks.put(None)
        # Poison for result listener
        self.results.put(None)
        
        # Kill our own thread
        self.kill_switch = True
        self.context.term()
        self.join(5000)
        self.log.info("ZMQTaskManager shutdown finished")
        
    def run(self):
        self.log.info("ZMQTaskManager started")
        
        # Create and connect to our scheduler socket
        self.socket = self.context.socket(zmq.PULL)
        self.socket.setsockopt(zmq.LINGER, 0)
        self.socket.set_hwm(0)
        self.socket.connect('tcp://{host}:{port}'.format(host=self.master_backend_port[0], port=self.master_backend_port[1]))
        
        # Start receiving messages
        while not self.kill_switch:
            try:
                next_task = receive_from_zmq_zipped(self.socket)
                self.push_task(next_task)
            except zmq.ContextTerminated:
                break
            except zmq.ZMQError as e:
                if e.errno == zmq.EAGAIN:
                    pass  # no message was ready
                else:
                    break
            except:
                traceback.print_exc()
            
        self.socket.close()
        self.log.info("ZMQTaskManager stopped")

    def update_pool(self, _num_workers=-1):
        """
        Set the number of workers the task manager should use
        """
        self.stop()
        self.init(_num_workers)
        self.start()

    def push_task(self, task):
        """
        Push a task that should be completed by the workers
        """
        try:
            #send_to_zmq_zipped(self.ventilator_send, task)
            self.tasks.put(task)
        except:
            traceback.print_exc()
        return True

    def wait_for_all(self):
        """
        Wait until all tasks has been finished
        """
        pass
    
    def get_results_queue(self):
        """
        Return a refernce to the result queue
        """
        return self.results
    
    def task_finished(self, task, result, error):
        """
        Called once a task has been performed
        """
        task.finished(result, error)
Exemplo n.º 17
0
class ZMQTaskScheduler(Component, threading.Thread):
    implements(ITaskScheduler)
    """
    Different task scheduler implementation using ZMQ push/pull sockets. Uses a simple round-robin
    mechanism to handle multiple slaves.
    """
    
    frontend_port = IntItem('ZMQTaskScheduler', 'frontend_port', 5000,
        """Frontend port used to send tasks to the scheduler""")
    
    backend_port = IntItem('ZMQTaskScheduler', 'backend_port', 5001,
        """Backend port used to send tasks to the scheduler. Slaves will receive tasks on it.""")
    
    def __init__(self):
        threading.Thread.__init__ (self)
        Component.__init__(self)
        self.stats = Stats.getInstance()
        
        # Some thread related stuff
        self.daemon = True
        self.kill_switch = False
        
        # The socket framework
        self.context = zmq.Context()
        self.frontend = self.context.socket(zmq.PULL)
        self.frontend.bind('tcp://*:{port}'.format(port=self.frontend_port))
        self.frontend.setsockopt(zmq.LINGER, 0)
        self.frontend.set_hwm(0)
        self.backend = self.context.socket(zmq.PUSH)
        self.backend.bind('tcp://*:{port}'.format(port=self.backend_port))
        self.backend.setsockopt(zmq.LINGER, 0)
        self.backend.set_hwm(0)
        
        # The poller is used to poll for incomming messages for both
        # the frontend (internet) and the backend (scheduling)
        self.poll = zmq.Poller()
        self.poll.register(self.frontend, zmq.POLLIN)
        
        # Connected socket locally to frontend to send tasks, this socket
        # provides a lock to be able to be thread-safe
        self.frontend_push = self.context.socket(zmq.PUSH)
        self.frontend_push.connect('tcp://localhost:{port}'.format(port=self.frontend_port))
        self.frontend_push.setsockopt(zmq.LINGER, 0)
        self.frontend_push.set_hwm(0)
        
        
        # Our lock used to protect the frontend_push socket
        self.lock = threading.Lock()
        
    def setup(self, master):
        self.master = master
        self.start()
    
    def run(self):
        self.log.info("ZMQTaskScheduler started")
        
        # Start receiving messages
        while not self.kill_switch:
            try:
                sockets = dict(self.poll.poll(1000))
                if self.frontend in sockets:
                    msg = self.frontend.recv(flags=zmq.NOBLOCK)
                    #tprint('Server received message from %s' % (ident))
                    self.backend.send(msg, flags=zmq.NOBLOCK)
            except zmq.Again:
                # Timeouy just fired, no problem!
                pass
            except KeyboardInterrupt:
                break
            except zmq.ContextTerminated:
                break
            except zmq.ZMQError as e:
                if e.errno == zmq.EAGAIN:
                    pass  # no message was ready
                else:
                    break
            except:
                traceback.print_exc()
                # Not really good to just pass but saver for now!
                pass

        self.frontend.close()
        self.backend.close()
        with self.lock:
            self.frontend_push.close()
        self.context.term()
        self.log.info("ZMQTaskScheduler stopped")
    
    def stop(self):
        self.log.info("Shutting down ZMQTaskScheduler")
        self.kill_switch = True
        self.join(5000)
        self.log.info("ZMQTaskScheduler shutdown finished")
            
    def start_system(self, task_system):
        """
        Start an incomming task system
        """
        self.push_tasks(task_system.generate_tasks(self.master))
    
    def _push_task(self, task):
        """
        No lock variant of push task method
        """
        send_to_zmq_zipped(self.frontend_push, task)
        
    def push_tasks(self, tasks):
        """
        Push all tasks on the global task queue
        """
        with self.lock:
            # DO NOT USE push_task to queue tasks! It would be a deadlock!
            for task in tasks:
                self._push_task(task)
                #self.tasks.put(task)
    
    def push_task(self, task):
        """
        Put a task on the global task queue
        """
        with self.lock:
            # Do not poison ourselfs!
            if task:
                self._push_task(task)
                #self.tasks.put(task)
    
    def rate_slaves(self):
        """
        Update slaves
        """
        pass
    
    def _tasked_pushed(self, slave_id):
        """
        A slave has aquired a new task, update its rank
        """
        pass
    
    def task_finished(self, task, result, error):
        """
        A slave has finished a new task, update its rank
        """
        task.finished(result, error)
Exemplo n.º 18
0
class Pickler(Component):
    implements(IPickler)
    """
    Class responsible for pickling and unpickling objects
    """
    pickle_protocol = IntItem(
        'pickler', 'protocol', pickle.HIGHEST_PROTOCOL,
        """Protocol used when pickling, by default pickle.HIGHEST_PROTOCOL""")

    secret = ConfigItem(
        'pickler', 'secret', 'JhTv535Vg385V',
        """Default salt used on decrypting encrypting a pickle""")

    # salt size in bytes
    salt_size = IntItem('pickler', 'salt_size', 16,
                        """Size of the salt used in the encryption process""")

    # number of iterations in the key generation
    num_iterations = IntItem(
        'pickler', 'num_iterations', 20,
        """Number of iterations used in the key generation""")

    # the size multiple required for AES
    aes_padding = IntItem('pickler', 'aes_padding', 16,
                          """Padding used for AES encryption""")

    def __init__(self):
        super(Pickler, self).__init__()
        self.crypto_helper = CryptoHelper(self.salt_size, self.num_iterations,
                                          self.aes_padding)

        if self.secret == Pickler.secret.default.decode('utf-8'):
            self.log.warn(
                "Pickler using default secret, please setup you own to avoid security vulnerabilities!"
            )

    def pickle_f(self, fname, obj):
        """
        picke an object into a file
        """
        try:
            pickle.dump(obj=obj,
                        file=gzip.open(fname, "wb"),
                        protocol=self.pickle_protocol)
        except:
            raise PickleException()

    def unpickle_f(self, fname):
        """
        Unpicke an object from a file
        """
        try:
            return pickle.load(gzip.open(fname, "rb"))
        except:
            raise UnpickleException()

    def pickle_s(self, obj):
        """
        pickle an object and return the pickled string
        """
        try:
            return pickle.dumps(obj, protocol=self.pickle_protocol)
        except:
            raise PickleException()

    def pickle_encode_s(self, obj):
        """
        Encode a pickled object
        """
        try:
            return base64.b64encode(
                self.crypto_helper.encrypt(self.pickle_s(obj), self.secret))
        except:
            raise PickleException()

    def unpickle_s(self, pickle_string):
        """
        unpickle a string and return an object
        """
        try:
            return pickle.loads(pickle_string)
        except:
            raise UnpickleException()

    def unpickle_decode_s(self, pickle_string):
        """
        Unpickle a base64 string and return an object
        """
        try:
            return self.unpickle_s(
                self.crypto_helper.decrypt(base64.b64decode(pickle_string),
                                           self.secret))
        except:
            raise UnpickleException()
Exemplo n.º 19
0
class MasterNode(Component, BaseNode):
    implements(IApp)
    """
    A MasterNode is a compute node that can act and be used in computation when in standalone mode
    but is mainly used to dsitribute jobs along registered slaves. Once the jobs of a slave, or
    its own, are finished we will redistribute the results to the responsible client nodes.
    """
    is_standalone = BoolItem('masternode', 'is_standalone', 'False',
        """Master node is also a slave and a standalone application""")
    
    inactivity_time_multiplier = IntItem('node', 'inactivity_time_multiplier', 3,
        """Inactivty multiplier multiplies the heartbeat time to ensure inactivity is always several heartbeats""")
    
    registry_mirror_timer = FloatItem('masternode', 'registry_mirror_timer', 30.0,
        """Timer used to update node registry mirror""")
    
    registry_cleanup_timer = FloatItem('masternode', 'registry_cleanup_timer', 60.0,
        """Timer used to cleanup the node registry""")
    
    task_scheduler= ExtensionPointItem('masternode', 'task_scheduler', ITaskScheduler, 'GenericTaskScheduler',
        """Task scheduler used by the master node""")
    
    master_port = IntItem('node', 'master_port', 8081,
        """Port used by the master node for high-performance communication and dedicated persistent connections""")
    
    def app_init(self):
        """
        Initialize application just before running it
        """
        super(MasterNode, self).app_init()
        
        # Start our TCPServer,
        #self.server = TCPServer("localhost", self.master_port, self)
        #self.server_thread = threading.Thread(name="tcp_server", target=self.server.serve_forever)
        #self.server_thread.daemon = True
        
        # Setup our ZeroMQ asyn server
        self.zmq_server = TCPServerZMQ(self.master_port, self.log, 5)
        
        # The node registry holds updated into about slaves/clients and its processing
        # we week track of number of tasks submitted to each slave, how they perform
        # general statistics and more.
        self.node_registry = defaultdict(self._default_node)
        self.registry_lock = self.lock_cache.registry_lock
        self.node_cleanup_threshold = self.registry_cleanup_timer
        self.task_scheduler.setup(self)
        
        # Our client registry
        self.client_registry = defaultdict(self._default_node)
        self.client_registry_lock = self.lock_cache.client_registry_lock
        
        # The registry mirror is used to send all updates from time to time and cache it.
        # We use a different dict so client status request do not block
        self.node_registry_mirror = {}
        self.registry_mirror_lock = self.lock_cache.registry_mirror_lock
        self.registry_mirror_threshold = self.registry_mirror_timer
        self.registry_mirror_dirty = True
        
        # Client registry mirror
        self.client_registry_mirror = {}
        self.client_registry_mirror_lock = self.lock_cache.client_registry_mirror_lock
        
        # Timer which controls inactivity handling of a node, being it a slave or a client
        self.inactivity_timer = self.heartbeat_timer*self.inactivity_time_multiplier
        self.inactivity_unregister_timer = self.inactivity_timer * 3
        self.inactivity_threshold = self.inactivity_timer
        
        self.test_timer = 1
        self.test_app_id = uuid.uuid1()
        
        # Our task system registry
        self.tasksystem_registry = defaultdict(self._default_tasksystem)
        self.tasksystem_lock = self.lock_cache.tasksystem_lock
        
        # Create master thread
        #self.master_thread = master_thread(self.log)
    
    def app_main(self):
        """
        Launch a concurrent application
        """
        self.log.info("Initializing MasterNode")
        result = super(MasterNode, self).app_main()
        if result not in SUCCESS_RET_CODES:
            return result

        # Start the main server thread
        #self.server_thread.start()
        self.zmq_server.start()
            
        # Enter mail loop
        self.main_loop()
        
        # Stop all threads processes
        #self.server.shutdown()
        self.zmq_server.stop()
        self.notify_shutdown()
        self.stop_api_thread()
        #self.stop_master_thread()
        self.task_scheduler.stop()        
        
        # Now launch base node
        return result
    
    def handle_echo(self, sock, address):
        print(address)
        fp = sock.makefile()
        while True:
            line = fp.readline()
            if line:
                fp.write(line)
                fp.flush()
            else:
                break
    
    def stop_master_thread(self):
        self.master_thread.stop()
         
    def generate_api(self):
        """
        Create all rpc methods the node requires
        """
        super(MasterNode, self).generate_api()
        if not self.is_standalone:
            @jsonremote(self.api_service_v1)
            def register_slave(request, node_id, port, data):
                self.stats.add_avg('register_slave')
                return self.register_node(node_id, web.ctx['ip'], port, data, NodeType.slave)
            
            @tcpremote(self.zmq_server, name='register_slave')
            #@tcpremote(self.server, name='register_slave')
            def register_slave_tcp(handler, request, node_id):
                self.stats.add_avg('register_slave_tcp')
                return self.register_node_tcp(handler, request, node_id, NodeType.slave)
            
            @jsonremote(self.api_service_v1)
            def register_client(request, node_id, port, data):
                self.stats.add_avg('register_client')
                return self.register_node(node_id, web.ctx['ip'], port, data, NodeType.client)
            
            @tcpremote(self.zmq_server, name='register_client')
            #@tcpremote(self.server, name='register_client')
            def register_client_tcp(handler, request, node_id):
                self.stats.add_avg('register_client_tcp')
                return self.register_node_tcp(handler, request, node_id, NodeType.client)
            
            @jsonremote(self.api_service_v1)
            def unregister_slave(request, node_id):
                self.stats.add_avg('unregister_slave')
                return self.unregister_node(node_id, NodeType.slave)
            
            @jsonremote(self.api_service_v1)
            def unregister_client(request, node_id):
                self.stats.add_avg('unregister_client')
                return self.unregister_node(node_id, NodeType.client)
            
            @jsonremote(self.api_service_v1)
            def heartbeat_slave(request, node_id):
                self.stats.add_avg('heartbeat_slave')
                return self.heartbeat(node_id, NodeType.slave)
            
            @jsonremote(self.api_service_v1)
            def heartbeat_client(request, node_id):
                self.stats.add_avg('heartbeat_client')
                return self.heartbeat(node_id, NodeType.client)
            
            @tcpremote(self.zmq_server)
            #@tcpremote(self.server)
            def task_finished(handler, request, task, result, error):
                self.stats.add_avg('task_finished')
                self.task_finished(task, result, error)
                # This is an end method for the interaction
                raise NoResponseRequired()
            
            @tcpremote(self.zmq_server)
            #@tcpremote(self.server)
            def push_task_response(handler, request, result):
                # TODO: Handle failure when result is False!
                pass
            
            @tcpremote(self.zmq_server)
            #@tcpremote(self.server)
            def push_task_failed(handler, request, result):
                # TODO: Handle failure when pushing tasks failed!
                pass

        @tcpremote(self.zmq_server)
        #@tcpremote(self.server)
        def push_tasksystem(handler, request, tasksystem):
            """
            Push a application onto the computation framework
            """
            self.stats.add_avg('push_tasksystem')
            return self.push_tasksystem(request, tasksystem)
        
        @tcpremote(self.zmq_server)
        #@tcpremote(self.server)
        def push_task(handler, request, task):
            """
            Push a task onto the computation framework
            """
            self.stats.add_avg('push_task')
            return self.push_task(request, task)
        
        @tcpremote(self.zmq_server)
        #@tcpremote(self.server)
        def push_tasks(handler, request, tasks):
            """
            Push a set of tasks onto the computation framework
            """
            self.stats.add_avg('push_tasks')
            if isinstance(tasks, list):
                for task in tasks:
                    if not self.push_task(request, task):
                        return False
            return True
        
        @tcpremote(self.zmq_server)
        #@tcpremote(self.server)
        def test_method(handler, request):
            print("test_method from {}".format(request))
            raise NoResponseRequired()
    
    def _generate_status_dict(self, node):
        return {'type':node.type,'state':node.state}
    
    def status(self):
        status = ComputeNode.status(self)
        with self.registry_mirror_lock.readlock:
            status['nodes'] = dict((k, self._generate_status_dict(v)) for k, v in self.node_registry_mirror.iteritems() if v)
        with self.client_registry_mirror_lock.readlock:
            status['clients'] = dict((k, self._generate_status_dict(v)) for k, v in self.client_registry_mirror.iteritems() if v)
        return status
    
    def on_update(self, delta_time):
        super(MasterNode, self).on_update(delta_time)
        
        # Update map
        self.registry_mirror_threshold -= delta_time
        if self.registry_mirror_threshold < 0:
            self.update_registry_mirror()
            self.registry_mirror_threshold = self.registry_mirror_timer
        
        # Handle inactive nodes or cleanup empty nodes
        self.inactivity_threshold -= delta_time
        self.node_cleanup_threshold -= delta_time
        if self.inactivity_threshold < 0:
            self.update_inactive_nodes()
            self.inactivity_threshold = self.inactivity_timer
        elif self.node_cleanup_threshold < 0:
            self.clean_node_map()
            self.node_cleanup_threshold = self.registry_cleanup_timer
    
    def has_master(self):
        """
        Check if the node has a master or not. Master node has no master itself
        """
        return False
    
    def _handle_timeout(self, node):
        """
        Handle state for a given node checking the nodes timestamp value
        """
        ellapsed_time = self.current_time - node['heartbeat']
        if node['state'] == NodeState.active and ellapsed_time > self.inactivity_timer:
            self.log.info("Node %s set to inactive (t:%f)" % (node['node_id'], ellapsed_time))
            node['state'] = NodeState.inactive
            self.set_registry_dirty()
        elif node['state'] == NodeState.inactive and ellapsed_time > self.inactivity_unregister_timer:
            # Delete node! To much time inactive!
            self.log.info("Node %s kicked from system! To much time of inactivity! (t:%f)" % (node['node_id'], ellapsed_time))
            self.set_registry_dirty()
            return None
        return node
    
    def set_registry_dirty(self):
        """
        Set the registry dirty, this will force an update of the task scheduler
        """
        self.registry_mirror_dirty = True
        self.update_scheduler()
        
    def update_scheduler(self):
        """
        Update task scheduler with the current list of slaves
        """
        self.task_scheduler.rate_slaves()
        
    def update_inactive_nodes(self):
        """
        Called when we check for inactive nodes, those that have not send any heartbeat for a while
        """
        self.log.info("Checking for inactive nodes...") 
        with self.registry_lock.writelock:
            self.node_registry = dict((k, self._handle_timeout(v)) for k, v in self.node_registry.iteritems() if v)
        with self.client_registry_lock.writelock:
            self.client_registry = dict((k, self._handle_timeout(v)) for k, v in self.client_registry.iteritems() if v)
    
    def update_registry_mirror(self):
        """
        Update the registry mirror with a copy of the registry. Used to expose a copy dict to the public.
        """
        if self.registry_mirror_dirty:
            self.log.info("Updating node registry mirror...")
            with self.registry_mirror_lock.writelock:
                self.node_registry_mirror = dict((k, v) for k, v in self.node_registry.iteritems() if v)
            with self.client_registry_mirror_lock.writelock:
                self.client_registry_mirror = dict((k, v) for k, v in self.client_registry.iteritems() if v)
            self.registry_mirror_dirty = False
    
    def clean_node_map(self):
        """
        Clean node map for any empty node values.
        """
        self.log.info("Cleaning node registry...")
        with self.registry_lock.writelock:
            self.node_registry = dict((k, v) for k, v in self.node_registry.iteritems() if v)
        with self.client_registry_lock.writelock:
            self.client_registry = dict((k, v) for k, v in self.client_registry.iteritems() if v)
    
    def get_node_id_no_lock(self, url):
        return next((k for k, v in self.node_registry.iteritems() if v and v.url == url), None)
    
    def get_node_id(self, url):
        """
        Return a node id given an url
        """
        with self.registry_lock.readlock:
            node_id = self.get_node_id_no_lock(url)
        return node_id
    
    def get_client_id_no_lock(self, url):
        return next((k for k, v in self.client_registry.iteritems() if v and v.url == url), None)
    
    def get_client_id(self, url):
        """
        Return a client id given an url
        """
        with self.client_registry_lock.readlock:
            node_id = self.get_client_id_no_lock(url)
        return node_id
    
    def get_node(self, url):
        """
        Get a node representation given an url
        """
        node = None
        with self.registry_lock.readlock:
            node_id = self.get_node_id_no_lock(url)
            if node_id:
                node = self.node_registry[node_id]
        return node
    
    def get_client(self, url):
        """
        Get a node representation given an url
        """
        node = None
        with self.registry_lock.readlock:
            node_id = self.get_client_id_no_lock(url)
            if node_id:
                node = self.node_registry[node_id]
        return node
    
    def _default_node(self):
        return {}
    
    def _default_tasksystem(self):
        return Bunch({})
    
    def _default_slave_bunch(self):
        return Bunch({'node_id':'', 'url':'', 'ip':'', 'port':0, 'type':NodeType.slave, 'state':NodeState.inactive, 'heartbeat':0, 'proxy':None, 'workers':0, 'tasks':0, 'rating':0.0, 'handler': None})
    
    def _default_client_bunch(self):
        return Bunch({'node_id':'', 'url':'', 'ip':'', 'port':0, 'type':NodeType.slave, 'state':NodeState.inactive, 'heartbeat':0, 'proxy':None, 'handler': None})
    
    def register_node(self, node_id, ip, port, data, node_type):
        """
        Register a node within our node map
        """
        try:
            # TODO: CHECK ALL CLIENT DATA!
            url = ("%s:%d") % (ip, port)
            if NodeType.slave == node_type:
                with self.registry_lock.writelock:
                    node = self.get_node(url)
                    if node is None:
                        # This is a node that is registering again so reuse it
                        node = self.node_registry[node_id] = self._default_slave_bunch()
                    
                    # Basic node values
                    node.node_id = node_id
                    node.url = url
                    node.ip = ip
                    node.port = port
                    node.type = node_type
                    node.proxy = self.create_node_proxy(url)
                    node.state = NodeState.pending
                    node.heartbeat = time.time()
                    
                    # Add slave data                   
                    node.workers = data['workers']
                    node.tasks = 0
                    # Rating goes from [0, ..) 0 is the best rating and so asuitable candidate
                    node.rating = 0
                    node.handler = None
                    node.tcp_proxy = None
                    
                    # Make sure the mirror updates properly
                    self.set_registry_dirty()
                    
                    # Send back the generated id
                    return {'id': node.node_id, 'port': self.master_port}
            elif NodeType.client == node_type:
                with self.client_registry_lock.writelock:
                    node = self.get_node(url)
                    if node is None:
                        # This is a node that is registering again so reuse it
                        node = self.client_registry[node_id] = self._default_client_bunch()
                    
                    # Basic node values
                    node.node_id = node_id
                    node.url = url
                    node.ip = ip
                    node.port = port
                    node.type = node_type
                    node.proxy = self.create_node_proxy(url)
                    node.state = NodeState.pending
                    node.heartbeat = time.time()
                    
                    # Add client data
                    node.handler = None
                    node.tcp_proxy = None
                    
                    # Make sure the mirror updates properly
                    self.set_registry_dirty()
                    
                    # Send back the generated id
                    return {'id': node.node_id, 'port': self.master_port}
            else:
                raise NotImplementedError("Unkown node")
        except Exception as e:
            traceback.print_exc()
            # Make sure to cleanup node from node map!
            if node_id:
                self.unregister_node(node_id, node_type)
            raise e
    
    def unregister_node(self, node_id, node_type):
        """
        Unregister a node within our node map
        """
        if NodeType.slave == node_type:
            with self.registry_lock.writelock:
                if node_id in self.node_registry:
                    self.node_registry[node_id] = None
                    # Make sure we let the mirror update
                    self.registry_mirror_dirty = True
                    self.set_registry_dirty()
                    return True
                return False
        elif NodeType.client == node_type:
            with self.client_registry_lock.writelock:
                if node_id in self.client_registry:
                    # if we had a socket close it now!
                    self.client_registry[node_id] = None
                    # Get rid of any registered task system
                    with self.tasksystem_lock.writelock:
                        if node_id in self.tasksystem_registry:
                            del self.tasksystem_registry[node_id]
                    # Make sure we let the mirror update
                    self.registry_mirror_dirty = True
                    self.set_registry_dirty()
                    return True
                return False
        else:
            raise NotImplementedError("Unkown node")
    
    def register_node_tcp(self, handler, request, node_id, node_type):
        """
        Slave has just registered itself throug the compute channel
        """
        if NodeType.slave == node_type:
            with self.registry_lock.writelock:
                if node_id in self.node_registry:
                    # The handler is shared between many client sockets!
                    self.node_registry[node_id].handler = handler
                    self.node_registry[node_id].socket = handler.worker
                    #self.node_registry[node_id].tcp_proxy = self.create_tcp_client_proxy(handler.worker, request)
                    self.node_registry[node_id].tcp_proxy = self.create_tcp_client_proxy_zmq(self.zmq_server.context, request)
                    self.node_registry[node_id].state = NodeState.active
                    # Let the slave know that the handshake worked
                    return True
                return False
        elif NodeType.client == node_type:
            with self.client_registry_lock.writelock:
                if node_id in self.client_registry:
                    # The handler is shared between many client sockets!
                    self.client_registry[node_id].handler = handler
                    self.client_registry[node_id].socket = handler.worker
                    #self.client_registry[node_id].tcp_proxy = self.create_tcp_client_proxy(handler.worker, request)
                    self.client_registry[node_id].tcp_proxy = self.create_tcp_client_proxy_zmq(self.zmq_server.context, request)
                    self.client_registry[node_id].state = NodeState.active
                    # Safe some data within the handler itself
                    handler.node_id = node_id
                    handler.node_type = NodeType.client
                    # Let the client know that the handshake worked
                    return True
                return False
        else:
            raise NotImplementedError("Unkown node")
                    
            
        
    def notify_shutdown(self):
        """
        Notify a global shutdown to all nodes
        """
        with self.registry_lock.readlock:
            for node_id in self.node_registry:
                if self.node_registry[node_id] and self.node_registry[node_id].proxy:
                    try:
                        self.node_registry[node_id].proxy.master_disconnected()
                    except:
                        pass
        with self.client_registry_lock.readlock:
            for node_id in self.client_registry:
                if self.client_registry[node_id] and self.client_registry[node_id].proxy:
                    try:
                        self.client_registry[node_id].proxy.master_disconnected()
                    except:
                        pass
    
    def heartbeat(self, node_id, node_type):
        """
        We just received a nice beat from a node, update it's last heartbeat
        timestamp to perevent timeouts
        """
        if NodeType.slave == node_type:
            with self.registry_lock.writelock:
                if node_id in self.node_registry:
                    self.node_registry[node_id].heartbeat = time.time()
                    if self.node_registry[node_id].state == NodeState.inactive:
                        self.node_registry[node_id].state = NodeState.active
                    #self.log.info("Node %s just ticked" % (node_id))
                    return True
                return False
        elif NodeType.client == node_type:
            with self.client_registry_lock.writelock:
                if node_id in self.client_registry:
                    self.client_registry[node_id].heartbeat = time.time()
                    if self.client_registry[node_id].state == NodeState.inactive:
                        self.client_registry[node_id].state = NodeState.active
                    #self.log.info("Node %s just ticked" % (node_id))
                    return True
                return False
        else:
            raise NotImplementedError("Unkown node")
    
    def rpc_call_failed(self, proxy, method, reason):
        """
        Called when an RPC call failed for an unexpected reason
        """
        self.log.info("Method %s failed because of %s" % (method, reason))
    
    def rpc_call_success(self, proxy, method, result):
        """
        Called when an RPC call succeded
        """
        self.log.info("Method %s succeded with %s" % (method, result))
        return result
    
    def push_tasksystem(self, request, tasksystem):
        """
        We received a task system from a client. Get the first list of tasks and save out the
        system itself for later access
        """
        
        # Easier access
        node_id = request
        
        # Now get the
        with self.tasksystem_lock.writelock:
            # No re-registering!
            system_id = tasksystem.system_id
            if system_id in self.tasksystem_registry:
                return False
            
            # Safe out the registry
            system_entry = self.tasksystem_registry[system_id] = self._default_tasksystem()
            system_entry.system = tasksystem
            system_entry.client_id = node_id
            system_entry.system_id = system_id
            
            # Now gather task and push them to the system
            system_entry.system.log = self.log
            system_entry.system.init_system(self)
            self.task_scheduler.start_system(system_entry.system)
        return True
    
    def push_task(self, request, task):
        """
        We received a task from a client, add it to the system to be processed
        """
        if isinstance(task, Task):
            self.task_scheduler.push_task(task)
            return True
        return False
    
    def task_finished(self, task, result, error):
        """
        Called when a task has finished its computation, the result object contains the task, 
        the result or an error and additional information
        """
        # if the task does not specify a ITaskSystem id its a single executed task which is not controller by
        # a dedicated autonomouse system on the master
        if task.system_id is None:
            client_id = task.client_id
            with self.client_registry_lock.readlock:
                if client_id in self.client_registry:
                    self.client_registry[client_id].tcp_proxy.task_finished(task.task_id, result, error)
        else:
            # If we do have a system id let it process it instead
            with self.tasksystem_lock.writelock:
                if task.system_id in self.tasksystem_registry:
                    system_entry = self.tasksystem_registry[task.system_id]
                    system_entry.system.task_finished(self, task, result, error)
                    
                    # Inform scheduler of the task
                    self.task_scheduler.task_finished(task, result, error)
                    
                    # Check for end
                    if system_entry.system.is_complete(self):
                        try:
                            # Gather results
                            final_results = system_entry.system.gather_result(self)
                            
                            # Send to client proxy the results
                            client_id = system_entry.client_id
                            with self.client_registry_lock.readlock:
                                if client_id in self.client_registry:
                                    self.client_registry[client_id].tcp_proxy.work_finished(final_results, system_entry.system.system_id)
                        finally:
                            del self.tasksystem_registry[task.system_id]