Exemplo n.º 1
0
class SandboxTaskStateAwaiter(object):
    DEFAULT_UPDATE_INTERVAL = 1.0 # FIXME

    def __init__(self, sandbox, update_interval=DEFAULT_UPDATE_INTERVAL):
        self._sandbox = sandbox
        self._should_stop = False
        self._lock = threading.Lock()
        self._something_happend = threading.Condition(self._lock)
        self._worker_thread = None
        self._update_interval = update_interval
        self._running = {}
        self._incoming = set()

    def start(self):
        self._worker_thread = ProfiledThread(target=self._loop, name_prefix='SbxStateMon')
        self._worker_thread.start()

    def stop(self):
        with self._lock:
            self._should_stop = True
            self._something_happend.notify()

        self._worker_thread.join()

    def await(self, task_id):
        with self._lock:
            was_empty = not self._incoming

            assert task_id not in self._incoming and task_id not in self._running

            self._incoming.add(task_id)

            if was_empty:
                self._something_happend.notify()
Exemplo n.º 2
0
    def start(self, working_directory, io_directory, on_update, reset_tries=False,
              vaults_setup=None):
        self._on_update = on_update
        self._working_directory = working_directory
        self._io_directory = io_directory
        #self._init_non_persistent()
        self._proc_runner = rem.job.create_job_runner(None, None)

        if vaults_setup:
            def dictify(pairs):
                if pairs is None:
                    return None
                return dict(pairs)

            vaults_setup = {
                'global': dictify(vaults_setup['global']),
                'jobs': {
                    int(job_id): dictify(setup)
                        for job_id, setup in vaults_setup['jobs'].items()
                }
            }

        self._vaults_setup = vaults_setup

        with self._lock:
            if reset_tries:
                self._graph_executor.reset_tries()
            #self.resume()
            #self._do_not_run = False

        self._main_thread = ProfiledThread(target=self._main_loop, name_prefix='PckLoop')
        self._main_thread.start()
Exemplo n.º 3
0
 def __init__(self, send_update):
     self._send_update = send_update
     self._pending_update = None
     self._pck_finished = False
     self._should_stop_max_time = None
     self._lock = threading.Lock()
     self._changed = threading.Condition(self._lock)
     self._worker_thread = ProfiledThread(target=self._the_loop, name_prefix='RemNotifier')
     self._worker_thread.daemon = True # FIXME See failed[0]
     self._worker_thread.start()
Exemplo n.º 4
0
    def Start(self):
        if not self.network_name or not self.tags_file or not self.port:
            logging.warning("ConnectionManager could'n start: wrong configuration. " +
                            "network_name: %s, remote_tags_db_file: %s, system_port: %r",
                            self.network_name, self.tags_file, self.port)
            return

        self.ReloadConfig()
        logging.debug("after_reload_config")

        for client in self.topologyInfo.servers.values():
            if client.active and client.name != self.network_name:
                client.TryInitializePeersVersions(self.network_name)
        logging.debug("after_clients_versions_init")

        self.alive = True
        self.InitXMLRPCServer()
        self._accept_loop_thread = ProfiledThread(target=self.ServerLoop, name_prefix='ConnManager')
        self._accept_loop_thread.start()
        logging.debug("after_connection_manager_loop_start")

        for client in self.topologyInfo.servers.values():
            self.scheduler.ScheduleTaskT(0, self.SendData, client, skip_logging=True)
Exemplo n.º 5
0
class Packet(object):
    _MAX_TIME_WAIT = 60.0 # FIXME

    def __init__(self, pck_id, graph):
        self.id = pck_id
        self.name = '_TODO_packet_name_for_%s' % pck_id # TODO
        self.history = []
        self._init_non_persistent()

        self.state = None
        #self._update_state_if_need()

        self._graph_executor = rem.job_graph.JobGraphExecutor(
            _ExecutorOps(self),
            self.id,
            graph,
        )

    # TODO Better
        with self._lock:
            self._graph_executor.init()

    def _get_vaults_for(self, job_id):
        vaults = self._vaults_setup
        if not vaults:
            return None

        env = copy.copy(vaults['global']) or {}
        env.update(vaults['jobs'].get(job_id, {}))

        logging.debug('Vaults for %d: %s' % (job_id, env))

        return env

    def _mark_as_finished_if_need(self):
        graph = self._graph_executor

        self._finished = \
            graph.state in [GraphState.SUCCESSFULL, GraphState.ERROR] \
            or graph.state == GraphState.TIME_WAIT \
                and graph.get_nearest_retry_deadline() - time.time() > self._MAX_TIME_WAIT \
            or (self._do_not_run or self._cancelled) and graph.is_null()

    def _update_state(self):
        new_state = self._calc_state()
        if new_state == self.state:
            return
        self.state = new_state
        self.history.append((new_state, time.time()))
        logging.info("new state %s" % new_state)

    def _init_non_persistent(self):
        self._lock = threading.RLock()
        self._something_changed = threading.Condition(self._lock)
        self._main_thread = None
        self._job_threads = []
        self._proc_runner = None
        self._do_not_run = False

        self._finished = False
        self._cancelled = False
        self._has_updates = False

    def vivify_jobs_waiting_stoppers(self):
        with self._lock:
            self._graph_executor.vivify_jobs_waiting_stoppers()

    def __getstate__(self):
        sdict = self.__dict__.copy()
        sdict.pop('_lock', None)
        sdict.pop('_something_changed', None)
        sdict.pop('_working_directory', None)
        sdict.pop('_io_directory', None)
        sdict.pop('_main_thread', None)
        sdict.pop('_proc_runner', None)
        sdict.pop('_job_threads', None)
        sdict.pop('_on_update', None)
        sdict.pop('_do_not_run', None)

        sdict.pop('_finished', None)
        sdict.pop('_cancelled', None)
        sdict.pop('_has_updates', None)
        return sdict

    def __setstate__(self, sdict):
        self.__dict__.update(sdict)
        self._init_non_persistent()

    def start(self, working_directory, io_directory, on_update, reset_tries=False,
              vaults_setup=None):
        self._on_update = on_update
        self._working_directory = working_directory
        self._io_directory = io_directory
        #self._init_non_persistent()
        self._proc_runner = rem.job.create_job_runner(None, None)

        if vaults_setup:
            def dictify(pairs):
                if pairs is None:
                    return None
                return dict(pairs)

            vaults_setup = {
                'global': dictify(vaults_setup['global']),
                'jobs': {
                    int(job_id): dictify(setup)
                        for job_id, setup in vaults_setup['jobs'].items()
                }
            }

        self._vaults_setup = vaults_setup

        with self._lock:
            if reset_tries:
                self._graph_executor.reset_tries()
            #self.resume()
            #self._do_not_run = False

        self._main_thread = ProfiledThread(target=self._main_loop, name_prefix='PckLoop')
        self._main_thread.start()

    def join(self):
        self._main_thread.join()

    def get_working_directory(self):
        return self._working_directory

    def get_io_directory(self):
        return self._io_directory

    def _start_one_another_job(self):
        logging.debug('+ Packet._start_one_another_job')
        job_runner = self._graph_executor.get_job_to_run()
        t = ProfiledThread(target=job_runner.run, name_prefix='Job')
        self._job_threads.append(t)
        t.start()

    def stop(self, kill_jobs):
        with self._lock:
            #if self._do_not_run: # XXX May be called with different kill_jobs
                #return
            if self._finished: # FIXME
                #raise RuntimeError("Already finished")
                return
            if self._cancelled:
                raise RuntimeError("Already cancelled")

            self._do_not_run = True

            if kill_jobs:
                self._graph_executor.cancel()
                self._mark_as_finished_if_need()

            self._something_changed.notify()

    # For those who changed their's minds after call to stop(kill_jobs=False)
    def resume(self):
        with self._lock:
            if self._finished: # FIXME
                raise RuntimeError("Already finished")
            if self._cancelled:
                raise RuntimeError("Already cancelled")

            if self._do_not_run:
                self._do_not_run = False
                self._graph_executor.reset_tries()
                self._mark_as_finished_if_need()
                self._something_changed.notify()

    def cancel(self):
        with self._lock:
            if self._finished:
                raise RuntimeError("Already finished")
            self._cancelled = True
            self._graph_executor.cancel()
            self._mark_as_finished_if_need()
            self._something_changed.notify()

    def is_cancelled(self):
        return self._cancelled

    def restart(self):
        with self._lock:
            if self._finished:
                raise RuntimeError("Already finished")
            if self._cancelled:
                raise RuntimeError("Already cancelled")
            self._do_not_run = False # was any
            self._graph_executor.reset()
            self._something_changed.notify()

    def produce_rem_update_message(self):
        graph = self._graph_executor

        state = {
            #'history': list(self.history), # TODO FIXME
            'state': self.state,
            'detailed_status': graph.produce_detailed_status(),
            'succeed_jobs': map(str, graph.get_succeeded_jobs()),
        }

        if graph.state == GraphState.TIME_WAIT:
            state['nearest_retry_deadline'] = graph.get_nearest_retry_deadline()

        return state

    def _send_update(self):
        self._on_update(self.produce_rem_update_message())

    def _main_loop(self):
        logging.debug('+ Packet.run')

        while True:
            with self._lock:
                logging.debug('_before_job_start_loop')

                if not (self._do_not_run or self._cancelled):
                    logging.debug('_graph_executor.state == %s' \
                        % GraphState.str(self._graph_executor.state))

                    while self._graph_executor.state & GraphState.PENDING_JOBS:
                        self._start_one_another_job()

                logging.debug('_before_send_update_check: %s' % ((self._has_updates, self._finished),))
                if self._has_updates and not self._finished:
                    logging.debug('_before_send_update')
                    self._send_update()
                    self._has_updates = False

                if self._finished:
                    break

                logging.debug('_before_cond_wait')
                self._something_changed.wait()
                logging.debug('_after_cond_wait')

        logging.debug('+ exiting Packet.run')

    def _calc_state(self):
        graph = self._graph_executor
        return graph.state # FIXME

        if graph.is_null():
            if self._do_not_run:
                return GraphState.SUSPENDED
            elif self._cancelled:
                return GraphState.CANCELLED

        return graph.state

    def _stop_waiting(self, stop_id):
        with self._lock:
            if self._cancelled or self._finished: # FIXME _do_not_run
                return
            self._graph_executor.stop_waiting(stop_id)

# OPS for rem.job.Job
    def start_process(self, *args, **kwargs):
        return self._proc_runner(*args, **kwargs)

    def notify_long_execution(self, job):
        raise NotImplementedError()

    def _create_job_file_handles(self, job):
        return self._graph_executor.create_job_file_handles(job)

    def on_job_done(self, job_runner):
        self._graph_executor.on_job_done(job_runner)

        with self._lock:
            self._graph_executor.apply_jobs_results()

    def create_file_handles(self, job):
        return self._graph_executor.create_job_file_handles(job)
Exemplo n.º 6
0
 def _start_one_another_job(self):
     logging.debug('+ Packet._start_one_another_job')
     job_runner = self._graph_executor.get_job_to_run()
     t = ProfiledThread(target=job_runner.run, name_prefix='Job')
     self._job_threads.append(t)
     t.start()
Exemplo n.º 7
0
 def start(self):
     self._worker_thread = ProfiledThread(target=self._loop, name_prefix='SbxStateMon')
     self._worker_thread.start()
Exemplo n.º 8
0
class ConnectionManager(Unpickable(topologyInfo=TopologyInfo,
                                   lock=PickableLock,
                                   alive=(bool, False),
                                   tags_file=str),
                        ICallbackAcceptor):
    def InitXMLRPCServer(self):
        self.rpcserver = SimpleXMLRPCServer(("", self.port), allow_none=True)
        self.rpcserver.register_function(self.set_client_version, "set_client_version")
        self.rpcserver.register_function(self.get_client_version, "get_client_version")
        self.rpcserver.register_function(self.set_tags, "set_tags")
        self.rpcserver.register_function(self.register_tags_events, "register_tags_events")
        self.rpcserver.register_function(self.list_clients, "list_clients")
        self.rpcserver.register_function(self.list_tags, "list_tags")
        self.rpcserver.register_function(self.suspend_client, "suspend_client")
        self.rpcserver.register_function(self.resume_client, "resume_client")
        self.rpcserver.register_function(self.reload_config, "reload_config")
        self.rpcserver.register_function(self.register_share, "register_share")
        self.rpcserver.register_function(self.unregister_share, "unregister_share")
        self.rpcserver.register_function(self.get_client_info, "get_client_info")
        self.rpcserver.register_function(self.list_shares, "list_shares")
        self.rpcserver.register_function(self.list_shared_events, "list_shared_events")
        self.rpcserver.register_function(self.list_subscriptions, "list_subscriptions")
        self.rpcserver.register_function(self.check_connection, "check_connection")
        self.rpcserver.register_function(self.ping, "ping")

    def UpdateContext(self, context):
        self.scheduler = context.Scheduler
        self.network_name = context.network_name
        self.tags_file = context.remote_tags_db_file
        self.port = context.system_port
        if self.tags_file:
            self.acceptors = MapSetDB(self.tags_file)
        self.topologyInfo.UpdateContext(context)
        self.max_remotetags_resend_delay = context.max_remotetags_resend_delay

    def Start(self):
        if not self.network_name or not self.tags_file or not self.port:
            logging.warning("ConnectionManager could'n start: wrong configuration. " +
                            "network_name: %s, remote_tags_db_file: %s, system_port: %r",
                            self.network_name, self.tags_file, self.port)
            return

        self.ReloadConfig()
        logging.debug("after_reload_config")

        for client in self.topologyInfo.servers.values():
            if client.active and client.name != self.network_name:
                client.TryInitializePeersVersions(self.network_name)
        logging.debug("after_clients_versions_init")

        self.alive = True
        self.InitXMLRPCServer()
        self._accept_loop_thread = ProfiledThread(target=self.ServerLoop, name_prefix='ConnManager')
        self._accept_loop_thread.start()
        logging.debug("after_connection_manager_loop_start")

        for client in self.topologyInfo.servers.values():
            self.scheduler.ScheduleTaskT(0, self.SendData, client, skip_logging=True)

    def Stop(self):
        self.alive = False
        self._accept_loop_thread.join()
        self.rpcserver = None # shutdown listening socket

    def ServerLoop(self):
        rpc_fd = self.rpcserver.fileno()
        while self.alive:
            rout, _, _ = select.select((rpc_fd,), (), (), 0.01)
            if rpc_fd in rout:
                self.rpcserver.handle_request()

    def SendData(self, client):
        if self.alive and client.active:
            client.SendDataIfNeed(self.network_name)

        if hasattr(self, "scheduler"):
            self.scheduler.ScheduleTaskT(
                min(client.PENALTY_FACTOR ** client.errorsCnt, self.max_remotetags_resend_delay),
                self.SendData,
                client,
                skip_logging=True
            )

    def RegisterTagEvent(self, tag, event, message=None):
        if not isinstance(tag, TagBase):
            raise RuntimeError("%s is not Tag class instance", tag.GetName())
        if tag.IsRemote():
            return

        tagname = tag.GetName()
        with self.lock: # see register_share
            acceptors = self.acceptors.get(tagname)
            if acceptors:
                logging.debug("on %s connmanager %s with acceptors list %s", TagEventName[event], tagname, acceptors)
                for clientname in acceptors:
                    self.RegisterTagEventForClient(clientname, tagname, event, message)

    def RegisterTagEventForClient(self, clientname, tagname, event, message=None):
        logging.debug("%s remote tag %s on host %s", TagEventName[event], tagname, clientname)
        client = self.topologyInfo.GetClient(clientname, checkname=False)
        if client is None:
            logging.error("unknown client %s appeared", clientname)
            return False
        client.RegisterTagEvent("%s:%s" % (self.network_name, tagname), event, message)

    def ReloadConfig(self, filename=None):
        old_servers = set(self.topologyInfo.servers.keys())
        self.topologyInfo.ReloadConfig()
        new_servers = set(self.topologyInfo.servers.keys())
        new_servers -= old_servers
        if self.alive:
            for client in new_servers:
                self.scheduler.ScheduleTaskT(0, self.SendData, self.topologyInfo.servers[client], skip_logging=True)

    def Subscribe(self, tag):
        if tag.IsRemote():
            client = self.topologyInfo.GetClient(tag.GetRemoteHost(), checkname=True)
            client.Subscribe(tag.GetName())
            return True
        return False

    @traced_rpc_method()
    def set_tags(self, tags): # obsolete
        logging.debug("set %d remote tags", len(tags))
        for tagname in tags:
            self.scheduler.tagRef.AcquireTag(tagname).CheckRemote().Set()
        return True

    @traced_rpc_method()
    def set_client_version(self, clientname, version):
        self.topologyInfo.GetClient(clientname, checkname=True).SetVersion(int(version))
        logging.debug("set client version for %s to %s", clientname, version)
        return True

    @traced_rpc_method()
    def get_client_version(self):
        return PROTOCOL_VERSION

    @traced_rpc_method()
    def register_tags_events(self, updates):
        tagRef = self.scheduler.tagRef
        logging.debug("register_tags_events %d: %s", len(updates), updates)
        for update in updates:
            tagRef.AcquireTag(update[0]).CheckRemote().Modify(*update[1:])
            logging.debug("done with: %s", update)
        logging.debug("register_tags_events %d: done", len(updates))
        return True

    @traced_rpc_method()
    def list_clients(self):

        return [{"name": client.name,
                 "url": client.url,
                 "systemUrl": client.systemUrl,
                 "active": client.active,
                 "version": client.version,
                 "errorsCount": client.errorsCnt,
                 "tagsCount": len(client.events),
                 "subscriptionsCount": len(client.subscriptions),
                 "lastError": str(client.lastError)} for client in self.topologyInfo.servers.values()]

    @traced_rpc_method()
    def list_tags(self, name_prefix):
        data = set()
        for server in self.topologyInfo.servers.values():
            if name_prefix is None or server.name.startswith(name_prefix):
                data.update(server.GetEventsAsTagsToSet())
        return list(data)

    @traced_rpc_method()
    def suspend_client(self, name):
        client = self.topologyInfo.GetClient(name)
        return client.Suspend()

    @traced_rpc_method()
    def resume_client(self, name):
        client = self.topologyInfo.GetClient(name)
        return client.Resume()

    @traced_rpc_method()
    def reload_config(self, location=None):
        self.ReloadConfig(location)

    @traced_rpc_method()
    def register_share(self, tags, clientname):
        tagRef = self.scheduler.tagRef
        logging.debug("register_share %d tags for %s: %s", len(tags), clientname, tags)
        for tagname in tags:
            # XXX
            # 1. this lock only guarantee eventual-consistency of tag's history
            # 2. clients of self may see duplicates of events (even Reset)
            # 3. also guard self.acceptors
            with self.lock:
                self.acceptors.add(tagname, clientname)
                if tagRef._RawTag(tagname).IsLocallySet():
                    self.RegisterTagEventForClient(clientname, tagname, ETagEvent.Set)
        logging.debug("register_share %d tags for %s: done", len(tags), clientname)

    @traced_rpc_method()
    def unregister_share(self, tagname, clientname):
        with self.lock:
            return self.acceptors.remove(tagname, clientname)

    @traced_rpc_method()
    def get_client_info(self, clientname):
        client = self.topologyInfo.GetClient(clientname)
        res = {"name": client.name,
               "url": client.url,
               "systemUrl": client.systemUrl,
               "active": client.active,
               "version": client.version,
               "errorsCount": client.errorsCnt,
               "deferedTagsCount": len(client.events),
               "subscriptionsCount": len(client.subscriptions),
               "lastError": str(client.lastError)}
        return res

    @traced_rpc_method()
    def list_shares(self, clientname):
        client = self.topologyInfo.GetClient(clientname)
        return _get_tags_to_set(client.GetEventsAsList())

    @traced_rpc_method()
    def list_shared_events(self, clientname):
        client = self.topologyInfo.GetClient(clientname)
        return client.GetEventsAsList()

    @traced_rpc_method()
    def list_subscriptions(self, clientname):
        client = self.topologyInfo.GetClient(clientname)
        return list(client.subscriptions)

    @traced_rpc_method()
    def check_connection(self, clientname):
        client = self.topologyInfo.GetClient(clientname)
        return client.connection.ping()

    @traced_rpc_method()
    def ping(self):
        return True

    def __getstate__(self):
        sdict = self.__dict__.copy()
        sdict.pop("scheduler", None)
        sdict.pop("rpcserver", None)
        sdict.pop("acceptors", None)
        sdict.pop("_accept_loop_thread", None)
        sdict["alive"] = False
        return getattr(super(ConnectionManager, self), "__getstate__", lambda: sdict)()
Exemplo n.º 9
0
class RemNotifier(object):
    _RETRY_DELAY = 10.0

    class RetriableError(RuntimeError):
        pass

    def __init__(self, send_update):
        self._send_update = send_update
        self._pending_update = None
        self._pck_finished = False
        self._should_stop_max_time = None
        self._lock = threading.Lock()
        self._changed = threading.Condition(self._lock)
        self._worker_thread = ProfiledThread(target=self._the_loop, name_prefix='RemNotifier')
        self._worker_thread.daemon = True # FIXME See failed[0]
        self._worker_thread.start()

    def stop(self, timeout=0):
        with self._lock:
            if self._should_stop_max_time:
                raise RuntimeError()

            self._should_stop_max_time = time.time() + timeout
            self._changed.notify()

        self._worker_thread.join()

    def send_update(self, update, is_final=False):
        with self._lock:
            self._pending_update = (update, is_final)
            self._changed.notify()

    def _the_loop(self):
        next_try_min_time = 0

        while True:
            with self._lock:
                while True:
                    now = time.time()

                    if self._should_stop_max_time:
                        if now > self._should_stop_max_time \
                                or next_try_min_time > self._should_stop_max_time:
                            return

                    if self._pending_update:
                        deadline = next_try_min_time

                        if now > deadline:
                            break

                    else:
                        deadline = None

                    self._changed.wait(deadline - now if deadline is not None else None)

                update, is_final = self._pending_update
                self._pending_update = None

            logging.debug('sending_update: %s' % ((update, is_final),))

            try:
                self._send_update(update, is_final)

            except self.RetriableError:
                logging.exception('Failed to send update')

                with self._lock:
                    if not self._pending_update:
                        self._pending_update = (update, is_final)

                    next_try_min_time = time.time() + self._RETRY_DELAY

            else:
                if is_final:
                    return