Exemplo n.º 1
0
    def run_umc(self,umcdef,GlobalContext): 
        # check minimum umc runs delay
        slrun=time.time()-self.last_run_time
        if slrun<UmcRunTask.MIN_RUN_DELAY:
            Msg.info2_msg("Sleeping %.2f seoncds before running the next umc instance..."%(UmcRunTask.MIN_RUN_DELAY-slrun))
            time.sleep(UmcRunTask.MIN_RUN_DELAY-slrun)
        
        # create log directory for this tool if it does not exist
        log_dir=get_umc_instance_log_dir(umcdef.umc_instanceid, GlobalContext)
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

        # tell what we are doing
        Msg.info1_msg("Starting umc instance id '{umc_instanceid}': umc='{umc_toolid}', delay={delay}, count={count}, params='{params}', rotation_timelimit={rotation_timelimit}, log_dir='{log_dir}, log_file_groups={log_file_groups}'".
            format(umc_instanceid=umcdef.umc_instanceid,umc_toolid=umcdef.umc_toolid,delay=umcdef.delay,count=umcdef.count,params=umcdef.params,
                rotation_timelimit=umcdef.rotation_timelimit,log_dir=umcdef.log_dir,log_file_groups=umcdef.log_file_groups))

        # it is important to set setsid as there might be child processes that use tty, this should provide a dedicated tty for them
        # example of such process is sqlcl
        preexec=None
        if "setsid" in umcdef.options:
            preexec=ctypes.CDLL('libc.so.6').setsid

        p = psutil.Popen(UmcRunTask.UMC_LAUNCH_CMD.format(umc_instanceid=umcdef.umc_instanceid,umc_toolid=umcdef.umc_toolid,
                delay=umcdef.delay,count=umcdef.count,params=umcdef.params,rotation_timelimit=umcdef.rotation_timelimit,
                umc_home=GlobalContext.homeDir,log_dir=log_dir,log_file_groups=umcdef.log_file_groups),
            shell=True, executable=UmcRunTask.DEFAULT_SHELL, preexec_fn=preexec, stdin=None, stdout=None, stderr=None)

        self.last_run_time=time.time()
        return p    
Exemplo n.º 2
0
    def run_task(self, GlobalContext, tdef):
        orphans = []
        pids = self.get_all_pgids()[str(os.getpgrp())]
        procs = psutil.Process().children(recursive=True)
        
        for pid in pids:
            try:
                os.kill(int(pid), 0)
            except OSError:
                # we are not so fast, the process ended in the meantime 
                pass
            else:
                # the process is live; check it exist in process tree
                found = False
                for p in procs:
                    if p.pid == pid:
                        found = True
                        break
                
                if not found:
                    orphans.append(pid)
            # else
        # for pid

        # pause if there are orhpans
        if len(orphans)>0:
            Msg.warn_msg("There are %d orphan processes, will pause umcrunner until orhpans exist!"%(len(orphans)))
            Msg.info2_msg("The orhpans are: %s"%orphans)
            return False
        else:
            return True
Exemplo n.º 3
0
 def run_task(self, GlobalContext, tdef):
     for umcdef in GlobalContext.umcdefs:
         self.refresh_single_instance(umcdef, GlobalContext)
     
     # report number of open handles per type
     if Msg.verbose:
         fd_result = utils.fd_table_status()
         Msg.info2_msg('Open file handles: %s'%utils.fd_table_status_str())
     
     return True
Exemplo n.º 4
0
    def run_task(self, GlobalContext, tdef):
        kids=psutil.Process().children(True)

        Msg.info2_msg("There are %d children processes."%(len(kids)))
            
        if len(kids) > GlobalContext.params.max_processes:
            Msg.warn_msg("The current number of child processes %d exceeds the maximum of %d; umcrunner will be paused."
                %(len(kids),GlobalContext.params.max_processes))
            return False
        else:
            return True    
Exemplo n.º 5
0
def eval_transform(transform_exprs, timestamp, tags, fields):
    try:
        # declare variables and assign values to them
        for k, v in tags.items():
            if v is not None:
                exec(k + "=\"" + v + "\"")
        for k, v in fields.items():
            if v is not None:
                exec(k + "=" + str(v))

        # transform
        for expr in transform_exprs:
            try:
                exec(expr)
            except Exception as ex:
                pass
                Msg.info2_msg("Error when evaluating transformation '%s': %s" %
                              (expr, str(ex)))

        # get only variables that come from tags and fiedls, remove all local ones
        # the list in the below expression must contain all local variables in this function prior to this call!
        nf = {
            k: v
            for k, v in locals().items() if k not in [
                "k", "v", "umc_id", "transform_exprs", "timestamp", "tags",
                "fields", "expr", "ex"
            ]
        }

        __t2 = {}
        __f2 = {}
        for k, v in nf.items():
            if k in tags.keys():
                exec("__t2['%s']=%s" % (k, k))
            elif k in fields.keys():
                exec("__f2['%s']=%s" % (k, k))
            else:
                exec("value=%s" % (k))
                if isinstance(value, int) or isinstance(value, float):
                    exec("__f2['%s']=%s" % (k, k))
                else:
                    exec("__t2['%s']=%s" % (k, k))
            # new tag or field that resulted from transformation
        # // for

        return __t2, __f2
    except Exception as e:
        Msg.err_msg("Error when evaluating transformations for %s: %s" %
                    (umc_id, str(e)))
        return tags, fields
Exemplo n.º 6
0
    def purge_cache(self):
        topurge = []
        for url in self.data:
            d = self.data[url]
            if (not (d.lock._RLock__owner)) and (
                    d.created_time is None or d.age is None
                    or time.time() - d.created_time > d.age):
                topurge.append(url)
            # // if purge
        # // for

        # purge
        for url in topurge:
            del self.data[url]
            Msg.info2_msg("The cache item %s has been purged from the cache." %
                          url)
Exemplo n.º 7
0
    def run_all(self):
        paused = self.GlobalContext.paused
        for tdef in self.tasks:
            if time.time()-tdef.last_run_time > tdef.time_interval and (tdef.run_on_global_pause or not(paused)):
                if tdef.run_after==0 or time.time()>tdef.run_after:
                    if not(tdef.disabled):                         
                        # inform that the task is resumed if it was puased
                        if tdef.run_after>0:
                            tdef.run_after=0
                            Msg.info1_msg("The task %s is resumed."%(tdef.name))
                        
                        # run the task    
                        start_t=time.time()
                        tdef.result = tdef.target.run_task(self.GlobalContext, tdef)
                        end_t=time.time()
                        if not(tdef.result):
                            paused = True
                        tdef.last_run_time = end_t
                        tdef.last_run_duration=end_t-start_t
                        
                        # check to be disabled due to hard limit
                        if tdef.time_limit_disable>0 and tdef.last_run_duration > tdef.time_limit_disable:
                            tdef.disabled=True
                            Msg.warn_msg("The task %s was running for %.2f seconds which is more than the hard maximum of %.2f seconds. The task will be disabled."
                                %(tdef.name, tdef.last_run_duration, tdef.time_limit_disable))
                                
                        # check to be paused due to soft limit
                        elif tdef.time_limit_pause>0 and tdef.last_run_duration > tdef.time_limit_pause:
                            tdef.run_after=end_t+tdef.pause_for
                            Msg.warn_msg("The task %s was running for %.2f seconds which is more than the soft maximum of %.2f seconds. The task will be paused for %.2f seconds."
                                %(tdef.name, tdef.last_run_duration, tdef.time_limit_pause, tdef.pause_for))
                        else:
                            # report on task duration
                            Msg.info2_msg("The task %s was running for %.2f seconds."%(tdef.name,tdef.last_run_duration))

                    # // not disabled
                # // locally paused
            else:
                pass

        old_paused = self.GlobalContext.paused
        self.GlobalContext.paused = not(all([ tdef.result for tdef in self.tasks if tdef.result is not None ]))

        if self.GlobalContext.paused != old_paused:
            Msg.warn_msg("umcrunner state has been %s."%("PAUSED" if self.GlobalContext.paused else "RESUMED"))
Exemplo n.º 8
0
    def read_umcdefs(self, reader, writer):
        allinstances = self.get_umc_instances()
        umcdefs = {}

        for instance in allinstances:
            umc_id = instance["umc-id"]

            umcdef = Map(umcid=umc_id,
                         enabled=False,
                         writer=None,
                         reader=None,
                         instance=instance)
            umcdef.enabled = self.value_element(instance, "enabled", False)
            umcdef.writer = writer.read_umcdef(umc_id, instance)
            umcdef.reader = reader.read_umcdef(umc_id, instance)
            Msg.info1_msg("Definition retrieved for umc %s" % (umc_id))

            if not (umcdef.enabled):
                Msg.info1_msg(
                    "umc id %s is disabled by configuration, no datapoints will be read."
                    % (umc_id))
            elif umcdef.writer is None or umcdef.reader is None:
                Msg.info2_msg(
                    "umc id %s does not have reader or writer definitions and it will be disabled."
                    % (umc_id))
                umcdef.enabled = False

            # disable if the writer is not enabled
            if not (umcdef.writer.enabled):
                Msg.info2_msg(
                    "umc id %s is disabled as its writer is disabled. No data will be read for this umc id."
                    % (umc_id))
                umcdef.enabled = False

            if umcdefs.get(umc_id) is not None:
                Msg.err_msg(
                    "There is a duplicate umc instance with id '%s' in the configuration file!"
                    % (umc_id))
            else:
                umcdefs[umc_id] = umcdef
        # // for

        return umcdefs
Exemplo n.º 9
0
    def run_task(self, GlobalContext, tdef):
        kids=psutil.Process().children(True)
        
        nz = 0
        for p in kids:
            try:
                if p.status() == psutil.STATUS_ZOMBIE:
                    nz = nz + 1
            except Exception as e:
                pass

        Msg.info2_msg("There are %d zombie processes"%(nz))

        if nz > len(GlobalContext.umcdefs):
            Msg.warn_msg("There are %d zombie processes which exceeds the number of umc instances %d. Will pause umc runner until the zombie processes will disappear!"%
                (nz,len(GlobalContext.umcdefs)))
            return False
        else:
            return True        
Exemplo n.º 10
0
    def __init__(self, config, writerDef):
        super(OMCWriter, self).__init__(config, writerDef)
        
        # read params
        self.omc_params=Map(
            base_url=self.param("connect.base-url"), 
            data_url=self.param("connect.data-url"),
            proxies=self.param("connect.proxies"),
            user=self.param("connect.user"), 
            upass=self.param("connect.pass", ""),
            connect_timeout=self.param("connect.connect-timeout", 5),
            read_timeout=self.param("connect.read-timeout", 10),
            omc_inprogress_timeout=self.param("connect.omc-inprogress-timeout", 120))
       
        # print params
        Msg.info2_msg("OMC Writer parameters: %s"%self.omc_params)
 
        # check the db was defined
        if self.omc_params.data_url == None:
            raise Exception("Invalid connection details (data_url is missing).")
Exemplo n.º 11
0
 def __send_request(self):
     try:
         Msg.info2_msg("Sending proxy request %s %s" %
                       (self.method.upper(), self.url))
         headers = {"Via": "1.1 %s" % socket.gethostname()}
         if self.method == "get":
             self.response = requests.get(
                 self.url,
                 timeout=(GlobalContext.params.proxy_timeout_connect,
                          GlobalContext.params.proxy_timeout_read),
                 headers=headers)
         elif self.method == "post":
             self.response = requests.post(
                 self.url,
                 timeout=(GlobalContext.params.proxy_timeout_connect,
                          GlobalContext.params.proxy_timeout_read),
                 headers=headers)
         else:
             raise Exception("Method %s is not supported!" % self.method)
     except Exception as e:
         Msg.warn_msg("Proxy request to %s failed: %s" % (self.url, str(e)))
         pass
Exemplo n.º 12
0
    def write(self,datapoints,exit_event=None):
        Msg.info2_msg("Uploading %d records to OMC..."%len(datapoints))        
        Msg.info2_msg("The batch contains entity types %s"%self.get_all_entity_types(datapoints))

        #if datapoints is not None:
          #print "========= BATCH OUTPUT START"
          #print json.dumps(datapoints)
          #print "========= BATCH OUTPUT END" 

        response = self.run_request('POST',self.omc_params.data_url, datapoints, 'application/octet-stream')
        if response.status_code<300:
            resp=json.loads(response.text)
            status_uri=resp["statusUri"]
            
            Msg.info2_msg("Upload request sent, waiting for the result at %s up to %s seconds..."%(status_uri,self.omc_params.omc_inprogress_timeout))

            start_t=time.time()
            while resp["status"]=="IN_PROGRESS" and (exit_event is not None and not(exit_event.is_set())):
                response=self.run_request('GET',status_uri)
                if response.status_code>=300:
                    raise Exception("OMC status request failed with status code %d"%response.status_code)    
                
                resp=json.loads(response.text)
                if resp["status"]=="IN_PROGRESS":
                    # wait only certain number of seconds
                    if time.time()-start_t>self.omc_params.omc_inprogress_timeout:
                      Msg.err_msg("Upload failed, the datapoints in the batch will be discarded, they contain the following entity types: %s"%self.get_all_entity_types(datapoints))
                      raise Exception("OMC upload failed due to a timeout of %d seconds while waiting for OMC to confirm the data was uploaded successfully! The status response payload is %s"%(self.omc_params.omc_inprogress_timeout,resp))   
                    # wait
                    exit_event.wait(1) if exit_event is not None else sleep(1)
            # // while

            if resp["status"]=="FAILED":
                raise Exception("OMC upload reuqest failed. %s. Response payload: %s"
                    %(resp["errorMessage"],resp))
            elif exit_event is None or not(exit_event.is_set()):
                Msg.info2_msg("OMC upload reuqest processed in %d seconds. %s: %s. Response payload: %s"
                    %(time.time()-start_t,resp["status"],resp["errorMessage"],resp))
        else:
            raise Exception("OMC data upload request failed with status code %d. Response payload: %s"%(response.status_code,response.text))    
Exemplo n.º 13
0
    def run_task(self, GlobalContext, tdef):
        running=[]; started=[]; waiting=[]
        for umcdef in GlobalContext.umcdefs:
            if umcdef.enabled:
                umcdef.lock.acquire()
                try:
                    if umcdef.proc is None and time.time()>umcdef.start_after:
                        if umcdef.last_started_time is not None and time.time()-umcdef.last_started_time < GlobalContext.params.min_starting_time:
                            Msg.warn_msg("umc instance id '%s' starting frequency is too high (<%d seconds), will not start it now!"
                                %(umcdef.umc_instanceid,GlobalContext.params.min_starting_time))
                            waiting.append("%s, WT=%.2fs"%(umcdef.umc_instanceid,GlobalContext.params.min_starting_time))                        
                        else:
                            try:
                                # run umcinstance as a child process
                                umcdef.proc = self.run_umc(umcdef, GlobalContext)
                                
                                # start time
                                start_t=time.time()
                                umcdef.start_after=0
                                umcdef.last_started_time=start_t
                                umcdef.num_runs = umcdef.num_runs + 1
                                if umcdef.first_started_time == 0:
                                    umcdef.first_started_time = time.time()
                                
                                started.append("%s, PID=%d"%(umcdef.umc_instanceid,umcdef.proc.pid))
                            except Exception as e:
                                Msg.warn_msg("Error occurred while starting umc instance %s. The exception was: %s"%(umcdef.umc_instanceid, str(e)))
                                pass
                    else:
                        if umcdef.proc is not None: 
                            running.append("%s, PID=%d"%(umcdef.umc_instanceid,umcdef.proc.pid))
                        else: 
                            waiting.append("%s, WT=%.2fs"%(umcdef.umc_instanceid,umcdef.start_after-time.time()))
                finally:
                    umcdef.lock.release()
        # for

        time_run = time.time()
        Msg.info2_msg("Running: %s"%(running))                
        Msg.info2_msg("Started: %s"%(started))                
        Msg.info2_msg("Waiting: %s"%(waiting))                
Exemplo n.º 14
0
 def log_request(self, size):
     Msg.info2_msg('HTTP request from (%s) %s %s' %
                   (self.address_string(), self.requestline, str(size)))
Exemplo n.º 15
0
    def process_cluster_request(self, method, path_def, allow_all,
                                cache_maxage, is_stream, get_content):
        params = PathDef(path_def).params(
            self.path)  #get_path_params(path_def, self.path)

        # path must be a valid path and hostname param must exist in it
        if params is None or params.params.hostname is None:
            return None

        # get a list of servers this should be proxied to
        # if there is more than one, then proxy them, otherwise run the locally or redirect via client
        server_list = self.get_server_list(params)

        # hostname is "all", will forward to individual umcrunner servers
        if len(server_list) > 1 and allow_all:
            # check if this has been proxied already
            if self.headers.get("Via") is None:
                # acquire lock on this path to prevent other threads from doing the same
                cache.acquire_lock(self.path)
                try:
                    # check if in cache
                    content = cache.get(self.path)
                    if content is None:
                        # not in cache
                        # proxy to all umcrunner hosts including "me" (this one)
                        Msg.info2_msg("Sending %d proxy requests." %
                                      (len(server_list)))

                        start_t = time.time()
                        prqs = []
                        for server_def in server_list:
                            prqs.append(
                                ProxyRequest(
                                    method,
                                    'http://{address}:{tcp_port}{fw_path}'.
                                    format(
                                        address=server_def.address,
                                        tcp_port=server_def.tcp_port,
                                        fw_path=params.replace(
                                            params,
                                            Map(hostname=server_def["hostname"]
                                                ))),
                                    GlobalContext.params.proxy_run_threads))
                            prqs[-1].send_request()

                        # wait for all responses
                        for x in prqs:
                            x.wait_for_response()

                        # get all "valid" responses
                        resp = [r for r in prqs if r.response is not None]
                        Msg.info2_msg(
                            "Data from %d proxy requests retrieved in %.2f seconds."
                            % (len(resp), time.time() - start_t))

                        # add result to cache; the result from individual servers should always be json array
                        content = Map(content="[%s]" % ",".join([
                            r.response.text.strip()[1:-1]
                            for r in resp if r.response.text.strip() != "[]"
                        ]))
                        if cache_maxage > 0:
                            cache.create_data(self.path, content.content,
                                              time.time(), cache_maxage)
                    # if not in cache
                    else:
                        Msg.info2_msg("Serving request for %s from cache." %
                                      self.path)

                    # send back response
                    self.send(200, {"Content-Type": "application/json"},
                              content.content)
                finally:
                    cache.release_lock(self.path)
                return True
            # if not via
            else:
                Msg.warn_msg(
                    "A request to %s can only come from a client, not a proxy! (%s)"
                    % (self.path, self.headers.get("Via")))
                self.send(
                    400, None,
                    "Request to the resource that comes via a proxy is not allowed!"
                )
                return False
        # // if multiple hostnames
        elif len(server_list) == 1:
            # params.params.hostname should be a valid hostname
            server_def = server_list[0]
            if not (server_def.me):
                # host should be a known host, redirect the request onto it rather than being a proxy
                location_url = "http://{address}:{tcp_port}{fw_path}".format(
                    address=server_def.address,
                    tcp_port=server_def.tcp_port,
                    fw_path=params.replace(
                        params, Map(hostname=server_def["hostname"])))
                Msg.info2_msg("Redirecting the request to '%s'" % location_url)
                self.send(308, {"Location": location_url}, "")
                return
            else:
                if not (is_stream):
                    content = get_content(params)
                    if content is not None:
                        self.send(content.code,
                                  {"Content-Type": "application/json"},
                                  "[%s]" % ",".join(content.json))
                    else:
                        # should not happen really
                        self.send(500, None, "")
                    return True
                else:
                    get_content(params)
                    return True
        # // if one hostname only
        else:
            self.send(
                404, None, "The host '%s' cannot be found or is not allowed!" %
                params.params.hostname)
            return False
Exemplo n.º 16
0
def on_terminate(proc):
    Msg.info2_msg("...process {} terminated with exit code {}".format(proc.pid, proc.returncode))