示例#1
0
文件: elasticbu.py 项目: cmsdaq/hltd
class elasticBandBU:

    def __init__(self,conf,runnumber,startTime,runMode=True,nsslock=None,box_version=None,update_run_mapping=True,update_box_mapping=True):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.conf=conf
        self.es_server_url=conf.elastic_runindex_url
        self.runindex_write="runindex_"+conf.elastic_runindex_name+"_write"
        self.runindex_read="runindex_"+conf.elastic_runindex_name+"_read"
        self.runindex_name="runindex_"+conf.elastic_runindex_name
        self.boxinfo_write="boxinfo_"+conf.elastic_runindex_name+"_write"
        self.boxinfo_read="boxinfo_"+conf.elastic_runindex_name+"_read"
        self.boxinfo_name="boxinfo_"+conf.elastic_runindex_name
        self.boxdoc_version=box_version
        self.runnumber = str(runnumber)
        self.startTime = startTime
        self.host = os.uname()[1]
        self.stopping=False
        self.threadEvent = threading.Event()
        self.runMode=runMode
        self.boxinfoFUMap = {}
        self.ip_url=None
        self.nsslock=nsslock
        if update_run_mapping:
            self.updateIndexMaybe(self.runindex_name,self.runindex_write,self.runindex_read,mappings.central_es_settings_runindex,mappings.central_runindex_mapping)
        if update_box_mapping:
            self.updateIndexMaybe(self.boxinfo_name,self.boxinfo_write,self.boxinfo_read,mappings.central_es_settings_boxinfo,mappings.central_boxinfo_mapping)
        #silence
        eslib_logger = logging.getLogger('elasticsearch')
        eslib_logger.setLevel(logging.ERROR)

        self.black_list=None
        if self.conf.instance=='main':
            self.hostinst = self.host
        else:
            self.hostinst = self.host+'_'+self.conf.instance

        #this naturally fits with the 'run' document
	retries=10
        if runMode == True:
	  while retries:
	    retries-=1
            try:
              version = None
              arch = None
              hltmenuname = None
              with open(os.path.join(mainDir,'hlt',conf.paramfile_name),'r') as fp:
                fffparams = json.load(fp)
                version = fffparams['CMSSW_VERSION']
                arch = fffparams['SCRAM_ARCH']
                self.logger.info("OK")
              with open(os.path.join(mainDir,'hlt','HltConfig.py'),'r') as fp:
                firstline = fp.readline().strip().strip("\n") #first line
                if firstline.startswith("#"):
                  hltmenuname = firstline.strip("#").strip()
              break
            except Exception as ex:
	      self.logger.info("failed to parse run metadata file "+str(ex)+". retries left "+str(retries))
	      time.sleep(0.2)

        #write run number document
        if runMode == True and self.stopping==False:
            document = {}
            doc_id = self.runnumber
            document['runNumber'] = doc_id
            document['startTime'] = startTime
            document['activeBUs'] = 1
            document['totalBUs'] = 1
            document['rawDataSeenByHLT']=False
            if version: document['CMSSW_version']=version
            if arch: document['CMSSW_arch']=arch
            if hltmenuname and len(hltmenuname): document['HLT_menu']=hltmenuname
            documents = [document]
            ret = self.index_documents('run',documents,doc_id,bulk=False,overwrite=False)
            if isinstance(ret,tuple) and ret[1]==409:
                #run document was already created by another BU. In that case increase atomically active BU counter
                #self.index_documents('run',[{"inline":"ctx._source.activeBUs+=1;ctx._source.totalBUs+=1","lang":"painless"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300)
                self.index_documents('run',[{"inline":"ctx._source.activeBUs+=1;ctx._source.totalBUs+=1"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300)


    def updateIndexMaybe(self,index_name,alias_write,alias_read,settings,mapping):
        connectionAttempts=0
        retry=False
        while True:
            if self.stopping:break
            connectionAttempts+=1
            try:
                if retry or self.ip_url==None:
                    self.ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                    self.es = ElasticSearch(self.ip_url,timeout=20)

                #check if index alias exists
                if requests.get(self.ip_url+'/_alias/'+alias_write).status_code == 200:
                    self.logger.info('writing to elastic index '+alias_write + ' on '+self.es_server_url+' - '+self.ip_url )
                    self.createDocMappingsMaybe(alias_write,mapping)
                    break
                else:
                    time.sleep(.5)
                    if (connectionAttempts%10)==0:
                        self.logger.error('unable to access to elasticsearch alias ' + alias_write + ' on '+self.es_server_url+' / '+self.ip_url)
                    continue
            except ElasticHttpError as ex:
                #es error, retry
                self.logger.error(ex)
                if self.runMode and connectionAttempts>100:
                    self.logger.error('elastic (BU): exiting after 100 ElasticHttpError reports from '+ self.es_server_url)
                    sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry=True
                continue

            except (socket.gaierror,ConnectionError,Timeout,RequestsConnectionError,RequestsTimeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if self.runMode and connectionAttempts>100:
                    self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ self.es_server_url)
                    sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry=True
                continue

    def createDocMappingsMaybe(self,index_name,mapping):
        #update in case of new documents added to mapping definition
        for key in mapping:
            doc = {key:mapping[key]}
            res = requests.get(self.ip_url+'/'+index_name+'/'+key+'/_mapping')
            #only update if mapping is empty
            if res.status_code==200:
                if res.content.strip()=='{}':
                    self.logger.info('inserting new mapping for '+str(key))
                    requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
                else:
                    #still check if number of properties is identical in each type
                    inmapping = json.loads(res.content)
                    for indexname in inmapping:
                        properties = inmapping[indexname]['mappings'][key]['properties']

                        self.logger.info('checking mapping '+ indexname + '/' + key + ' which has '
                            + str(len(mapping[key]['properties'])) + '(index:' + str(len(properties)) + ') entries..')
                        for pdoc in mapping[key]['properties']:
                            if pdoc not in properties:
                                self.logger.info('inserting mapping for ' + str(key) + ' which is missing mapping property ' + str(pdoc))
                                requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
                                if res.status_code!=200: self.logger.warning('insert mapping reply status code '+str(res.status_code)+': '+res.content)
                                break
            else:
                self.logger.warning('requests error code '+res.status_code+' in mapping request')

    def read_line(self,fullpath):
        with open(fullpath,'r') as fp:
            return fp.readline()

    def elasticize_modulelegend(self,fullpath):

        self.logger.info(os.path.basename(fullpath))
        document = {}
        #document['_parent']= self.runnumber
        doc_id="microstatelegend_"+self.runnumber
        if fullpath.endswith('.jsn'):
            try:
                with open(fullpath,'r') as fp:
                    doc = json.load(fp)
                    document['stateNames'] = doc['names']
                    try:document['reserved'] = doc['reserved']
                    except:document['reserved'] = 33
                    try:document['special'] = doc['special']
                    except:document['special'] = 7
                    nstring = ""
                    cnt = 0
                    outputcnt = 0
                    #fill in also old format for now
                    for sname in doc['names']:
                        nstring+= str(cnt) + "=" + sname + " "
                        cnt+=1
                        if sname.startswith('hltOutput'):outputcnt+=1
                    try:document['output'] = doc['output']
                    except:document['output']=outputcnt
                    #document['names'] = nstring
            except Exception as ex:
                self.logger.warning("can not parse "+fullpath + ' ' + str(ex))
        else:
            #old format
            stub = self.read_line(fullpath)
            docnames= self.read_line(fullpath)
            document['reserved'] = 33
            document['special'] = 7
            outputcnt=0
            for sname in docnames.split():
                if "=hltOutput" in sname: outputcnt+=1
            document['output'] = outputcnt
            document['stateNames']=[]
            nameTokens = docnames.split()
            for nameToken in nameTokens:
                if '=' in nameToken:
                    idx,sn = nameToken.split('=')
                    document["stateNames"].append( sn )

        documents = [document]
        doc_pars = {"parent":str(self.runnumber)}
        return self.index_documents('microstatelegend',documents,doc_id,doc_params=doc_pars,bulk=False)


    def elasticize_pathlegend(self,fullpath):
        self.logger.info(os.path.basename(fullpath))
        document = {}
        #document['_parent']= self.runnumber
        doc_id="pathlegend_"+self.runnumber
        if fullpath.endswith('.jsn'):
            try:
                with open(fullpath,'r') as fp:
                    doc = json.load(fp)
                    document['stateNames'] = doc['names']
                    document['reserved'] = doc['reserved']
                    #put old name format value
                    nstring=""
                    cnt=0
                    for sname in doc['names']:
                        nstring+= str(cnt) + "=" + sname + " "
                        cnt+=1
                    document['names'] = nstring
            except Exception as ex:
                self.logger.warning("can not parse "+fullpath)
        else:
            stub = self.read_line(fullpath)
            document['names']= self.read_line(fullpath)
        documents = [document]
        doc_pars = {"parent":str(self.runnumber)}
        return self.index_documents('pathlegend',documents,doc_id,doc_params=doc_pars,bulk=False)

    def elasticize_inputlegend(self,fullpath):
        self.logger.info(os.path.basename(fullpath))
        document = {}
        doc_id="inputstatelegend_"+self.runnumber
        try:
            with open(fullpath,'r') as fp:
                doc = json.load(fp)
                document['stateNames'] = doc['names']
        except Exception as ex:
            self.logger.warning("can not parse "+fullpath)
        documents = [document]
        doc_pars = {"parent":str(self.runnumber)}
        return self.index_documents('inputstatelegend',documents,doc_id,doc_params=doc_pars,bulk=False)

    def elasticize_stream_label(self,infile):
        #elasticize stream name information
        self.logger.info(infile.filepath)
        document = {}
        #document['_parent']= self.runnumber
        document['stream']=infile.stream[6:]
        doc_id=infile.basename
        doc_pars = {"parent":str(self.runnumber)}
        return self.index_documents('stream_label',[document],doc_id,doc_params=doc_pars,bulk=False)

    def elasticize_runend_time(self,endtime):
        self.logger.info(str(endtime)+" going into buffer")
        doc_id = self.runnumber
        #first update: endtime field
        self.index_documents('run',[{"endTime":endtime}],doc_id,bulk=False,update_only=True)
        #second update:decrease atomically active BU counter
        #self.index_documents('run',[{"inline":"ctx._source.activeBUs-=1","lang":"painless"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300)
        self.index_documents('run',[{"inline":"ctx._source.activeBUs-=1"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300)

    def elasticize_resource_summary(self,jsondoc):
        self.logger.debug('injecting resource summary document')
        jsondoc['appliance']=self.host
        self.index_documents('resource_summary',[jsondoc],bulk=False)

    def elasticize_box(self,infile):

        basename = infile.basename
        self.logger.debug(basename)
        current_time = time.time()

        if infile.data=={}:return

        bu_doc=False
        if basename.startswith('bu') or basename.startswith('dvbu'):
            bu_doc=True

        #check box file against blacklist
        if bu_doc or self.black_list==None:
            self.black_list=[]

            try:
                with open(os.path.join(self.conf.watch_directory,'appliance','blacklist'),"r") as fi:
                    try:
                        self.black_list = json.load(fi)
                    except ValueError:
                        #file is being written or corrupted
                        return
            except:
                #blacklist file is not present, do not filter
                pass

        if basename in self.black_list:return

        if bu_doc==False:
            try:
                if self.boxdoc_version!=infile.data['version']:
                    self.logger.info('skipping '+basename+' box file version '+str(infile.data['version'])+' which is different from '+str(self.boxdoc_version))
                    return;
            except:
                self.logger.warning("didn't find version field in box file "+basename)
                return
            try:
                self.boxinfoFUMap[basename] = [infile.data,current_time]
            except Exception as ex:
                self.logger.warning('box info not injected: '+str(ex))
                return
        try:
            document = infile.data
            #unique id for separate instances
            if bu_doc:
                doc_id=self.hostinst
            else:
                doc_id=basename

            document['id']=doc_id
            try:
              document['activeRunList'] = map(int,document['activeRuns'])
            except:
              pass
            try:
              document['activeRuns'] = map(str,document['activeRuns'])
            except:
              pass
            document['appliance']=self.host
            document['instance']=self.conf.instance
            if bu_doc==True:
                document['blacklist']=self.black_list
            #only here
            document['host']=basename
            try:document.pop('version')
            except:pass
            try:document.pop('ip')
            except:pass
            try:document.pop('boot_id')
            except:pass
            self.index_documents('boxinfo',[document],doc_id,bulk=False)
        except Exception as ex:
            self.logger.warning('box info not injected: '+str(ex))
            return

    def elasticize_fubox(self,doc):
        try:
            doc_id = self.host
            doc['host']=doc_id
            self.index_documents('fu-box-status',[doc],doc_id,bulk=False)
        except Exception as ex:
            self.logger.warning('fu box status not injected: '+str(ex))

    def elasticize_eols(self,infile):
        basename = infile.basename
        self.logger.info(basename)
        data = infile.data['data']
        data.insert(0,infile.mtime)
        data.insert(0,infile.ls[2:])

        values = [int(f) if f.isdigit() else str(f) for f in data]
        try:
            keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents","NBytes"]
            document = dict(zip(keys, values))
        except:
            #try without NBytes
            keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents"]
            document = dict(zip(keys, values))

        doc_id = infile.name+"_"+self.host
        document['id'] = doc_id
        #document['_parent']= self.runnumber
        document['appliance']=self.host
        documents = [document]
        doc_pars = {"parent":str(self.runnumber)}
        self.index_documents('eols',documents,doc_id,doc_params=doc_pars,bulk=False)

    def index_documents(self,name,documents,doc_id=None,doc_params=None,bulk=True,overwrite=True,update_only=False,retry_on_conflict=0,script=False):

        if name=='fu-box-status' or name.startswith("boxinfo") or name=='resource_summary':
            destination_index = self.boxinfo_write
            is_box=True
        else:
            destination_index = self.runindex_write
            is_box=False
        attempts=0
        while True:
            attempts+=1
            try:
                if bulk:
                    self.es.bulk_index(destination_index,name,documents)
                else:
                    if doc_id:
                      if update_only:
                        if script:
                          self.es.update(index=destination_index,doc_type=name,id=doc_id,script=documents[0],upsert=False,retry_on_conflict=retry_on_conflict)
                        else:
                          self.es.update(index=destination_index,doc_type=name,id=doc_id,doc=documents[0],upsert=False,retry_on_conflict=retry_on_conflict)
                      else:
                        #overwrite existing can be used with id specified
                        if doc_params:
                          self.es.index(destination_index,name,documents[0],doc_id,parent=doc_params['parent'],overwrite_existing=overwrite)
                        else:
                          self.es.index(destination_index,name,documents[0],doc_id,overwrite_existing=overwrite)
                    else:
                        self.es.index(destination_index,name,documents[0])
                return True

            except ElasticHttpError as ex:
                if name=='run' and ex[0]==409: #create failed because overwrite was forbidden
                    return (False,ex[0])

                if ex[0]==429:
                  if attempts<10 and not is_box:
                    self.logger.warning('elasticsearch HTTP error 429'+str(ex)+'. retrying..')
                    time.sleep(.1)
                    continue
                else:
                  if attempts<=1 and not is_box:continue

                if is_box:
                    self.logger.warning('elasticsearch HTTP error '+str(ex)+'. skipping document '+name)
                else:
                    self.logger.error('elasticsearch HTTP error '+str(ex)+'. skipping document '+name)
                return False
            except (socket.gaierror,ConnectionError,Timeout) as ex:
                if attempts>100 and self.runMode:
                    raise(ex)
                if is_box or attempts<=1:
                    self.logger.warning('elasticsearch connection error' + str(ex)+'. retry.')
                elif (attempts-2)%10==0:
                    self.logger.error('elasticsearch connection error' + str(ex)+'. retry.')
                if self.stopping:return False
                ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                self.es = ElasticSearch(ip_url,timeout=20)
                time.sleep(0.1)
                if is_box==True:#give up on too many box retries as they are indexed again every 5 seconds
                  break
        return False