class elasticBandBU: def __init__(self,conf,runnumber,startTime,runMode=True,nsslock=None,box_version=None,update_run_mapping=True,update_box_mapping=True): self.logger = logging.getLogger(self.__class__.__name__) self.conf=conf self.es_server_url=conf.elastic_runindex_url self.runindex_write="runindex_"+conf.elastic_runindex_name+"_write" self.runindex_read="runindex_"+conf.elastic_runindex_name+"_read" self.runindex_name="runindex_"+conf.elastic_runindex_name self.boxinfo_write="boxinfo_"+conf.elastic_runindex_name+"_write" self.boxinfo_read="boxinfo_"+conf.elastic_runindex_name+"_read" self.boxinfo_name="boxinfo_"+conf.elastic_runindex_name self.boxdoc_version=box_version self.runnumber = str(runnumber) self.startTime = startTime self.host = os.uname()[1] self.stopping=False self.threadEvent = threading.Event() self.runMode=runMode self.boxinfoFUMap = {} self.ip_url=None self.nsslock=nsslock if update_run_mapping: self.updateIndexMaybe(self.runindex_name,self.runindex_write,self.runindex_read,mappings.central_es_settings_runindex,mappings.central_runindex_mapping) if update_box_mapping: self.updateIndexMaybe(self.boxinfo_name,self.boxinfo_write,self.boxinfo_read,mappings.central_es_settings_boxinfo,mappings.central_boxinfo_mapping) #silence eslib_logger = logging.getLogger('elasticsearch') eslib_logger.setLevel(logging.ERROR) self.black_list=None if self.conf.instance=='main': self.hostinst = self.host else: self.hostinst = self.host+'_'+self.conf.instance #this naturally fits with the 'run' document retries=10 if runMode == True: while retries: retries-=1 try: version = None arch = None hltmenuname = None with open(os.path.join(mainDir,'hlt',conf.paramfile_name),'r') as fp: fffparams = json.load(fp) version = fffparams['CMSSW_VERSION'] arch = fffparams['SCRAM_ARCH'] self.logger.info("OK") with open(os.path.join(mainDir,'hlt','HltConfig.py'),'r') as fp: firstline = fp.readline().strip().strip("\n") #first line if firstline.startswith("#"): hltmenuname = firstline.strip("#").strip() break except Exception as ex: self.logger.info("failed to parse run metadata file "+str(ex)+". retries left "+str(retries)) time.sleep(0.2) #write run number document if runMode == True and self.stopping==False: document = {} doc_id = self.runnumber document['runNumber'] = doc_id document['startTime'] = startTime document['activeBUs'] = 1 document['totalBUs'] = 1 document['rawDataSeenByHLT']=False if version: document['CMSSW_version']=version if arch: document['CMSSW_arch']=arch if hltmenuname and len(hltmenuname): document['HLT_menu']=hltmenuname documents = [document] ret = self.index_documents('run',documents,doc_id,bulk=False,overwrite=False) if isinstance(ret,tuple) and ret[1]==409: #run document was already created by another BU. In that case increase atomically active BU counter #self.index_documents('run',[{"inline":"ctx._source.activeBUs+=1;ctx._source.totalBUs+=1","lang":"painless"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300) self.index_documents('run',[{"inline":"ctx._source.activeBUs+=1;ctx._source.totalBUs+=1"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300) def updateIndexMaybe(self,index_name,alias_write,alias_read,settings,mapping): connectionAttempts=0 retry=False while True: if self.stopping:break connectionAttempts+=1 try: if retry or self.ip_url==None: self.ip_url=getURLwithIP(self.es_server_url,self.nsslock) self.es = ElasticSearch(self.ip_url,timeout=20) #check if index alias exists if requests.get(self.ip_url+'/_alias/'+alias_write).status_code == 200: self.logger.info('writing to elastic index '+alias_write + ' on '+self.es_server_url+' - '+self.ip_url ) self.createDocMappingsMaybe(alias_write,mapping) break else: time.sleep(.5) if (connectionAttempts%10)==0: self.logger.error('unable to access to elasticsearch alias ' + alias_write + ' on '+self.es_server_url+' / '+self.ip_url) continue except ElasticHttpError as ex: #es error, retry self.logger.error(ex) if self.runMode and connectionAttempts>100: self.logger.error('elastic (BU): exiting after 100 ElasticHttpError reports from '+ self.es_server_url) sys.exit(1) elif self.runMode==False and connectionAttempts>10: self.threadEvent.wait(60) else: self.threadEvent.wait(1) retry=True continue except (socket.gaierror,ConnectionError,Timeout,RequestsConnectionError,RequestsTimeout) as ex: #try to reconnect with different IP from DNS load balancing if self.runMode and connectionAttempts>100: self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ self.es_server_url) sys.exit(1) elif self.runMode==False and connectionAttempts>10: self.threadEvent.wait(60) else: self.threadEvent.wait(1) retry=True continue def createDocMappingsMaybe(self,index_name,mapping): #update in case of new documents added to mapping definition for key in mapping: doc = {key:mapping[key]} res = requests.get(self.ip_url+'/'+index_name+'/'+key+'/_mapping') #only update if mapping is empty if res.status_code==200: if res.content.strip()=='{}': self.logger.info('inserting new mapping for '+str(key)) requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc)) else: #still check if number of properties is identical in each type inmapping = json.loads(res.content) for indexname in inmapping: properties = inmapping[indexname]['mappings'][key]['properties'] self.logger.info('checking mapping '+ indexname + '/' + key + ' which has ' + str(len(mapping[key]['properties'])) + '(index:' + str(len(properties)) + ') entries..') for pdoc in mapping[key]['properties']: if pdoc not in properties: self.logger.info('inserting mapping for ' + str(key) + ' which is missing mapping property ' + str(pdoc)) requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc)) if res.status_code!=200: self.logger.warning('insert mapping reply status code '+str(res.status_code)+': '+res.content) break else: self.logger.warning('requests error code '+res.status_code+' in mapping request') def read_line(self,fullpath): with open(fullpath,'r') as fp: return fp.readline() def elasticize_modulelegend(self,fullpath): self.logger.info(os.path.basename(fullpath)) document = {} #document['_parent']= self.runnumber doc_id="microstatelegend_"+self.runnumber if fullpath.endswith('.jsn'): try: with open(fullpath,'r') as fp: doc = json.load(fp) document['stateNames'] = doc['names'] try:document['reserved'] = doc['reserved'] except:document['reserved'] = 33 try:document['special'] = doc['special'] except:document['special'] = 7 nstring = "" cnt = 0 outputcnt = 0 #fill in also old format for now for sname in doc['names']: nstring+= str(cnt) + "=" + sname + " " cnt+=1 if sname.startswith('hltOutput'):outputcnt+=1 try:document['output'] = doc['output'] except:document['output']=outputcnt #document['names'] = nstring except Exception as ex: self.logger.warning("can not parse "+fullpath + ' ' + str(ex)) else: #old format stub = self.read_line(fullpath) docnames= self.read_line(fullpath) document['reserved'] = 33 document['special'] = 7 outputcnt=0 for sname in docnames.split(): if "=hltOutput" in sname: outputcnt+=1 document['output'] = outputcnt document['stateNames']=[] nameTokens = docnames.split() for nameToken in nameTokens: if '=' in nameToken: idx,sn = nameToken.split('=') document["stateNames"].append( sn ) documents = [document] doc_pars = {"parent":str(self.runnumber)} return self.index_documents('microstatelegend',documents,doc_id,doc_params=doc_pars,bulk=False) def elasticize_pathlegend(self,fullpath): self.logger.info(os.path.basename(fullpath)) document = {} #document['_parent']= self.runnumber doc_id="pathlegend_"+self.runnumber if fullpath.endswith('.jsn'): try: with open(fullpath,'r') as fp: doc = json.load(fp) document['stateNames'] = doc['names'] document['reserved'] = doc['reserved'] #put old name format value nstring="" cnt=0 for sname in doc['names']: nstring+= str(cnt) + "=" + sname + " " cnt+=1 document['names'] = nstring except Exception as ex: self.logger.warning("can not parse "+fullpath) else: stub = self.read_line(fullpath) document['names']= self.read_line(fullpath) documents = [document] doc_pars = {"parent":str(self.runnumber)} return self.index_documents('pathlegend',documents,doc_id,doc_params=doc_pars,bulk=False) def elasticize_inputlegend(self,fullpath): self.logger.info(os.path.basename(fullpath)) document = {} doc_id="inputstatelegend_"+self.runnumber try: with open(fullpath,'r') as fp: doc = json.load(fp) document['stateNames'] = doc['names'] except Exception as ex: self.logger.warning("can not parse "+fullpath) documents = [document] doc_pars = {"parent":str(self.runnumber)} return self.index_documents('inputstatelegend',documents,doc_id,doc_params=doc_pars,bulk=False) def elasticize_stream_label(self,infile): #elasticize stream name information self.logger.info(infile.filepath) document = {} #document['_parent']= self.runnumber document['stream']=infile.stream[6:] doc_id=infile.basename doc_pars = {"parent":str(self.runnumber)} return self.index_documents('stream_label',[document],doc_id,doc_params=doc_pars,bulk=False) def elasticize_runend_time(self,endtime): self.logger.info(str(endtime)+" going into buffer") doc_id = self.runnumber #first update: endtime field self.index_documents('run',[{"endTime":endtime}],doc_id,bulk=False,update_only=True) #second update:decrease atomically active BU counter #self.index_documents('run',[{"inline":"ctx._source.activeBUs-=1","lang":"painless"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300) self.index_documents('run',[{"inline":"ctx._source.activeBUs-=1"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300) def elasticize_resource_summary(self,jsondoc): self.logger.debug('injecting resource summary document') jsondoc['appliance']=self.host self.index_documents('resource_summary',[jsondoc],bulk=False) def elasticize_box(self,infile): basename = infile.basename self.logger.debug(basename) current_time = time.time() if infile.data=={}:return bu_doc=False if basename.startswith('bu') or basename.startswith('dvbu'): bu_doc=True #check box file against blacklist if bu_doc or self.black_list==None: self.black_list=[] try: with open(os.path.join(self.conf.watch_directory,'appliance','blacklist'),"r") as fi: try: self.black_list = json.load(fi) except ValueError: #file is being written or corrupted return except: #blacklist file is not present, do not filter pass if basename in self.black_list:return if bu_doc==False: try: if self.boxdoc_version!=infile.data['version']: self.logger.info('skipping '+basename+' box file version '+str(infile.data['version'])+' which is different from '+str(self.boxdoc_version)) return; except: self.logger.warning("didn't find version field in box file "+basename) return try: self.boxinfoFUMap[basename] = [infile.data,current_time] except Exception as ex: self.logger.warning('box info not injected: '+str(ex)) return try: document = infile.data #unique id for separate instances if bu_doc: doc_id=self.hostinst else: doc_id=basename document['id']=doc_id try: document['activeRunList'] = map(int,document['activeRuns']) except: pass try: document['activeRuns'] = map(str,document['activeRuns']) except: pass document['appliance']=self.host document['instance']=self.conf.instance if bu_doc==True: document['blacklist']=self.black_list #only here document['host']=basename try:document.pop('version') except:pass try:document.pop('ip') except:pass try:document.pop('boot_id') except:pass self.index_documents('boxinfo',[document],doc_id,bulk=False) except Exception as ex: self.logger.warning('box info not injected: '+str(ex)) return def elasticize_fubox(self,doc): try: doc_id = self.host doc['host']=doc_id self.index_documents('fu-box-status',[doc],doc_id,bulk=False) except Exception as ex: self.logger.warning('fu box status not injected: '+str(ex)) def elasticize_eols(self,infile): basename = infile.basename self.logger.info(basename) data = infile.data['data'] data.insert(0,infile.mtime) data.insert(0,infile.ls[2:]) values = [int(f) if f.isdigit() else str(f) for f in data] try: keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents","NBytes"] document = dict(zip(keys, values)) except: #try without NBytes keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents"] document = dict(zip(keys, values)) doc_id = infile.name+"_"+self.host document['id'] = doc_id #document['_parent']= self.runnumber document['appliance']=self.host documents = [document] doc_pars = {"parent":str(self.runnumber)} self.index_documents('eols',documents,doc_id,doc_params=doc_pars,bulk=False) def index_documents(self,name,documents,doc_id=None,doc_params=None,bulk=True,overwrite=True,update_only=False,retry_on_conflict=0,script=False): if name=='fu-box-status' or name.startswith("boxinfo") or name=='resource_summary': destination_index = self.boxinfo_write is_box=True else: destination_index = self.runindex_write is_box=False attempts=0 while True: attempts+=1 try: if bulk: self.es.bulk_index(destination_index,name,documents) else: if doc_id: if update_only: if script: self.es.update(index=destination_index,doc_type=name,id=doc_id,script=documents[0],upsert=False,retry_on_conflict=retry_on_conflict) else: self.es.update(index=destination_index,doc_type=name,id=doc_id,doc=documents[0],upsert=False,retry_on_conflict=retry_on_conflict) else: #overwrite existing can be used with id specified if doc_params: self.es.index(destination_index,name,documents[0],doc_id,parent=doc_params['parent'],overwrite_existing=overwrite) else: self.es.index(destination_index,name,documents[0],doc_id,overwrite_existing=overwrite) else: self.es.index(destination_index,name,documents[0]) return True except ElasticHttpError as ex: if name=='run' and ex[0]==409: #create failed because overwrite was forbidden return (False,ex[0]) if ex[0]==429: if attempts<10 and not is_box: self.logger.warning('elasticsearch HTTP error 429'+str(ex)+'. retrying..') time.sleep(.1) continue else: if attempts<=1 and not is_box:continue if is_box: self.logger.warning('elasticsearch HTTP error '+str(ex)+'. skipping document '+name) else: self.logger.error('elasticsearch HTTP error '+str(ex)+'. skipping document '+name) return False except (socket.gaierror,ConnectionError,Timeout) as ex: if attempts>100 and self.runMode: raise(ex) if is_box or attempts<=1: self.logger.warning('elasticsearch connection error' + str(ex)+'. retry.') elif (attempts-2)%10==0: self.logger.error('elasticsearch connection error' + str(ex)+'. retry.') if self.stopping:return False ip_url=getURLwithIP(self.es_server_url,self.nsslock) self.es = ElasticSearch(ip_url,timeout=20) time.sleep(0.1) if is_box==True:#give up on too many box retries as they are indexed again every 5 seconds break return False