def storeMessage(message): MessageQueue.insert("ProdMgrInterface",message['state'],message['server_url'],\ message['type'],\ message['parameters'],"00:00:10") logging.debug("Problem connecting to server "+message['server_url']) logging.debug("Attempt stored in message queue for later retries") Session.commit()
def commit(serverUrl=None,method_name=None,componentID=None): global lastCall # all other methods that access the database can do that # with their own private sessions and hence connect imediately # to the database. Commit is different since if it fails all # updates in the queues must fail (rollback). last_session=Session.current_session try: Session.connect('ProdMgr') Session.start_transaction('ProdMgr') if (serverUrl==None) or (method_name==None) or (componentID==None): serverUrl,method_name,componentID=lastCall sqlStr="""UPDATE ws_last_call SET call_state="result_retrieved" WHERE server_url="%s" AND component_id="%s" AND service_call="%s"; """ %(serverUrl,componentID,method_name) Session.execute(sqlStr) Session.commit() Session.close() Session.set_session(last_session) except Exception,ex: Session.rollback() Session.close() Session.set_session(last_session) raise ProdAgentException("Service commit Error: "+str(ex))
def execute(self): logging.debug("Executing state: Cleanup") componentState="start" componentStateParameters={} State.setState("ProdMgrInterface",componentState) State.setParameters("ProdMgrInterface",componentStateParameters) Session.commit() # set session back to default Session.set_session("default") return componentState
def testA(self): print('Inserting workflows') try: Session.set_database(dbConfig) Session.connect() Session.start_transaction() Session.execute("SELECT asdf FROM bla;") return workflows=[] for i in xrange(0,10): workflow={} workflow['events_processed']=1000 workflow['id']='workflow_id'+str(i) workflow['owner']='elmo' workflow['priority']=123 workflow['prod_mgr_url']='http://some.where.over.the.rainbow' workflows.append(workflow) Session.insert("we_Workflow",workflows) #now we will brake the session object deliberatly Session.session['default']['connection']=None Session.session['default']['cursor']=None sqlStr="""SELECT COUNT(*) FROM we_Workflow""" print('Case 1 (none type connection and cursor)*****************') Session.execute(sqlStr) rows=Session.fetchall() self.assertEqual(int(rows[0][0]),10) #now we will brake the session object again deliberatly Session.session['default']['connection']=None Session.session['default']['cursor']=None print('Case 2 (none type connection and cursor)****') Session.commit() #put in a query that is incorrect (should raise an error) print('Case 2 (malformed query)*********************************') try: sqlStr="""INSERT some garbage""" Session.execute(sqlStr) except Exception,ex: print("Error testing successful : "+str(ex)) #put in a query that violates a db constraint (should raise an error) print('Case 3 (wellformed query with db constraint violation)***') try: sqlStr="""INSERT INTO we_Workflow(events_processed,id,owner,priority,prod_mgr_url) VALUES("1000","%s","elmo","123","http://some.where.over.the.rainbow") """ %(str("workflow_id"+str(1))) Session.execute(sqlStr) except Exception,ex: print("Error testing successful : "+str(ex))
def cutFile(job_ids,jobCutSize,maxJobs): global jobSpecDir,maxRetries logging.debug("Job_ids: "+str(job_ids)) jobIDs=job_ids.split(',') listOfSpecs=[] for job_id in jobIDs[:-1]: jobDetails=Allocation.get(job_id)['details'] logging.debug("Job details: "+str(jobDetails)) workflowspec=Workflow.get(Aux.split(job_id)[1])['workflow_spec_file'] job_file=job_id+'.xml' jobSpecFile=jobSpecDir+'/'+job_file Allocation.setAllocationSpecFile(job_id,jobSpecFile) logging.debug("start with local jobspec generation") run_number=int(jobDetails['start_event']) event_count=int(jobDetails['event_count']) # find out how many jobs we want to cut. jobs=int(math.ceil(float(event_count)/float(jobCutSize))) if jobs>maxJobs and maxJobs>0: jobs=maxJobs maxJobs=maxJobs-jobs start_event=run_number EventJobSpec.createJobSpec(job_id,workflowspec,jobSpecFile,run_number,event_count,start_event,False,False) jobSpec= JobSpec() jobSpec.load(jobSpecFile) jobSpec.parameters['ProdMgr']='generated' fileData={} fileData['LFN']=jobDetails['lfn'] jobSpec.addAssociatedFiles('fileList', fileData) jobSpec.save(jobSpecFile) job_run_numbers=Workflow.getNewRunNumber(Aux.split(job_id)[1],jobs) logging.debug("Starting factorization") logging.debug("Writing job cut specs to: "+str(jobSpecDir)) listOfSpecs=factoriseJobSpec(jobSpec,jobSpecDir,job_run_numbers,jobSpec.parameters['EventCount'],\ RunNumber=jobSpec.parameters['RunNumber'],FirstEvent=jobSpec.parameters['FirstEvent']) logging.debug("Registering job cuts") for i in xrange(0,len(listOfSpecs)): listOfSpecs[i]['owner'] = 'prodmgr' listOfSpecs[i]['job_type'] = 'Processing' if maxRetries: listOfSpecs[i]['max_retries']=maxRetries Job.register(None,job_id,listOfSpecs) Session.commit() return {'specs' : listOfSpecs, 'workflow' : 'test', \ 'priority' : 1}
def execute(self): logging.debug("Executing state: AcquireRequest") # remove potential server urls from the cooloff state Cooloff.remove() Session.commit() componentState=State.get("ProdMgrInterface") #check if we reached our threshold. if so quit. if int(componentState['parameters']['numberOfJobs']==0): State.setState("ProdMgrInterface","Cleanup") Session.commit() return 'Cleanup' # get request with highest priority: logging.debug("Getting request with index: "+str(componentState['parameters']['requestIndex'])) requestIndex=componentState['parameters']['requestIndex'] request=Workflow.getHighestPriority(requestIndex) # if this is true we have no more requests to check: if request==[]: State.setState("ProdMgrInterface","Cleanup") Session.commit() return "Cleanup" # if this url is available in the cooloff table do not # use it : if request!=[]: while Cooloff.hasURL(request['prod_mgr_url']) : requestIndex=requestIndex+1 request=Workflow.getHighestPriority(requestIndex) if request==[]: break # check if there are requests left: if (request==[]): State.setState("ProdMgrInterface","Cleanup") Session.commit() logging.debug("We have no more requests in our queue for allocations"+\ " and jobs, bailing out") return "Cleanup" logging.debug("Found request: "+str(request['id'])) # set parameters and state and commit for next session componentState['parameters']['RequestID']=request['id'] componentState['parameters']['ProdMgrURL']=request['prod_mgr_url'] componentState['parameters']['RequestType']=request['workflow_type'] componentState['parameters']['requestIndex']=requestIndex State.setParameters("ProdMgrInterface",componentState['parameters']) State.setState("ProdMgrInterface","AcquireJobs") Session.commit() return "AcquireJobs"
def logCall(serverUrl,method_name,args,componentID="defaultComponent",tag="0"): global lastCall last_session=Session.current_session try: Session.connect('ProdMgr') Session.start_transaction('ProdMgr') sqlStr="""INSERT INTO ws_last_call(server_url,component_id,service_call,service_parameters,call_state,tag) VALUES("%s","%s","%s","%s","%s","%s") ON DUPLICATE KEY UPDATE service_parameters="%s", call_state="%s", tag="%s"; """ %(serverUrl,componentID,method_name,base64.encodestring(cPickle.dumps(args)),"call_placed",str(tag),base64.encodestring(cPickle.dumps(args)),"call_placed",tag) Session.execute(sqlStr) Session.commit() lastCall=(serverUrl,method_name,componentID) Session.close() Session.set_session(last_session) except Exception,ex: Session.rollback() Session.close() Session.set_session(last_session) raise ProdAgentException("Service logging Error: "+str(ex))
def retrieve(serverURL=None,method_name=None,componentID=None): last_session=Session.current_session try: Session.connect('ProdMgr') Session.start_transaction('ProdMgr') #NOTE: we do several nested queries and assume that the query engine can rewrite them #NOTE: we should rewrite these queries ourselves. if serverURL==None and method_name==None and componentID==None: sqlStr="""SELECT server_url,service_call,component_id,tag FROM ws_last_call WHERE call_state="call_placed" AND id in ( SELECT max(id) FROM ws_last_call WHERE call_state="call_placed" AND log_time IN ( SELECT max(log_time) FROM ws_last_call WHERE call_state="call_placed" GROUP BY server_url) GROUP BY server_url); """ elif serverURL==None and method_name==None and componentID!=None: sqlStr="""SELECT server_url,service_call,component_id,tag FROM ws_last_call WHERE component_id="%s" AND call_state="call_placed" AND id in ( SELECT max(id) FROM ws_last_call WHERE component_id="%s" AND call_state="call_placed" AND log_time IN ( SELECT max(log_time) FROM ws_last_call WHERE component_id="%s" AND call_state="call_placed" GROUP BY server_url) GROUP BY server_url); """ %(componentID,componentID,componentID) elif serverURL==None and method_name!=None and componentID!=None: sqlStr="""SELECT server_url,service_call,component_id,tag FROM ws_last_call WHERE component_id="%s" AND service_call="%s" AND call_state="call_placed" AND id in ( SELECT max(id) FROM ws_last_call WHERE component_id="%s" AND service_call="%s" AND call_state="call_placed" AND log_time IN ( SELECT max(log_time) FROM ws_last_call WHERE component_id="%s" AND service_call="%s" AND call_state="call_placed" GROUP BY server_url) GROUP BY server_url; """ %(componentID,method_name,componentID,method_name,componentID,method_name) elif serverURL!=None and method_name==None and componentID!=None: sqlStr="""SELECT server_url,service_call,component_id,tag FROM ws_last_call WHERE component_id="%s" AND server_url="%s" AND call_state="call_placed" AND id in ( SELECT max(id) FROM ws_last_call WHERE component_id="%s" AND server_url="%s" AND call_state="call_placed" AND log_time IN ( SELECT max(log_time) FROM ws_last_call WHERE component_id="%s" AND server_url="%s" AND call_state="call_placed" GROUP BY server_url) GROUP BY server_url); """ %(componentID,serverURL,componentID,serverURL,componentID,serverURL) elif serverURL!=None and method_name!=None and componentID!=None: sqlStr="""SELECT server_url,service_call,component_id,tag FROM ws_last_call WHERE component_id="%s" AND server_url="%s" AND call_state="call_placed" AND service_call="%s" """ %(componentID,serverURL,method_name) dbCur.execute(sqlStr) rows=dbCur.fetchall() if len(rows)==0: raise ProdAgentException("No result in local last service call table with componentID :"+\ str(componentID),3000) server_url=rows[0][0] service_call=rows[0][1] component_id=rows[0][2] tag=rows[0][3] Session.execute(sqlStr) Session.commit() Session.close() Session.set_session(last_session) return [server_url,service_call,component_id,tag] except ProdAgentException: Session.rollback() Session.close() Session.set_session(last_session) raise except Exception,ex: Session.rollback() Session.close() Session.set_session(last_session) raise ProdAgentException("Service commit Error: "+str(ex),3001)
logging.debug("Removing allocation spec file for : "+str(allocation['id'])) try: logging.debug("Spec file location is: "+str(allocation['allocation_spec_file'])) os.remove(allocation['allocation_spec_file']) except Exception,ex: logging.debug("WARNING: "+str(ex)) pass logging.debug("All jobs for this allocations have finished: contacting prodmgr") parameters={} parameters['jobSpecId']=str(jobId) parameters['events']=allocation['events_processed'] parameters['request_id']=request_id=Aux.split(jobId)[1] result=sendMessage(allocation['prod_mgr_url'],parameters) parameters['result']=result['result'] newState=handleResult(parameters) Session.commit() else: logging.debug("Not all jobs for this allocation have finised. Not contacting prodmgr") Session.commit() def sendMessage(url,parameters): try: logging.debug("Attempting to connect to server : "+url) finished=ProdMgrAPI.releaseJob(url,str(parameters['jobSpecId']),\ int(parameters['events']),"ProdMgrInterface") # check if the associated allocation needs to be released. request_id=Aux.split(parameters['jobSpecId'])[1] return {'result':finished,'url':'fine'} except Exception, ex: if(ex.faultCode == 2001): logging.debug('For some reason the workflow disappeared at the prodmgr')