def update_jobs_old(self, req): """ Update jobs. """ self.update_jobs_lock.acquire() try: logger.info("Updating jobs: %s" % req) data = {'jobList': json.dumps(req.jobs)} url = environ.get('PANDA_SERVER_URL', config.Pilot.pandaserver) res = https.request( '{pandaserver}/server/panda/updateJobsInBulk'.format( pandaserver=url), data=data) logger.info("Updated jobs status: %s" % res) resp_attrs = {'status': 0, 'content': res, 'exception': None} resp = CommunicationResponse(resp_attrs) except Exception as e: # Python 2/3 logger.error("Failed to update jobs: %s, %s" % (e, traceback.format_exc())) resp_attrs = { 'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to update jobs: %s" % (traceback.format_exc())) } resp = CommunicationResponse(resp_attrs) self.update_jobs_lock.release() return resp
def update_events(self, req): """ Update events. """ self.update_events_lock.acquire() resp = None try: logger.info("Updating events: %s" % req) url = environ.get('PANDA_SERVER_URL', config.Pilot.pandaserver) res = https.request( '{pandaserver}/server/panda/updateEventRanges'.format( pandaserver=url), data=req.update_events) logger.info("Updated event ranges status: %s" % res) resp_attrs = {'status': 0, 'content': res, 'exception': None} resp = CommunicationResponse(resp_attrs) except Exception as e: # Python 2/3 logger.error("Failed to update event ranges: %s, %s" % (e, traceback.format_exc())) resp_attrs = { 'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to update events: %s" % (traceback.format_exc())) } resp = CommunicationResponse(resp_attrs) self.update_events_lock.release() return resp
def get_events(self, req): """ Get events """ self.get_events_lock.acquire() resp = None try: log = get_logger(str(req.jobid), logger) if not req.num_ranges: # ToBeFix num_ranges with corecount req.num_ranges = 1 data = { 'pandaID': req.jobid, 'jobsetID': req.jobsetid, 'taskID': req.taskid, 'nRanges': req.num_ranges } log.info("Downloading new event ranges: %s" % data) res = https.request( '{pandaserver}/server/panda/getEventRanges'.format( pandaserver=config.Pilot.pandaserver), data=data) log.info("Downloaded event ranges: %s" % res) if res['StatusCode'] == 0 or str(res['StatusCode']) == '0': resp_attrs = { 'status': 0, 'content': res['eventRanges'], 'exception': None } else: resp_attrs = { 'status': res['StatusCode'], 'content': None, 'exception': exception.CommunicationFailure( "Get events from panda returns non-zero value: %s" % res['StatusCode']) } resp = CommunicationResponse(resp_attrs) except Exception, e: log.error("Failed to download event ranges: %s, %s" % (e, traceback.format_exc())) resp_attrs = { 'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to get events: %s" % (traceback.format_exc())) } resp = CommunicationResponse(resp_attrs)
def get_security_key(self, secret_key, access_key): """ Get security key pair from panda :param secret_key: secrect key name as string :param access_key: access key name as string :return: setup as a string """ try: data = {'privateKeyName': secret_key, 'publicKeyName': access_key} logger.info("Getting key pair: %s" % data) url = environ.get('PANDA_SERVER_URL', config.Pilot.pandaserver) res = https.request('{pandaserver}/server/panda/getKeyPair'.format( pandaserver=url), data=data) if res and res['StatusCode'] == 0: return { "publicKey": res["publicKey"], "privateKey": res["privateKey"] } else: logger.info("Got key pair returns wrong value: %s" % res) except Exception as ex: logger.error("Failed to get key pair(%s,%s): %s, %s" % (access_key, secret_key, ex, traceback.format_exc())) return {}
def get_jobs(self, req): """ Get the job definition from panda server. :return: job definiton dictionary. """ self.get_jobs_lock.acquire() try: jobs = [] resp_attrs = None data = {'getProxyKey': 'False'} kmap = {'node': 'node', 'mem': 'mem', 'getProxyKey': 'getProxyKey', 'computingElement': 'queue', 'diskSpace': 'disk_space', 'siteName': 'site', 'prodSourceLabel': 'job_label', 'workingGroup': 'working_group', 'cpu': 'cpu'} for key, value in list(kmap.items()): # Python 2/3 if hasattr(req, value): data[key] = getattr(req, value) for i in range(req.num_jobs): logger.info("Getting jobs: %s" % data) res = https.request('{pandaserver}/server/panda/getJob'.format(pandaserver=config.Pilot.pandaserver), data=data) logger.info("Got jobs returns: %s" % res) if res is None: resp_attrs = {'status': None, 'content': None, 'exception': exception.CommunicationFailure("Get job failed to get response from Panda.")} break elif res['StatusCode'] == 20 and 'no jobs in PanDA' in res['errorDialog']: resp_attrs = {'status': res['StatusCode'], 'content': None, 'exception': exception.CommunicationFailure("No jobs in panda")} elif res['StatusCode'] != 0: resp_attrs = {'status': res['StatusCode'], 'content': None, 'exception': exception.CommunicationFailure("Get job from Panda returns a non-zero value: %s" % res['StatusCode'])} break else: jobs.append(res) if jobs: resp_attrs = {'status': 0, 'content': jobs, 'exception': None} elif not resp_attrs: resp_attrs = {'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to get jobs")} resp = CommunicationResponse(resp_attrs) except Exception as e: # Python 2/3 logger.error("Failed to get jobs: %s, %s" % (e, traceback.format_exc())) resp_attrs = {'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to get jobs: %s" % (traceback.format_exc()))} resp = CommunicationResponse(resp_attrs) self.get_jobs_lock.release() return resp
def update_job(self, job): """ Update job. """ try: logger.info("Updating job: %s" % job) res = https.request('{pandaserver}/server/panda/updateJob'.format(pandaserver=config.Pilot.pandaserver), data=job) logger.info("Updated jobs status: %s" % res) return res except Exception as e: # Python 2/3 logger.error("Failed to update jobs: %s, %s" % (e, traceback.format_exc())) return -1
def get_payload_proxy(proxy_outfile_name, voms_role='atlas'): """ :param proxy_outfile_name: specify the file to store proxy :param voms_role: what proxy (role) to request. It should exist on Panda node :return: True on success """ try: # it assumes that https_setup() was done already url = os.environ.get('PANDA_SERVER_URL', config.Pilot.pandaserver) res = https.request( '{pandaserver}/server/panda/getProxy'.format(pandaserver=url), data={'role': voms_role}) if res is None: logger.error( "Unable to get proxy with role '%s' from panda server", voms_role) return False if res['StatusCode'] != 0: logger.error( "When get proxy with role '%s' panda server returned: %s", voms_role, res['errorDialog']) return False proxy_contents = res['userProxy'] except Exception as exc: logger.error("Get proxy from panda server failed: %s, %s", exc, traceback.format_exc()) return False res = False try: # pre-create empty proxy file with secure permissions. Prepare it for write_file() which can not # set file permission mode, it will writes to the existing file with correct permissions. _file = os.open(proxy_outfile_name, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) os.close(_file) res = write_file(proxy_outfile_name, proxy_contents, mute=False) # returns True on success except (IOError, OSError, FileHandlingFailure) as exc: logger.error( "Exception when try to save proxy to the file '%s': %s, %s", proxy_outfile_name, exc, traceback.format_exc()) return res
def send_state(job, state, xml=None): log = logger.getChild(str(job['PandaID'])) log.debug('set job state=%s' % state) data = {'jobId': job['PandaID'], 'state': state} if xml is not None: data['xml'] = urllib.quote_plus(xml) try: if https.request( 'https://pandaserver.cern.ch:25443/server/panda/updateJob', data=data) is not None: log.info('confirmed job state=%s' % state) return True except Exception as e: log.warning('while setting job state, Exception caught: %s' % str(e.message)) pass log.warning('set job state=%s failed' % state) return False
def retrieve(queues, traces, args): while not args.graceful_stop.is_set(): logger.debug('trying to fetch job') data = { 'siteName': args.location.queue, 'prodSourceLabel': args.job_label } res = https.request( 'https://pandaserver.cern.ch:25443/server/panda/getJob', data=data) if res is None: logger.warning('did not get a job -- sleep 1000s and repeat') for i in xrange(10000): if args.graceful_stop.is_set(): break time.sleep(0.1) else: if res['StatusCode'] != 0: logger.warning( 'did not get a job -- sleep 1000s and repeat -- status: %s' % res['StatusCode']) for i in xrange(10000): if args.graceful_stop.is_set(): break time.sleep(0.1) else: logger.info( 'got job: %s -- sleep 1000s before trying to get another job' % res['PandaID']) queues.jobs.put(res) for i in xrange(10000): if args.graceful_stop.is_set(): break time.sleep(0.1)