def makeRequest(url, params): ''' a test docstring ''' mgr = RequestHandler() header, data = mgr.request(url, params, ckey=ckey, cert=cert) if header.status != 200: print "ERROR" return data
def getDataFromURL(url, proxyfilename = None): """ Read the content of a URL and return it as a string. Type of content should not matter, it can be a json file or a tarball for example. url: the link you would like to retrieve proxyfilename: the x509 proxy certificate to be used in case auth is required Returns binary data encoded as a string, which can be later processed according to what kind of content it represents. """ # Get rid of unicode which may cause problems in pycurl stringUrl = url.encode('ascii') reqHandler = RequestHandler() _, data = reqHandler.request(url=stringUrl, params={}, ckey=proxyfilename, cert=proxyfilename, capath=HTTPRequests.getCACertPath()) return data
class PyCurlManager(unittest.TestCase): """Test pycurl_manager module""" def setUp(self): "initialization" self.mgr = RequestHandler() self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') def testMulti(self): """ Test fetch of several urls at once, one of the url relies on CERN SSO. """ tfile = tempfile.NamedTemporaryFile() url1 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/help" url2 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/datatiers" url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" cern_sso_cookie(url3, tfile.name, self.cert, self.ckey) cookie = {url3: tfile.name} urls = [url1, url2, url3] data = getdata(urls, self.ckey, self.cert, cookie=cookie) headers = 0 for row in data: if '200 OK' in row['headers']: headers += 1 self.assertTrue(headers, 3) def testSingle(self): """ Test single call to CERN SSO url. """ # test RequestHandler url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" params = {} tfile = tempfile.NamedTemporaryFile() cern_sso_cookie(url, tfile.name, self.cert, self.ckey) cookie = {url: tfile.name} header, _ = self.mgr.request(url, params, cookie=cookie) self.assertTrue(header.status, 200)
class Requests(dict): """ Generic class for sending different types of HTTP Request to a given URL """ def __init__(self, url='http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) self.capath = idict.get('capath', None) if self.pycurl: self.reqmgr = RequestHandler() # set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth(urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("cert", None) self.setdefault("key", None) self.setdefault('capath', None) self.setdefault("timeout", 300) self.setdefault("logger", logging) check_server_url(self['host']) def get(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ GET some data """ return self.makeRequest(uri, data, 'GET', incoming_headers, encode, decode, contentType) def post(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ POST some data """ return self.makeRequest(uri, data, 'POST', incoming_headers, encode, decode, contentType) def put(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ PUT some data """ return self.makeRequest(uri, data, 'PUT', incoming_headers, encode, decode, contentType) def delete(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ DELETE some data """ return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode, decode, contentType) def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Wrapper around request helper functions. """ if self.pycurl: result = self.makeRequest_pycurl(uri, data, verb, incoming_headers, encoder, decoder, contentType) else: result = self.makeRequest_httplib(uri, data, verb, incoming_headers, encoder, decoder, contentType) return result def makeRequest_pycurl(self, uri=None, params={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make HTTP(s) request via pycurl library. Stay complaint with makeRequest_httplib method. """ ckey, cert = self.getKeyCert() capath = self.getCAPath() if not contentType: contentType = self['content_type'] headers = { "Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type'] } for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] # And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) url = self['host'] + uri response, data = self.reqmgr.request(url, params, headers, verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder) return data, response.status, response.reason, response.fromcache def makeRequest_httplib(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make a request to the remote database. for a give URI. The type of request will determine the action take by the server (be careful with DELETE!). Data should be a dictionary of {dataname: datavalue}. Returns a tuple of the data from the server, decoded using the appropriate method the response status and the response reason, to be used in error handling. You can override the method to encode/decode your data by passing in an encoding/decoding function to this method. Your encoded data must end up as a string. """ # TODO: User agent should be: # $client/$client_version (CMS) # $http_lib/$http_lib_version $os/$os_version ($arch) if not contentType: contentType = self['content_type'] headers = { "Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type'] } encoded_data = '' for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] # And now overwrite any headers that have been passed into the call: # WARNING: doesn't work with deplate so only accept gzip incoming_headers["accept-encoding"] = "gzip,identity" headers.update(incoming_headers) # httpib2 requires absolute url uri = self['host'] + uri # If you're posting an attachment, the data might not be a dict # please test against ConfigCache_t if you're unsure. # assert type(data) == type({}), \ # "makeRequest input data must be a dict (key/value pairs)" if verb != 'GET' and data: if isinstance(encoder, (types.MethodType, types.FunctionType)): encoded_data = encoder(data) elif encoder == False: # Don't encode the data more than we have to # we don't want to URL encode the data blindly, # that breaks POSTing attachments... ConfigCache_t # encoded_data = urllib.urlencode(data) # -- Andrew Melo 25/7/09 encoded_data = data else: # Either the encoder is set to True or it's junk, so use # self.encode encoded_data = self.encode(data) headers["Content-length"] = len(encoded_data) elif verb == 'GET' and data: # encode the data as a get string uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True)) headers["Content-length"] = str(len(encoded_data)) # PY3 needed for compatibility because str under futurize is not a string. Can be just str in Py3 only # PY3 Don't let futurize change this assert isinstance(encoded_data, (str, basestring)), \ "Data in makeRequest is %s and not encoded to a string" % type(encoded_data) # httplib2 will allow sockets to close on remote end without retrying # try to send request - if this fails try again - should then succeed try: conn = self._getURLOpener() response, result = conn.request(uri, method=verb, body=encoded_data, headers=headers) if response.status == 408: # timeout can indicate a socket error raise socket.error except ServerNotFoundError as ex: # DNS cannot resolve this domain name, let's call it 'Service Unavailable' e = HTTPException() setattr(e, 'url', uri) setattr(e, 'status', 503) setattr(e, 'reason', 'Service Unavailable') setattr(e, 'result', str(ex)) raise e except (socket.error, AttributeError): self['logger'].warn("Http request failed, retrying once again..") # AttributeError implies initial connection error - need to close # & retry. httplib2 doesn't clear httplib state before next request # if this is threaded this may spoil things # only have one endpoint so don't need to determine which to shut for con in conn.connections.values(): con.close() conn = self._getURLOpener() # ... try again... if this fails propagate error to client try: response, result = conn.request(uri, method=verb, body=encoded_data, headers=headers) except AttributeError: msg = traceback.format_exc() # socket/httplib really screwed up - nuclear option conn.connections = {} raise socket.error('Error contacting: %s: %s' % (self.getDomainName(), msg)) if response.status >= 400: e = HTTPException() setattr(e, 'req_data', encoded_data) setattr(e, 'req_headers', headers) setattr(e, 'url', uri) setattr(e, 'result', result) setattr(e, 'status', response.status) setattr(e, 'reason', response.reason) setattr(e, 'headers', response) raise e if isinstance(decoder, (types.MethodType, types.FunctionType)): result = decoder(result) elif decoder != False: result = self.decode(result) # TODO: maybe just return result and response... return result, response.status, response.reason, response.fromcache def encode(self, data): """ encode data into some appropriate format, for now make it a string... """ return urllib.urlencode(data, doseq=1) def decode(self, data): """ decode data to some appropriate format, for now make it a string... """ return data.__str__() def cachePath(self, given_path, service_name): """Return cache location""" if not service_name: service_name = 'REQUESTS' top = self.cacheTopPath(given_path, service_name) # deal with multiple Services that have the same service running and # with multiple users for a given Service if self.getUserName() is None: cachepath = os.path.join(top, self['endpoint_components'].netloc) else: cachepath = os.path.join( top, '%s-%s' % (self.getUserName(), self.getDomainName())) try: # only we should be able to write to this dir os.makedirs(cachepath, stat.S_IRWXU) except OSError: if not os.path.isdir(cachepath): raise Permissions.owner_readwriteexec(cachepath) return cachepath def cacheTopPath(self, given_path, service_name): """Where to cache results? Logic: o If passed in take that o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined? o Is WMCORE_CACHE_DIR set o Generate a temporary directory """ if given_path: return given_path user = str(os.getuid()) # append user id so users don't clobber each other lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower()) for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'): if os.environ.get(var): firstbit = os.environ[var] break else: idir = tempfile.mkdtemp(prefix='.wmcore_cache_') self['deleteCacheOnExit'] = TempDirectory(idir) return idir return os.path.join(firstbit, lastbit) def getDomainName(self): """Parse netloc info to get hostname""" return self['endpoint_components'].hostname def getUserName(self): """Parse netloc to get user""" return self['endpoint_components'].username def _getURLOpener(self): """ method getting a secure (HTTPS) connection """ import httplib2 key, cert = None, None if self['endpoint_components'].scheme == 'https': # only add certs to https requests # if we have a key/cert add to request, # if not proceed as not all https connections require them try: key, cert = self.getKeyCert() except Exception as ex: msg = 'No certificate or key found, authentication may fail' self['logger'].info(msg) self['logger'].debug(str(ex)) try: # disable validation as we don't have a single PEM with all ca's http = httplib2.Http(self['req_cache_path'], self['timeout'], disable_ssl_certificate_validation=True) except TypeError: # old httplib2 versions disable validation by default http = httplib2.Http(self['req_cache_path'], self['timeout']) # Domain must be just a hostname and port. self[host] is a URL currently if key or cert: http.add_certificate(key=key, cert=cert, domain='') return http def addBasicAuth(self, username, password): """Add basic auth headers to request""" auth_string = "Basic %s" % base64.encodestring( '%s:%s' % (username, password)).strip() self.additionalHeaders["Authorization"] = auth_string def getKeyCert(self): """ _getKeyCert_ Get the user credentials if they exist, otherwise throw an exception. This code was modified from DBSAPI/dbsHttpService.py """ # Zeroth case is if the class has over ridden the key/cert and has it # stored in self if self['cert'] and self['key']: key = self['key'] cert = self['cert'] else: key, cert = getKeyCertFromEnv() # Set but not found if key is None or cert is None: raise WMException('Request requires a host certificate and key', "WMCORE-11") # All looks OK, still doesn't guarantee proxy's validity etc. return key, cert def getCAPath(self): """ _getCAPath_ Return the path of the CA certificates. The check is loose in the pycurl_manager: is capath == None then the server identity is not verified. To enable this check you need to set either the X509_CERT_DIR variable or the cacert key of the request. """ capath = self['capath'] if not capath: capath = getCAPathFromEnv() return capath def uploadFile(self, fileName, url, fieldName='file1', params=[], verb='POST'): """ Upload a file with curl streaming it directly from disk """ ckey, cert = self.getKeyCert() capath = self.getCAPath() import pycurl c = pycurl.Curl() if verb == 'POST': c.setopt(c.POST, 1) elif verb == 'PUT': c.setopt(pycurl.CUSTOMREQUEST, 'PUT') else: raise HTTPException("Verb %s not sopported for upload." % verb) c.setopt(c.URL, url) fullParams = [(fieldName, (c.FORM_FILE, fileName))] fullParams.extend(params) c.setopt(c.HTTPPOST, fullParams) bbuf = StringIO.StringIO() hbuf = StringIO.StringIO() c.setopt(pycurl.WRITEFUNCTION, bbuf.write) c.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: c.setopt(pycurl.CAPATH, capath) c.setopt(pycurl.SSL_VERIFYPEER, True) else: c.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: c.setopt(pycurl.SSLKEY, ckey) if cert: c.setopt(pycurl.SSLCERT, cert) c.perform() hres = hbuf.getvalue() bres = bbuf.getvalue() rh = ResponseHeader(hres) c.close() if rh.status < 200 or rh.status >= 300: exc = HTTPException(bres) setattr(exc, 'req_data', fullParams) setattr(exc, 'url', url) setattr(exc, 'result', bres) setattr(exc, 'status', rh.status) setattr(exc, 'reason', rh.reason) setattr(exc, 'headers', rh.header) raise exc return bres def downloadFile(self, fileName, url): """ Download a file with curl streaming it directly to disk """ ckey, cert = self.getKeyCert() capath = self.getCAPath() import pycurl hbuf = StringIO.StringIO() with open(fileName, "wb") as fp: curl = pycurl.Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.WRITEDATA, fp) curl.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: curl.setopt(pycurl.CAPATH, capath) curl.setopt(pycurl.SSL_VERIFYPEER, True) else: curl.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: curl.setopt(pycurl.SSLKEY, ckey) if cert: curl.setopt(pycurl.SSLCERT, cert) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.perform() curl.close() header = ResponseHeader(hbuf.getvalue()) if header.status < 200 or header.status >= 300: raise RuntimeError('Reading %s failed with code %s' % (url, header.status)) return fileName, header
class Requests(dict): """ Generic class for sending different types of HTTP Request to a given URL """ def __init__(self, url='http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) self.capath = idict.get('capath', None) if self.pycurl: self.reqmgr = RequestHandler() # set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth( \ urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), \ idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("timeout", 300) self.setdefault("logger", logging) check_server_url(self['host']) def get(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ GET some data """ return self.makeRequest(uri, data, 'GET', incoming_headers, encode, decode, contentType) def post(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ POST some data """ return self.makeRequest(uri, data, 'POST', incoming_headers, encode, decode, contentType) def put(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ PUT some data """ return self.makeRequest(uri, data, 'PUT', incoming_headers, encode, decode, contentType) def delete(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ DELETE some data """ return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode, decode, contentType) def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Wrapper around request helper functions. """ if self.pycurl: result = self.makeRequest_pycurl(uri, data, verb, incoming_headers, encoder, decoder, contentType) else: result = self.makeRequest_httplib(uri, data, verb, incoming_headers, encoder, decoder, contentType) return result def makeRequest_pycurl(self, uri=None, params={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make HTTP(s) request via pycurl library. Stay complaint with makeRequest_httplib method. """ ckey, cert = self.getKeyCert() capath = self.getCAPath() if not contentType: contentType = self['content_type'] headers = {"Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type']} for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] # And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) url = self['host'] + uri response, data = self.reqmgr.request(url, params, headers, \ verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder) return data, response.status, response.reason, response.fromcache def makeRequest_httplib(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make a request to the remote database. for a give URI. The type of request will determine the action take by the server (be careful with DELETE!). Data should be a dictionary of {dataname: datavalue}. Returns a tuple of the data from the server, decoded using the appropriate method the response status and the response reason, to be used in error handling. You can override the method to encode/decode your data by passing in an encoding/decoding function to this method. Your encoded data must end up as a string. """ # TODO: User agent should be: # $client/$client_version (CMS) # $http_lib/$http_lib_version $os/$os_version ($arch) if not contentType: contentType = self['content_type'] headers = {"Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type']} encoded_data = '' for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] # And now overwrite any headers that have been passed into the call: # WARNING: doesn't work with deplate so only accept gzip incoming_headers["accept-encoding"] = "gzip,identity" headers.update(incoming_headers) # httpib2 requires absolute url uri = self['host'] + uri # If you're posting an attachment, the data might not be a dict # please test against ConfigCache_t if you're unsure. # assert type(data) == type({}), \ # "makeRequest input data must be a dict (key/value pairs)" if verb != 'GET' and data: if isinstance(encoder, (types.MethodType, types.FunctionType)): encoded_data = encoder(data) elif encoder == False: # Don't encode the data more than we have to # we don't want to URL encode the data blindly, # that breaks POSTing attachments... ConfigCache_t # encoded_data = urllib.urlencode(data) # -- Andrew Melo 25/7/09 encoded_data = data else: # Either the encoder is set to True or it's junk, so use # self.encode encoded_data = self.encode(data) headers["Content-length"] = len(encoded_data) elif verb == 'GET' and data: # encode the data as a get string uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True)) headers["Content-length"] = str(len(encoded_data)) assert isinstance(encoded_data, str), \ "Data in makeRequest is %s and not encoded to a string" % type(encoded_data) # httplib2 will allow sockets to close on remote end without retrying # try to send request - if this fails try again - should then succeed try: conn = self._getURLOpener() response, result = conn.request(uri, method=verb, body=encoded_data, headers=headers) if response.status == 408: # timeout can indicate a socket error raise socket.error except (socket.error, AttributeError): self['logger'].warn("Http request failed, retrying once again..") # AttributeError implies initial connection error - need to close # & retry. httplib2 doesn't clear httplib state before next request # if this is threaded this may spoil things # only have one endpoint so don't need to determine which to shut for con in conn.connections.values(): con.close() conn = self._getURLOpener() # ... try again... if this fails propagate error to client try: response, result = conn.request(uri, method=verb, body=encoded_data, headers=headers) except AttributeError: msg = traceback.format_exc() # socket/httplib really screwed up - nuclear option conn.connections = {} raise socket.error('Error contacting: %s: %s' % (self.getDomainName(), msg)) if response.status >= 400: e = HTTPException() setattr(e, 'req_data', encoded_data) setattr(e, 'req_headers', headers) setattr(e, 'url', uri) setattr(e, 'result', result) setattr(e, 'status', response.status) setattr(e, 'reason', response.reason) setattr(e, 'headers', response) raise e if isinstance(decoder, (types.MethodType, types.FunctionType)): result = decoder(result) elif decoder != False: result = self.decode(result) # TODO: maybe just return result and response... return result, response.status, response.reason, response.fromcache def encode(self, data): """ encode data into some appropriate format, for now make it a string... """ return urllib.urlencode(data, doseq=1) def decode(self, data): """ decode data to some appropriate format, for now make it a string... """ return data.__str__() def cachePath(self, given_path, service_name): """Return cache location""" if not service_name: service_name = 'REQUESTS' top = self.cacheTopPath(given_path, service_name) # deal with multiple Services that have the same service running and # with multiple users for a given Service if self.getUserName() is None: cachepath = os.path.join(top, self['endpoint_components'].netloc) else: cachepath = os.path.join(top, '%s-%s' % (self.getUserName(), self.getDomainName())) try: # only we should be able to write to this dir os.makedirs(cachepath, stat.S_IRWXU) except OSError: if not os.path.isdir(cachepath): raise Permissions.owner_readwriteexec(cachepath) return cachepath def cacheTopPath(self, given_path, service_name): """Where to cache results? Logic: o If passed in take that o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined? o Is WMCORE_CACHE_DIR set o Generate a temporary directory """ if given_path: return given_path user = str(os.getuid()) # append user id so users don't clobber each other lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower()) for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'): if os.environ.get(var): firstbit = os.environ[var] break else: idir = tempfile.mkdtemp(prefix='.wmcore_cache_') self['deleteCacheOnExit'] = TempDirectory(idir) return idir return os.path.join(firstbit, lastbit) def getDomainName(self): """Parse netloc info to get hostname""" return self['endpoint_components'].hostname def getUserName(self): """Parse netloc to get user""" return self['endpoint_components'].username def _getURLOpener(self): """ method getting a secure (HTTPS) connection """ import httplib2 key, cert = None, None if self['endpoint_components'].scheme == 'https': # only add certs to https requests # if we have a key/cert add to request, # if not proceed as not all https connections require them try: key, cert = self.getKeyCert() except Exception as ex: msg = 'No certificate or key found, authentication may fail' self['logger'].info(msg) self['logger'].debug(str(ex)) try: # disable validation as we don't have a single PEM with all ca's http = httplib2.Http(self['req_cache_path'], self['timeout'], disable_ssl_certificate_validation=True) except TypeError: # old httplib2 versions disable validation by default http = httplib2.Http(self['req_cache_path'], self['timeout']) # Domain must be just a hostname and port. self[host] is a URL currently if key or cert: http.add_certificate(key=key, cert=cert, domain='') return http def addBasicAuth(self, username, password): """Add basic auth headers to request""" auth_string = "Basic %s" % base64.encodestring('%s:%s' % ( username, password)).strip() self.additionalHeaders["Authorization"] = auth_string def getKeyCert(self): """ _getKeyCert_ Get the user credentials if they exist, otherwise throw an exception. This code was modified from DBSAPI/dbsHttpService.py """ cert = None key = None # Zeroth case is if the class has over ridden the key/cert and has it # stored in self if 'cert' in self and 'key' in self and self['cert'] and self['key']: key = self['key'] cert = self['cert'] # Now we're trying to guess what the right cert/key combo is... # First preference to HOST Certificate, This is how it set in Tier0 elif 'X509_HOST_CERT' in os.environ: cert = os.environ['X509_HOST_CERT'] key = os.environ['X509_HOST_KEY'] # Second preference to User Proxy, very common elif 'X509_USER_PROXY' in os.environ and os.path.exists(os.environ['X509_USER_PROXY']): cert = os.environ['X509_USER_PROXY'] key = cert # Third preference to User Cert/Proxy combinition elif 'X509_USER_CERT' in os.environ: cert = os.environ['X509_USER_CERT'] key = os.environ['X509_USER_KEY'] # TODO: only in linux, unix case, add other os case # look for proxy at default location /tmp/x509up_u$uid elif os.path.exists('/tmp/x509up_u' + str(os.getuid())): cert = '/tmp/x509up_u' + str(os.getuid()) key = cert # if interactive we can use an encrypted certificate elif sys.stdin.isatty(): if os.path.exists(os.environ['HOME'] + '/.globus/usercert.pem'): cert = os.environ['HOME'] + '/.globus/usercert.pem' if os.path.exists(os.environ['HOME'] + '/.globus/userkey.pem'): key = os.environ['HOME'] + '/.globus/userkey.pem' else: key = cert # Set but not found if key and cert: if not os.path.exists(cert) or not os.path.exists(key): raise WMException('Request requires a host certificate and key', "WMCORE-11") # All looks OK, still doesn't guarantee proxy's validity etc. return key, cert def getCAPath(self): """ _getCAPath_ Return the path of the CA certificates. The check is loose in the pycurl_manager: is capath == None then the server identity is not verified. To enable this check you need to set either the X509_CERT_DIR variable or the cacert key of the request. """ cacert = None if 'capath' in self: cacert = self['capath'] elif "X509_CERT_DIR" in os.environ: cacert = os.environ["X509_CERT_DIR"] return cacert def uploadFile(self, fileName, url, fieldName='file1', params=[], verb='POST'): """ Upload a file with curl streaming it directly from disk """ ckey, cert = self.getKeyCert() capath = self.getCAPath() import pycurl c = pycurl.Curl() if verb == 'POST': c.setopt(c.POST, 1) elif verb == 'PUT': c.setopt(pycurl.CUSTOMREQUEST, 'PUT') else: raise HTTPException("Verb %s not sopported for upload." % verb) c.setopt(c.URL, url) fullParams = [(fieldName, (c.FORM_FILE, fileName))] fullParams.extend(params) c.setopt(c.HTTPPOST, fullParams) bbuf = StringIO.StringIO() hbuf = StringIO.StringIO() c.setopt(pycurl.WRITEFUNCTION, bbuf.write) c.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: c.setopt(pycurl.CAPATH, capath) c.setopt(pycurl.SSL_VERIFYPEER, True) else: c.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: c.setopt(pycurl.SSLKEY, ckey) if cert: c.setopt(pycurl.SSLCERT, cert) c.perform() hres = hbuf.getvalue() bres = bbuf.getvalue() rh = ResponseHeader(hres) c.close() if rh.status < 200 or rh.status >= 300: exc = HTTPException(bres) setattr(exc, 'req_data', fullParams) setattr(exc, 'url', url) setattr(exc, 'result', bres) setattr(exc, 'status', rh.status) setattr(exc, 'reason', rh.reason) setattr(exc, 'headers', rh.header) raise exc return bres def downloadFile(self, fileName, url): """ Download a file with curl streaming it directly to disk """ ckey, cert = self.getKeyCert() capath = self.getCAPath() import pycurl from WMCore.Services.pycurl_manager import ResponseHeader hbuf = StringIO.StringIO() with open(fileName, "wb") as fp: curl = pycurl.Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.WRITEDATA, fp) curl.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: curl.setopt(pycurl.CAPATH, capath) curl.setopt(pycurl.SSL_VERIFYPEER, True) else: curl.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: curl.setopt(pycurl.SSLKEY, ckey) if cert: curl.setopt(pycurl.SSLCERT, cert) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.perform() curl.close() header = ResponseHeader(hbuf.getvalue()) if header.status < 200 or header.status >= 300: raise RuntimeError('Reading %s failed with code %s' % (url, header.status)) return fileName, header
class PyCurlManager(unittest.TestCase): """Test pycurl_manager module""" def setUp(self): "initialization" self.mgr = RequestHandler() self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') self.cricheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: sessionid=bc1xu8zi5rbbsd5fgjuklb2tk2r3f6tw; expires=Sun, 11-Nov-2018 14:50:29 GMT; httponly; Max-Age=432000; Path=/\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' self.dbsheader = 'Date: Tue, 06 Nov 2018 14:39:07 GMT\r\nServer: Apache\r\nCMS-Server-Time: D=1503 t=1541515147806112\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\n\r\n' self.HTTPheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: GRIDHTTP_PASSCODE=2c6da9c96efa2ad0farhda; domain=cms-cric.cern.ch; path=/; secure\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' def testMulti(self): """ Test fetch of several urls at once, one of the url relies on CERN SSO. """ tfile = tempfile.NamedTemporaryFile() url1 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/help" url2 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/datatiers" url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" cern_sso_cookie(url3, tfile.name, self.cert, self.ckey) cookie = {url3: tfile.name} urls = [url1, url2, url3] data = getdata(urls, self.ckey, self.cert, cookie=cookie) headers = 0 for row in data: if '200 OK' in row['headers']: headers += 1 self.assertTrue(headers, 3) def testSingle(self): """ Test single call to CERN SSO url. """ # test RequestHandler url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" params = {} tfile = tempfile.NamedTemporaryFile() cern_sso_cookie(url, tfile.name, self.cert, self.ckey) cookie = {url: tfile.name} header, _ = self.mgr.request(url, params, cookie=cookie) self.assertTrue(header.status, 200) def testContinue(self): """ Test HTTP exit code 100 - Continue """ header = "HTTP/1.1 100 Continue\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertIsNone(getattr(resp, "status", None)) self.assertEqual(resp.reason, "") self.assertFalse(resp.fromcache) self.assertIn("CMS-Server-Time", resp.header) self.assertIn("Date", resp.header) self.assertEqual(resp.header['Content-Type'], 'text/html') self.assertEqual(resp.header['Server'], 'Apache') self.assertEqual(resp.header['Transfer-Encoding'], 'chunked') return def testOK(self): """ Test HTTP exit code 200 - OK """ header = "HTTP/1.1 200 OK\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertEqual(resp.status, 200) self.assertEqual(resp.reason, "OK") self.assertFalse(resp.fromcache) return def testForbidden(self): """ Test HTTP exit code 403 - Forbidden """ header = "HTTP/1.1 403 Forbidden\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertEqual(resp.status, 403) self.assertEqual(resp.reason, "Forbidden") self.assertFalse(resp.fromcache) return def testOKCRIC(self): """ Test HTTP exit code 200 - OK for a CRIC response header """ header = "HTTP/1.1 200 OK\r\n" + self.cricheader resp = ResponseHeader(header) self.assertEqual(resp.status, 200) self.assertEqual(resp.reason, "OK") self.assertFalse(resp.fromcache) self.assertIn("Content-Length", resp.header) self.assertIn("Date", resp.header) self.assertIn("Server", resp.header) self.assertIn("sessionid", resp.header['Set-Cookie']) self.assertEqual(resp.header['Content-Type'], 'application/json') self.assertEqual(resp.header['Vary'], 'Cookie') self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN') return def testUnavailableCRICHTTP(self): """ Test HTTP exit code 503 - Service Unavailable for a CRIC response header when it also contains a HTTP string in the Set-Cookie header section """ header = "HTTP/1.1 503 Service Unavailable\r\n" + self.HTTPheader resp = ResponseHeader(header) self.assertEqual(resp.status, 503) self.assertEqual(resp.reason, "Service Unavailable") self.assertFalse(resp.fromcache) self.assertIn("Content-Length", resp.header) self.assertIn("Date", resp.header) self.assertIn("Server", resp.header) self.assertIn("GRIDHTTP_PASSCODE", resp.header['Set-Cookie']) self.assertEqual(resp.header['Content-Type'], 'application/json') self.assertEqual(resp.header['Vary'], 'Cookie') self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN') return
class PyCurlManager(unittest.TestCase): """Test pycurl_manager module""" def setUp(self): "initialization" self.mgr = RequestHandler() #self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') #self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') self.ckey = getKeyCertFromEnv()[0] self.cert = getKeyCertFromEnv()[1] self.cricheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: sessionid=bc1xu8zi5rbbsd5fgjuklb2tk2r3f6tw; expires=Sun, 11-Nov-2018 14:50:29 GMT; httponly; Max-Age=432000; Path=/\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' self.dbsheader = 'Date: Tue, 06 Nov 2018 14:39:07 GMT\r\nServer: Apache\r\nCMS-Server-Time: D=1503 t=1541515147806112\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\n\r\n' self.HTTPheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: GRIDHTTP_PASSCODE=2c6da9c96efa2ad0farhda; domain=cms-cric.cern.ch; path=/; secure\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' def testMulti(self): """ Test fetch of several urls at once, one of the url relies on CERN SSO. """ tfile = tempfile.NamedTemporaryFile() url1 = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader/help" url2 = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader/datatiers" url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" cern_sso_cookie(url3, tfile.name, self.cert, self.ckey) cookie = {url3: tfile.name} urls = [url1, url2, url3] data = getdata(urls, self.ckey, self.cert, cookie=cookie) headers = 0 for row in data: if '200 OK' in row['headers']: headers += 1 self.assertTrue(headers, 3) def testSingle(self): """ Test single call to CERN SSO url. """ # test RequestHandler url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" params = {} headers = {"Cache-Control": "no-cache"} tfile = tempfile.NamedTemporaryFile() cern_sso_cookie(url, tfile.name, self.cert, self.ckey) cookie = {url: tfile.name} header, _ = self.mgr.request(url, params, headers, cookie=cookie) self.assertTrue(header.status, 200) def testContinue(self): """ Test HTTP exit code 100 - Continue """ header = "HTTP/1.1 100 Continue\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertIsNone(getattr(resp, "status", None)) self.assertEqual(resp.reason, "") self.assertFalse(resp.fromcache) self.assertIn("CMS-Server-Time", resp.header) self.assertIn("Date", resp.header) self.assertEqual(resp.header['Content-Type'], 'text/html') self.assertEqual(resp.header['Server'], 'Apache') self.assertEqual(resp.header['Transfer-Encoding'], 'chunked') return def testOK(self): """ Test HTTP exit code 200 - OK """ header = "HTTP/1.1 200 OK\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertEqual(resp.status, 200) self.assertEqual(resp.reason, "OK") self.assertFalse(resp.fromcache) return def testForbidden(self): """ Test HTTP exit code 403 - Forbidden """ header = "HTTP/1.1 403 Forbidden\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertEqual(resp.status, 403) self.assertEqual(resp.reason, "Forbidden") self.assertFalse(resp.fromcache) return def testOKCRIC(self): """ Test HTTP exit code 200 - OK for a CRIC response header """ header = "HTTP/1.1 200 OK\r\n" + self.cricheader resp = ResponseHeader(header) self.assertEqual(resp.status, 200) self.assertEqual(resp.reason, "OK") self.assertFalse(resp.fromcache) self.assertIn("Content-Length", resp.header) self.assertIn("Date", resp.header) self.assertIn("Server", resp.header) self.assertIn("sessionid", resp.header['Set-Cookie']) self.assertEqual(resp.header['Content-Type'], 'application/json') self.assertEqual(resp.header['Vary'], 'Cookie') self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN') return def testUnavailableCRICHTTP(self): """ Test HTTP exit code 503 - Service Unavailable for a CRIC response header when it also contains a HTTP string in the Set-Cookie header section """ header = "HTTP/1.1 503 Service Unavailable\r\n" + self.HTTPheader resp = ResponseHeader(header) self.assertEqual(resp.status, 503) self.assertEqual(resp.reason, "Service Unavailable") self.assertFalse(resp.fromcache) self.assertIn("Content-Length", resp.header) self.assertIn("Date", resp.header) self.assertIn("Server", resp.header) self.assertIn("GRIDHTTP_PASSCODE", resp.header['Set-Cookie']) self.assertEqual(resp.header['Content-Type'], 'application/json') self.assertEqual(resp.header['Vary'], 'Cookie') self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN') return def testHeadRequest(self): """ Test a HEAD request. """ params = {} headers = {} url = 'https://cmsweb.cern.ch/reqmgr2/data/info' res = self.mgr.getheader(url, params=params, headers=headers, ckey=self.ckey, cert=self.cert) self.assertEqual(res.getReason(), "OK") self.assertTrue(len(res.getHeader()) > 10) # Kubernetes cluster responds with a different Server header serverHeader = res.getHeaderKey("Server") self.assertTrue( serverHeader.startswith("nginx/") or serverHeader.startswith("CherryPy/") or serverHeader.startswith("openresty/"))
def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report): """ For each job the worker has to complete: Delete files that have failed previously Create a temporary copyjob file Submit the copyjob to the appropriate FTS server Parse the output of the FTS transfer and return complete and failed files for recording """ # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'} #Loop through all the jobs for the links we have failure_reasons = [] for link, copyjob in jobs.items(): submission_error = False status_error = False fts_job = {} # Validate copyjob file before doing anything self.logger.debug("Valid %s" % self.validate_copyjob(copyjob)) if not self.validate_copyjob(copyjob): continue rest_copyjob = { "params":{ "bring_online": None, "verify_checksum": False, "copy_pin_lifetime": -1, "max_time_in_queue": self.config.max_h_in_queue, "job_metadata":{"issuer": "ASO"}, "spacetoken": None, "source_spacetoken": None, "fail_nearline": False, "overwrite": True, "gridftp": None }, "files":[] } pairs = [] for SrcDest in copyjob: tempDict = {"sources": [], "metadata": None, "destinations": []} tempDict["sources"].append(SrcDest.split(" ")[0]) tempDict["destinations"].append(SrcDest.split(" ")[1]) rest_copyjob["files"].append(tempDict) self.logger.debug("Subbmitting this REST copyjob %s" % rest_copyjob) url = self.fts_server_for_transfer + '/jobs' self.logger.debug("Running FTS submission command") self.logger.debug("FTS server: %s" % self.fts_server_for_transfer) self.logger.debug("link: %s -> %s" % link) heade = {"Content-Type ":"application/json"} buf = StringIO.StringIO() try: connection = RequestHandler(config={'timeout': 300, 'connecttimeout' : 300}) except Exception as ex: msg = str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) try: response, datares = connection.request(url, rest_copyjob, heade, verb='POST', doseq=True, ckey=self.user_proxy, \ cert=self.user_proxy, capath='/etc/grid-security/certificates', \ cainfo=self.user_proxy, verbose=True) self.logger.debug("Submission done") self.logger.debug('Submission header status: %s' % response.status) self.logger.debug('Submission header reason: %s' % response.reason) self.logger.debug('Submission result %s' % datares) except Exception as ex: msg = "Error submitting to FTS: %s " % url msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) failure_reasons.append(msg) submission_error = True buf.close() if not submission_error: res = {} try: res = json.loads(datares) except Exception as ex: msg = "Couldn't load submission acknowledgment from FTS" msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) submission_error = True failure_reasons.append(msg) if 'job_id' in res: fileId_list = [] files_res = [] files_ = {} job_id = res['job_id'] file_url = self.fts_server_for_transfer + '/jobs/' + job_id +'/files' self.logger.debug("Submitting to %s" % file_url) file_buf = StringIO.StringIO() try: response, files_ = connection.request(file_url, {}, heade, doseq=True, ckey=self.user_proxy, \ cert=self.user_proxy, capath='/etc/grid-security/certificates', \ cainfo=self.user_proxy, verbose=True) files_res = json.loads(files_) except Exception as ex: msg = "Error contacting FTS to retrieve file: %s " % file_url msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) submission_error = True failure_reasons.append(msg) self.logger.debug("List files in job %s" % files_) file_buf.close() for file_in_job in files_res: if 'file_id' in file_in_job: fileId_list.append(file_in_job['file_id']) else: msg = "Could not load submitted file %s from FTS" % file_url self.logger.debug(msg) submission_error = True failure_reasons.append(msg) self.logger.debug("File id list %s" % fileId_list) if submission_error: self.logger.debug("Submission failed") self.logger.info("Mark failed %s files" % len(jobs_lfn[link])) self.logger.debug("Mark failed %s files" % jobs_lfn[link]) failed_files = self.mark_failed(jobs_lfn[link], force_fail=False, submission_error=True, failure_reasons=failure_reasons) self.logger.info("Marked failed %s" % len(failed_files)) continue fts_job['userProxyPath'] = self.user_proxy fts_job['LFNs'] = jobs_lfn[link] fts_job['PFNs'] = jobs_pfn[link] fts_job['FTSJobid'] = job_id fts_job['files_id'] = fileId_list fts_job['username'] = self.user self.logger.debug("Creating json file %s in %s" % (fts_job, self.dropbox_dir)) ftsjob_file = open('%s/Monitor.%s.json' % (self.dropbox_dir, fts_job['FTSJobid']), 'w') jsondata = json.dumps(fts_job) ftsjob_file.write(jsondata) ftsjob_file.close() self.logger.debug("%s ready." % fts_job) # Prepare Dashboard report for lfn in fts_job['LFNs']: lfn_report = {} lfn_report['FTSJobid'] = fts_job['FTSJobid'] index = fts_job['LFNs'].index(lfn) lfn_report['PFN'] = fts_job['PFNs'][index] lfn_report['FTSFileid'] = fts_job['files_id'][index] lfn_report['Workflow'] = jobs_report[link][index][2] lfn_report['JobVersion'] = jobs_report[link][index][1] job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % (int(jobs_report[link][index][0]), int(jobs_report[link][index][0]), lfn_report['Workflow'].replace("_", ":"), lfn_report['JobVersion']) lfn_report['JobId'] = job_id lfn_report['URL'] = self.fts_server_for_transfer self.logger.debug("Creating json file %s in %s for FTS3 Dashboard" % (lfn_report, self.dropbox_dir)) dash_job_file = open('/tmp/Dashboard.%s.json' % getHashLfn(lfn_report['PFN']), 'w') jsondata = json.dumps(lfn_report) dash_job_file.write(jsondata) dash_job_file.close() self.logger.debug("%s ready for FTS Dashboard report." % lfn_report) return
class Requests(dict): """ Generic class for sending different types of HTTP Request to a given URL """ def __init__(self, url='http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) self.capath = idict.get('capath', None) if self.pycurl: self.reqmgr = RequestHandler() #set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth(\ urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), \ idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("timeout", 300) self.setdefault("logger", logging) check_server_url(self['host']) # and then get the URL opener self.setdefault("conn", self._getURLOpener()) def get(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ GET some data """ return self.makeRequest(uri, data, 'GET', incoming_headers, encode, decode, contentType) def post(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ POST some data """ return self.makeRequest(uri, data, 'POST', incoming_headers, encode, decode, contentType) def put(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ PUT some data """ return self.makeRequest(uri, data, 'PUT', incoming_headers, encode, decode, contentType) def delete(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ DELETE some data """ return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode, decode, contentType) def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Wrapper around request helper functions. """ if self.pycurl: result = self.makeRequest_pycurl(uri, data, verb, incoming_headers, encoder, decoder, contentType) else: result = self.makeRequest_httplib(uri, data, verb, incoming_headers, encoder, decoder, contentType) return result def makeRequest_pycurl(self, uri=None, params={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make HTTP(s) request via pycurl library. Stay complaint with makeRequest_httplib method. """ ckey, cert = self.getKeyCert() capath = self.getCAPath() if not contentType: contentType = self['content_type'] headers = { "Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type'] } for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] #And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) url = self['host'] + uri response, data = self.reqmgr.request(url, params, headers, \ verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder) return data, response.status, response.reason, response.fromcache def makeRequest_httplib(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make a request to the remote database. for a give URI. The type of request will determine the action take by the server (be careful with DELETE!). Data should be a dictionary of {dataname: datavalue}. Returns a tuple of the data from the server, decoded using the appropriate method the response status and the response reason, to be used in error handling. You can override the method to encode/decode your data by passing in an encoding/decoding function to this method. Your encoded data must end up as a string. """ #TODO: User agent should be: # $client/$client_version (CMS) # $http_lib/$http_lib_version $os/$os_version ($arch) if not contentType: contentType = self['content_type'] headers = { "Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type'] } encoded_data = '' for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] #And now overwrite any headers that have been passed into the call: #WARNING: doesn't work with deplate so only accept gzip incoming_headers["accept-encoding"] = "gzip,identity" headers.update(incoming_headers) # httpib2 requires absolute url uri = self['host'] + uri # If you're posting an attachment, the data might not be a dict # please test against ConfigCache_t if you're unsure. #assert type(data) == type({}), \ # "makeRequest input data must be a dict (key/value pairs)" # There must be a better way to do this... def f(): """Dummy function""" pass if verb != 'GET' and data: if type(encoder) == type(self.get) or type(encoder) == type(f): encoded_data = encoder(data) elif encoder == False: # Don't encode the data more than we have to # we don't want to URL encode the data blindly, # that breaks POSTing attachments... ConfigCache_t #encoded_data = urllib.urlencode(data) # -- Andrew Melo 25/7/09 encoded_data = data else: # Either the encoder is set to True or it's junk, so use # self.encode encoded_data = self.encode(data) headers["Content-length"] = len(encoded_data) elif verb == 'GET' and data: #encode the data as a get string uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True)) headers["Content-length"] = str(len(encoded_data)) assert type(encoded_data) == type('string'), \ "Data in makeRequest is %s and not encoded to a string" \ % type(encoded_data) # httplib2 will allow sockets to close on remote end without retrying # try to send request - if this fails try again - should then succeed try: response, result = self['conn'].request(uri, method=verb, body=encoded_data, headers=headers) if response.status == 408: # timeout can indicate a socket error response, result = self['conn'].request(uri, method=verb, body=encoded_data, headers=headers) except (socket.error, AttributeError): # AttributeError implies initial connection error - need to close # & retry. httplib2 doesn't clear httplib state before next request # if this is threaded this may spoil things # only have one endpoint so don't need to determine which to shut [conn.close() for conn in self['conn'].connections.values()] self['conn'] = self._getURLOpener() # ... try again... if this fails propagate error to client try: response, result = self['conn'].request(uri, method=verb, body=encoded_data, headers=headers) except AttributeError: # socket/httplib really screwed up - nuclear option self['conn'].connections = {} raise socket.error, 'Error contacting: %s' \ % self.getDomainName() if response.status >= 400: e = HTTPException() setattr(e, 'req_data', encoded_data) setattr(e, 'req_headers', headers) setattr(e, 'url', uri) setattr(e, 'result', result) setattr(e, 'status', response.status) setattr(e, 'reason', response.reason) setattr(e, 'headers', response) raise e if type(decoder) == type(self.makeRequest) or type(decoder) == type(f): result = decoder(result) elif decoder != False: result = self.decode(result) #TODO: maybe just return result and response... return result, response.status, response.reason, response.fromcache def encode(self, data): """ encode data into some appropriate format, for now make it a string... """ return urllib.urlencode(data, doseq=1) def decode(self, data): """ decode data to some appropriate format, for now make it a string... """ return data.__str__() def cachePath(self, given_path, service_name): """Return cache location""" if not service_name: service_name = 'REQUESTS' top = self.cacheTopPath(given_path, service_name) # deal with multiple Services that have the same service running and # with multiple users for a given Service if self.getUserName() is None: cachepath = os.path.join(top, self['endpoint_components'].netloc) else: cachepath = os.path.join(top, '%s-%s' \ % (self.getUserName(), self.getDomainName())) try: # only we should be able to write to this dir os.makedirs(cachepath, stat.S_IRWXU) except OSError: if not os.path.isdir(cachepath): raise Permissions.owner_readwriteexec(cachepath) return cachepath def cacheTopPath(self, given_path, service_name): """Where to cache results? Logic: o If passed in take that o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined? o Is WMCORE_CACHE_DIR set o Generate a temporary directory """ if given_path: return given_path user = str(os.getuid()) # append user id so users don't clobber each other lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower()) for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'): if os.environ.get(var): firstbit = os.environ[var] break else: idir = tempfile.mkdtemp(prefix='.wmcore_cache_') # object to store temporary directory - cleaned up on destruction self['deleteCacheOnExit'] = TempDirectory(idir) return idir return os.path.join(firstbit, lastbit) def getDomainName(self): """Parse netloc info to get hostname""" return self['endpoint_components'].hostname def getUserName(self): """Parse netloc to get user""" return self['endpoint_components'].username def _getURLOpener(self): """ method getting a secure (HTTPS) connection """ key, cert = None, None if self['endpoint_components'].scheme == 'https': # only add certs to https requests # if we have a key/cert add to request, # if not proceed as not all https connections require them try: key, cert = self.getKeyCert() except Exception, ex: msg = 'No certificate or key found, authentication may fail' self['logger'].info(msg) self['logger'].debug(str(ex)) try: # disable validation as we don't have a single PEM with all ca's http = httplib2.Http(self['req_cache_path'], self['timeout'], disable_ssl_certificate_validation=True) except TypeError: # old httplib2 versions disable validation by default http = httplib2.Http(self['req_cache_path'], self['timeout']) # Domain must be just a hostname and port. self[host] is a URL currently if key or cert: http.add_certificate(key=key, cert=cert, domain='') return http
class Requests(dict): """ Generic class for sending different types of HTTP Request to a given URL """ def __init__(self, url = 'http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) if self.pycurl: self.reqmgr = RequestHandler() #set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth(\ urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), \ idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("timeout", 30) self.setdefault("logger", logging) check_server_url(self['host']) # and then get the URL opener self.setdefault("conn", self._getURLOpener()) def get(self, uri=None, data={}, incoming_headers={}, encode = True, decode=True, contentType=None): """ GET some data """ return self.makeRequest(uri, data, 'GET', incoming_headers, encode, decode, contentType) def post(self, uri=None, data={}, incoming_headers={}, encode = True, decode=True, contentType=None): """ POST some data """ return self.makeRequest(uri, data, 'POST', incoming_headers, encode, decode, contentType) def put(self, uri=None, data={}, incoming_headers={}, encode = True, decode=True, contentType=None): """ PUT some data """ return self.makeRequest(uri, data, 'PUT', incoming_headers, encode, decode, contentType) def delete(self, uri=None, data={}, incoming_headers={}, encode = True, decode=True, contentType=None): """ DELETE some data """ return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode, decode, contentType) def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Wrapper around request helper functions. """ if self.pycurl: result = self.makeRequest_pycurl(uri, data, verb, incoming_headers, encoder, decoder, contentType) else: result = self.makeRequest_httplib(uri, data, verb, incoming_headers, encoder, decoder, contentType) return result def makeRequest_pycurl(self, uri=None, params={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make HTTP(s) request via pycurl library. Stay complaint with makeRequest_httplib method. """ ckey, cert = self.getKeyCert() capath = self.getCAPath() if not contentType: contentType = self['content_type'] headers = {"Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type']} for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] #And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) url = self['host'] + uri response, data = self.reqmgr.request(url, params, headers, \ verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder) return data, response.status, response.reason, response.fromcache def makeRequest_httplib(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make a request to the remote database. for a give URI. The type of request will determine the action take by the server (be careful with DELETE!). Data should be a dictionary of {dataname: datavalue}. Returns a tuple of the data from the server, decoded using the appropriate method the response status and the response reason, to be used in error handling. You can override the method to encode/decode your data by passing in an encoding/decoding function to this method. Your encoded data must end up as a string. """ #TODO: User agent should be: # $client/$client_version (CMS) # $http_lib/$http_lib_version $os/$os_version ($arch) if not contentType: contentType = self['content_type'] headers = {"Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type']} encoded_data = '' for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] #And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) # httpib2 requires absolute url uri = self['host'] + uri # If you're posting an attachment, the data might not be a dict # please test against ConfigCache_t if you're unsure. #assert type(data) == type({}), \ # "makeRequest input data must be a dict (key/value pairs)" # There must be a better way to do this... def f(): """Dummy function""" pass if verb != 'GET' and data: if type(encoder) == type(self.get) or type(encoder) == type(f): encoded_data = encoder(data) elif encoder == False: # Don't encode the data more than we have to # we don't want to URL encode the data blindly, # that breaks POSTing attachments... ConfigCache_t #encoded_data = urllib.urlencode(data) # -- Andrew Melo 25/7/09 encoded_data = data else: # Either the encoder is set to True or it's junk, so use # self.encode encoded_data = self.encode(data) headers["Content-length"] = len(encoded_data) elif verb == 'GET' and data: #encode the data as a get string uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True)) headers["Content-length"] = str(len(encoded_data)) assert type(encoded_data) == type('string'), \ "Data in makeRequest is %s and not encoded to a string" \ % type(encoded_data) # httplib2 will allow sockets to close on remote end without retrying # try to send request - if this fails try again - should then succeed try: response, result = self['conn'].request(uri, method = verb, body = encoded_data, headers = headers) if response.status == 408: # timeout can indicate a socket error response, result = self['conn'].request(uri, method = verb, body = encoded_data, headers = headers) except (socket.error, AttributeError): # AttributeError implies initial connection error - need to close # & retry. httplib2 doesn't clear httplib state before next request # if this is threaded this may spoil things # only have one endpoint so don't need to determine which to shut [conn.close() for conn in self['conn'].connections.values()] self['conn'] = self._getURLOpener() # ... try again... if this fails propagate error to client try: response, result = self['conn'].request(uri, method = verb, body = encoded_data, headers = headers) except AttributeError: # socket/httplib really screwed up - nuclear option self['conn'].connections = {} raise socket.error, 'Error contacting: %s' \ % self.getDomainName() if response.status >= 400: e = HTTPException() setattr(e, 'req_data', encoded_data) setattr(e, 'req_headers', headers) setattr(e, 'url', uri) setattr(e, 'result', result) setattr(e, 'status', response.status) setattr(e, 'reason', response.reason) setattr(e, 'headers', response) raise e if type(decoder) == type(self.makeRequest) or type(decoder) == type(f): result = decoder(result) elif decoder != False: result = self.decode(result) #TODO: maybe just return result and response... return result, response.status, response.reason, response.fromcache def encode(self, data): """ encode data into some appropriate format, for now make it a string... """ return urllib.urlencode(data, doseq=1) def decode(self, data): """ decode data to some appropriate format, for now make it a string... """ return data.__str__() def cachePath(self, given_path, service_name): """Return cache location""" if not service_name: service_name = 'REQUESTS' top = self.cacheTopPath(given_path, service_name) # deal with multiple Services that have the same service running and # with multiple users for a given Service if self.getUserName() is None: cachepath = os.path.join(top, self['endpoint_components'].netloc) else: cachepath = os.path.join(top, '%s-%s' \ % (self.getUserName(), self.getDomainName())) try: # only we should be able to write to this dir os.makedirs(cachepath, stat.S_IRWXU) except OSError: if not os.path.isdir(cachepath): raise Permissions.owner_readwriteexec(cachepath) return cachepath def cacheTopPath(self, given_path, service_name): """Where to cache results? Logic: o If passed in take that o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined? o Is WMCORE_CACHE_DIR set o Generate a temporary directory """ if given_path: return given_path user = str(os.getuid()) # append user id so users don't clobber each other lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower()) for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'): if os.environ.get(var): firstbit = os.environ[var] break else: idir = tempfile.mkdtemp(prefix='.wmcore_cache_') # object to store temporary directory - cleaned up on destruction self['deleteCacheOnExit'] = TempDirectory(idir) return idir return os.path.join(firstbit, lastbit) def getDomainName(self): """Parse netloc info to get hostname""" return self['endpoint_components'].hostname def getUserName(self): """Parse netloc to get user""" return self['endpoint_components'].username def _getURLOpener(self): """ method getting a secure (HTTPS) connection """ key, cert = None, None if self['endpoint_components'].scheme == 'https': # only add certs to https requests # if we have a key/cert add to request, # if not proceed as not all https connections require them try: key, cert = self.getKeyCert() except Exception, ex: msg = 'No certificate or key found, authentication may fail' self['logger'].info(msg) self['logger'].debug(str(ex)) try: # disable validation as we don't have a single PEM with all ca's http = httplib2.Http(self['req_cache_path'], self['timeout'], disable_ssl_certificate_validation = True) except TypeError: # old httplib2 versions disable validation by default http = httplib2.Http(self['req_cache_path'], self['timeout']) # Domain must be just a hostname and port. self[host] is a URL currently if key or cert: http.add_certificate(key=key, cert=cert, domain='') return http
class Worker(object): """ """ def __init__(self, config, quiet): """ Initialise class members """ self.config = config.General self.max_files_per_block = self.config.max_files_per_block self.userProxy = self.config.opsProxy self.block_publication_timeout = self.config.block_closure_timeout self.lfn_map = {} self.force_publication = False self.force_failure = False #TODO: logger! def createLogdir(dirname): """ Create the directory dirname ignoring erors in case it exists. Exit if the directory cannot be created. """ try: os.mkdir(dirname) except OSError as ose: if ose.errno != 17: #ignore the "Directory already exists error" print(str(ose)) print("The task worker need to access the '%s' directory" % dirname) sys.exit(1) def setRootLogger(quiet, debug): """Sets the root logger with the desired verbosity level The root logger logs to logs/twlog.txt and every single logging instruction is propagated to it (not really nice to read) :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger :return logger: a logger with the appropriate logger level.""" createLogdir('logs') createLogdir('logs/processes') createLogdir('logs/tasks') logHandler = MultiProcessingLog('logs/log.txt', when='midnight') logFormatter = logging.Formatter("%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) loglevel = logging.INFO if quiet: loglevel = logging.WARNING if debug: loglevel = logging.DEBUG logging.getLogger().setLevel(loglevel) logger = setProcessLogger("master") logger.debug("PID %s.", os.getpid()) logger.debug("Logging level initialized to %s.", loglevel) return logger self.cache_area = self.config.cache_area self.logger = setRootLogger(quiet, True) try: self.oracleDB = HTTPRequests(self.config.oracleDB, self.config.opsProxy, self.config.opsProxy) self.logger.debug('Contacting OracleDB:' + self.config.oracleDB) except: self.logger.exception('Failed when contacting Oracle') raise try: self.connection = RequestHandler(config={'timeout': 900, 'connecttimeout' : 900}) except Exception as ex: msg = "Error initializing the connection" msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) def active_tasks(self, db): fileDoc = {} fileDoc['asoworker'] = self.config.asoworker fileDoc['subresource'] = 'acquirePublication' self.logger.debug("Retrieving publications from oracleDB") results = '' try: results = db.post(self.config.oracleFileTrans, data=encodeRequest(fileDoc)) except Exception as ex: self.logger.error("Failed to acquire publications \ from oracleDB: %s" %ex) return [] fileDoc = dict() fileDoc['asoworker'] = self.config.asoworker fileDoc['subresource'] = 'acquiredPublication' fileDoc['grouping'] = 0 fileDoc['limit'] = 100000 self.logger.debug("Retrieving max.100000 acquired puclications from oracleDB") result = [] try: results = db.get(self.config.oracleFileTrans, data=encodeRequest(fileDoc)) result.extend(oracleOutputMapping(results)) except Exception as ex: self.logger.error("Failed to acquire publications \ from oracleDB: %s" %ex) return [] self.logger.debug("publen: %s" % len(result)) self.logger.debug("%s acquired puclications retrieved" % len(result)) #TODO: join query for publisher (same of submitter) unique_tasks = [list(i) for i in set(tuple([x['username'], x['user_group'], x['user_role'], x['taskname']] ) for x in result if x['transfer_state'] == 3)] info = [] for task in unique_tasks: info.append([x for x in result if x['taskname'] == task[3]]) return zip(unique_tasks, info) def getPublDescFiles(self, workflow, lfn_ready): """ Download and read the files describing what needs to be published """ data = {} data['taskname'] = workflow data['filetype'] = 'EDM' out = [] # divide lfn per chunks, avoiding URI-too long exception def chunks(l, n): """ Yield successive n-sized chunks from l. :param l: list to splitt in chunks :param n: chunk size :return: yield the next list chunk """ for i in range(0, len(l), n): yield l[i:i + n] for lfn_ in chunks(lfn_ready, 50): data['lfn'] = lfn_ try: res = self.oracleDB.get('/crabserver/dev/filemetadata', data=encodeRequest(data, listParams=["lfn"])) res = res[0] except Exception as ex: self.logger.error("Error during metadata retrieving: %s" %ex) print(len(res['result'])) for obj in res['result']: if isinstance(obj, dict): out.append(obj) else: #print type(obj) out.append(json.loads(str(obj))) return out def algorithm(self): """ 1. Get a list of users with files to publish from the couchdb instance 2. For each user get a suitably sized input for publish 3. Submit the publish to a subprocess """ tasks = self.active_tasks(self.oracleDB) self.logger.debug('kicking off pool %s' % [x[0][3] for x in tasks]) processes = [] try: for task in tasks: p = Process(target=self.startSlave, args=(task,)) p.start() processes.append(p) for proc in processes: proc.join() except: self.logger.exception("Error during process mapping") def startSlave(self, task): # TODO: lock task! # - process logger logger = setProcessLogger(str(task[0][3])) logger.info("Process %s is starting. PID %s", task[0][3], os.getpid()) self.force_publication = False workflow = str(task[0][3]) wfnamemsg = "%s: " % (workflow) if len(task[1]) > self.max_files_per_block: self.force_publication = True msg = "All datasets have more than %s ready files." % (self.max_files_per_block) msg += " No need to retrieve task status nor last publication time." logger.info(wfnamemsg+msg) else: msg = "At least one dataset has less than %s ready files." % (self.max_files_per_block) logger.info(wfnamemsg+msg) # Retrieve the workflow status. If the status can not be retrieved, continue # with the next workflow. workflow_status = '' url = '/'.join(self.cache_area.split('/')[:-1]) + '/workflow' msg = "Retrieving status from %s" % (url) logger.info(wfnamemsg+msg) buf = cStringIO.StringIO() header = {"Content-Type":"application/json"} data = {'workflow': workflow}#, 'subresource': 'taskads'} try: _, res_ = self.connection.request(url, data, header, doseq=True, ckey=self.userProxy, cert=self.userProxy )# , verbose=True) # for debug except Exception as ex: if self.config.isOracle: logger.exception('Error retrieving status from cache.') return 0 msg = "Status retrieved from cache. Loading task status." logger.info(wfnamemsg+msg) try: buf.close() res = json.loads(res_) workflow_status = res['result'][0]['status'] msg = "Task status is %s." % workflow_status logger.info(wfnamemsg+msg) except ValueError: msg = "Workflow removed from WM." logger.error(wfnamemsg+msg) workflow_status = 'REMOVED' except Exception as ex: msg = "Error loading task status!" msg += str(ex) msg += str(traceback.format_exc()) logger.error(wfnamemsg+msg) # If the workflow status is terminal, go ahead and publish all the ready files # in the workflow. if workflow_status in ['COMPLETED', 'FAILED', 'KILLED', 'REMOVED']: self.force_publication = True if workflow_status in ['KILLED', 'REMOVED']: self.force_failure = True msg = "Considering task status as terminal. Will force publication." logger.info(wfnamemsg+msg) # Otherwise... else: msg = "Task status is not considered terminal." logger.info(wfnamemsg+msg) msg = "Getting last publication time." logger.info(wfnamemsg+msg) # Get when was the last time a publication was done for this workflow (this # should be more or less independent of the output dataset in case there are # more than one). last_publication_time = None data = {} data['workflow'] = workflow data['subresource'] = 'search' try: result = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'task'), data=encodeRequest(data)) logger.debug("task: %s " % str(result[0])) logger.debug("task: %s " % getColumn(result[0], 'tm_last_publication')) except Exception as ex: logger.error("Error during task doc retrieving: %s" %ex) if last_publication_time: date = oracleOutputMapping(result)['last_publication'] seconds = datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f").timetuple() last_publication_time = time.mktime(seconds) msg = "Last publication time: %s." % str(last_publication_time) logger.debug(wfnamemsg+msg) # If this is the first time a publication would be done for this workflow, go # ahead and publish. if not last_publication_time: self.force_publication = True msg = "There was no previous publication. Will force publication." logger.info(wfnamemsg+msg) # Otherwise... else: last = last_publication_time msg = "Last published block: %s" % (last) logger.debug(wfnamemsg+msg) # If the last publication was long time ago (> our block publication timeout), # go ahead and publish. now = int(time.time()) - time.timezone time_since_last_publication = now - last hours = int(time_since_last_publication/60/60) minutes = int((time_since_last_publication - hours*60*60)/60) timeout_hours = int(self.block_publication_timeout/60/60) timeout_minutes = int((self.block_publication_timeout - timeout_hours*60*60)/60) msg = "Last publication was %sh:%sm ago" % (hours, minutes) if time_since_last_publication > self.block_publication_timeout: self.force_publication = True msg += " (more than the timeout of %sh:%sm)." % (timeout_hours, timeout_minutes) msg += " Will force publication." else: msg += " (less than the timeout of %sh:%sm)." % (timeout_hours, timeout_minutes) msg += " Not enough to force publication." logger.info(wfnamemsg+msg) #logger.info(task[1]) try: if self.force_publication: # - get info active_ = [{'key': [x['username'], x['user_group'], x['user_role'], x['taskname']], 'value': [x['destination'], x['source_lfn'], x['destination_lfn'], x['input_dataset'], x['dbs_url'], x['last_update'] ]} for x in task[1] if x['transfer_state'] == 3 and x['publication_state'] not in [2, 3, 5]] lfn_ready = [] wf_jobs_endtime = [] pnn, input_dataset, input_dbs_url = "", "", "" for active_file in active_: job_end_time = active_file['value'][5] if job_end_time and self.config.isOracle: wf_jobs_endtime.append(int(job_end_time) - time.timezone) elif job_end_time: wf_jobs_endtime.append(int(time.mktime(time.strptime(str(job_end_time), '%Y-%m-%d %H:%M:%S'))) - time.timezone) source_lfn = active_file['value'][1] dest_lfn = active_file['value'][2] self.lfn_map[dest_lfn] = source_lfn if not pnn or not input_dataset or not input_dbs_url: pnn = str(active_file['value'][0]) input_dataset = str(active_file['value'][3]) input_dbs_url = str(active_file['value'][4]) lfn_ready.append(dest_lfn) userDN = '' username = task[0][0] user_group = "" if task[0][1]: user_group = task[0][1] user_role = "" if task[0][2]: user_role = task[0][2] logger.debug("Trying to get DN %s %s %s" % (username, user_group, user_role)) try: userDN = getDNFromUserName(username, logger) except Exception as ex: msg = "Error retrieving the user DN" msg += str(ex) msg += str(traceback.format_exc()) logger.error(msg) return 1 # Get metadata toPublish = [] publDescFiles_list = self.getPublDescFiles(workflow, lfn_ready) for file_ in active_: for _, doc in enumerate(publDescFiles_list): #logger.info(type(doc)) #logger.info(doc) if doc["lfn"] == file_["value"][2]: doc["User"] = username doc["Group"] = file_["key"][1] doc["Role"] = file_["key"][2] doc["UserDN"] = userDN doc["Destination"] = file_["value"][0] doc["SourceLFN"] = file_["value"][1] toPublish.append(doc) with open("/tmp/"+workflow+'.json', 'w') as outfile: json.dump(toPublish, outfile) logger.info(". publisher.sh %s" % (workflow)) subprocess.call(["/bin/bash", "/data/user/MicroASO/microPublisher/python/publisher.sh", workflow]) except: logger.exception("Exception!") return 0