def validate(self, apiobj, method, api, param, safe): """Validating all the input parameter as enforced by the WMCore.REST module""" authz_login_valid() if method in ['PUT']: #TODO check optional parameter #TODO check all the regexp validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_strlist("outfilelumis", param, safe, RX_LUMILIST) validate_numlist("outfileruns", param, safe) if len(safe.kwargs["outfileruns"]) != len(safe.kwargs["outfilelumis"]): raise InvalidParameter("The number of runs and the number of lumis lists are different") validate_strlist("inparentlfns", param, safe, RX_PARENTLFN) validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True) validate_str("jobid", param, safe, RX_JOBID, optional=True) #TODO: for backward compatibility. Get rid of the pandajobid once all jobs using it are done (one month after this gets released) #(The following five lines can be deleted) validate_num("pandajobid", param, safe, optional=True) if bool(safe.kwargs["jobid"]) == bool(safe.kwargs["pandajobid"]): raise InvalidParameter("Only one among jobid and pandajobid should be set") #Oracle/cx_oracle/python stack does not like None for numbers, even if they are nullable if safe.kwargs["pandajobid"] == None: safe.kwargs["pandajobid"] = 0 validate_num("outsize", param, safe, optional=False) validate_str("publishdataname", param, safe, RX_PUBLISH, optional=False) validate_str("appver", param, safe, RX_CMSSW, optional=False) validate_str("outtype", param, safe, RX_OUTTYPES, optional=False) validate_str("checksummd5", param, safe, RX_CHECKSUM, optional=False) validate_str("checksumcksum", param, safe, RX_CHECKSUM, optional=False) validate_str("checksumadler32", param, safe, RX_CHECKSUM, optional=False) validate_str("outlocation", param, safe, RX_CMSSITE, optional=False) validate_str("outtmplocation", param, safe, RX_CMSSITE, optional=False) validate_str("acquisitionera", param, safe, RX_TASKNAME, optional=False)#TODO Do we really need this? validate_str("outdatasetname", param, safe, RX_OUTDSLFN, optional=False)#TODO temporary, need to come up with a regex validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False) validate_str("outtmplfn", param, safe, RX_PARENTLFN, optional=True) validate_num("events", param, safe, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=True) validate_num("directstageout", param, safe, optional=True) safe.kwargs["directstageout"] = 'T' if safe.kwargs["directstageout"] else 'F' #'F' if not provided elif method in ['POST']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_str("outlfn", param, safe, RX_LFN, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=False) elif method in ['GET']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_str("filetype", param, safe, RX_OUTTYPES, optional=False) validate_num("howmany", param, safe, optional=True) elif method in ['DELETE']: authz_operator() validate_str("taskname", param, safe, RX_TASKNAME, optional=True) validate_str("hours", param, safe, RX_HOURS, optional=True) if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]): raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\ " will be deleted. Only one of the two parameters can be specified.")
def validate(self, apiobj, method, api, param, safe): """Validating all the input parameter as enforced by the WMCore.REST module""" authz_login_valid() if method in ['PUT']: #TODO check optional parameter #TODO check all the regexp validate_str("taskname", param, safe, RX_WORKFLOW, optional=False) validate_strlist("outfilelumis", param, safe, RX_LUMILIST) validate_numlist("outfileruns", param, safe) if len(safe.kwargs["outfileruns"]) != len(safe.kwargs["outfilelumis"]): raise InvalidParameter("The number of runs and the number of lumis lists are different") validate_strlist("inparentlfns", param, safe, RX_PARENTLFN) validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True) validate_num("pandajobid", param, safe, optional=False) validate_num("outsize", param, safe, optional=False) validate_str("publishdataname", param, safe, RX_PUBLISH, optional=False) validate_str("appver", param, safe, RX_CMSSW, optional=False) validate_str("outtype", param, safe, RX_OUTTYPES, optional=False) validate_str("checksummd5", param, safe, RX_CHECKSUM, optional=False) validate_str("checksumcksum", param, safe, RX_CHECKSUM, optional=False) validate_str("checksumadler32", param, safe, RX_CHECKSUM, optional=False) validate_str("outlocation", param, safe, RX_CMSSITE, optional=False) validate_str("outtmplocation", param, safe, RX_CMSSITE, optional=False) validate_str("acquisitionera", param, safe, RX_WORKFLOW, optional=False)#TODO Do we really need this? validate_str("outdatasetname", param, safe, RX_OUTDSLFN, optional=False)#TODO temporary, need to come up with a regex validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False) validate_str("outtmplfn", param, safe, RX_PARENTLFN, optional=True) validate_num("events", param, safe, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=True) validate_num("directstageout", param, safe, optional=True) safe.kwargs["directstageout"] = 'T' if safe.kwargs["directstageout"] else 'F' #'F' if not provided elif method in ['POST']: validate_str("taskname", param, safe, RX_WORKFLOW, optional=False) validate_str("outlfn", param, safe, RX_LFN, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=False) elif method in ['GET']: validate_str("taskname", param, safe, RX_WORKFLOW, optional=False) validate_str("filetype", param, safe, RX_OUTTYPES, optional=False) elif method in ['DELETE']: authz_operator() validate_str("taskname", param, safe, RX_WORKFLOW, optional=True) validate_str("hours", param, safe, RX_HOURS, optional=True) if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]): raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\ " will be deleted. Only one of the two parameters can be specified.")
def validate(self, apiobj, method, api, param, safe): """Validating all the input parameter as enforced by the WMCore.REST module""" authz_login_valid() if method in ['PUT']: #TODO check optional parameter #TODO check all the regexp validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_strlist("outfilelumis", param, safe, RX_LUMILIST) validate_numlist("outfileruns", param, safe) if len(safe.kwargs["outfileruns"]) != len( safe.kwargs["outfilelumis"]): raise InvalidParameter( "The number of runs and the number of lumis lists are different" ) validate_strlist("inparentlfns", param, safe, RX_PARENTLFN) validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True) validate_num("pandajobid", param, safe, optional=False) validate_num("outsize", param, safe, optional=False) validate_str("publishdataname", param, safe, RX_PUBLISH, optional=False) validate_str("appver", param, safe, RX_CMSSW, optional=False) validate_str("outtype", param, safe, RX_OUTTYPES, optional=False) validate_str("checksummd5", param, safe, RX_CHECKSUM, optional=False) validate_str("checksumcksum", param, safe, RX_CHECKSUM, optional=False) validate_str("checksumadler32", param, safe, RX_CHECKSUM, optional=False) validate_str("outlocation", param, safe, RX_CMSSITE, optional=False) validate_str("outtmplocation", param, safe, RX_CMSSITE, optional=False) validate_str("acquisitionera", param, safe, RX_TASKNAME, optional=False) #TODO Do we really need this? validate_str( "outdatasetname", param, safe, RX_OUTDSLFN, optional=False) #TODO temporary, need to come up with a regex validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False) validate_str("outtmplfn", param, safe, RX_PARENTLFN, optional=True) validate_num("events", param, safe, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=True) validate_num("directstageout", param, safe, optional=True) safe.kwargs["directstageout"] = 'T' if safe.kwargs[ "directstageout"] else 'F' #'F' if not provided elif method in ['POST']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_str("outlfn", param, safe, RX_LFN, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=False) elif method in ['GET']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_str("filetype", param, safe, RX_OUTTYPES, optional=False) elif method in ['DELETE']: authz_operator() validate_str("taskname", param, safe, RX_TASKNAME, optional=True) validate_str("hours", param, safe, RX_HOURS, optional=True) if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]): raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\ " will be deleted. Only one of the two parameters can be specified.")
def validate(self, apiobj, method, api, param, safe): """Validating all the input parameter as enforced by the WMCore.REST module""" authz_login_valid() if method in ['PUT']: #TODO check optional parameter #TODO check all the regexp validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_strlist("outfilelumis", param, safe, RX_LUMILIST) validate_numlist("outfileruns", param, safe) if len(safe.kwargs["outfileruns"]) != len( safe.kwargs["outfilelumis"]): raise InvalidParameter( "The number of runs and the number of lumis lists are different" ) validate_strlist("inparentlfns", param, safe, RX_PARENTLFN) validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True) validate_str("jobid", param, safe, RX_JOBID, optional=True) #TODO: for backward compatibility. Get rid of the pandajobid once all jobs using it are done (one month after this gets released) #(The following five lines can be deleted) validate_num("pandajobid", param, safe, optional=True) if bool(safe.kwargs["jobid"]) == bool(safe.kwargs["pandajobid"]): raise InvalidParameter( "Only one among jobid and pandajobid should be set") #Oracle/cx_oracle/python stack does not like None for numbers, even if they are nullable if safe.kwargs["pandajobid"] == None: safe.kwargs["pandajobid"] = 0 validate_num("outsize", param, safe, optional=False) validate_str("publishdataname", param, safe, RX_PUBLISH, optional=False) validate_str("appver", param, safe, RX_CMSSW, optional=False) validate_str("outtype", param, safe, RX_OUTTYPES, optional=False) validate_str("checksummd5", param, safe, RX_CHECKSUM, optional=False) validate_str("checksumcksum", param, safe, RX_CHECKSUM, optional=False) validate_str("checksumadler32", param, safe, RX_CHECKSUM, optional=False) validate_str("outlocation", param, safe, RX_CMSSITE, optional=False) validate_str("outtmplocation", param, safe, RX_CMSSITE, optional=False) validate_str("acquisitionera", param, safe, RX_TASKNAME, optional=False) #TODO Do we really need this? validate_str( "outdatasetname", param, safe, RX_OUTDSLFN, optional=False) #TODO temporary, need to come up with a regex validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False) validate_str("outtmplfn", param, safe, RX_PARENTLFN, optional=True) validate_num("events", param, safe, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=True) validate_num("directstageout", param, safe, optional=True) safe.kwargs["directstageout"] = 'T' if safe.kwargs[ "directstageout"] else 'F' #'F' if not provided elif method in ['POST']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_str("outlfn", param, safe, RX_LFN, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=False) elif method in ['GET']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_str("filetype", param, safe, RX_OUTTYPES, optional=False) validate_num("howmany", param, safe, optional=True) validate_strlist("lfn", param, safe, RX_LFN) elif method in ['DELETE']: authz_operator() validate_str("taskname", param, safe, RX_TASKNAME, optional=True) validate_str("hours", param, safe, RX_HOURS, optional=True) if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]): raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\ " will be deleted. Only one of the two parameters can be specified.")
def get(self, subresource, objecttype, taskname, username, tarballname): # pylint: disable=redefined-builtin """ :arg str subresource: the specific information to be accessed; """ authenticatedUserName = cherrypy.request.user[ 'login'] # the username of who's calling # a bit of code common to 3 subresource's: validate args and prepare the s3_objectKey inside the bucket if subresource in ['upload', 'retrieve', 'download']: if not objecttype: raise MissingParameter("objecttype is missing") if objecttype == 'sandbox': if not tarballname: raise MissingParameter("tarballname is missing") ownerName = authenticatedUserName if subresource == 'upload' else username # sandbox goes in bucket/username/sandboxes/ objectPath = ownerName + '/sandboxes/' + tarballname else: if not taskname: raise MissingParameter("takskname is missing") ownerName = getUsernameFromTaskname(taskname) # task related files go in bucket/username/taskname/ objectPath = ownerName + '/' + taskname + '/' + objecttype s3_objectKey = fromNewBytesToString(objectPath) if subresource == 'upload': # returns a dictionary with the information to upload a file with a POST # via a "PreSigned URL". It can return an empty string '' as URL to indicate that # a sandbox upload request refers to an existing object with same name # WMCore REST does not allow to return None authz_operator(username=ownerName, group='crab3', role='operator') if objecttype == 'sandbox': # we only upload same sandbox once alreadyThere = False try: # from https://stackoverflow.com/a/38376288 self.s3_client.head_object(Bucket=self.s3_bucket, Key=s3_objectKey) alreadyThere = True except ClientError: pass if alreadyThere: return ["", {}] # this tells client not to upload expiration = 60 * 60 # 1 hour is good for retries and debugging try: response = self.s3_client.generate_presigned_post( self.s3_bucket, s3_objectKey, ExpiresIn=expiration) # this returns a dictionary like: # {'url': u'https://s3.cern.ch/bucket1', # 'fields': {'policy': u'eyJjb ... jEzWiJ9', # policy is a 164-char-long string # 'AWSAccessKeyId': u'5d4270f1e022442783646c34cf552d55', # 'key': objectPath, 'signature': u'pm58cUqxNQHBZXS1B/Er6P89IhU='}} # need to build a single URL string to return preSignedUrl = response except ClientError as e: raise ExecutionError("Connection to s3.cern.ch failed:\n%s" % str(e)) # somehow it does not work to return preSignedUrl as a single object return [preSignedUrl['url'], preSignedUrl['fields']] if subresource == 'download': authz_operator(username=ownerName, group='crab3', role='operator') if subresource == 'sandbox' and not username: raise MissingParameter("username is missing") # returns a PreSignedUrl to download the file within the expiration time expiration = 60 * 60 # 1 hour default is good for retries and debugging if objecttype in ['debugfiles', 'clientlog', 'twlog']: expiration = 60 * 60 * 24 * 30 # for logs make url valid as long as we keep files (1 month) try: response = self.s3_client.generate_presigned_url( 'get_object', Params={ 'Bucket': self.s3_bucket, 'Key': s3_objectKey }, ExpiresIn=expiration) preSignedUrl = response except ClientError as e: raise ExecutionError("Connection to s3.cern.ch failed:\n%s" % str(e)) return preSignedUrl if subresource == 'retrieve': # download from S3 into a temporary file, read it, and return content to caller authz_operator(username=ownerName, group='crab3', role='operator') tempFile = '/tmp/boto.' + uuid.uuid4().hex try: self.s3_client.download_file(self.s3_bucket, s3_objectKey, tempFile) except ClientError as e: raise ExecutionError("Connection to s3.cern.ch failed:\n%s" % str(e)) with open(tempFile) as f: txt = f.read() os.remove(tempFile) return txt if subresource == 'list': # list all files (aka objects, aka keys in S3 lingo) for a given usermame # if arg objecttype is present, returns only the file names for that objecttype if not username: raise MissingParameter('username is missing') # In S3 we always need to retrieve all keys even if some filtering/compression # will be applied before reporting, since there is a limit of 1K key per call, # multiple calls will be needed, S3 paginators make that easy # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html # We use S3 prefix to limit retrieved list to a user, since in our buckets # file keys always have the form <username>/... see: # https://github.com/dmwm/CRABServer/wiki/CRABCache-replacement-with-S3#bucket-organization and # https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-prefixes.html # fileNames = [] paginator = self.s3_client.get_paginator('list_objects_v2') user = fromNewBytesToString(username) operation_parameters = {'Bucket': self.s3_bucket, 'Prefix': user} page_iterator = paginator.paginate(**operation_parameters) for page in page_iterator: namesInPage = [ item['Key'].lstrip(user + '/') for item in page['Contents'] ] fileNames += namesInPage if objecttype: filteredFileNames = [f for f in fileNames if objecttype in f] fileNames = filteredFileNames return fileNames if subresource == 'used': # return space used by username, in MBytes (rounded to integer) if not username: raise MissingParameter('username is missing') paginator = self.s3_client.get_paginator('list_objects_v2') user = fromNewBytesToString(username) operation_parameters = {'Bucket': self.s3_bucket, 'Prefix': user} page_iterator = paginator.paginate(**operation_parameters) # S3 records object size in bytes, see: # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.list_objects_v2 usedBytes = 0 for page in page_iterator: for item in page['Contents']: usedBytes += item['Size'] usedMBytes = usedBytes // 1024 // 1024 # WMCore REST wants to return lists return [usedMBytes]
def validate(self, apiobj, method, api, param, safe): """Validating all the input parameter as enforced by the WMCore.REST module""" authz_login_valid() if method in ['PUT']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_strlist("outfilelumis", param, safe, RX_LUMILIST) validate_strlist("outfileruns", param, safe, RX_RUNS) if len(safe.kwargs["outfileruns"]) != len( safe.kwargs["outfilelumis"]): raise InvalidParameter( "The number of runs and the number of lumis lists are different" ) validate_strlist("inparentlfns", param, safe, RX_PARENTLFN) # inparentlfns will be inserted in Oracle as CLOB, so it must be a string safe.kwargs['inparentlfns'] = str(safe.kwargs['inparentlfns']) validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True) validate_str("jobid", param, safe, RX_JOBID, optional=True) safe.kwargs["pandajobid"] = 0 validate_num("outsize", param, safe, optional=False) validate_str("publishdataname", param, safe, RX_PUBLISH, optional=False) validate_str("appver", param, safe, RX_CMSSW, optional=False) validate_str("outtype", param, safe, RX_OUTTYPES, optional=False) validate_str("checksummd5", param, safe, RX_CHECKSUM, optional=False) validate_num("checksumcksum", param, safe, optional=False) validate_str("checksumadler32", param, safe, RX_CHECKSUM, optional=False) validate_str("outlocation", param, safe, RX_CMSSITE, optional=False) validate_str("outtmplocation", param, safe, RX_CMSSITE, optional=False) validate_str("acquisitionera", param, safe, RX_TASKNAME, optional=False) validate_str("outdatasetname", param, safe, RX_OUTDSLFN, optional=False) # need to use RX_PARENTLFN becasue same API is also used for input metadata validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False) validate_str("outtmplfn", param, safe, RX_LFN, optional=True) validate_num("events", param, safe, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=True) validate_num("directstageout", param, safe, optional=True) safe.kwargs["directstageout"] = 'T' if safe.kwargs[ "directstageout"] else 'F' #'F' if not provided elif method in ['POST']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_str("outlfn", param, safe, RX_LFN, optional=False) validate_str("filestate", param, safe, RX_FILESTATE, optional=False) elif method in ['GET']: validate_str("taskname", param, safe, RX_TASKNAME, optional=False) validate_str("filetype", param, safe, RX_OUTTYPES, optional=False) validate_num("howmany", param, safe, optional=True) validate_strlist("lfn", param, safe, RX_LFN) elif method in ['DELETE']: authz_operator() validate_str("taskname", param, safe, RX_TASKNAME, optional=True) validate_str("hours", param, safe, RX_HOURS, optional=True) if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]): raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\ " will be deleted. Only one of the two parameters can be specified.")