Пример #1
0
def upload(rse_settings,
           lfns,
           source_dir=None,
           force_pfn=None,
           force_scheme=None,
           transfer_timeout=None,
           delete_existing=False,
           sign_service=None,
           auth_token=None):
    """
        Uploads a file to the connected storage.
        Providing a list indicates the bulk mode.

        :rse_settings:   RSE attributes
        :param lfns:        a single dict or a list with dicts containing 'scope' and 'name'.
                            Examples:
                            [
                            {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'},
                            {'name': '2_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 4711, 'adler32': 'RSSMICETHMISBA837464F'}
                            ]
                            If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name').
        :param source_dir:  path to the local directory including the source files
        :param force_pfn: use the given PFN -- can lead to dark data, use sparingly
        :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description
        :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it
        :param sign_service: use the given service (e.g. gcs, s3, swift) to sign the URL
        :param auth_token: Optionally passing JSON Web Token (OIDC) string for authentication

        :returns: True/False for a single file or a dict object with 'scope:name' as keys and True or the exception as value for each file in bulk mode

        :raises RSENotConnected: no connection to a specific storage has been established
        :raises SourceNotFound: local source file can not be found
        :raises DestinationNotAccessible: remote destination directory is not accessible
        :raises ServiceUnavailable: for any other reason
    """
    ret = {}
    gs = True  # gs represents the global status which indicates if every operation worked in bulk mode

    protocol = create_protocol(rse_settings,
                               'write',
                               scheme=force_scheme,
                               auth_token=auth_token)
    protocol.connect()
    protocol_delete = create_protocol(rse_settings,
                                      'delete',
                                      auth_token=auth_token)
    protocol_delete.connect()

    lfns = [lfns] if not type(lfns) is list else lfns
    for lfn in lfns:
        base_name = lfn.get('filename', lfn['name'])
        name = lfn.get('name', base_name)
        scope = lfn['scope']
        if 'adler32' not in lfn:
            gs = False
            ret['%s:%s' % (scope, name)] = exception.RucioException(
                'Missing checksum for file %s:%s' % (lfn['scope'], name))
            continue
        if 'filesize' not in lfn:
            gs = False
            ret['%s:%s' % (scope, name)] = exception.RucioException(
                'Missing filesize for file %s:%s' % (lfn['scope'], name))
            continue

        if force_pfn:
            pfn = force_pfn
            readpfn = force_pfn
        else:
            pfn = list(protocol.lfns2pfns(make_valid_did(lfn)).values())[0]
            if isinstance(pfn, exception.RucioException):
                raise pfn
            readpfn = pfn
            if sign_service is not None:
                # need a separate signed URL for read operations (exists and stat)
                readpfn = __get_signed_url(rse_settings['rse'], sign_service,
                                           'read', pfn)  # NOQA pylint: disable=undefined-variable
                pfn = __get_signed_url(rse_settings['rse'], sign_service,
                                       'write', pfn)  # NOQA pylint: disable=undefined-variable

        # First check if renaming operation is supported
        if protocol.renaming:

            # Check if file replica is already on the storage system
            if protocol.overwrite is False and delete_existing is False and protocol.exists(
                    pfn):
                ret['%s:%s' % (
                    scope, name
                )] = exception.FileReplicaAlreadyExists(
                    'File %s in scope %s already exists on storage as PFN %s' %
                    (name, scope, pfn))
                gs = False
            else:
                if protocol.exists(
                        '%s.rucio.upload' % pfn
                ):  # Check for left over of previous unsuccessful attempts
                    try:
                        protocol_delete.delete('%s.rucio.upload' % list(
                            protocol_delete.lfns2pfns(
                                make_valid_did(lfn)).values())[0])
                    except Exception as e:
                        ret['%s:%s' % (
                            scope, name
                        )] = exception.RSEOperationNotSupported(
                            'Unable to remove temporary file %s.rucio.upload: %s'
                            % (pfn, str(e)))
                        gs = False
                        continue

                if delete_existing:
                    if protocol.exists(
                            '%s' % pfn
                    ):  # Check for previous completed uploads that have to be removed before upload
                        try:
                            protocol_delete.delete('%s' % list(
                                protocol_delete.lfns2pfns(
                                    make_valid_did(lfn)).values())[0])
                        except Exception as e:
                            ret['%s:%s' %
                                (scope,
                                 name)] = exception.RSEOperationNotSupported(
                                     'Unable to remove file %s: %s' %
                                     (pfn, str(e)))
                            gs = False
                            continue

                try:  # Try uploading file
                    protocol.put(base_name,
                                 '%s.rucio.upload' % pfn,
                                 source_dir,
                                 transfer_timeout=transfer_timeout)
                except Exception as e:
                    gs = False
                    ret['%s:%s' % (scope, name)] = e
                    continue

                valid = None

                try:  # Get metadata of file to verify if upload was successful
                    try:
                        stats = _retry_protocol_stat(protocol,
                                                     '%s.rucio.upload' % pfn)
                        # Verify all supported checksums and keep rack of the verified ones
                        verified_checksums = []
                        for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
                            if (checksum_name in stats) and (checksum_name
                                                             in lfn):
                                verified_checksums.append(
                                    stats[checksum_name] == lfn[checksum_name])

                        # Upload is successful if at least one checksum was found
                        valid = any(verified_checksums)
                        if not valid and ('filesize' in stats) and ('filesize'
                                                                    in lfn):
                            valid = stats['filesize'] == lfn['filesize']
                    except NotImplementedError:
                        if rse_settings['verify_checksum'] is False:
                            valid = True
                        else:
                            raise exception.RucioException(
                                'Checksum not validated')
                    except exception.RSEChecksumUnavailable as e:
                        if rse_settings['verify_checksum'] is False:
                            valid = True
                        else:
                            raise exception.RucioException(
                                'Checksum not validated')
                except Exception as e:
                    gs = False
                    ret['%s:%s' % (scope, name)] = e
                    continue

                if valid:  # The upload finished successful and the file can be renamed
                    try:
                        protocol.rename('%s.rucio.upload' % pfn, pfn)
                        ret['%s:%s' % (scope, name)] = True
                    except Exception as e:
                        gs = False
                        ret['%s:%s' % (scope, name)] = e
                else:
                    gs = False
                    ret['%s:%s' % (scope, name)] = exception.RucioException(
                        'Replica %s is corrupted.' % pfn)
        else:

            # Check if file replica is already on the storage system
            if protocol.overwrite is False and delete_existing is False and protocol.exists(
                    readpfn):
                ret['%s:%s' % (
                    scope, name
                )] = exception.FileReplicaAlreadyExists(
                    'File %s in scope %s already exists on storage as PFN %s' %
                    (name, scope, pfn))
                gs = False
            else:
                try:  # Try uploading file
                    protocol.put(base_name,
                                 pfn,
                                 source_dir,
                                 transfer_timeout=transfer_timeout)
                except Exception as e:
                    gs = False
                    ret['%s:%s' % (scope, name)] = e
                    continue

                valid = None
                try:  # Get metadata of file to verify if upload was successful
                    try:
                        stats = _retry_protocol_stat(protocol, pfn)

                        # Verify all supported checksums and keep rack of the verified ones
                        verified_checksums = []
                        for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
                            if (checksum_name in stats) and (checksum_name
                                                             in lfn):
                                verified_checksums.append(
                                    stats[checksum_name] == lfn[checksum_name])

                        # Upload is successful if at least one checksum was found
                        valid = any(verified_checksums)
                        if not valid and ('filesize' in stats) and ('filesize'
                                                                    in lfn):
                            valid = stats['filesize'] == lfn['filesize']
                    except NotImplementedError:
                        if rse_settings['verify_checksum'] is False:
                            valid = True
                        else:
                            raise exception.RucioException(
                                'Checksum not validated')
                    except exception.RSEChecksumUnavailable as e:
                        if rse_settings['verify_checksum'] is False:
                            valid = True
                        else:
                            raise exception.RucioException(
                                'Checksum not validated')
                except Exception as e:
                    gs = False
                    ret['%s:%s' % (scope, name)] = e
                    continue

                if not valid:
                    gs = False
                    ret['%s:%s' % (scope, name)] = exception.RucioException(
                        'Replica %s is corrupted.' % pfn)

    protocol.close()
    protocol_delete.close()
    if len(ret) == 1:
        for x in ret:
            if isinstance(ret[x], Exception):
                raise ret[x]
            else:
                return {'success': ret[x], 'pfn': pfn}
    return {0: gs, 1: ret, 'success': gs, 'pfn': pfn}
Пример #2
0
def upload(rse_settings,
           lfns,
           source_dir=None,
           force_pfn=None,
           force_scheme=None,
           transfer_timeout=None):
    """
        Uploads a file to the connected storage.
        Providing a list indicates the bulk mode.

        :param lfns:        a single dict or a list with dicts containing 'scope' and 'name'.
                            Examples:
                            [
                            {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'},
                            {'name': '2_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 4711, 'adler32': 'RSSMICETHMISBA837464F'}
                            ]
                            If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name').
        :param source_dir:  path to the local directory including the source files
        :param force_pfn: use the given PFN -- can lead to dark data, use sparingly
        :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description
        :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it

        :returns: True/False for a single file or a dict object with 'scope:name' as keys and True or the exception as value for each file in bulk mode

        :raises RSENotConnected: no connection to a specific storage has been established
        :raises SourceNotFound: local source file can not be found
        :raises DestinationNotAccessible: remote destination directory is not accessible
        :raises ServiceUnavailable: for any other reason
    """
    ret = {}
    gs = True  # gs represents the global status which indicates if every operation worked in bulk mode

    protocol = create_protocol(rse_settings, 'write', scheme=force_scheme)
    protocol.connect()
    protocol_delete = create_protocol(rse_settings, 'delete')
    protocol_delete.connect()

    lfns = [lfns] if not type(lfns) is list else lfns
    for lfn in lfns:
        base_name = lfn.get('filename', lfn['name'])
        name = lfn.get('name', base_name)
        scope = lfn['scope']
        if 'adler32' not in lfn:
            gs = False
            ret['%s:%s' % (scope, name)] = exception.RucioException(
                'Missing checksum for file %s:%s' % (lfn['scope'], name))
            continue
        if 'filesize' not in lfn:
            gs = False
            ret['%s:%s' % (scope, name)] = exception.RucioException(
                'Missing filesize for file %s:%s' % (lfn['scope'], name))
            continue

        if force_pfn:
            pfn = force_pfn
        else:
            pfn = list(protocol.lfns2pfns(make_valid_did(lfn)).values())[0]
            if isinstance(pfn, exception.RucioException):
                raise pfn

        # First check if renaming operation is supported
        if protocol.renaming:

            # Check if file replica is already on the storage system
            if protocol.overwrite is False and protocol.exists(pfn):
                ret['%s:%s' % (
                    scope, name
                )] = exception.FileReplicaAlreadyExists(
                    'File %s in scope %s already exists on storage as PFN %s' %
                    (name, scope, pfn))
                gs = False
            else:
                if protocol.exists(
                        '%s.rucio.upload' % pfn
                ):  # Check for left over of previous unsuccessful attempts
                    try:
                        protocol_delete.delete('%s.rucio.upload' % list(
                            protocol_delete.lfns2pfns(
                                make_valid_did(lfn)).values())[0])
                    except Exception as e:
                        ret['%s:%s' % (
                            scope, name
                        )] = exception.RSEOperationNotSupported(
                            'Unable to remove temporary file %s.rucio.upload: %s'
                            % (pfn, str(e)))
                        gs = False
                        continue
                try:  # Try uploading file
                    protocol.put(base_name,
                                 '%s.rucio.upload' % pfn,
                                 source_dir,
                                 transfer_timeout=transfer_timeout)
                except Exception as e:
                    gs = False
                    ret['%s:%s' % (scope, name)] = e
                    continue

                valid = None
                try:  # Get metadata of file to verify if upload was successful
                    try:
                        stats = protocol.stat('%s.rucio.upload' % pfn)
                        if ('adler32' in stats) and ('adler32' in lfn):
                            valid = stats['adler32'] == lfn['adler32']
                        if (valid is None) and ('filesize'
                                                in stats) and ('filesize'
                                                               in lfn):
                            valid = stats['filesize'] == lfn['filesize']
                    except exception.RSEChecksumUnavailable as e:
                        if rse_settings['verify_checksum'] is False:
                            valid = True
                        else:
                            raise exception.RucioException(
                                'Checksum not validated')
                except Exception as e:
                    gs = False
                    ret['%s:%s' % (scope, name)] = e
                    continue

                if valid:  # The upload finished successful and the file can be renamed
                    try:
                        protocol.rename('%s.rucio.upload' % pfn, pfn)
                        ret['%s:%s' % (scope, name)] = True
                    except Exception as e:
                        gs = False
                        ret['%s:%s' % (scope, name)] = e
                else:
                    gs = False
                    ret['%s:%s' % (scope, name)] = exception.RucioException(
                        'Replica %s is corrupted.' % pfn)
        else:

            # Check if file replica is already on the storage system
            if protocol.overwrite is False and protocol.exists(pfn):
                ret['%s:%s' % (
                    scope, name
                )] = exception.FileReplicaAlreadyExists(
                    'File %s in scope %s already exists on storage as PFN %s' %
                    (name, scope, pfn))
                gs = False
            else:
                try:  # Try uploading file
                    protocol.put(base_name,
                                 pfn,
                                 source_dir,
                                 transfer_timeout=transfer_timeout)
                except Exception as e:
                    gs = False
                    ret['%s:%s' % (scope, name)] = e
                    continue

                valid = None
                try:  # Get metadata of file to verify if upload was successful
                    try:
                        stats = protocol.stat(pfn)
                        if ('adler32' in stats) and ('adler32' in lfn):
                            valid = stats['adler32'] == lfn['adler32']
                        if (valid is None) and ('filesize'
                                                in stats) and ('filesize'
                                                               in lfn):
                            valid = stats['filesize'] == lfn['filesize']
                    except exception.RSEChecksumUnavailable as e:
                        if rse_settings['verify_checksum'] is False:
                            valid = True
                        else:
                            raise exception.RucioException(
                                'Checksum not validated')
                except Exception as e:
                    gs = False
                    ret['%s:%s' % (scope, name)] = e
                    continue

                if not valid:
                    gs = False
                    ret['%s:%s' % (scope, name)] = exception.RucioException(
                        'Replica %s is corrupted.' % pfn)

    protocol.close()
    protocol_delete.close()
    if len(ret) == 1:
        for x in ret:
            if isinstance(ret[x], Exception):
                raise ret[x]
            else:
                return {'success': ret[x], 'pfn': pfn}
    return [gs, ret]
Пример #3
0
    def _upload_item(self,
                     rse_settings,
                     rse_attributes,
                     lfn,
                     source_dir=None,
                     domain='wan',
                     force_pfn=None,
                     force_scheme=None,
                     transfer_timeout=None,
                     delete_existing=False,
                     sign_service=None):
        """
            Uploads a file to the connected storage.

            :param rse_settings: dictionary containing the RSE settings
            :param rse_attributes: dictionary containing the RSE attribute key value pairs
            :param lfn:         a single dict containing 'scope' and 'name'.
                                Example:
                             {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}
                              If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name').
            :param source_dir:  path to the local directory including the source files
            :param force_pfn: use the given PFN -- can lead to dark data, use sparingly
            :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description
            :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it
            :param sign_service: use the given service (e.g. gcs, s3, swift) to sign the URL

            :raises RucioException(msg): general exception with msg for more details.
        """
        logger = self.logger

        # Construct protocol for write and read operation.
        protocol_write = self._create_protocol(rse_settings,
                                               'write',
                                               force_scheme=force_scheme,
                                               domain=domain)
        protocol_read = self._create_protocol(rse_settings,
                                              'read',
                                              domain=domain)

        base_name = lfn.get('filename', lfn['name'])
        name = lfn.get('name', base_name)
        scope = lfn['scope']

        # Conditional lfn properties
        if 'adler32' not in lfn:
            logger(logging.WARNING,
                   'Missing checksum for file %s:%s' % (lfn['scope'], name))

        # Getting pfn
        pfn = None
        readpfn = None
        try:
            pfn = list(protocol_write.lfns2pfns(
                make_valid_did(lfn)).values())[0]
            readpfn = list(
                protocol_read.lfns2pfns(make_valid_did(lfn)).values())[0]
            logger(logging.DEBUG,
                   'The PFN created from the LFN: {}'.format(pfn))
        except Exception as error:
            logger(logging.WARNING, 'Failed to create PFN for LFN: %s' % lfn)
            logger(logging.DEBUG, str(error), exc_info=True)
        if force_pfn:
            pfn = force_pfn
            readpfn = pfn
            logger(logging.DEBUG, 'The given PFN is used: {}'.format(pfn))

        # Auth. mostly for object stores
        if sign_service:
            pfn = self.client.get_signed_url(rse_settings['rse'], sign_service,
                                             'write', pfn)  # NOQA pylint: disable=undefined-variable
            readpfn = self.client.get_signed_url(rse_settings['rse'],
                                                 sign_service, 'read', pfn)  # NOQA pylint: disable=undefined-variable

        # Create a name of tmp file if renaming operation is supported
        pfn_tmp = '%s.rucio.upload' % pfn if protocol_write.renaming else pfn
        readpfn_tmp = '%s.rucio.upload' % readpfn if protocol_write.renaming else readpfn

        # Either DID eixsts or not register_after_upload
        if protocol_write.overwrite is False and delete_existing is False and protocol_read.exists(
                readpfn):
            raise FileReplicaAlreadyExists(
                'File %s in scope %s already exists on storage as PFN %s' %
                (name, scope, pfn))  # wrong exception ?

        # Removing tmp from earlier attempts
        if protocol_read.exists(readpfn_tmp):
            logger(logging.DEBUG,
                   'Removing remains of previous upload attemtps.')
            try:
                # Construct protocol for delete operation.
                protocol_delete = self._create_protocol(rse_settings,
                                                        'delete',
                                                        domain=domain)
                protocol_delete.delete('%s.rucio.upload' % list(
                    protocol_delete.lfns2pfns(
                        make_valid_did(lfn)).values())[0])
                protocol_delete.close()
            except Exception as e:
                raise RSEOperationNotSupported(
                    'Unable to remove temporary file %s.rucio.upload: %s' %
                    (pfn, str(e)))

        # Removing not registered files from earlier attempts
        if delete_existing:
            logger(
                logging.DEBUG,
                'Removing not-registered remains of previous upload attemtps.')
            try:
                # Construct protocol for delete operation.
                protocol_delete = self._create_protocol(rse_settings,
                                                        'delete',
                                                        domain=domain)
                protocol_delete.delete('%s' % list(
                    protocol_delete.lfns2pfns(
                        make_valid_did(lfn)).values())[0])
                protocol_delete.close()
            except Exception as error:
                raise RSEOperationNotSupported('Unable to remove file %s: %s' %
                                               (pfn, str(error)))

        # Process the upload of the tmp file
        try:
            retry(protocol_write.put,
                  base_name,
                  pfn_tmp,
                  source_dir,
                  transfer_timeout=transfer_timeout)(mtries=2, logger=logger)
            logger(logging.INFO,
                   'Successful upload of temporary file. {}'.format(pfn_tmp))
        except Exception as error:
            raise RSEOperationNotSupported(str(error))

        # Is stat after that upload allowed?
        skip_upload_stat = rse_attributes.get('skip_upload_stat', False)
        self.logger(logging.DEBUG, 'skip_upload_stat=%s', skip_upload_stat)

        # Checksum verification, obsolete, see Gabriele changes.
        if not skip_upload_stat:
            try:
                stats = self._retry_protocol_stat(protocol_write, pfn_tmp)
                if not isinstance(stats, dict):
                    raise RucioException(
                        'Could not get protocol.stats for given PFN: %s' % pfn)

                # The checksum and filesize check
                if ('filesize' in stats) and ('filesize' in lfn):
                    self.logger(
                        logging.DEBUG, 'Filesize: Expected=%s Found=%s' %
                        (lfn['filesize'], stats['filesize']))
                    if int(stats['filesize']) != int(lfn['filesize']):
                        raise RucioException(
                            'Filesize mismatch. Source: %s Destination: %s' %
                            (lfn['filesize'], stats['filesize']))
                if rse_settings['verify_checksum'] is not False:
                    if ('adler32' in stats) and ('adler32' in lfn):
                        self.logger(
                            logging.DEBUG, 'Checksum: Expected=%s Found=%s' %
                            (lfn['adler32'], stats['adler32']))
                        if str(stats['adler32']).lstrip('0') != str(
                                lfn['adler32']).lstrip('0'):
                            raise RucioException(
                                'Checksum mismatch. Source: %s Destination: %s'
                                % (lfn['adler32'], stats['adler32']))

            except Exception as error:
                raise error

        # The upload finished successful and the file can be renamed
        try:
            if protocol_write.renaming:
                logger(logging.DEBUG,
                       'Renaming file %s to %s' % (pfn_tmp, pfn))
                protocol_write.rename(pfn_tmp, pfn)
        except Exception:
            raise RucioException('Unable to rename the tmp file %s.' % pfn_tmp)

        protocol_write.close()
        protocol_read.close()

        return pfn