Пример #1
0
    def test_put_mgr_ok_multi(self):
        """(RSE/PROTOCOLS): Put multiple files to storage (Success)"""

        if self.rse_settings['protocols'][0]['hostname'] == 'ssh1':
            result = mgr.upload(self.rse_settings, [{'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                     'md5': md5(str(self.tmpdir) + '/1_rse_local_put.raw'),
                                                     'filesize': os.stat('%s/1_rse_local_put.raw' % self.tmpdir)[
                                                         os.path.stat.ST_SIZE]},
                                                    {'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                     'md5': md5(str(self.tmpdir) + '/2_rse_local_put.raw'),
                                                     'filesize': os.stat('%s/2_rse_local_put.raw' % self.tmpdir)[
                                                         os.path.stat.ST_SIZE]}], source_dir=self.tmpdir, vo=self.vo,
                                impl=self.impl)
        else:
            result = mgr.upload(self.rse_settings, [{'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                     'adler32': adler32('%s/1_rse_local_put.raw' % self.tmpdir),
                                                     'filesize': os.stat('%s/1_rse_local_put.raw' % self.tmpdir)[
                                                         os.path.stat.ST_SIZE]},
                                                    {'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                     'adler32': adler32('%s/2_rse_local_put.raw' % self.tmpdir),
                                                     'filesize': os.stat('%s/2_rse_local_put.raw' % self.tmpdir)[
                                                         os.path.stat.ST_SIZE]}], source_dir=self.tmpdir, vo=self.vo)

        status = result[0]
        details = result[1]
        if not (status and details['user.%s:1_rse_local_put.raw' % self.user] and details['user.%s:2_rse_local_put.raw' % self.user]):
            raise Exception('Return not as expected: %s, %s' % (status, details))
Пример #2
0
    def test_utils_md5(self):
        """(COMMON/UTILS): test calculating MD5 of a file"""
        ret = md5(self.temp_file_1.name)
        assert isinstance(ret, str), "Object returned by utils.md5 is not a string"
        assert match('[a-fA-F0-9]{32}', ret) is not None, "String returned by utils.md5 is not a md5 hex digest"
        assert ret == '31d50dd6285b9ff9f8611d0762265d04', "Hex digest returned by utils.md5 is the MD5 checksum"

        with pytest.raises(Exception, match='FATAL - could not get MD5 checksum of file no_file - \\[Errno 2\\] No such file or directory: \'no_file\''):
            md5('no_file')
Пример #3
0
    def test_utils_md5(self):
        """(COMMON/UTILS): test calculating MD5 of a file"""
        ret = md5(self.temp_file_1.name)
        assert_is_instance(ret, str, msg="Object returned by utils.md5 is not a string")
        assert_is_not_none(match('[a-fA-F0-9]{32}', ret), msg="String returned by utils.md5 is not a md5 hex digest")
        assert_equal(ret, '31d50dd6285b9ff9f8611d0762265d04',
                     msg="Hex digest returned by utils.md5 is the MD5 checksum")

        with assert_raises(Exception) as e:
            md5('no_file')
        assert_equal('FATAL - could not get MD5 checksum of file no_file - [Errno 2] No such file or directory: \'no_file\'', e.exception.message)
Пример #4
0
    def _collect_file_info(self, filepath, item):
        """
        Collects infos (e.g. size, checksums, etc.) about the file and
        returns them as a dictionary
        (This function is meant to be used as class internal only)

        :param filepath: path where the file is stored
        :param item: input options for the given file

        :returns: a dictionary containing all collected info and the input options
        """
        new_item = copy.deepcopy(item)
        new_item['path'] = filepath
        new_item['dirname'] = os.path.dirname(filepath)
        new_item['basename'] = os.path.basename(filepath)

        new_item['bytes'] = os.stat(filepath).st_size
        new_item['adler32'] = adler32(filepath)
        new_item['md5'] = md5(filepath)
        new_item['meta'] = {'guid': self._get_file_guid(new_item)}
        new_item['state'] = 'C'
        if not new_item.get('did_scope'):
            new_item['did_scope'] = self.default_file_scope
        if not new_item.get('did_name'):
            new_item['did_name'] = new_item['basename']

        return new_item
Пример #5
0
 def test_put_mgr_ok_single(self):
     """(RSE/PROTOCOLS): Put a single file to storage (Success)"""
     if self.rse_settings['protocols'][0]['hostname'] == 'ssh1':
         mgr.upload(self.rse_settings, {
             'name':
             '3_rse_local_put.raw',
             'scope':
             'user.%s' % self.user,
             'md5':
             md5('%s/3_rse_local_put.raw' % self.tmpdir),
             'filesize':
             os.stat('%s/3_rse_local_put.raw' %
                     self.tmpdir)[os.path.stat.ST_SIZE]
         },
                    source_dir=self.tmpdir,
                    vo=self.vo,
                    impl=self.impl)
     else:
         mgr.upload(self.rse_settings, {
             'name':
             '3_rse_local_put.raw',
             'scope':
             'user.%s' % self.user,
             'adler32':
             adler32('%s/3_rse_local_put.raw' % self.tmpdir),
             'filesize':
             os.stat('%s/3_rse_local_put.raw' %
                     self.tmpdir)[os.path.stat.ST_SIZE]
         },
                    source_dir=self.tmpdir,
                    vo=self.vo)
Пример #6
0
    def _collect_file_info(self, filepath, settings):
        """
        Collects infos (e.g. size, checksums, etc.) about the file and
        returns them as a dictionary
        (This function is meant to be used as class internal only)

        :param filepath: path where the file is stored
        :param settings: input options for the given file

        :returns: a dictionary containing all collected info and the input options
        """
        file = copy.deepcopy(settings)
        file['path'] = filepath
        file['dirname'] = os.path.dirname(filepath)
        file['basename'] = os.path.basename(filepath)

        file['bytes'] = os.stat(filepath).st_size
        file['adler32'] = adler32(filepath)
        file['md5'] = md5(filepath)
        file['meta'] = {'guid': self._get_file_guid(file)}
        file['state'] = 'C'
        file.setdefault('did_scope', self.default_file_scope)
        file.setdefault('did_name', file['basename'])
        file.setdefault('lifetime', None)

        return file
Пример #7
0
 def test_download_succeeds_md5only(self):
     """CLIENT(USER): Rucio download succeeds MD5 only"""
     # user has a file to upload
     filename = file_generator()
     file_md5 = md5(filename)
     filesize = stat(filename).st_size
     lfn = {'name': filename[5:], 'scope': self.user, 'bytes': filesize, 'md5': file_md5}
     # user uploads file
     self.replica_client.add_replicas(files=[lfn], rse=self.def_rse)
     rse_settings = rsemgr.get_rse_info(self.def_rse)
     protocol = rsemgr.create_protocol(rse_settings, 'write')
     protocol.connect()
     pfn = protocol.lfns2pfns(lfn).values()[0]
     protocol.put(filename[5:], pfn, filename[:5])
     protocol.close()
     remove(filename)
     # download files
     cmd = 'rucio -v download --dir /tmp {0}:{1}'.format(self.user, filename[5:])
     print(self.marker + cmd)
     exitcode, out, err = execute(cmd)
     print(out, err)
     # search for the files with ls
     cmd = 'ls /tmp/{0}'.format(self.user)    # search in /tmp/
     print(self.marker + cmd)
     exitcode, out, err = execute(cmd)
     print(out, err)
     nose.tools.assert_not_equal(re.search(filename[5:], out), None)
     try:
         for i in listdir('data13_hip'):
             unlink('data13_hip/%s' % i)
         rmdir('data13_hip')
     except Exception:
         pass
Пример #8
0
 def test_utils_md5(self):
     """(COMMON/UTILS): test calculating MD5 of a file"""
     ret = md5(self.temp_file_1.name)
     assert_is_instance(ret,
                        str,
                        msg="Object returned by tools.md5 is not a string")
     assert_is_not_none(
         match('[a-fA-F0-9]{32}', ret),
         msg="String returned by tools.md5 is not a md5 hex digest")
Пример #9
0
 def test_utils_md5(self):
     """(COMMON/UTILS): test calculating MD5 of a file"""
     ret = md5(self.temp_file_1.name)
     assert_is_instance(ret,
                        str,
                        msg="Object returned by utils.md5 is not a string")
     assert_is_not_none(
         match('[a-fA-F0-9]{32}', ret),
         msg="String returned by utils.md5 is not a md5 hex digest")
     assert_equal(
         ret,
         '31d50dd6285b9ff9f8611d0762265d04',
         msg="Hex digest returned by utils.md5 is the MD5 checksum")
Пример #10
0
    def test_download_fails_badmd5(self):
        """CLIENT(USER): Rucio download fails on MD5 mismatch"""
        # user has a file to upload
        filename = file_generator()
        file_md5 = md5(filename)
        filesize = stat(filename).st_size
        lfn = {
            'name': filename[5:],
            'scope': self.user,
            'bytes': filesize,
            'md5': '0123456789abcdef0123456789abcdef'
        }
        # user uploads file
        self.replica_client.add_replicas(files=[lfn], rse=self.def_rse)
        rse_settings = rsemgr.get_rse_info(self.def_rse)
        protocol = rsemgr.create_protocol(rse_settings, 'write')
        protocol.connect()
        pfn = protocol.lfns2pfns(lfn).values()[0]
        protocol.put(filename[5:], pfn, filename[:5])
        protocol.close()
        remove(filename)

        # download file
        cmd = 'rucio download --dir /tmp {0}:{1}'.format(
            self.user, filename[5:])
        print(self.marker + cmd)
        exitcode, out, err = execute(cmd)
        print(out, err)

        # a failure message 'Checksum mismatch : local _____ vs storage _____' appears
        report = 'Checksum\ mismatch\ \:\ local\ {0}\ vs\ recorded\ 0123456789abcdef0123456789abcdef'.format(
            file_md5)
        print('searching', report, 'in', err)
        nose.tools.assert_not_equal(re.search(report, err), None)

        # The file should not exist
        cmd = 'ls /tmp/'  # search in /tmp/
        print(self.marker + cmd)
        exitcode, out, err = execute(cmd)
        print(out, err)
        nose.tools.assert_equal(re.search(filename[5:], out), None)

        try:
            for i in listdir('data13_hip'):
                unlink('data13_hip/%s' % i)
            rmdir('data13_hip')
        except Exception:
            pass
Пример #11
0
    def collect_file_info(self, filepath, settings):
        file = copy.deepcopy(settings)
        file['path'] = filepath
        file['dirname'] = os.path.dirname(filepath)
        file['basename'] = os.path.basename(filepath)

        file['bytes'] = os.stat(filepath).st_size
        file['adler32'] = adler32(filepath)
        file['md5'] = md5(filepath)
        file['meta'] = {'guid': self.get_file_guid(file)}
        file['state'] = 'C'
        file.setdefault('did_scope', self.default_file_scope)
        file.setdefault('did_name', file['basename'])
        file.setdefault('lifetime', None)

        return file
Пример #12
0
 def test_upload_adds_md5digest(self):
     """CLIENT(USER): Upload Checksums"""
     # user has a file to upload
     filename = file_generator()
     file_md5 = md5(filename)
     # user uploads file
     cmd = 'rucio upload --rse {0} --scope {1} {2}'.format(
         self.def_rse, self.user, filename)
     print(self.marker + cmd)
     exitcode, out, err = execute(cmd)
     print(out)
     print(err)
     # When inspecting the metadata of the new file the user finds the md5 checksum
     meta = self.did_client.get_metadata(scope=self.user, name=filename[5:])
     nose.tools.assert_in('md5', meta)
     nose.tools.assert_equal(meta['md5'], file_md5)
     remove(filename)
def check_storage(filepath):
    """
    Check size and checksum of a file on storage
    """
    logging.info("Checking %s" % filepath)
    try:
        size = os.stat(filepath).st_size
        adler_checksum = adler32(filepath)
        md5_checksum = md5(filepath)

        # FIXME: some frames have len(adler_checksum)=7, is there a better way to
        # force len(adler_checksum)=8 than prepending a zero manually?
        if len(adler_checksum)!=8: adler_checksum="0{}".format(adler_checksum)
        logging.info("Got size and checksum of file: %s size=%s adler32 checksum=%s md5 checksum=%s"
                % (filepath, size, adler_checksum, md5_checksum))
    except:
        logging.warning("no file found at %s" % filepath)
        return False
    return size, adler_checksum, md5_checksum
Пример #14
0
def download(rse_settings,
             files,
             dest_dir=None,
             force_scheme=None,
             ignore_checksum=False,
             printstatements=False,
             domain='wan',
             transfer_timeout=None):
    """
        Copy a file from the connected storage to the local file system.
        Providing a list indicates the bulk mode.


        :param rse_settings:    RSE to use
        :param files:           a single dict or a list with dicts containing 'scope' and 'name'
                                if LFNs are provided and additional 'pfn' if PFNs are provided.
                                Examples:
                                [
                                {'name': '2_rse_remote_get.raw', 'scope': 'user.jdoe'},
                                {'name':'3_rse_remote_get.raw', 'scope': 'user.jdoe', 'pfn': 'user/jdoe/5a/98/3_rse_remote_get.raw'}
                                ]
        :param dest_dir:        path to the directory where the downloaded files will be stored. If not given, each scope is represented by its own directory.
        :param force_scheme:    normally the scheme is dictated by the RSE object, when specifying the PFN it must be forced to the one specified in the PFN, overruling the RSE description.
        :param ignore_checksum: do not verify the checksum - caution: should only be used for rucio download --pfn
        :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it

        :returns: True/False for a single file or a dict object with 'scope:name' for LFNs or 'name' for PFNs as keys and True or the exception as value for each file in bulk mode

        :raises SourceNotFound: remote source file can not be found on storage
        :raises DestinationNotAccessible: local destination directory is not accessible
        :raises FileConsistencyMismatch: the checksum of the downloaded file does not match the provided one
        :raises ServiceUnavailable: for any other reason

    """
    ret = {}
    gs = True  # gs represents the global status which inidcates if every operation workd in bulk mode

    protocol = create_protocol(rse_settings,
                               'read',
                               scheme=force_scheme,
                               domain=domain)
    protocol.connect()

    files = [files] if not type(files) is list else files
    for f in files:
        pfn = f['pfn'] if 'pfn' in f else list(
            protocol.lfns2pfns(f).values())[0]
        target_dir = "./%s" % f['scope'] if dest_dir is None else dest_dir
        try:
            if not os.path.exists(target_dir):
                os.makedirs(target_dir)
            # Each scope is stored into a separate folder
            finalfile = '%s/%s' % (target_dir, f['name'])
            # Check if the file already exists, if not download and validate it
            if not os.path.isfile(finalfile):
                if 'adler32' in f:
                    tempfile = '%s/%s.part' % (target_dir, f['name'])
                    if os.path.isfile(tempfile):
                        if printstatements:
                            print(
                                '%s already exists, probably from a failed attempt. Will remove it'
                                % (tempfile))
                        os.unlink(tempfile)
                    protocol.get(pfn,
                                 tempfile,
                                 transfer_timeout=transfer_timeout)
                    if printstatements:
                        print('File downloaded. Will be validated')

                    if ignore_checksum:
                        if printstatements:
                            print('Skipping checksum validation')
                    else:
                        ruciochecksum = f['adler32'] if f['adler32'] else f[
                            'md5']
                        localchecksum = utils.adler32(
                            tempfile) if f['adler32'] else utils.md5(tempfile)
                        if localchecksum == ruciochecksum:
                            if printstatements:
                                print('File validated')
                        else:
                            os.unlink(tempfile)
                            raise exception.FileConsistencyMismatch(
                                'Checksum mismatch : local %s vs recorded %s' %
                                (str(localchecksum), str(ruciochecksum)))
                    os.rename(tempfile, finalfile)
                else:
                    protocol.get(pfn,
                                 '%s/%s' % (target_dir, f['name']),
                                 transfer_timeout=transfer_timeout)
                ret['%s:%s' % (f['scope'], f['name'])] = True
            else:
                ret['%s:%s' % (f['scope'], f['name'])] = True
        except Exception as e:
            gs = False
            ret['%s:%s' % (f['scope'], f['name'])] = e

    protocol.close()
    if len(ret) == 1:
        for x in ret:
            if isinstance(ret[x], Exception):
                raise ret[x]
            else:
                return ret[x]
    return [gs, ret]
Пример #15
0
    def _download_item(self, item, trace, log_prefix=''):
        """
        Downloads the given item and sends traces for success/failure.
        (This function is meant to be used as class internal only)

        :param item: dictionary that describes the item to download
        :param trace: dictionary representing a pattern of trace that will be send
        :param log_prefix: string that will be put at the beginning of every log message

        :returns: dictionary with all attributes from the input item and a clientState attribute
        """
        logger = self.logger

        did_scope = item['scope']
        did_name = item['name']
        did_str = '%s:%s' % (did_scope, did_name)

        logger.info('%sPreparing download of %s' % (log_prefix, did_str))

        trace['scope'] = did_scope
        trace['filename'] = did_name
        trace.setdefault('datasetScope', item.get('dataset_scope', ''))
        trace.setdefault('dataset', item.get('dataset_name', ''))
        trace.setdefault('filesize', item.get('bytes'))

        # if file already exists, set state, send trace, and return
        dest_file_path = item['dest_file_path']
        if os.path.isfile(dest_file_path):
            logger.info('%sFile exists already locally: %s' % (log_prefix, did_str))
            item['clientState'] = 'ALREADY_DONE'

            trace['transferStart'] = time.time()
            trace['transferEnd'] = time.time()
            trace['clientState'] = 'ALREADY_DONE'
            send_trace(trace, self.client.host, self.client.user_agent)
            return item

        # check if file has replicas
        sources = item.get('sources')
        if not sources or not len(sources):
            logger.warning('%sNo available source found for file: %s' % (log_prefix, did_str))
            item['clientState'] = 'FILE_NOT_FOUND'

            trace['clientState'] = 'FILE_NOT_FOUND'
            send_trace(trace, self.client.host, self.client.user_agent)
            return item

        success = False
        # try different PFNs until one succeeded
        i = 0
        while not success and i < len(sources):
            pfn = sources[i]['pfn']
            rse_name = sources[i]['rse']
            i += 1
            scheme = pfn.split(':')[0]

            try:
                rse = rsemgr.get_rse_info(rse_name)
            except RSENotFound:
                logger.warning('%sCould not get info of RSE %s' % (log_prefix, rse_name))
                continue

            trace['remoteSite'] = rse_name
            trace['clientState'] = 'DOWNLOAD_ATTEMPT'
            trace['protocol'] = scheme

            logger.info('%sTrying to download with %s from %s: %s ' % (log_prefix, scheme, rse_name, did_str))

            try:
                protocol = rsemgr.create_protocol(rse, operation='read', scheme=scheme)
                protocol.connect()
            except Exception as error:
                logger.warning('%sFailed to create protocol for PFN: %s' % (log_prefix, pfn))
                logger.debug('scheme: %s, exception: %s' % (scheme, error))
                continue

            attempt = 0
            retries = 2
            # do some retries with the same PFN if the download fails
            while not success and attempt < retries:
                attempt += 1
                item['attemptnr'] = attempt

                temp_file_path = item['temp_file_path']
                if os.path.isfile(temp_file_path):
                    logger.debug('%sDeleting existing temporary file: %s' % (log_prefix, temp_file_path))
                    os.unlink(temp_file_path)

                start_time = time.time()

                try:
                    protocol.get(pfn, temp_file_path, transfer_timeout=item.get('transfer_timeout'))
                    success = True
                except Exception as error:
                    logger.debug(error)
                    trace['clientState'] = str(type(error).__name__)

                end_time = time.time()

                if success and not item.get('ignore_checksum', False):
                    rucio_checksum = item.get('adler32')
                    local_checksum = None
                    if not rucio_checksum:
                        rucio_checksum = item.get('md5')
                        local_checksum = md5(temp_file_path)
                    else:
                        local_checksum = adler32(temp_file_path)

                    if rucio_checksum != local_checksum:
                        success = False
                        os.unlink(temp_file_path)
                        logger.warning('%sChecksum validation failed for file: %s' % (log_prefix, did_str))
                        logger.debug('Local checksum: %s, Rucio checksum: %s' % (local_checksum, rucio_checksum))
                        try:
                            self.client.declare_suspicious_file_replicas([pfn], reason='Corrupted')
                        except Exception:
                            pass
                        trace['clientState'] = 'FAIL_VALIDATE'
                if not success:
                    logger.warning('%sDownload attempt failed. Try %s/%s' % (log_prefix, attempt, retries))
                    send_trace(trace, self.client.host, self.client.user_agent)

            protocol.close()

        if not success:
            logger.error('%sFailed to download file %s' % (log_prefix, did_str))
            item['clientState'] = 'FAILED'
            return item

        os.rename(temp_file_path, dest_file_path)

        trace['transferStart'] = start_time
        trace['transferEnd'] = end_time
        trace['clientState'] = 'DONE'
        item['clientState'] = 'DONE'
        send_trace(trace, self.client.host, self.client.user_agent)

        duration = round(end_time - start_time, 2)
        size = item.get('bytes')
        size_str = sizefmt(size, self.is_human_readable)
        if size and duration:
            rate = round((size / duration) * 1e-6, 2)
            logger.info('%sFile %s successfully downloaded. %s in %s seconds = %s MBps' % (log_prefix, did_str, size_str, duration, rate))
        else:
            logger.info('%sFile %s successfully downloaded in %s seconds' % (log_prefix, did_str, duration))
        return item