Пример #1
0
def _stage_in_bulk(dst,
                   files,
                   trace_report_out=None,
                   trace_common_fields=None):
    """
    Stage-in files in bulk using the Rucio API.

    :param dst: destination (string).
    :param files: list of fspec objects.
    :param trace_report:
    :param trace_report_out:
    :return:
    """
    # init. download client
    from rucio.client.downloadclient import DownloadClient
    download_client = DownloadClient(logger=logger)

    # traces are switched off
    if hasattr(download_client, 'tracing'):
        download_client.tracing = tracing_rucio

    # build the list of file dictionaries before calling the download function
    file_list = []

    for fspec in files:
        fspec.status_code = 0

        # file specifications before the actual download
        f = {}
        f['did_scope'] = fspec.scope
        f['did_name'] = fspec.lfn
        f['did'] = '%s:%s' % (fspec.scope, fspec.lfn)
        f['rse'] = fspec.ddmendpoint
        f['base_dir'] = fspec.workdir or dst
        f['no_subdir'] = True
        if fspec.turl:
            f['pfn'] = fspec.turl
        else:
            logger.warning(
                'cannot perform bulk download since fspec.turl is not set (required by download_pfns()'
            )
            # fail somehow

        if fspec.filesize:
            f['transfer_timeout'] = get_timeout(fspec.filesize)

        file_list.append(f)

    # proceed with the download
    trace_pattern = trace_common_fields if trace_common_fields else {}

    # download client raises an exception if any file failed
    num_threads = len(file_list)
    result = download_client.download_pfns(file_list,
                                           num_threads,
                                           trace_custom_fields=trace_pattern,
                                           traces_copy_out=trace_report_out)
    logger.debug('Rucio download client returned %s' % result)
Пример #2
0
    def _stageInApi(self, dst, fspec):

        from rucio.client.downloadclient import DownloadClient

        # rucio logger init.
        rucio_logger = logging.getLogger('rucio_mover')
        download_client = DownloadClient(logger=rucio_logger)

        # traces are switched off
        if hasattr(download_client, 'tracing'):
            download_client.tracing = self.tracing

        # file specifications before the actual download
        f = {}
        f['did_scope'] = fspec.scope
        f['did_name'] = fspec.lfn
        f['did'] = '%s:%s' % (fspec.scope, fspec.lfn)
        f['rse'] = fspec.ddmendpoint
        f['base_dir'] = dirname(dst)
        if fspec.turl:
            f['pfn'] = fspec.turl
        #if fspec.filesize:
        #    f['transfer_timeout'] = self.getTimeOut(fspec.filesize) # too harsh, max 3 hours

        # proceed with the download
        tolog('_stageInApi file: %s' % str(f))
        trace_pattern = {}
        if self.trace_report:
            trace_pattern = self.trace_report
        result = []
        if fspec.turl:
            result = download_client.download_pfns(
                [f], 1, trace_custom_fields=trace_pattern)
        else:
            result = download_client.download_dids(
                [f], trace_custom_fields=trace_pattern)

        clientState = 'FAILED'
        if result:
            clientState = result[0].get('clientState', 'FAILED')

        return clientState
Пример #3
0
def _stage_in_api(dst, fspec, trace_report):

    # init. download client
    from rucio.client.downloadclient import DownloadClient
    download_client = DownloadClient()

    # traces are switched off
    if hasattr(download_client, 'tracing'):
        download_client.tracing = tracing_rucio

    # file specifications before the actual download
    f = {}
    f['did_scope'] = fspec.scope
    f['did_name'] = fspec.lfn
    f['did'] = '%s:%s' % (fspec.scope, fspec.lfn)
    f['rse'] = fspec.ddmendpoint
    f['base_dir'] = dst
    f['no_subdir'] = True
    if fspec.turl:
        f['pfn'] = fspec.turl

    if fspec.filesize:
        f['transfer_timeout'] = get_timeout(fspec.filesize)

    # proceed with the download
    logger.info('_stage_in_api file: %s' % str(f))
    trace_pattern = {}
    if trace_report:
        trace_pattern = trace_report
    result = []
    if fspec.turl:
        result = download_client.download_pfns(
            [f], 1, trace_custom_fields=trace_pattern)
    else:
        result = download_client.download_dids(
            [f], trace_custom_fields=trace_pattern)

    client_state = 'FAILED'
    if result:
        client_state = result[0].get('clientState', 'FAILED')

    return client_state
Пример #4
0
    def _stageInApi(self, dst, fspec):

        from rucio.client.downloadclient import DownloadClient

        # rucio logger init.
        rucio_logger = logging.getLogger('rucio_mover')
        download_client = DownloadClient(logger=rucio_logger)

        # traces are switched off
        if hasattr(download_client, 'tracing'):
            download_client.tracing = self.tracing

        # file specifications before the actual download
        f = {}
        f['did_scope'] = fspec.scope
        f['did_name'] = fspec.lfn
        f['did'] = '%s:%s' % (fspec.scope, fspec.lfn)
        f['rse'] = fspec.ddmendpoint
        f['base_dir'] = dirname(dst)
        if fspec.turl:
            f['pfn'] = fspec.turl
        #if fspec.filesize:
        #    f['transfer_timeout'] = self.getTimeOut(fspec.filesize) # too harsh, max 3 hours

        # proceed with the download
        tolog('_stageInApi file: %s' % str(f))
        trace_pattern = {}
        if self.trace_report:
            trace_pattern = self.trace_report
        result = []
        if fspec.turl:
            result = download_client.download_pfns([f], 1, trace_custom_fields=trace_pattern)
        else:
            result = download_client.download_dids([f], trace_custom_fields=trace_pattern)

        clientState = 'FAILED'
        if result:
            clientState = result[0].get('clientState', 'FAILED') 

        return clientState 
Пример #5
0
def _stage_in_api(dst, fspec, trace_report, trace_report_out, transfer_timeout):

    # init. download client
    from rucio.client.downloadclient import DownloadClient
    download_client = DownloadClient(logger=logger)

    # traces are switched off
    if hasattr(download_client, 'tracing'):
        download_client.tracing = tracing_rucio

    # file specifications before the actual download
    f = {}
    f['did_scope'] = fspec.scope
    f['did_name'] = fspec.lfn
    f['did'] = '%s:%s' % (fspec.scope, fspec.lfn)
    f['rse'] = fspec.ddmendpoint
    f['base_dir'] = dst
    f['no_subdir'] = True
    if fspec.turl:
        f['pfn'] = fspec.turl

    if transfer_timeout:
        f['transfer_timeout'] = transfer_timeout

    # proceed with the download
    logger.info('_stage_in_api file: %s' % str(f))
    trace_pattern = {}
    if trace_report:
        trace_pattern = trace_report

    # download client raises an exception if any file failed
    if fspec.turl:
        result = download_client.download_pfns([f], 1, trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out)
    else:
        result = download_client.download_dids([f], trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out)

    logger.debug('Rucio download client returned %s' % result)

    return trace_report_out
Пример #6
0
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Authors:
# - Paul Nilsson, [email protected], 2018-9

# This script demonstrates how to download a file using the Rucio download client.
# Note: Rucio needs to be setup with 'lsetup rucio'.

try:
    from rucio.client.downloadclient import DownloadClient
except Exception:
    print("Rucio client has not been setup, please run \'lsetup rucio\' first")
else:
    f_ific = {
        'did_scope': 'mc16_13TeV',
        'did': 'mc16_13TeV:EVNT.16337107._000147.pool.root.1',
        'rse':
        'IFIC-LCG2_DATADISK',  # Python 2 - is unicode necessary for the 'rse' value? was u'IFIC-LCG2_DATADISK'
        'pfn':
        'root://t2fax.ific.uv.es:1094//lustre/ific.uv.es/grid/atlas/atlasdatadisk/rucio/mc16_13TeV/59/29/EVNT.16337107._000147.pool.root.1',
        'did_name': 'EVNT.16337107._000147.pool.root.1',
        'transfer_timeout': 3981,
        'base_dir': '.'
    }

    download_client = DownloadClient()
    print(download_client.download_pfns([f_ific], 1))
Пример #7
0
class TestDownloadClient(unittest.TestCase):
    def setUp(self):
        if config_get_bool('common',
                           'multi_vo',
                           raise_exception=False,
                           default=False):
            self.vo = {
                'vo':
                config_get('client',
                           'vo',
                           raise_exception=False,
                           default='tst')
            }
        else:
            self.vo = {}

        logger = logging.getLogger('dlul_client')
        logger.addHandler(logging.StreamHandler())
        logger.setLevel(logging.DEBUG)
        self.client = Client()
        self.did_client = DIDClient()
        self.upload_client = UploadClient(_client=self.client, logger=logger)
        self.download_client = DownloadClient(client=self.client,
                                              logger=logger)

    def _upoad_test_file(self, rse, scope, name, path=None):
        item = {
            'path': path if path else file_generator(),
            'rse': rse,
            'did_scope': scope,
            'did_name': name,
            'guid': generate_uuid(),
        }
        assert self.upload_client.upload([item]) == 0
        return item

    @staticmethod
    def _check_download_result(actual_result, expected_result):
        assert len(expected_result) == len(actual_result)
        expected_result = sorted(expected_result, key=lambda x: x['did'])
        actual_result = sorted(actual_result, key=lambda x: x['did'])
        for i, expected in enumerate(expected_result):
            for param_name, expected_value in expected.items():
                assert param_name and actual_result[i][param_name] == expected[
                    param_name]

    def test_download_without_base_dir(self):
        rse = 'MOCK4'
        scope = 'mock'
        item = self._upoad_test_file(rse, scope,
                                     'testDownloadNoBasedir' + generate_uuid())
        did = '%s:%s' % (scope, item['did_name'])
        try:
            # download to the default location, i.e. to ./
            result = self.download_client.download_dids([{'did': did}])
            self._check_download_result(
                actual_result=result,
                expected_result=[{
                    'did': did,
                    'clientState': 'DONE',
                }],
            )

            # re-downloading the same file again should not overwrite it
            result = self.download_client.download_dids([{'did': did}])
            self._check_download_result(
                actual_result=result,
                expected_result=[{
                    'did': did,
                    'clientState': 'ALREADY_DONE',
                }],
            )
        finally:
            shutil.rmtree(scope)

    def test_download_multiple(self):
        rse = 'MOCK4'
        scope = 'mock'
        base_name = 'testDownloadItem' + generate_uuid()
        item000 = self._upoad_test_file(rse, scope, base_name + '.000')
        item001 = self._upoad_test_file(rse, scope, base_name + '.001')
        item100 = self._upoad_test_file(rse, scope, base_name + '.100')

        with TemporaryDirectory() as tmp_dir:
            # Download specific DID
            result = self.download_client.download_dids([{
                'did':
                '%s:%s' % (scope, item000['did_name']),
                'base_dir':
                tmp_dir
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[{
                    'did': '%s:%s' % (scope, item000['did_name']),
                    'clientState': 'DONE',
                }],
            )

            # Download multiple files with wildcard. One file already exists on the file system. Will not be re-downloaded.
            result = self.download_client.download_dids([{
                'did':
                '%s:%s.0*' % (scope, base_name),
                'base_dir':
                tmp_dir
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[
                    {
                        'did': '%s:%s' % (scope, item000['did_name']),
                        'clientState': 'ALREADY_DONE',
                    },
                    {
                        'did': '%s:%s' % (scope, item001['did_name']),
                        'clientState': 'DONE',
                    },
                ],
            )

            # Download with filter
            result = self.download_client.download_dids([{
                'filters': {
                    'guid': item000['guid'],
                    'scope': scope
                },
                'base_dir': tmp_dir
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[{
                    'did': '%s:%s' % (scope, item000['did_name']),
                }],
            )

            # Download with wildcard and name
            result = self.download_client.download_dids([{
                'did': '%s:*' % scope,
                'filters': {
                    'guid': item100['guid']
                },
                'base_dir': tmp_dir
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[{
                    'did': '%s:%s' % (scope, item100['did_name']),
                    'clientState': 'DONE',
                }],
            )

            # Don't create subdirectories by scope
            result = self.download_client.download_dids([{
                'did':
                '%s:%s.*' % (scope, base_name),
                'base_dir':
                tmp_dir,
                'no_subdir':
                True
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[
                    {
                        'did':
                        '%s:%s' % (scope, item000['did_name']),
                        'clientState':
                        'DONE',
                        'dest_file_paths':
                        ['%s/%s' % (tmp_dir, item000['did_name'])],
                    },
                    {
                        'did':
                        '%s:%s' % (scope, item001['did_name']),
                        'clientState':
                        'DONE',
                        'dest_file_paths':
                        ['%s/%s' % (tmp_dir, item001['did_name'])],
                    },
                    {
                        'did':
                        '%s:%s' % (scope, item100['did_name']),
                        'clientState':
                        'DONE',
                        'dest_file_paths':
                        ['%s/%s' % (tmp_dir, item100['did_name'])],
                    },
                ],
            )

            # Re-download file existing on the file system with no-subdir set. It must be overwritten.
            result = self.download_client.download_dids([{
                'did':
                '%s:%s' % (scope, item100['did_name']),
                'base_dir':
                tmp_dir,
                'no_subdir':
                True
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[{
                    'did':
                    '%s:%s' % (scope, item100['did_name']),
                    'clientState':
                    'ALREADY_DONE',
                    'dest_file_paths':
                    ['%s/%s' % (tmp_dir, item100['did_name'])],
                }],
            )

    @pytest.mark.xfail(
        reason=
        'XRD1 must be initialized https://github.com/rucio/rucio/pull/4165/')
    def test_download_from_archive_on_xrd(self):
        scope = 'test'
        rse = 'XRD1'
        base_name = 'testDownloadArchive' + generate_uuid()
        with TemporaryDirectory() as tmp_dir:
            # Create a zip archive with two files and upload it
            name000 = base_name + '.000'
            data000 = '000'
            adler000 = '01230091'
            name001 = base_name + '.001'
            data001 = '001'
            adler001 = '01240092'
            zip_name = base_name + '.zip'
            zip_path = '%s/%s' % (tmp_dir, zip_name)
            with ZipFile(zip_path, 'w') as myzip:
                myzip.writestr(name000, data=data000)
                myzip.writestr(name001, data=data001)
            self._upoad_test_file(rse, scope, zip_name, path=zip_path)
            self.did_client.add_files_to_archive(
                scope,
                zip_name,
                [
                    {
                        'scope': scope,
                        'name': name000,
                        'bytes': len(data000),
                        'type': 'FILE',
                        'adler32': adler000,
                        'meta': {
                            'guid': str(generate_uuid())
                        }
                    },
                    {
                        'scope': scope,
                        'name': name001,
                        'bytes': len(data001),
                        'type': 'FILE',
                        'adler32': adler001,
                        'meta': {
                            'guid': str(generate_uuid())
                        }
                    },
                ],
            )

            # Download one file from the archive
            result = self.download_client.download_dids([{
                'did':
                '%s:%s' % (scope, name000),
                'base_dir':
                tmp_dir
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[
                    {
                        'did': '%s:%s' % (scope, name000),
                        'clientState': 'DONE',
                    },
                ],
            )
            with open('%s/%s/%s' % (tmp_dir, scope, name000), 'r') as file:
                assert file.read() == data000

            # Download both files from the archive
            result = self.download_client.download_dids([{
                'did':
                '%s:%s.00*' % (scope, base_name),
                'base_dir':
                tmp_dir
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[
                    {
                        'did': '%s:%s' % (scope, name000),
                        'clientState': 'ALREADY_DONE',
                    },
                    {
                        'did': '%s:%s' % (scope, name001),
                        'clientState': 'DONE',
                    },
                ],
            )
            with open('%s/%s/%s' % (tmp_dir, scope, name001), 'r') as file:
                assert file.read() == data001

            pfn = next(filter(lambda r: name001 in r['did'],
                              result))['sources'][0]['pfn']
            # Download by pfn from the archive
            result = self.download_client.download_pfns([{
                'did':
                '%s:%s' % (scope, name001),
                'pfn':
                pfn,
                'rse':
                rse,
                'base_dir':
                tmp_dir,
                'no_subdir':
                True
            }])
            self._check_download_result(
                actual_result=result,
                expected_result=[
                    {
                        'did': '%s:%s' % (scope, name001),
                        'clientState': 'DONE',
                    },
                ],
            )

    def test_trace_copy_out_and_checksum_validation(self):
        rse = 'MOCK4'
        scope = 'mock'
        name = 'testDownloadTraces' + generate_uuid()
        self._upoad_test_file(rse, scope, name)

        with TemporaryDirectory() as tmp_dir:
            # Try downloading non-existing did
            traces = []
            with pytest.raises(NoFilesDownloaded):
                self.download_client.download_dids([{
                    'did': 'some:randomNonExistingDid',
                    'base_dir': tmp_dir
                }],
                                                   traces_copy_out=traces)
            assert len(
                traces) == 1 and traces[0]['clientState'] == 'FILE_NOT_FOUND'

            # Download specific DID
            traces = []
            self.download_client.download_dids([{
                'did': '%s:%s' % (scope, name),
                'base_dir': tmp_dir
            }],
                                               traces_copy_out=traces)
            assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'

            # Download same DID again
            traces = []
            result = self.download_client.download_dids(
                [{
                    'did': '%s:%s' % (scope, name),
                    'base_dir': tmp_dir
                }],
                traces_copy_out=traces)
            assert len(
                traces) == 1 and traces[0]['clientState'] == 'ALREADY_DONE'

            # Change the local file and download the same file again. Checksum validation should fail and it must be re-downloaded
            with open(result[0]['dest_file_paths'][0], 'a') as f:
                f.write("more data")
            traces = []
            result = self.download_client.download_dids(
                [{
                    'did': '%s:%s' % (scope, name),
                    'base_dir': tmp_dir
                }],
                traces_copy_out=traces)
            assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'

            pfn = result[0]['sources'][0]['pfn']

        # Switch to a new empty directory
        with TemporaryDirectory() as tmp_dir:
            # Wildcards in did name are not allowed on pfn downloads
            traces = []
            with pytest.raises(InputValidationError):
                self.download_client.download_pfns([{
                    'did': '%s:*' % scope,
                    'pfn': pfn,
                    'rse': rse,
                    'base_dir': tmp_dir
                }],
                                                   traces_copy_out=traces)
            assert not traces

            # Same pfn, but without wildcard in the did should work
            traces = []
            self.download_client.download_pfns([{
                'did': '%s:%s' % (scope, name),
                'pfn': pfn,
                'rse': rse,
                'base_dir': tmp_dir
            }],
                                               traces_copy_out=traces)
            assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'

            # Same pfn. Local file already present. Shouldn't be overwritten.
            traces = []
            self.download_client.download_pfns([{
                'did': '%s:%s' % (scope, name),
                'pfn': pfn,
                'rse': rse,
                'base_dir': tmp_dir
            }],
                                               traces_copy_out=traces)
            assert len(
                traces) == 1 and traces[0]['clientState'] == 'ALREADY_DONE'

            # Provide wrong checksum for validation, the file will be re-downloaded but checksum validation fails
            traces = []
            with pytest.raises(NoFilesDownloaded):
                self.download_client.download_pfns(
                    [{
                        'did': '%s:%s' % (scope, name),
                        'pfn': pfn,
                        'rse': rse,
                        'adler32': 'wrong',
                        'base_dir': tmp_dir
                    }],
                    traces_copy_out=traces)
            assert len(
                traces) == 1 and traces[0]['clientState'] == 'FAIL_VALIDATE'

        # Switch to a new empty directory
        with TemporaryDirectory() as tmp_dir:
            # Simulate checksum corruption by changing the source file. We rely on the particularity
            # that the MOCK4 rse uses the posix protocol: files are stored on the local file system
            protocol = rsemgr.create_protocol(rsemgr.get_rse_info(
                rse, vo=self.client.vo),
                                              operation='read')
            assert isinstance(protocol, PosixProtocol)
            mock_rse_local_path = protocol.pfn2path(pfn)
            with open(mock_rse_local_path, 'w') as f:
                f.write('some completely other data')

            # Download fails checksum validation
            traces = []
            with pytest.raises(NoFilesDownloaded):
                self.download_client.download_dids(
                    [{
                        'did': '%s:%s' % (scope, name),
                        'base_dir': tmp_dir
                    }],
                    traces_copy_out=traces)
            assert len(
                traces) == 1 and traces[0]['clientState'] == 'FAIL_VALIDATE'

            # Ignore_checksum set. Download works.
            traces = []
            self.download_client.download_dids([{
                'did': '%s:%s' % (scope, name),
                'base_dir': tmp_dir,
                'ignore_checksum': True
            }],
                                               traces_copy_out=traces)
            assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'
Пример #8
0
def _stage_in_api(dst, fspec, trace_report, trace_report_out, transfer_timeout,
                  use_pcache):

    ec = 0

    # init. download client
    from rucio.client.downloadclient import DownloadClient
    download_client = DownloadClient(logger=logger)
    if use_pcache:
        download_client.check_pcache = True

    # traces are switched off
    if hasattr(download_client, 'tracing'):
        download_client.tracing = tracing_rucio

    # file specifications before the actual download
    f = {}
    f['did_scope'] = fspec.scope
    f['did_name'] = fspec.lfn
    f['did'] = '%s:%s' % (fspec.scope, fspec.lfn)
    f['rse'] = fspec.ddmendpoint
    f['base_dir'] = dst
    f['no_subdir'] = True
    if fspec.turl:
        f['pfn'] = fspec.turl

    if transfer_timeout:
        f['transfer_timeout'] = transfer_timeout
    f['connection_timeout'] = 60 * 60

    # proceed with the download
    logger.info('rucio API stage-in dictionary: %s' % f)
    trace_pattern = {}
    if trace_report:
        trace_pattern = trace_report

    # download client raises an exception if any file failed
    try:
        logger.info('*** rucio API downloading file (taking over logging) ***')
        if fspec.turl:
            result = download_client.download_pfns(
                [f],
                1,
                trace_custom_fields=trace_pattern,
                traces_copy_out=trace_report_out)
        else:
            result = download_client.download_dids(
                [f],
                trace_custom_fields=trace_pattern,
                traces_copy_out=trace_report_out)
    except Exception as e:
        logger.warning('*** rucio API download client failed ***')
        logger.warning('caught exception: %s' % e)
        logger.debug('trace_report_out=%s' % trace_report_out)
        # only raise an exception if the error info cannot be extracted
        if not trace_report_out:
            raise e
        if not trace_report_out[0].get('stateReason'):
            raise e
        ec = -1
    else:
        logger.info('*** rucio API download client finished ***')
        logger.debug('client returned %s' % result)

    logger.debug('trace_report_out=%s' % trace_report_out)

    return ec, trace_report_out