示例#1
0
    def setupClass(cls):
        """POSIX (RSE/PROTOCOLS): Creating necessary directories and files """
        # Creating local files
        cls.tmpdir = tempfile.mkdtemp()
        cls.user = uuid()

        with open("%s/data.raw" % cls.tmpdir, "wb") as out:
            out.seek((1024 * 1024) - 1)  # 1 MB
            out.write('\0')
        for f in MgrTestCases.files_local:
            shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))

        with open('etc/rse_repository.json') as f:
            data = json.load(f)
        prefix = data['MOCK-POSIX']['protocols']['supported']['file']['prefix']
        try:
            os.mkdir(prefix)
        except Exception as e:
            print(e)
        os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' %
                  prefix)
        cls.static_file = '%s/data.raw' % prefix
        for f in MgrTestCases.files_remote:
            protocol = mgr.create_protocol(mgr.get_rse_info('MOCK-POSIX'),
                                           'write')
            pfn = mgr.lfns2pfns(mgr.get_rse_info('MOCK-POSIX'), {
                'name': f,
                'scope': 'user.%s' % cls.user
            }).values()[0]
            path = protocol.pfn2path(pfn)
            dirs = os.path.dirname(path)
            if not os.path.exists(dirs):
                os.makedirs(dirs)
            shutil.copy('%s/data.raw' % prefix, path)
示例#2
0
 def test_download_succeeds_md5only(self):
     """CLIENT(USER): Rucio download succeeds MD5 only"""
     # user has a file to upload
     filename = file_generator()
     file_md5 = md5(filename)
     filesize = stat(filename).st_size
     lfn = {'name': filename[5:], 'scope': self.user, 'bytes': filesize, 'md5': file_md5}
     # user uploads file
     self.replica_client.add_replicas(files=[lfn], rse=self.def_rse)
     rse_settings = rsemgr.get_rse_info(self.def_rse)
     protocol = rsemgr.create_protocol(rse_settings, 'write')
     protocol.connect()
     pfn = protocol.lfns2pfns(lfn).values()[0]
     protocol.put(filename[5:], pfn, filename[:5])
     protocol.close()
     remove(filename)
     # download files
     cmd = 'rucio -v download --dir /tmp {0}:{1}'.format(self.user, filename[5:])
     print(self.marker + cmd)
     exitcode, out, err = execute(cmd)
     print(out, err)
     # search for the files with ls
     cmd = 'ls /tmp/{0}'.format(self.user)    # search in /tmp/
     print(self.marker + cmd)
     exitcode, out, err = execute(cmd)
     print(out, err)
     nose.tools.assert_not_equal(re.search(filename[5:], out), None)
     try:
         for i in listdir('data13_hip'):
             unlink('data13_hip/%s' % i)
         rmdir('data13_hip')
     except Exception:
         pass
示例#3
0
def get_did_from_pfns(pfns, rse, session=None):
    """
    Get the DIDs associated to a PFN on one given RSE

    :param pfns: The list of PFNs.
    :param rse: The RSE name.
    :param session: The database session in use.
    :returns: A dictionary {pfn: {'scope': scope, 'name': name}}
    """
    rse_info = rsemgr.get_rse_info(rse, session=session)
    rse_id = rse_info['id']
    pfndict = {}
    p = rsemgr.create_protocol(rse_info, 'read', scheme='srm')
    if rse_info['deterministic']:
        parsed_pfn = p.parse_pfns(pfns=pfns)
        for pfn in parsed_pfn:
            path = parsed_pfn[pfn]['path']
            if path.startswith('user') or path.startswith('group'):
                scope = '%s.%s' % (path.split('/')[0], path.split('/')[1])
                name = parsed_pfn[pfn]['name']
            else:
                scope = path.split('/')[0]
                name = parsed_pfn[pfn]['name']
            yield {pfn: {'scope': scope, 'name': name}}
    else:
        condition = []
        parsed_pfn = p.parse_pfns(pfns=pfns)
        for pfn in parsed_pfn:
            path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
            pfndict[path] = pfn
            condition.append(and_(models.RSEFileAssociation.path == path, models.RSEFileAssociation.rse_id == rse_id))
        for scope, name, pfn in session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.path).filter(or_(*condition)):
            yield {pfndict[pfn]: {'scope': scope, 'name': name}}
    def setupClass(cls):
        """SFTP (RSE/PROTOCOLS): Creating necessary directories and files """
        # Creating local files
        cls.tmpdir = tempfile.mkdtemp()
        cls.user = uuid()

        with open("%s/data.raw" % cls.tmpdir, "wb") as out:
            out.seek((1024 * 1024) - 1)  # 1 MB
            out.write('\0')
        for f in MgrTestCases.files_local:
            os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))

        # Load local credentials from file
        with open('etc/rse-accounts.cfg') as f:
            data = json.load(f)
        credentials = data['LXPLUS']
        lxplus = pysftp.Connection(**credentials)
        with open('etc/rse_repository.json') as f:
            prefix = json.load(f)['LXPLUS']['protocols']['supported']['sftp']['prefix']
        lxplus.execute('mkdir %s' % prefix)
        lxplus.execute('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % prefix)
        cls.static_file = 'sftp://lxplus.cern.ch:22%sdata.raw' % prefix
        protocol = mgr.create_protocol(mgr.get_rse_info('LXPLUS'), 'write')
        for f in MgrTestCases.files_remote:
            tmp = protocol.parse_pfns(protocol.lfns2pfns({'name': f, 'scope': 'user.%s' % cls.user}).values()[0]).values()[0]
            for cmd in ['mkdir -p %s' % ''.join([tmp['prefix'], tmp['path']]), 'ln -s %sdata.raw %s' % (prefix, ''.join([tmp['prefix'], tmp['path'], tmp['name']]))]:
                lxplus.execute(cmd)
        lxplus.close()
示例#5
0
    def preferred_impl(self, rse_settings, domain):
        """
            Finds the optimum protocol impl preferred by the client and
            supported by the remote RSE.

            :param rse_settings: dictionary containing the RSE settings
            :param domain:     The network domain, either 'wan' (default) or 'lan'

            :raises RucioException(msg): general exception with msg for more details.
        """

        preferred_protocols = []
        supported_impl = None

        try:
            preferred_impls = config_get('upload', 'preferred_impl')
        except Exception as error:
            self.logger(logging.INFO, 'No preferred protocol impl in rucio.cfg: %s' % (error))
            pass
        else:
            preferred_impls = list(preferred_impls.split(', '))
            i = 0
            while i < len(preferred_impls):
                impl = preferred_impls[i]
                impl_split = impl.split('.')
                if len(impl_split) == 1:
                    preferred_impls[i] = 'rucio.rse.protocols.' + impl + '.Default'
                else:
                    preferred_impls[i] = 'rucio.rse.protocols.' + impl
                i += 1

            preferred_protocols = [protocol for protocol in reversed(rse_settings['protocols']) if protocol['impl'] in preferred_impls]

        if len(preferred_protocols) > 0:
            preferred_protocols += [protocol for protocol in reversed(rse_settings['protocols']) if protocol not in preferred_protocols]
        else:
            preferred_protocols = reversed(rse_settings['protocols'])

        for protocol in preferred_protocols:
            if domain not in list(protocol['domains'].keys()):
                self.logger(logging.DEBUG, 'Unsuitable protocol "%s": Domain %s not supported' % (protocol['impl'], domain))
                continue
            if not all(operations in protocol['domains'][domain] for operations in ("read", "write", "delete")):
                self.logger(logging.DEBUG, 'Unsuitable protocol "%s": All operations are not supported' % (protocol['impl']))
                continue
            try:
                supported_protocol = rsemgr.create_protocol(rse_settings, 'read', domain=domain, impl=protocol['impl'], auth_token=self.auth_token, logger=self.logger)
                supported_protocol.connect()
            except Exception as error:
                self.logger(logging.DEBUG, 'Failed to create protocol "%s", exception: %s' % (protocol['impl'], error))
                pass
            else:
                self.logger(logging.INFO, 'Preferred protocol impl supported locally and remotely: %s' % (protocol['impl']))
                supported_impl = protocol['impl']
                break

        return supported_impl
示例#6
0
    def setUpClass(cls):
        """SSH (RSE/PROTOCOLS): Creating necessary directories and files """

        # Getting info for the test environment
        rse_id, prefix, hostname, port, sshuser = cls.get_rse_info()

        try:
            os.mkdir(prefix)
        except Exception as e:
            print(e)

        # Creating local files
        cls.tmpdir = tempfile.mkdtemp()
        cls.user = uuid()

        set_preferred_checksum('md5')
        cmd = 'ssh-keygen -R %s' % (cls.hostname)
        execute(cmd)
        cmd = 'ssh-keyscan %s  >> /root/.ssh/known_hosts' % (cls.hostname)
        execute(cmd)

        with open("%s/data.raw" % cls.tmpdir, "wb") as out:
            out.seek((1024 * 1024) - 1)  # 1 MB
            out.write(b'\0')
        for f in MgrTestCases.files_local:
            shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))

        protocol = rsemanager.create_protocol(rsemanager.get_rse_info(rse_id),
                                              'write')
        protocol.connect()

        os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' %
                  prefix)
        cls.static_file = '%s@%s:/%s/data.raw' % (sshuser, hostname, prefix)
        pathdir = os.path.dirname(prefix)
        cmd = 'ssh %s@%s "mkdir -p %s" && scp %s/data.raw %s' % (
            sshuser, hostname, str(pathdir), prefix, cls.static_file)
        execute(cmd)

        for f in MgrTestCases.files_remote:
            path = str(prefix + protocol._get_path('user.%s' % cls.user, f))
            pathdir = os.path.dirname(path)
            cmd = 'ssh %s@%s "mkdir -p %s" && scp %s/data.raw %s@%s:%s' % (
                sshuser, hostname, str(pathdir), prefix, sshuser, hostname,
                path)
            execute(cmd)

        for f in MgrTestCases.files_local_and_remote:
            shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))
            path = str(prefix + protocol._get_path('user.%s' % cls.user, f))
            pathdir = os.path.dirname(path)
            cmd = 'ssh {0}@{1} "mkdir -p {2}" && scp {3}/{4} {5}@{6}:{7}'.format(
                sshuser, hostname, str(pathdir), str(cls.tmpdir), str(f),
                sshuser, hostname, path)
            execute(cmd)
示例#7
0
 def test_rename_mgr_ok_multi(self):
     """(RSE/PROTOCOLS): Rename multiple files on storage (Success)"""
     protocol = mgr.create_protocol(self.rse_settings, 'write')
     pfn_a = protocol.lfns2pfns({'name': '7_rse_remote_rename.raw', 'scope': 'user.%s' % self.user}).values()[0]
     pfn_a_new = protocol.lfns2pfns({'name': '7_rse_new_rename.raw', 'scope': 'user.%s' % self.user}).values()[0]
     pfn_b = protocol.lfns2pfns({'name': '8_rse_remote_rename.raw', 'scope': 'user.%s' % self.user}).values()[0]
     pfn_b_new = protocol.lfns2pfns({'name': '8_rse_new_rename.raw', 'scope': 'user.%s' % self.user}).values()[0]
     status, details = mgr.rename(self.rse_settings, [{'name': '1_rse_remote_rename.raw', 'scope': 'user.%s' % self.user, 'new_name': '1_rse_remote_renamed.raw'},
                                                      {'name': '2_rse_remote_rename.raw', 'scope': 'user.%s' % self.user, 'new_name': '2_rse_remote_renamed.raw'},
                                                      {'name': pfn_a, 'new_name': pfn_a_new},
                                                      {'name': pfn_b, 'new_name': pfn_b_new}])
     if not status or not (details['user.%s:1_rse_remote_rename.raw' % self.user] and details['user.%s:2_rse_remote_rename.raw' % self.user] and details[pfn_a] and details[pfn_b]):
         raise Exception('Return not as expected: %s, %s' % (status, details))
示例#8
0
    def test_pfn_https(self):
        """ PFN (CORE): Test the splitting of PFNs with https"""

        rse_info = rsemgr.get_rse_info('MOCK', **self.vo)
        proto = rsemgr.create_protocol(rse_info, 'read', scheme='https')
        pfn = 'https://mock.com:2880/pnfs/rucio/disk-only/scratchdisk/whatever'
        ret = proto.parse_pfns([pfn])
        assert ret[pfn]['scheme'] == 'https'
        assert ret[pfn]['hostname'] == 'mock.com'
        assert ret[pfn]['port'] == 2880
        assert ret[pfn]['prefix'] == '/pnfs/rucio/disk-only/scratchdisk/'
        assert ret[pfn]['path'] == '/'
        assert ret[pfn]['name'] == 'whatever'
示例#9
0
def inject(rse, older_than):
    logging.info('Starting to inject objects for RSE: %s' % rse)
    num_of_queued_dids = get_count_of_expired_temporary_dids(rse)
    rse_id = rse_core.get_rse_id(rse)
    if num_of_queued_dids < 1000:
        max_being_deleted_files, needed_free_space, used, free = __check_rse_usage(
            rse=rse, rse_id=rse_id)
        logging.info("needed_free_space: %s" % needed_free_space)
        if needed_free_space is None or needed_free_space > 0:
            rse_info = rsemgr.get_rse_info(rse)
            for protocol in rse_info['protocols']:
                protocol['impl'] = 'rucio.rse.protocols.s3boto.Default'

            prot = rsemgr.create_protocol(rse_info, 'delete')
            try:
                prot.connect()
                dids = []
                older_than_time = datetime.datetime.utcnow(
                ) - datetime.timedelta(days=older_than)
                older_than_time = older_than_time.replace(tzinfo=pytz.utc)
                for key in prot.list():
                    d = dateutil.parser.parse(key.last_modified)
                    if d < older_than_time:
                        did = {
                            'scope': 'transient',
                            'name': key.name.encode('utf-8'),
                            'rse': rse,
                            'rse_id': rse_id,
                            'bytes': key.size,
                            'created_at': d
                        }
                        dids.append(did)
                        if len(dids) == 1000:
                            add_temporary_dids(dids=dids, account='root')
                            logging.info('Adding 1000 dids to temp dids.')
                            dids = []
                    else:
                        pass
                        logging.info(
                            'Found objects newer than %s days, quit to list(normally objects in os are returned with order by time)'
                            % older_than)
                        break
                    if GRACEFUL_STOP.is_set():
                        logging.info('GRACEFUL_STOP is set. quit')
                        break
            except:
                logging.critical(traceback.format_exc())
    else:
        logging.info(
            "Number of queued deletion for %s is %s, which is bigger than 1000. quit."
            % (rse, num_of_queued_dids))
示例#10
0
    def setupClass(cls):
        """WebDAV (RSE/PROTOCOLS): Creating necessary directories and files """
        session = requests.Session()
        session.cert = os.getenv('X509_USER_PROXY')
        session.verify = False
        cls.site = 'FZK-LCG2_SCRATCHDISK'
        # Creating local files
        cls.tmpdir = tempfile.mkdtemp()
        cls.user = '******'
        with open('etc/rse_repository.json') as f:
            data = json.load(f)
        scheme = data[cls.site]['protocols']['supported']['https']['scheme']
        prefix = data[cls.site]['protocols']['supported']['https']['prefix']
        hostname = data[
            cls.site]['protocols']['supported']['https']['hostname']
        port = data[cls.site]['protocols']['supported']['https']['port']

        with open("%s/data.raw" % cls.tmpdir, "wb") as out:
            out.seek((1024) - 1)  # 1 kB
            out.write('\0')
        for f in MgrTestCases.files_local:
            os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))

        cls.static_file = '%s://%s:%s%sdata.raw' % (scheme, hostname, port,
                                                    prefix)

        rse_settings = rsemanager.get_rse_info(cls.site)
        storage = rsemanager.create_protocol(rse_settings,
                                             operation='write',
                                             scheme='https')
        storage.connect()
        for f in MgrTestCases.files_remote:
            os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))
            destfile = rsemanager.lfns2pfns(rse_settings, [
                {
                    'name': f,
                    'scope': 'user.%s' % (cls.user)
                },
            ],
                                            operation='write',
                                            scheme='https').values()[0]
            try:
                storage.put('%s/%s' % (cls.tmpdir, f), destfile)
            except FileReplicaAlreadyExists as e:
                print(e)
        with open('%s/data.raw' % cls.tmpdir, 'rb') as f_file:
            session.put(cls.static_file,
                        data=f_file.read(),
                        verify=False,
                        allow_redirects=True)
示例#11
0
    def test_pfn_srm(self):
        """ PFN (CORE): Test the splitting of PFNs with SRM"""

        rse_info = rsemgr.get_rse_info('MOCK', **self.vo)
        proto = rsemgr.create_protocol(rse_info, 'read', scheme='srm')
        pfns = ['srm://mock.com:8443/rucio/tmpdisk/rucio_tests/whatever',
                'srm://mock.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/whatever',
                'srm://mock.com:8443/srm/v2/server?SFN=/rucio/tmpdisk/rucio_tests/whatever']
        for pfn in pfns:
            ret = proto.parse_pfns([pfn])
            assert ret[pfn]['scheme'] == 'srm'
            assert ret[pfn]['hostname'] == 'mock.com'
            assert ret[pfn]['port'] == 8443
            assert ret[pfn]['prefix'] == '/rucio/tmpdisk/rucio_tests/'
            assert ret[pfn]['path'] == '/'
            assert ret[pfn]['name'] == 'whatever'
示例#12
0
 def test_get_mgr_SourceNotFound_multi(self):
     """(RSE/PROTOCOLS): Get multiple files from storage providing LFNs  and PFNs (SourceNotFound)"""
     protocol = mgr.create_protocol(self.rse_settings, 'read')
     pfn_a = protocol.lfns2pfns({'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0]
     pfn_b = protocol.lfns2pfns({'name': '2_rse_remote_get_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0]
     status, details = mgr.download(self.rse_settings, [{'name': '1_not_existing_data.raw', 'scope': 'user.%s' % self.user},
                                                        {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user},
                                                        {'name': '2_not_existing_data.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_b},
                                                        {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_a}], self.gettmpdir)
     if details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user]:
         if details['user.%s:1_not_existing_data.raw' % self.user].__class__.__name__ == 'SourceNotFound' and details['user.%s:2_not_existing_data.raw' % self.user].__class__.__name__ == 'SourceNotFound':
             raise details['user.%s:1_not_existing_data.raw' % self.user]
         else:
             raise Exception('Return not as expected: %s, %s' % (status, details))
     else:
         raise Exception('Return not as expected: %s, %s' % (status, details))
示例#13
0
 def test_get_mgr_SourceNotFound_multi(self):
     """(RSE/PROTOCOLS): Get multiple files from storage providing LFNs  and PFNs (SourceNotFound)"""
     protocol = mgr.create_protocol(self.rse_settings, 'read')
     pfn_a = protocol.lfns2pfns({'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0]
     pfn_b = protocol.lfns2pfns({'name': '2_rse_remote_get_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0]
     status, details = mgr.download(self.rse_settings, [{'name': '1_not_existing_data.raw', 'scope': 'user.%s' % self.user},
                                                        {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user},
                                                        {'name': '2_not_existing_data.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_b},
                                                        {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_a}], self.tmpdir)
     if details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user]:
         if details['user.%s:1_not_existing_data.raw' % self.user].__class__.__name__ == 'SourceNotFound' and details['user.%s:2_not_existing_data.raw' % self.user].__class__.__name__ == 'SourceNotFound':
             raise details['user.%s:1_not_existing_data.raw' % self.user]
         else:
             raise Exception('Return not as expected: %s, %s' % (status, details))
     else:
         raise Exception('Return not as expected: %s, %s' % (status, details))
示例#14
0
    def setUpClass(cls):
        """XROOTD (RSE/PROTOCOLS): Creating necessary directories and files """

        # Getting info for the test environment
        rse_id, prefix, hostname, port = cls.get_rse_info()

        try:
            os.mkdir(prefix)
        except Exception as e:
            print(e)

        # Creating local files
        cls.tmpdir = tempfile.mkdtemp()
        cls.user = uuid()

        with open("%s/data.raw" % cls.tmpdir, "wb") as out:
            out.seek((1024 * 1024) - 1)  # 1 MB
            out.write(b'\0')
        for f in MgrTestCases.files_local:
            shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))

        protocol = rsemanager.create_protocol(rsemanager.get_rse_info(rse_id),
                                              'write')
        protocol.connect()

        os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' %
                  prefix)
        cls.static_file = 'xroot://%s:%d/%s/data.raw' % (hostname, port,
                                                         prefix)
        cmd = 'xrdcp %s/data.raw %s' % (prefix, cls.static_file)
        execute(cmd)

        for f in MgrTestCases.files_remote:
            path = protocol.path2pfn(
                prefix + protocol._get_path('user.%s' % cls.user, f))
            cmd = 'xrdcp %s/data.raw %s' % (prefix, path)
            execute(cmd)

        for f in MgrTestCases.files_local_and_remote:
            shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))
            path = protocol.path2pfn(
                prefix + protocol._get_path('user.%s' % cls.user, f))
            cmd = 'xrdcp %s/%s %s' % (cls.tmpdir, f, path)
            execute(cmd)
示例#15
0
def add_replicas(rse, files, account, rse_id=None, ignore_availability=True, session=None):
    """
    Bulk add file replicas.

    :param rse:     The rse name.
    :param files:   The list of files.
    :param account: The account owner.
    :param rse_id:  The RSE id. To be used if rse parameter is None.
    :param ignore_availability: Ignore the RSE blacklisting.
    :param session: The database session in use.

    :returns: True is successful.
    """
    if rse:
        replica_rse = get_rse(rse=rse, session=session)
    else:
        replica_rse = get_rse(rse=None, rse_id=rse_id, session=session)

    if (not (replica_rse.availability & 2)) and not ignore_availability:
        raise exception.RessourceTemporaryUnavailable('%s is temporary unavailable for writing' % rse)

    replicas = __bulk_add_file_dids(files=files, account=account, session=session)

    if not replica_rse.deterministic:
        pfns, scheme = list(), None
        for file in files:
            if 'pfn' not in file:
                raise exception.UnsupportedOperation('PFN needed for this (non deterministic) RSE %(rse)s ' % locals())
            else:
                scheme = file['pfn'].split(':')[0]
            pfns.append(file['pfn'])

        p = rsemgr.create_protocol(rse_settings=rsemgr.get_rse_info(rse, session=session), operation='write', scheme=scheme)
        pfns = p.parse_pfns(pfns=pfns)
        for file in files:
            tmp = pfns[file['pfn']]
            file['path'] = ''.join([tmp['path'], tmp['name']])

    nbfiles, bytes = __bulk_add_replicas(rse_id=replica_rse.id, files=files, account=account, session=session)
    increase(rse_id=replica_rse.id, files=nbfiles, bytes=bytes, session=session)
    return replicas
示例#16
0
    def setUpClass(cls):
        """XROOTD (RSE/PROTOCOLS): Creating necessary directories and files """
        # Creating local files
        cls.tmpdir = tempfile.mkdtemp()
        cls.user = uuid()

        with open("%s/data.raw" % cls.tmpdir, "wb") as out:
            out.seek((1024 * 1024) - 1)  # 1 MB
            out.write(b'\0')
        for f in MgrTestCases.files_local:
            shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))

        protocol = rsemanager.create_protocol(
            rsemanager.get_rse_info('WJ-XROOTD'), 'write')
        protocol.connect()

        with open('etc/rse_repository.json') as f:
            data = json.load(f)
        prefix = data['WJ-XROOTD']['protocols']['supported']['xroot']['prefix']
        hostname = data['WJ-XROOTD']['protocols']['supported']['xroot'][
            'hostname']
        port = data['WJ-XROOTD']['protocols']['supported']['xroot']['port']

        try:
            os.mkdir(prefix)
        except Exception as e:
            print(e)

        os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' %
                  prefix)
        cls.static_file = 'xroot://%s:%d/%s/data.raw' % (hostname, port,
                                                         prefix)
        cmd = 'xrdcp %s/data.raw %s' % (prefix, cls.static_file)
        execute(cmd)

        for f in MgrTestCases.files_remote:
            path = protocol.path2pfn(
                prefix + protocol._get_path('user.%s' % cls.user, f))
            cmd = 'xrdcp %s/data.raw %s' % (prefix, path)
            execute(cmd)
示例#17
0
def declare_bad_file_replicas(pfns, rse, session=None):
    """
    Declare a list of bad replicas.

    :param pfns: The list of PFNs.
    :param rse: The RSE name.
    :param session: The database session in use.
    """
    rse_info = rsemgr.get_rse_info(rse, session=session)
    rse_id = rse_info['id']
    replicas = []
    p = rsemgr.create_protocol(rse_info, 'read', scheme='srm')
    if rse_info['deterministic']:
        parsed_pfn = p.parse_pfns(pfns=pfns)
        for pfn in parsed_pfn:
            path = parsed_pfn[pfn]['path']
            if path.startswith('user') or path.startswith('group'):
                scope = '%s.%s' % (path.split('/')[0], path.split('/')[1])
                name = parsed_pfn[pfn]['name']
            else:
                scope = path.split('/')[0]
                name = parsed_pfn[pfn]['name']
            replicas.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
        try:
            update_replicas_states(replicas, session=session)
        except exception.UnsupportedOperation:
            raise exception.ReplicaNotFound("One or several replicas don't exist.")
    else:
        path_clause = []
        parsed_pfn = p.parse_pfns(pfns=pfns)
        for pfn in parsed_pfn:
            path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
            path_clause.append(models.RSEFileAssociation.path == path)
        query = session.query(models.RSEFileAssociation.path, models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id).\
            with_hint(models.RSEFileAssociation, "+ index(replicas REPLICAS_PATH_IDX", 'oracle').\
            filter(models.RSEFileAssociation.rse_id == rse_id).filter(or_(*path_clause))
        rowcount = query.update({'state': ReplicaState.BAD})
        if rowcount != len(parsed_pfn):
            raise exception.ReplicaNotFound("One or several replicas don't exist.")
示例#18
0
    def tearDownClass(cls):
        """S3ES (RSE/PROTOCOLS): Removing created directories and files """
        rse_settings = rsemanager.get_rse_info(cls.site)
        with open('etc/rse_repository.json') as f:
            data = json.load(f)
        scheme = data[cls.site]['protocols']['supported']['s3+https']['scheme']
        prefix = data[cls.site]['protocols']['supported']['s3+https']['prefix']
        hostname = data[cls.site]['protocols']['supported']['s3+https']['hostname']
        port = data[cls.site]['protocols']['supported']['s3+https']['port']
        for protocol in rse_settings['protocols']:
            if protocol['impl'] == 'rucio.rse.protocols.signeds3.Default':
                protocol['impl'] = 'rucio.rse.protocols.s3es.Default'

        storage = rsemanager.create_protocol(rse_settings, operation='write', scheme='s3+https')
        storage.connect()
        try:
            storage.delete('%s://%s:%s/%s/%s' % (scheme, hostname, port, prefix, 'user'))
        except Exception as e:
            print(e)
        try:
            storage.delete('%s://%s:%s/%s/%s' % (scheme, hostname, port, prefix, 'group'))
        except Exception as e:
            print(e)
示例#19
0
    def setupClass(cls):
        """S3ES (RSE/PROTOCOLS): Creating necessary directories and files """
        cls.site = 'BNL-OSG2_ES'
        # Creating local files
        cls.tmpdir = tempfile.mkdtemp()
        cls.user = '******'
        with open('etc/rse_repository.json') as f:
            data = json.load(f)
        scheme = data[cls.site]['protocols']['supported']['s3+https']['scheme']
        prefix = data[cls.site]['protocols']['supported']['s3+https']['prefix']
        hostname = data[cls.site]['protocols']['supported']['s3+https']['hostname']
        port = data[cls.site]['protocols']['supported']['s3+https']['port']

        with open("%s/data.raw" % cls.tmpdir, "wb") as out:
            out.seek((1024) - 1)  # 1 kB
            out.write('\0')
        for f in MgrTestCases.files_local:
            os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))

        cls.static_file = '%s://%s:%s/%s/user.%s/data.raw' % (scheme, hostname, port, prefix, cls.user)

        rse_settings = rsemanager.get_rse_info(cls.site)
        storage = rsemanager.create_protocol(rse_settings, operation='write', scheme='s3+https')
        storage.connect()
        for f in MgrTestCases.files_remote:
            os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f))
            destfile = rsemanager.lfns2pfns(rse_settings, [{'name': f, 'scope': 'user.%s' % (cls.user)}, ], operation='write', scheme='s3+https').values()[0]
            try:
                storage.put('%s/%s' % (cls.tmpdir, f), destfile)
            except FileReplicaAlreadyExists as e:
                print(e)
        f = 'data.raw'
        destfile = rsemanager.lfns2pfns(rse_settings, [{'name': f, 'scope': 'user.%s' % (cls.user)}, ], operation='write', scheme='s3+https').values()[0]
        try:
            storage.put('%s/%s' % (cls.tmpdir, f), destfile)
        except FileReplicaAlreadyExists as e:
            print(e)
示例#20
0
def request_transfer(once=False, src=None, dst=None):
    """
    Main loop to request a new transfer.
    """

    logging.info('request: starting')

    site_a = 'RSE%s' % generate_uuid().upper()
    site_b = 'RSE%s' % generate_uuid().upper()

    scheme = 'https'
    impl = 'rucio.rse.protocols.webdav.Default'
    if not src.startswith('https://'):
        scheme = 'srm'
        impl = 'rucio.rse.protocols.srm.Default'
        srctoken = src.split(':')[0]
        dsttoken = dst.split(':')[0]

    tmp_proto = {
        'impl': impl,
        'scheme': scheme,
        'domains': {
            'lan': {'read': 1, 'write': 1, 'delete': 1},
            'wan': {'read': 1, 'write': 1, 'delete': 1}}}

    rse.add_rse(site_a)
    tmp_proto['hostname'] = src.split(':')[1][2:]
    tmp_proto['port'] = src.split(':')[2].split('/')[0]
    tmp_proto['prefix'] = '/'.join([''] + src.split(':')[2].split('/')[1:])
    if scheme == 'srm':
        tmp_proto['extended_attributes'] = {'space_token': srctoken,
                                            'web_service_path': ''}
    rse.add_protocol(site_a, tmp_proto)

    tmp_proto = {
        'impl': impl,
        'scheme': scheme,
        'domains': {
            'lan': {'read': 1, 'write': 1, 'delete': 1},
            'wan': {'read': 1, 'write': 1, 'delete': 1}}}

    rse.add_rse(site_b)
    tmp_proto['hostname'] = dst.split(':')[1][2:]
    tmp_proto['port'] = dst.split(':')[2].split('/')[0]
    tmp_proto['prefix'] = '/'.join([''] + dst.split(':')[2].split('/')[1:])
    if scheme == 'srm':
        tmp_proto['extended_attributes'] = {'space_token': dsttoken,
                                            'web_service_path': ''}
    rse.add_protocol(site_b, tmp_proto)

    si = rsemanager.get_rse_info(site_a)

    session = get_session()

    logging.info('request: started')

    while not graceful_stop.is_set():

        try:

            ts = time.time()

            tmp_name = generate_uuid()

            # add a new dataset
            did.add_did(scope='mock', name='dataset-%s' % tmp_name,
                        type=DIDType.DATASET, account='root', session=session)

            # construct PFN
            pfn = rsemanager.lfns2pfns(si, lfns=[{'scope': 'mock', 'name': 'file-%s' % tmp_name}])['mock:file-%s' % tmp_name]

            # create the directories if needed
            p = rsemanager.create_protocol(si, operation='write', scheme=scheme)
            p.connect()
            try:
                p.mkdir(pfn)
            except:
                pass

            # upload the test file
            try:
                fp = os.path.dirname(config_get('injector', 'file'))
                fn = os.path.basename(config_get('injector', 'file'))
                p.put(fn, pfn, source_dir=fp)
            except:
                logging.critical('Could not upload, removing temporary DID: %s' % str(sys.exc_info()))
                did.delete_dids([{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', session=session)
                break

            # add the replica
            replica.add_replica(rse=site_a, scope='mock', name='file-%s' % tmp_name,
                                bytes=config_get_int('injector', 'bytes'),
                                adler32=config_get('injector', 'adler32'),
                                md5=config_get('injector', 'md5'),
                                account='root', session=session)

            # to the dataset
            did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{'scope': 'mock',
                                                                               'name': 'file-%s' % tmp_name,
                                                                               'bytes': config_get('injector', 'bytes')}],
                            account='root', session=session)

            # add rule for the dataset
            ts = time.time()

            rule.add_rule(dids=[{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}],
                          account='root',
                          copies=1,
                          rse_expression=site_b,
                          grouping='ALL',
                          weight=None,
                          lifetime=None,
                          locked=False,
                          subscription_id=None,
                          activity='mock-injector',
                          session=session)

            logging.info('added rule for %s for DID mock:%s' % (site_b, tmp_name))
            record_timer('daemons.mock.conveyorinjector.add_rule', (time.time()-ts)*1000)

            record_counter('daemons.mock.conveyorinjector.request_transfer')

            session.commit()
        except:
            session.rollback()
            logging.critical(traceback.format_exc())

        if once:
            return

    logging.info('request: graceful stop requested')

    logging.info('request: graceful stop done')
示例#21
0
def list_replicas(dids, schemes=None, unavailable=False, request_id=None, ignore_availability=True, all_states=False, session=None):
    """
    List file replicas for a list of data identifiers (DIDs).

    :param dids: The list of data identifiers (DIDs).
    :param schemes: A list of schemes to filter the replicas. (e.g. file, http, ...)
    :param unavailable: Also include unavailable replicas in the list.
    :param request_id: ID associated with the request for debugging.
    :param ignore_availability: Ignore the RSE blacklisting.
    :param all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary.
    :param session: The database session in use.
    """
    # Get the list of files
    rseinfo = {}
    replicas = {}
    replica_conditions, did_conditions = [], []
    # remove duplicate did from the list
    for did in [dict(tupleized) for tupleized in set(tuple(item.items()) for item in dids)]:
        if 'type' in did and did['type'] in (DIDType.FILE, DIDType.FILE.value) or 'did_type' in did and did['did_type'] in (DIDType.FILE, DIDType.FILE.value):
            if all_states:
                condition = and_(models.RSEFileAssociation.scope == did['scope'],
                                 models.RSEFileAssociation.name == did['name'])
            else:
                if not unavailable:
                    condition = and_(models.RSEFileAssociation.scope == did['scope'],
                                     models.RSEFileAssociation.name == did['name'],
                                     models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
                else:
                    condition = and_(models.RSEFileAssociation.scope == did['scope'],
                                     models.RSEFileAssociation.name == did['name'],
                                     or_(models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
                                         models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE,
                                         models.RSEFileAssociation.state == ReplicaState.COPYING))
            replicas['%s:%s' % (did['scope'], did['name'])] = {'scope': did['scope'], 'name': did['name'], 'rses': {}}
            replica_conditions.append(condition)
        else:
            did_conditions.append(and_(models.DataIdentifier.scope == did['scope'],
                                       models.DataIdentifier.name == did['name']))

    if did_conditions:
        # Get files
        for scope, name, did_type in session.query(models.DataIdentifier.scope,
                                                   models.DataIdentifier.name,
                                                   models.DataIdentifier.did_type).filter(or_(*did_conditions)):
            if did_type == DIDType.FILE:
                replicas['%s:%s' % (scope, name)] = {'scope': scope, 'name': name, 'rses': {}}
                if all_states:
                    condition = and_(models.RSEFileAssociation.scope == scope,
                                     models.RSEFileAssociation.name == name)
                else:
                    if not unavailable:
                        condition = and_(models.RSEFileAssociation.scope == scope,
                                         models.RSEFileAssociation.name == name,
                                         models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
                    else:
                        condition = and_(models.RSEFileAssociation.scope == scope,
                                         models.RSEFileAssociation.name == name,
                                         or_(models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
                                             models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE,
                                             models.RSEFileAssociation.state == ReplicaState.COPYING))
                replica_conditions.append(condition)
            else:
                # for dataset/container
                content_query = session.query(models.DataIdentifierAssociation.child_scope,
                                              models.DataIdentifierAssociation.child_name,
                                              models.DataIdentifierAssociation.child_type)
                content_query = content_query.with_hint(models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle')
                child_dids = [(scope, name)]
                while child_dids:
                    s, n = child_dids.pop()
                    for tmp_did in content_query.filter_by(scope=s, name=n):
                        if tmp_did.child_type == DIDType.FILE:
                            replicas['%s:%s' % (tmp_did.child_scope, tmp_did.child_name)] = {'scope': tmp_did.child_scope,
                                                                                             'name': tmp_did.child_name,
                                                                                             'rses': {}}
                            if all_states:
                                condition = and_(models.RSEFileAssociation.scope == tmp_did.child_scope,
                                                 models.RSEFileAssociation.name == tmp_did.child_name)
                            else:
                                if not unavailable:
                                    condition = and_(models.RSEFileAssociation.scope == tmp_did.child_scope,
                                                     models.RSEFileAssociation.name == tmp_did.child_name,
                                                     models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
                                else:
                                    condition = and_(models.RSEFileAssociation.scope == tmp_did.child_scope,
                                                     models.RSEFileAssociation.name == tmp_did.child_name,
                                                     or_(models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
                                                         models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE,
                                                         models.RSEFileAssociation.state == ReplicaState.COPYING))
                            replica_conditions.append(condition)
                        else:
                            child_dids.append((tmp_did.child_scope, tmp_did.child_name))

    # Get the list of replicas
    is_false = False
    tmp_protocols = {}
    key = None
    for replica_condition in chunks(replica_conditions, 50):

        replica_query = select(columns=(models.RSEFileAssociation.scope,
                                        models.RSEFileAssociation.name,
                                        models.RSEFileAssociation.bytes,
                                        models.RSEFileAssociation.md5,
                                        models.RSEFileAssociation.adler32,
                                        models.RSEFileAssociation.path,
                                        models.RSEFileAssociation.state,
                                        models.RSE.rse),
                               whereclause=and_(models.RSEFileAssociation.rse_id == models.RSE.id,
                                                models.RSE.deleted == is_false,
                                                or_(*replica_condition)),
                               order_by=(models.RSEFileAssociation.scope,
                                         models.RSEFileAssociation.name)).\
            with_hint(models.RSEFileAssociation.scope, text="INDEX(REPLICAS REPLICAS_PK)", dialect_name='oracle').\
            compile()
        # models.RSE.availability.op(avail_op)(0x100) != 0
        for scope, name, bytes, md5, adler32, path, state, rse in session.execute(replica_query.statement, replica_query.params).fetchall():
            if rse not in rseinfo:
                rseinfo[rse] = rsemgr.get_rse_info(rse, session=session)
            if not rseinfo[rse]['staging_area']:
                if not key:
                    key = '%s:%s' % (scope, name)
                elif key != '%s:%s' % (scope, name):
                    yield replicas[key]
                    del replicas[key]
                    key = '%s:%s' % (scope, name)

                if 'bytes' not in replicas[key]:
                    replicas[key]['bytes'] = bytes
                    replicas[key]['md5'] = md5
                    replicas[key]['adler32'] = adler32

                if rse not in replicas[key]['rses']:
                    replicas[key]['rses'][rse] = []

                if all_states:
                    if 'states' not in replicas[key]:
                        replicas[key]['states'] = {}
                    replicas[key]['states'][rse] = state
                # get protocols
                if rse not in tmp_protocols:
                    protocols = list()
                    if not schemes:
                        try:
                            protocols.append(rsemgr.create_protocol(rseinfo[rse], 'read'))
                        except exception.RSEProtocolNotSupported:
                            pass  # no need to be verbose
                        except:
                            print format_exc()
                    else:
                        for s in schemes:
                            try:
                                protocols.append(rsemgr.create_protocol(rse_settings=rseinfo[rse], operation='read', scheme=s))
                            except exception.RSEProtocolNotSupported:
                                pass  # no need to be verbose
                            except:
                                print format_exc()
                    tmp_protocols[rse] = protocols

                # get pfns
                pfns_cache = dict()
                for protocol in tmp_protocols[rse]:
                    if 'determinism_type' in protocol.attributes:  # PFN is cachable
                        try:
                            path = pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], scope, name)]
                        except KeyError:  # No cache entry scope:name found for this protocol
                            path = protocol._get_path(scope, name)
                            pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], scope, name)] = path
                    if not schemes or protocol.attributes['scheme'] in schemes:
                        try:
                            replicas[key]['rses'][rse].append(protocol.lfns2pfns(lfns={'scope': scope, 'name': name, 'path': path}).values()[0])
                        except:
                            # temporary protection
                            print format_exc()
                        if protocol.attributes['scheme'] == 'srm':
                            try:
                                replicas[key]['space_token'] = protocol.attributes['extended_attributes']['space_token']
                            except KeyError:
                                replicas[key]['space_token'] = None
    if key:
        yield replicas[key]

    # Files with no replicas
    for replica in replicas:
        if not replicas[replica]['rses']:
            yield replicas[replica]
示例#22
0
def get_destinations(rse_info, scheme, req, sources):
    dsn = 'other'
    pfn = {}
    paths = {}
    if not rse_info['deterministic']:
        ts = time.time()

        # get rule scope and name
        if req['attributes']:
            if type(req['attributes']) is dict:
                req_attributes = json.loads(json.dumps(req['attributes']))
            else:
                req_attributes = json.loads(str(req['attributes']))
            if 'ds_name' in req_attributes:
                dsn = req_attributes["ds_name"]
        if dsn == 'other':
            # select a containing dataset
            for parent in did.list_parent_dids(req['scope'], req['name']):
                if parent['type'] == DIDType.DATASET:
                    dsn = parent['name']
                    break
        record_timer('daemons.conveyor.submitter.list_parent_dids', (time.time() - ts) * 1000)

        # always use SRM
        ts = time.time()
        nondet = rsemgr.create_protocol(rse_info, 'write', scheme='srm')
        record_timer('daemons.conveyor.submitter.create_protocol', (time.time() - ts) * 1000)

        # if there exists a prefix for SRM, use it
        prefix = ''
        for s in rse_info['protocols']:
            if s['scheme'] == 'srm':
                prefix = s['prefix']

        # DQ2 path always starts with /, but prefix might not end with /
        path = construct_surl_DQ2(dsn, req['name'])

        # retrial transfers to tape need a new filename - add timestamp
        if req['request_type'] == RequestType.TRANSFER\
           and 'previous_attempt_id' in req\
           and req['previous_attempt_id']\
           and rse_info['rse_type'] == 'TAPE':  # TODO: RUCIO-809 - rsemanager: get_rse_info -> rse_type is string instead of RSEType
            path = '%s_%i' % (path, int(time.time()))
            logging.debug('Retrial transfer request %s DID %s:%s to tape %s renamed to %s' % (req['request_id'],
                                                                                              req['scope'],
                                                                                              req['name'],
                                                                                              rse_info['rse'],
                                                                                              path))

        tmp_path = '%s%s' % (prefix[:-1], path)
        if prefix[-1] != '/':
            tmp_path = '%s%s' % (prefix, path)
        paths[req['scope'], req['name']] = path

        # add the hostname
        pfn['%s:%s' % (req['scope'], req['name'])] = nondet.path2pfn(tmp_path)
        if req['request_type'] == RequestType.STAGEIN:
            if len(sources) == 1:
                pfn['%s:%s' % (req['scope'], req['name'])] = sources[0][1]
            else:
                # TODO: need to check
                return None, None

        # we must set the destination path for nondeterministic replicas explicitly
        replica.update_replicas_paths([{'scope': req['scope'],
                                        'name': req['name'],
                                        'rse_id': req['dest_rse_id'],
                                        'path': path}])

    else:
        ts = time.time()
        try:
            pfn = rsemgr.lfns2pfns(rse_info,
                                   lfns=[{'scope': req['scope'],
                                          'name': req['name']}],
                                   scheme=scheme)
        except RSEProtocolNotSupported:
            logging.warn('%s not supported by %s' % (scheme, rse_info['rse']))
            return None, None

        record_timer('daemons.conveyor.submitter.lfns2pfns', (time.time() - ts) * 1000)

    destinations = []
    for k in pfn:
        if isinstance(pfn[k], (str, unicode)):
            destinations.append(pfn[k])
        elif isinstance(pfn[k], (tuple, list)):
            for url in pfn[k]:
                destinations.append(pfn[k][url])

    protocols = None
    try:
        protocols = rsemgr.select_protocol(rse_info, 'write', scheme=scheme)
    except RSEProtocolNotSupported:
        logging.warn('%s not supported by %s' % (scheme, rse_info['rse']))
        return None, None

    # we need to set the spacetoken if we use SRM
    dest_spacetoken = None
    if scheme == 'srm':
        dest_spacetoken = protocols['extended_attributes']['space_token']

    return destinations, dest_spacetoken
示例#23
0
                  'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in xrange(nbfiles)]
        for f in files:
            input[f['pfn']] = {'scope': f['scope'], 'name': f['name']}
        add_replicas(rse=rse, files=files, account='root', ignore_availability=True)
        for replica in list_replicas(dids=[{'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE} for f in files], schemes=['srm'], ignore_availability=True):
            for rse in replica['rses']:
                pfns.extend(replica['rses'][rse])
        for result in self.replica_client.get_did_from_pfns(pfns, rse):
            pfn = result.keys()[0]
            assert_equal(input[pfn], result.values()[0])

    def test_get_did_from_pfns_deterministic(self):
        """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for deterministic sites"""
        tmp_scope = 'mock'
        rse = 'MOCK3'
        nbfiles = 3
        pfns = []
        input = {}
        rse_info = rsemgr.get_rse_info(rse)
        assert_equal(rse_info['deterministic'], True)
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)]
        p = rsemgr.create_protocol(rse_info, 'read', scheme='srm')
        for f in files:
            pfn = p.lfns2pfns(lfns={'scope': f['scope'], 'name': f['name']}).values()[0]
            pfns.append(pfn)
            input[pfn] = {'scope': f['scope'], 'name': f['name']}
        add_replicas(rse=rse, files=files, account='root', ignore_availability=True)
        for result in self.replica_client.get_did_from_pfns(pfns, rse):
            pfn = result.keys()[0]
            assert_equal(input[pfn], result.values()[0])
示例#24
0
def reaper(rses, worker_number=1, child_number=1, total_children=1, chunk_size=100, once=False, greedy=False, scheme=None, exclude_rses=None, delay_seconds=0):
    """
    Main loop to select and delete files.

    :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs.
    :param worker_number: The worker number.
    :param child_number: The child number.
    :param total_children: The total number of children created per worker.
    :param chunk_size: the size of chunk for deletion.
    :param once: If True, only runs one iteration of the main loop.
    :param greedy: If True, delete right away replicas with tombstone.
    :param scheme: Force the reaper to use a particular protocol, e.g., mock.
    :param exclude_rses: RSE expression to exclude RSEs from the Reaper.
    """
    logging.info('Starting reaper: worker %(worker_number)s, child %(child_number)s' % locals())
    while not graceful_stop.is_set():
        try:
            max_deleting_rate = 0

            for rse in rses:
                deleting_rate = 0
                rse_info = rsemgr.get_rse_info(rse['rse'])
                rse_protocol = rse_core.get_rse_protocols(rse['rse'])

                if not rse_protocol['availability_delete']:
                    logging.info('Reaper %s-%s: RSE %s is not available for deletion' % (worker_number, child_number, rse_info['rse']))
                    continue

                # Temporary hack to force gfal for deletion
                for protocol in rse_info['protocols']:
                    if protocol['impl'] == 'rucio.rse.protocols.srm.Default' or protocol['impl'] == 'rucio.rse.protocols.gsiftp.Default':
                        protocol['impl'] = 'rucio.rse.protocols.gfal.Default'

                logging.info('Reaper %s-%s: Running on RSE %s' % (worker_number, child_number, rse_info['rse']))
                try:
                    needed_free_space, max_being_deleted_files = None, 10000
                    if not greedy:
                        max_being_deleted_files, needed_free_space, used, free = __check_rse_usage(rse=rse['rse'], rse_id=rse['id'])
                        logging.info('Reaper %(worker_number)s-%(child_number)s: Space usage for RSE %(rse)s - max_being_deleted_files: %(max_being_deleted_files)s, needed_free_space: %(needed_free_space)s, used: %(used)s, free: %(free)s' % locals())
                        if needed_free_space <= 0:
                            needed_free_space = 0
                            logging.info('Reaper %s-%s: free space is above minimum limit for %s' % (worker_number, child_number, rse['rse']))

                    s = time.time()
                    with monitor.record_timer_block('reaper.list_unlocked_replicas'):
                        replicas = list_unlocked_replicas(rse=rse['rse'], bytes=needed_free_space, limit=max_being_deleted_files, worker_number=child_number, total_workers=total_children, delay_seconds=delay_seconds)
                    logging.debug('Reaper %s-%s: list_unlocked_replicas %s %s %s' % (worker_number, child_number, rse['rse'], time.time() - s, len(replicas)))

                    if not replicas:
                        logging.info('Reaper %s-%s: nothing to do for %s' % (worker_number, child_number, rse['rse']))
                        continue

                    p = rsemgr.create_protocol(rse_info, 'delete', scheme=None)
                    for files in chunks(replicas, chunk_size):
                        logging.debug('Reaper %s-%s: Running on : %s' % (worker_number, child_number, str(files)))
                        try:
                            s = time.time()
                            update_replicas_states(replicas=[dict(replica.items() + [('state', ReplicaState.BEING_DELETED), ('rse_id', rse['id'])]) for replica in files])

                            for replica in files:
                                try:
                                    replica['pfn'] = str(rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{'scope': replica['scope'], 'name': replica['name']}, ], operation='delete').values()[0])
                                except ReplicaUnAvailable as e:
                                    err_msg = 'Failed to get pfn UNAVAILABLE replica %s:%s on %s with error %s' % (replica['scope'], replica['name'], rse['rse'], str(e))
                                    logging.warning('Reaper %s-%s: %s' % (worker_number, child_number, err_msg))
                                    replica['pfn'] = None

                                add_message('deletion-planned', {'scope': replica['scope'],
                                                                 'name': replica['name'],
                                                                 'file-size': replica['bytes'],
                                                                 'url': replica['pfn'],
                                                                 'rse': rse_info['rse']})

                            # logging.debug('update_replicas_states %s' % (time.time() - s))
                            monitor.record_counter(counters='reaper.deletion.being_deleted',  delta=len(files))

                            if not scheme:
                                try:
                                    deleted_files = []
                                    p.connect()
                                    for replica in files:
                                        try:
                                            logging.info('Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']))
                                            s = time.time()
                                            if rse['staging_area'] or rse['rse'].endswith("STAGING"):
                                                logging.warning('Reaper %s-%s: Deletion STAGING of %s:%s as %s on %s, will only delete the catalog and not do physical deletion' % (worker_number,
                                                                                                                                                                                    child_number,
                                                                                                                                                                                    replica['scope'],
                                                                                                                                                                                    replica['name'],
                                                                                                                                                                                    replica['pfn'],
                                                                                                                                                                                    rse['rse']))
                                            else:
                                                if replica['pfn']:
                                                    p.delete(replica['pfn'])
                                                else:
                                                    logging.warning('Reaper %s-%s: Deletion UNAVAILABLE of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']))
                                            monitor.record_timer('daemons.reaper.delete.%s.%s' % (p.attributes['scheme'], rse['rse']), (time.time()-s)*1000)
                                            duration = time.time() - s

                                            deleted_files.append({'scope': replica['scope'], 'name': replica['name']})

                                            add_message('deletion-done', {'scope': replica['scope'],
                                                                          'name': replica['name'],
                                                                          'rse': rse_info['rse'],
                                                                          'file-size': replica['bytes'],
                                                                          'url': replica['pfn'],
                                                                          'duration': duration})
                                            logging.info('Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']))
                                        except SourceNotFound:
                                            err_msg = 'Reaper %s-%s: Deletion NOTFOUND of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])
                                            logging.warning(err_msg)
                                            deleted_files.append({'scope': replica['scope'], 'name': replica['name']})
                                            add_message('deletion-failed', {'scope': replica['scope'],
                                                                            'name': replica['name'],
                                                                            'rse': rse_info['rse'],
                                                                            'file-size': replica['bytes'],
                                                                            'url': replica['pfn'],
                                                                            'reason': err_msg})
                                        except (ServiceUnavailable, RSEAccessDenied) as e:
                                            logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(e)))
                                            add_message('deletion-failed', {'scope': replica['scope'],
                                                                            'name': replica['name'],
                                                                            'rse': rse_info['rse'],
                                                                            'file-size': replica['bytes'],
                                                                            'url': replica['pfn'],
                                                                            'reason': str(e)})
                                        except Exception as e:
                                            logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc())))
                                            add_message('deletion-failed', {'scope': replica['scope'],
                                                                            'name': replica['name'],
                                                                            'rse': rse_info['rse'],
                                                                            'file-size': replica['bytes'],
                                                                            'url': replica['pfn'],
                                                                            'reason': str(e)})
                                        except:
                                            logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc())))
                                except (ServiceUnavailable, RSEAccessDenied) as e:
                                    for replica in files:
                                        logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(e)))
                                        add_message('deletion-failed', {'scope': replica['scope'],
                                                                        'name': replica['name'],
                                                                        'rse': rse_info['rse'],
                                                                        'file-size': replica['bytes'],
                                                                        'url': replica['pfn'],
                                                                        'reason': str(e)})
                                finally:
                                    p.close()
                            s = time.time()
                            with monitor.record_timer_block('reaper.delete_replicas'):
                                delete_replicas(rse=rse['rse'], files=deleted_files)
                            logging.debug('Reaper %s-%s: delete_replicas successes %s %s %s' % (worker_number, child_number, rse['rse'], len(deleted_files), time.time() - s))
                            monitor.record_counter(counters='reaper.deletion.done',  delta=len(deleted_files))
                            deleting_rate += len(deleted_files)
                        except:
                            logging.critical(traceback.format_exc())
                    deleting_rate = deleting_rate * 1.0 / max_being_deleted_files
                    if deleting_rate > max_deleting_rate:
                        max_deleting_rate = deleting_rate
                except:
                    logging.critical(traceback.format_exc())

            if once:
                break

            logging.info(" Reaper %s-%s: max_deleting_rate: %s " % (worker_number, child_number, max_deleting_rate))
            sleep_time = int((1 - max_deleting_rate) * 60 + 1)
            time.sleep(sleep_time)

        except:
            logging.critical(traceback.format_exc())

    logging.info('Graceful stop requested')
    logging.info('Graceful stop done')