Exemplo n.º 1
0
            def generate(vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                first = metalink

                try:
                    # then, stream the replica information
                    for rfile in list_replicas(dids=dids, schemes=schemes, vo=vo):
                        if first and metalink:
                            # first, set the appropriate content type, and stream the header
                            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                            first = False

                        replicas = []
                        dictreplica = {}
                        for rse in rfile['rses']:
                            for replica in rfile['rses'][rse]:
                                replicas.append(replica)
                                dictreplica[replica] = rse
                        if select == 'geoip':
                            try:
                                replicas = sort_geoip(dictreplica, client_ip)
                            except AddressNotFoundError:
                                pass
                        else:
                            replicas = sort_random(dictreplica)
                        if not metalink:
                            yield dumps(rfile) + '\n'
                        else:
                            yield ' <file name="' + rfile['name'] + '">\n'
                            yield '  <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n'

                            if rfile['adler32'] is not None:
                                yield '  <hash type="adler32">' + rfile['adler32'] + '</hash>\n'
                            if rfile['md5'] is not None:
                                yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                            yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                            yield '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name'])
                            yield '</glfn>\n'

                            idx = 0
                            for replica in replicas:
                                yield '   <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx + 1) + '">' + escape(replica) + '</url>\n'
                                idx += 1
                                if limit and limit == idx:
                                    break
                            yield ' </file>\n'

                    if metalink and first:
                        # if still first output, i.e. there were no replicas
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                finally:
                    # don't forget to send the metalink footer
                    if metalink and not first:
                        yield '</metalink>\n'
Exemplo n.º 2
0
            def generate():
                # first, set the appropriate content type, and stream the header
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

                # iteratively stream the XML per file
                for rfile in itertools.chain((first, ), replicas_iter):
                    replicas = []
                    dictreplica = {}
                    for rse in rfile['rses']:
                        for replica in rfile['rses'][rse]:
                            replicas.append(replica)
                            dictreplica[replica] = rse

                    # stream metadata
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    # sort the actual replicas if necessary
                    if select == 'geoip':
                        replicas = sort_geoip(dictreplica,
                                              client_location['ip'],
                                              ignore_error=True)
                    elif select == 'closeness':
                        replicas = sort_closeness(dictreplica, client_location)
                    elif select == 'dynamic':
                        replicas = sort_dynamic(dictreplica, client_location)
                    elif select == 'ranking':
                        replicas = sort_ranking(dictreplica, client_location)
                    else:
                        replicas = sort_random(dictreplica)

                    # stream URLs
                    idx = 1
                    for replica in replicas:
                        yield '  <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx) + '">' + replica + '</url>\n'
                        idx += 1

                    yield ' </file>\n'

                # don't forget to send the metalink footer
                yield '</metalink>\n'
Exemplo n.º 3
0
    def GET(self, scope, name):
        """
        Metalink redirect

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound
            406 Not Acceptable

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        dids, schemes, select = [{
            'scope': scope,
            'name': name
        }], [
            'http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp', 'srm',
            'davs'
        ], None

        # set the correct client IP
        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'sort' in params:
                select = params['sort'][0]

            if 'ip' in params:
                client_location['ip'] = params['ip'][0]
            if 'fqdn' in params:
                client_location['fqdn'] = params['fqdn'][0]
            if 'site' in params:
                client_location['site'] = params['site'][0]

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids,
                                             schemes=schemes,
                                             client_location=client_location)
            ]

            if not tmp_replicas:
                raise ReplicaNotFound(
                    'no redirection possible - cannot find the DID')

            # first, set the appropriate content type, and stream the header
            header('Content-Type', 'application/metalink4+xml')
            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # stream metadata
                yield ' <file name="' + rfile['name'] + '">\n'
                yield '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    yield '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                yield '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'],
                                                              rfile['name'])
                yield '</glfn>\n'

                # sort the actual replicas if necessary
                if select == 'geoip':
                    replicas = sort_geoip(dictreplica,
                                          client_location['ip'],
                                          ignore_error=True)
                elif select == 'closeness':
                    replicas = sort_closeness(dictreplica, client_location)
                elif select == 'dynamic':
                    replicas = sort_dynamic(dictreplica, client_location)
                elif select == 'ranking':
                    replicas = sort_ranking(dictreplica, client_location)
                else:
                    replicas = sort_random(dictreplica)

                # stream URLs
                idx = 1
                for replica in replicas:
                    yield '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                yield ' </file>\n'

            # don't forget to send the metalink footer
            yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except ReplicaNotFound as error:
            raise generate_http_error(404, 'ReplicaNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Exemplo n.º 4
0
    def GET(self, scope, name):
        """
        Header Redirect

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'davs', 'http', 'https', 's3+rucio'
            ]

            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = ctx.ip

            client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'sort' in params:
                    select = params['sort'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]
                else:
                    schemes = ['davs', 'https', 's3']

                if 'ip' in params:
                    client_location['ip'] = params['ip'][0]
                if 'fqdn' in params:
                    client_location['fqdn'] = params['fqdn'][0]
                if 'site' in params:
                    client_location['site'] = params['site'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes,
                                         client_location=client_location)
            ]

            selected_url, selected_rse = None, None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                            selected_rse = rse
                        else:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )

                        elif site:
                            rep = site_selector(dictreplica, site)
                            if rep:
                                selected_url = rep[0]
                            else:
                                raise ReplicaNotFound(
                                    'no redirection possible - no valid RSE for HTTP redirection found'
                                )
                        else:
                            if select == 'geoip':
                                rep = sort_geoip(dictreplica,
                                                 client_location['ip'])
                            elif select == 'closeness':
                                rep = sort_closeness(dictreplica,
                                                     client_location)
                            elif select == 'dynamic':
                                rep = sort_dynamic(dictreplica,
                                                   client_location)
                            elif select == 'ranking':
                                rep = sort_ranking(dictreplica,
                                                   client_location)
                            else:
                                rep = sort_random(dictreplica)

                            selected_url = rep[0]

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                if selected_url == replica:
                                    selected_rse = rep

            if selected_url:
                if selected_url.startswith('s3+rucio://'):
                    connect(selected_rse, selected_url)
                    signed_URLS = get_signed_urls([selected_url],
                                                  rse=selected_rse,
                                                  operation='read')
                    raise seeother(signed_URLS[selected_url])

                raise seeother(selected_url)

            raise ReplicaNotFound(
                'no redirection possible - file does not exist')

        except seeother:
            raise
        except ReplicaNotFound as error:
            raise generate_http_error(404, 'ReplicaNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Exemplo n.º 5
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        dids, schemes, select, limit = [{
            'scope': scope,
            'name': name
        }], None, None, None
        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = int(params['limit'][0])

        try:
            # first, set the appropriate content type, and stream the header
            if not metalink:
                header('Content-Type', 'application/x-json-stream')
            else:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids, schemes=schemes):
                client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                if client_ip is None:
                    client_ip = ctx.ip

                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    try:
                        replicas = sort_geoip(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = sort_random(dictreplica)
                if not metalink:
                    yield dumps(rfile) + '\n'
                else:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Exemplo n.º 6
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information, either as JSON stream or metalink4.
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states, domain = False, None, False, None
        signature_lifetime, resolve_archives, resolve_parents = None, True, False
        client_location = {}

        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']
            if 'client_location' in params:
                client_location = params['client_location']
                client_location['ip'] = params['client_location'].get(
                    'ip', client_ip)
            if 'sort' in params:
                select = params['sort']
            if 'domain' in params:
                domain = params['domain']
            if 'resolve_archives' in params:
                resolve_archives = params['resolve_archives']
            if 'resolve_parents' in params:
                resolve_parents = params['resolve_parents']
            if 'signature_lifetime' in params:
                signature_lifetime = params['signature_lifetime']
            else:
                # hardcoded default of 10 minutes if config is not parseable
                signature_lifetime = config_get('credentials',
                                                'signature_lifetime',
                                                raise_exception=False,
                                                default=600)
        except ValueError:
            raise generate_http_error(400, 'ValueError',
                                      'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]
            if 'sort' in params:
                select = params['sort']

        try:
            # first, set the appropriate content type, and stream the header
            if not metalink:
                header('Content-Type', 'application/x-json-stream')
            else:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression,
                                       client_location=client_location,
                                       domain=domain,
                                       signature_lifetime=signature_lifetime,
                                       resolve_archives=resolve_archives,
                                       resolve_parents=resolve_parents,
                                       issuer=ctx.env.get('issuer')):
                if not metalink:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                else:

                    replicas = []
                    dictreplica = {}
                    for replica in rfile['pfns'].keys():
                        replicas.append(replica)
                        dictreplica[replica] = (
                            rfile['pfns'][replica]['domain'],
                            rfile['pfns'][replica]['priority'],
                            rfile['pfns'][replica]['rse'],
                            rfile['pfns'][replica]['client_extract'])

                    yield ' <file name="' + rfile['name'] + '">\n'

                    if 'parents' in rfile and rfile['parents']:
                        yield '  <parents>\n'
                        for parent in rfile['parents']:
                            yield '   <did>' + parent + '</did>\n'
                        yield '  </parents>\n'

                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (
                        config_get(
                            'policy',
                            'schema',
                            raise_exception=False,
                            default='generic'), rfile['scope'], rfile['name'])

                    # TODO: deprecate this
                    if select == 'geoip':
                        replicas = sort_geoip(dictreplica,
                                              client_location['ip'])
                    elif select == 'closeness':
                        replicas = sort_closeness(dictreplica, client_location)
                    elif select == 'dynamic':
                        replicas = sort_dynamic(dictreplica, client_location)
                    elif select == 'ranking':
                        replicas = sort_ranking(dictreplica, client_location)
                    elif select == 'random':
                        replicas = sort_random(dictreplica)
                    else:
                        replicas = sorted(dictreplica, key=dictreplica.get)

                    idx = 0
                    for replica in replicas:
                        yield '  <url location="' + str(dictreplica[replica][2]) \
                            + '" domain="' + str(dictreplica[replica][0]) \
                            + '" priority="' + str(dictreplica[replica][1]) \
                            + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                            + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Exemplo n.º 7
0
    def get(self, scope, name):
        """
        List all replicas for data identifiers.

        .. :quickref: Replicas; List all replicas for did
        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :reqheader HTTP_ACCEPT: application/metalink4+xml
        :param scope: data identifier scope.
        :param name: data identifier name.
        :resheader Content-Type: application/x-json-stream
        :resheader Content-Type: application/metalink4+xml
        :status 200: OK.
        :status 401: Invalid auth token.
        :status 404: DID not found.
        :status 500: Internal Error.
        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if request.environ.get('HTTP_ACCEPT') is not None:
            tmp = request.environ.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        dids, schemes, select, limit = [{
            'scope': scope,
            'name': name
        }], None, None, None

        schemes = request.args.get('schemes', None)
        select = request.args.get('select', None)
        limit = request.args.get('limit', None)
        if limit:
            limit = int(limit)

        data = ""
        content_type = 'application/x-json-stream'
        try:
            # first, set the appropriate content type, and stream the header
            if metalink:
                content_type = 'application/metalink4+xml'
                data += '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids, schemes=schemes):
                client_ip = request.environ.get('HTTP_X_FORWARDED_FOR')
                if client_ip is None:
                    client_ip = request.remote_addr

                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    try:
                        replicas = sort_geoip(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = sort_random(dictreplica)
                if not metalink:
                    data += dumps(rfile) + '\n'
                else:
                    data += ' <file name="' + rfile['name'] + '">\n'
                    data += '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        data += '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        data += '  <hash type="md5">' + rfile[
                            'md5'] + '</hash>\n'

                    data += '  <size>' + str(rfile['bytes']) + '</size>\n'

                    data += '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    data += '</glfn>\n'

                    idx = 0
                    for replica in replicas:
                        data += '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    data += ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                data += '</metalink>\n'

            return Response(data, content_type=content_type)
        except DataIdentifierNotFound, e:
            return generate_http_error_flask(404, 'DataIdentifierNotFound',
                                             e.args[0][0])
Exemplo n.º 8
0
    def post(self):
        """
        List all replicas for data identifiers.

        .. :quickref: ListReplicas; List all replicas for did.

        :reqheader HTTP_ACCEPT: application/metalink4+xml
        :query schemes: A list of schemes to filter the replicas.
        :query sort: Requested sorting of the result, e.g., 'geoip', 'closeness', 'dynamic', 'ranking'.
        :<json list dids: list of DIDs.
        :<json list schemes: A list of schemes to filter the replicas.
        :<json bool unavailable: Also include unavailable replicas.
        :<json bool all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary.
        :<json string rse_expression: The RSE expression to restrict on a list of RSEs.
        :<json dict client_location: Client location dictionary for PFN modification {'ip', 'fqdn', 'site'}.
        :<json bool sort: Requested sorting of the result, e.g., 'geoip', 'closeness', 'dynamic', 'ranking'.
        :<json string domain: The network domain for the call, either None, 'wan' or 'lan'. None is fallback to 'wan', 'all' is both ['lan','wan']
        :resheader Content-Type: application/x-json-stream
        :resheader Content-Type: application/metalink4+xml
        :status 200: OK.
        :status 400: Cannot decode json parameter list.
        :status 401: Invalid auth token.
        :status 404: DID not found.
        :status 500: Internal Error.
        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if request.environ.get('HTTP_ACCEPT') is not None:
            tmp = request.environ.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        client_ip = request.environ.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = request.remote_addr

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states = False, None, False
        client_location = {}

        json_data = request.data
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']
            if 'client_location' in params:
                client_location = params['client_location']
                client_location['ip'] = params['client_location'].get(
                    'ip', client_ip)
            if 'sort' in params:
                select = params['sort']
            if 'domain' in params:
                domain = params['domain']
        except ValueError:
            return generate_http_error_flask(
                400, 'ValueError', 'Cannot decode json parameter list')

        schemes = request.args.get('schemes', None)
        select = request.args.get('select', None)
        select = request.args.get('sort', None)

        data = ""
        content_type = 'application/x-json-stream'
        try:
            # first, set the appropriate content type, and stream the header
            if metalink:
                content_type = 'application/metalink4+xml'
                data += '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(
                    dids=dids,
                    schemes=schemes,
                    unavailable=unavailable,
                    request_id=request.environ.get('request_id'),
                    ignore_availability=ignore_availability,
                    all_states=all_states,
                    rse_expression=rse_expression,
                    client_location=client_location,
                    domain=domain):
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                if not metalink:
                    data += dumps(rfile, cls=APIEncoder) + '\n'
                else:
                    data += ' <file name="' + rfile['name'] + '">\n'
                    data += '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        data += '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        data += '  <hash type="md5">' + rfile[
                            'md5'] + '</hash>\n'
                    data += '  <size>' + str(rfile['bytes']) + '</size>\n'

                    data += '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    data += '</glfn>\n'

                    if select == 'geoip':
                        replicas = sort_geoip(dictreplica,
                                              client_location['ip'])
                    elif select == 'closeness':
                        replicas = sort_closeness(dictreplica, client_location)
                    elif select == 'dynamic':
                        replicas = sort_dynamic(dictreplica, client_location)
                    elif select == 'ranking':
                        replicas = sort_ranking(dictreplica, client_location)
                    else:
                        replicas = sort_random(dictreplica)

                    idx = 0
                    for replica in replicas:
                        data += '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    data += ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                data += '</metalink>\n'

            return Response(data, content_type=content_type)
        except DataIdentifierNotFound, e:
            return generate_http_error_flask(404, 'DataIdentifierNotFound',
                                             e.args[0][0])
Exemplo n.º 9
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        dids, schemes, select, limit = [{
            'scope': scope,
            'name': name
        }], None, None, None
        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = int(params['limit'][0])

        # Resolve all reasonable protocols when doing metalink for maximum access possibilities
        if metalink and schemes is None:
            schemes = SUPPORTED_PROTOCOLS

        try:

            # we need to call list_replicas before starting to reply
            # otherwise the exceptions won't be propagated correctly
            __first = True

            header(
                'Content-Type', 'application/metalink4+xml'
                if metalink else 'application/x-json-stream')

            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       vo=ctx.env.get('vo')):

                # in first round, set the appropriate content type, and stream the header
                if __first:
                    if metalink:
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    __first = False

                # ... then, stream the replica information
                client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                if client_ip is None:
                    client_ip = ctx.ip

                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    try:
                        replicas = sort_geoip(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = sort_random(dictreplica)
                if not metalink:
                    yield dumps(rfile) + '\n'
                else:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + escape(replica) + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            if metalink:
                if __first:
                    # ensure complete metalink on success without any content
                    yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                else:
                    # if metalink start was already sent, always send the end
                    yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Exemplo n.º 10
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information, either as JSON stream or metalink4.
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states, domain = False, None, False, None
        signature_lifetime, resolve_archives, resolve_parents = None, True, False
        updated_after = None
        client_location = {}

        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']
            if 'client_location' in params:
                client_location = params['client_location']
                client_location['ip'] = params['client_location'].get(
                    'ip', client_ip)
            if 'sort' in params:
                select = params['sort']
            if 'domain' in params:
                domain = params['domain']
            if 'resolve_archives' in params:
                resolve_archives = params['resolve_archives']
            if 'resolve_parents' in params:
                resolve_parents = params['resolve_parents']

            if 'signature_lifetime' in params:
                signature_lifetime = params['signature_lifetime']
            else:
                # hardcoded default of 10 minutes if config is not parseable
                signature_lifetime = config_get('credentials',
                                                'signature_lifetime',
                                                raise_exception=False,
                                                default=600)

            if 'updated_after' in params:
                if isinstance(params['updated_after'], (int, float)):
                    # convert from epoch time stamp to datetime object
                    updated_after = datetime.utcfromtimestamp(
                        params['updated_after'])
                else:
                    # attempt UTC format '%Y-%m-%dT%H:%M:%S' conversion
                    updated_after = datetime.strptime(params['updated_after'],
                                                      '%Y-%m-%dT%H:%M:%S')

        except ValueError:
            raise generate_http_error(400, 'ValueError',
                                      'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]
            if 'sort' in params:
                select = params['sort']

        # Resolve all reasonable protocols when doing metalink for maximum access possibilities
        if metalink and schemes is None:
            schemes = SUPPORTED_PROTOCOLS

        try:

            # we need to call list_replicas before starting to reply
            # otherwise the exceptions won't be propagated correctly
            __first = True

            header(
                'Content-Type', 'application/metalink4+xml'
                if metalink else 'application/x-json-stream')

            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression,
                                       client_location=client_location,
                                       domain=domain,
                                       signature_lifetime=signature_lifetime,
                                       resolve_archives=resolve_archives,
                                       resolve_parents=resolve_parents,
                                       updated_after=updated_after,
                                       issuer=ctx.env.get('issuer'),
                                       vo=ctx.env.get('vo')):

                # in first round, set the appropriate content type, and stream the header
                if __first:
                    if metalink:
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    __first = False

                # ... then, stream the replica information
                if not metalink:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                else:

                    replicas = []
                    dictreplica = {}
                    for replica in rfile['pfns'].keys():
                        replicas.append(replica)
                        dictreplica[replica] = (
                            rfile['pfns'][replica]['domain'],
                            rfile['pfns'][replica]['priority'],
                            rfile['pfns'][replica]['rse'],
                            rfile['pfns'][replica]['client_extract'])

                    yield ' <file name="' + rfile['name'] + '">\n'

                    if 'parents' in rfile and rfile['parents']:
                        yield '  <parents>\n'
                        for parent in rfile['parents']:
                            yield '   <did>' + parent + '</did>\n'
                        yield '  </parents>\n'

                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (
                        config_get(
                            'policy',
                            'schema',
                            raise_exception=False,
                            default='generic'), rfile['scope'], rfile['name'])

                    # TODO: deprecate this
                    if select == 'geoip':
                        replicas = sort_geoip(dictreplica,
                                              client_location['ip'])
                    elif select == 'closeness':
                        replicas = sort_closeness(dictreplica, client_location)
                    elif select == 'dynamic':
                        replicas = sort_dynamic(dictreplica, client_location)
                    elif select == 'ranking':
                        replicas = sort_ranking(dictreplica, client_location)
                    elif select == 'random':
                        replicas = sort_random(dictreplica)
                    else:
                        replicas = sorted(dictreplica, key=dictreplica.get)

                    idx = 0
                    for replica in replicas:
                        yield '  <url location="' + str(dictreplica[replica][2]) \
                            + '" domain="' + str(dictreplica[replica][0]) \
                            + '" priority="' + str(dictreplica[replica][1]) \
                            + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                            + '">' + escape(replica) + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            if metalink:
                if __first:
                    # ensure complete metalink on success without any content
                    yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                else:
                    # if metalink start was already sent, always send the end
                    yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Exemplo n.º 11
0
    def get(self, scope, name):
        """
        Metalink redirect

        .. :quickref: MetaLinkRedirector; Metalink redirect.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        :resheader Content-Type: application/metalink4+xml'.
        :status 200: OK.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        :status 406: Not Acceptable.
        :status 500: Internal Error.
        :returns: Metalink file
        """

        dids = [{'scope': scope, 'name': name}]

        # set the correct client IP
        client_ip = request.environ.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = request.remote_addr

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        schemes = request.args.get('schemes', [
            'http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp', 'srm',
            'davs'
        ])
        select = request.args.get('select', None)
        if 'sort' in request.args:
            select = request.args['sort']

        client_location['ip'] = request.args.get('ip', None)
        client_location['fqdn'] = request.args.get('fqdn', None)
        client_location['site'] = request.args.get('site', None)

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids,
                                             schemes=schemes,
                                             client_location=client_location)
            ]

            if not tmp_replicas:
                return 'no redirection possible - cannot find the DID', 404

            # first, set the appropriate content type, and stream the header
            data = '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # stream metadata
                data += ' <file name="' + rfile['name'] + '">\n'
                data += '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    data += '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    data += '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                data += '  <size>' + str(rfile['bytes']) + '</size>\n'

                data += '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'],
                                                                rfile['name'])
                data += '</glfn>\n'

                # sort the actual replicas if necessary
                if select == 'geoip':
                    replicas = sort_geoip(dictreplica,
                                          client_location['ip'],
                                          ignore_error=True)
                elif select == 'closeness':
                    replicas = sort_closeness(dictreplica, client_location)
                elif select == 'dynamic':
                    replicas = sort_dynamic(dictreplica, client_location)
                elif select == 'ranking':
                    replicas = sort_ranking(dictreplica, client_location)
                else:
                    replicas = sort_random(dictreplica)

                # stream URLs
                idx = 1
                for replica in replicas:
                    data += '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                data += ' </file>\n'

            # don't forget to send the metalink footer
            data += '</metalink>\n'
            return Response(data, content_type='application/metalink4+xml')
        except DataIdentifierNotFound as error:
            return generate_http_error_flask(404, 'DataIdentifierNotFound',
                                             error.args[0])
        except ReplicaNotFound as error:
            return generate_http_error_flask(404, 'ReplicaNotFound',
                                             error.args[0])
        except RucioException as error:
            return generate_http_error_flask(500, error.__class__.__name__,
                                             error.args[0])
        except Exception as error:
            print(format_exc())
            return error, 500
Exemplo n.º 12
0
    def get(self, scope, name):
        """
        Header Redirect

        .. :quickref: HeaderRedirector; Header redirect.

        :param scope: The scope name of the file.
        :param name: The name of the file.
        :resheader Content-Type: application/metalink+xml'.
        :status 303: Redirect.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        :status 500: Internal Error.
        """

        headers = {}
        try:

            # use the default HTTP protocols if no scheme is given

            client_ip = request.environ.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = request.remote_addr

            client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

            schemes = request.args.get('schemes', ['davs', 'https', 's3'])
            select = request.args.get('select', 'random')
            if 'sort' in request.args:
                select = request.args['sort']
            rse = request.args.get('rse', None)
            site = request.args.get('site', None)

            client_location['ip'] = request.args.get('ip', client_ip)
            client_location['fqdn'] = request.args.get('fqdn', None)
            client_location['site'] = request.args.get('site', None)

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = request.environ.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                headers[
                    'Link'] = '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (
                        cleaned_url, ','.join(schemes), select)
            else:
                headers[
                    'Link'] = '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (
                        cleaned_url, schemes, select)
                schemes = [schemes]  # list_replicas needs a list

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes,
                                         client_location=client_location)
            ]

            selected_url, selected_rse = None, None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                            selected_rse = rse
                        else:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404

                        elif site:
                            rep = site_selector(dictreplica, site)
                            if rep:
                                selected_url = rep[0]
                            else:
                                return 'no redirection possible - no valid RSE for HTTP redirection found', 404
                        else:
                            if select == 'geoip':
                                rep = sort_geoip(dictreplica,
                                                 client_location['ip'])
                            elif select == 'closeness':
                                rep = sort_closeness(dictreplica,
                                                     client_location)
                            elif select == 'dynamic':
                                rep = sort_dynamic(dictreplica,
                                                   client_location)
                            elif select == 'ranking':
                                rep = sort_ranking(dictreplica,
                                                   client_location)
                            else:
                                rep = sort_random(dictreplica)

                            selected_url = rep[0]

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                if selected_url == replica:
                                    selected_rse = rep

            if selected_url:
                if selected_url.startswith('s3+rucio://'):
                    connect(selected_rse, selected_url)
                    signed_URLS = get_signed_urls([selected_url],
                                                  rse=selected_rse,
                                                  operation='read')
                    res = redirect(signed_URLS[selected_url], code=303)
                    res.header = headers
                    return res

                res = redirect(signed_URLS[selected_url], code=303)
                res.header = headers
                return res

            return 'no redirection possible - file does not exist', 404

        except ReplicaNotFound as error:
            return generate_http_error_flask(404, 'ReplicaNotFound',
                                             error.args[0])
        except RucioException as error:
            return generate_http_error_flask(500, error.__class__.__name__,
                                             error.args[0])
        except Exception as error:
            print(format_exc())
            return error, 500
Exemplo n.º 13
0
    def get(self, scope_name):
        """
        Header Redirect

        .. :quickref: HeaderRedirector; Header redirect.

        :param scope_name: data identifier (scope)/(name).
        :resheader Content-Type: application/metalink+xml'.
        :status 303: Redirect.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        :status 500: Internal Error.
        """
        headers = Headers()
        headers.set('Access-Control-Allow-Origin',
                    request.environ.get('HTTP_ORIGIN'))
        headers.set('Access-Control-Allow-Headers',
                    request.environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        headers.set('Access-Control-Allow-Methods', '*')
        headers.set('Access-Control-Allow-Credentials', 'true')

        try:
            scope, name = parse_scope_name(scope_name)
        except ValueError as error:
            return generate_http_error_flask(400,
                                             'ValueError',
                                             error.args[0],
                                             headers=headers)
        except Exception as error:
            print(format_exc())
            return str(error), 500, headers

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'davs', 'http', 'https'
            ]

            client_ip = request_header_ensure_string('X-Forwarded-For',
                                                     request.remote_addr)

            client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

            if request.query_string:
                query_string = request.query_string.decode(encoding='utf-8')
                params = parse_qs(query_string)
                if 'select' in params:
                    select = params['select'][0]
                if 'sort' in params:
                    select = params['sort'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]
                else:
                    schemes = ['davs', 'https', 's3']

                if 'ip' in params:
                    client_location['ip'] = params['ip'][0]
                if 'fqdn' in params:
                    client_location['fqdn'] = params['fqdn'][0]
                if 'site' in params:
                    client_location['site'] = params['site'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = request.environ.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                headers.set(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                headers.set(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            # get vo if given
            vo = request_header_ensure_string('X-Rucio-VO', 'def')

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes,
                                         client_location=client_location,
                                         vo=vo)
            ]

            selected_url = None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                        else:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers

                        elif site:
                            rep = site_selector(dictreplica, site, vo)
                            if rep:
                                selected_url = rep[0]
                            else:
                                return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                        else:
                            if select == 'geoip':
                                rep = sort_geoip(dictreplica,
                                                 client_location['ip'])
                            elif select == 'closeness':
                                rep = sort_closeness(dictreplica,
                                                     client_location)
                            elif select == 'dynamic':
                                rep = sort_dynamic(dictreplica,
                                                   client_location)
                            elif select == 'ranking':
                                rep = sort_ranking(dictreplica,
                                                   client_location)
                            else:
                                rep = sort_random(dictreplica)

                            selected_url = rep[0]

            if selected_url:
                response = redirect(selected_url, code=303)
                response.headers.extend(headers)
                return response

            return 'no redirection possible - file does not exist', 404, headers
        except ReplicaNotFound as error:
            return generate_http_error_flask(404,
                                             'ReplicaNotFound',
                                             error.args[0],
                                             headers=headers)
        except RucioException as error:
            return generate_http_error_flask(500,
                                             error.__class__.__name__,
                                             error.args[0],
                                             headers=headers)
        except Exception as error:
            print(format_exc())
            return str(error), 500, headers
Exemplo n.º 14
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states = False, None, False
        client_location = {}

        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']
            if 'client_location' in params:
                client_location = params['client_location']
                client_location['ip'] = params['client_location'].get('ip', client_ip)
            if 'sort' in params:
                select = params['sort']
        except ValueError:
            raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]
            if 'sort' in params:
                select = params['sort']

        try:
            # first, set the appropriate content type, and stream the header
            if not metalink:
                header('Content-Type', 'application/x-json-stream')
            else:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids, schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression,
                                       client_location=client_location):
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                if not metalink:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                else:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile['adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    if select == 'geoip':
                        replicas = sort_geoip(dictreplica, client_location['ip'])
                    elif select == 'closeness':
                        replicas = sort_closeness(dictreplica, client_location)
                    elif select == 'dynamic':
                        replicas = sort_dynamic(dictreplica, client_location)
                    elif select == 'ranking':
                        replicas = sort_ranking(dictreplica, client_location)
                    else:
                        replicas = sort_random(dictreplica)

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound', e.args[0][0])
Exemplo n.º 15
0
            def generate(request_id, issuer, vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                first = metalink

                try:
                    for rfile in list_replicas(dids=dids, schemes=schemes,
                                               unavailable=unavailable,
                                               request_id=request_id,
                                               ignore_availability=ignore_availability,
                                               all_states=all_states,
                                               rse_expression=rse_expression,
                                               client_location=client_location,
                                               domain=domain, signature_lifetime=signature_lifetime,
                                               resolve_archives=resolve_archives,
                                               resolve_parents=resolve_parents,
                                               updated_after=updated_after,
                                               issuer=issuer,
                                               vo=vo):

                        # in first round, set the appropriate content type, and stream the header
                        if first and metalink:
                            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                        first = False

                        if not metalink:
                            yield dumps(rfile, cls=APIEncoder) + '\n'
                        else:
                            replicas = []
                            dictreplica = {}
                            for replica in rfile['pfns'].keys():
                                replicas.append(replica)
                                dictreplica[replica] = (rfile['pfns'][replica]['domain'],
                                                        rfile['pfns'][replica]['priority'],
                                                        rfile['pfns'][replica]['rse'],
                                                        rfile['pfns'][replica]['client_extract'])

                            yield ' <file name="' + rfile['name'] + '">\n'

                            if 'parents' in rfile and rfile['parents']:
                                yield '  <parents>\n'
                                for parent in rfile['parents']:
                                    yield '   <did>' + parent + '</did>\n'
                                yield '  </parents>\n'

                            yield '  <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n'
                            if rfile['adler32'] is not None:
                                yield '  <hash type="adler32">' + rfile['adler32'] + '</hash>\n'
                            if rfile['md5'] is not None:
                                yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                            yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                            yield '  <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (config_get('policy', 'schema',
                                                                                           raise_exception=False,
                                                                                           default='generic'),
                                                                                rfile['scope'],
                                                                                rfile['name'])

                            # TODO: deprecate this
                            if select == 'geoip':
                                replicas = sort_geoip(dictreplica, client_location['ip'])
                            elif select == 'closeness':
                                replicas = sort_closeness(dictreplica, client_location)
                            elif select == 'dynamic':
                                replicas = sort_dynamic(dictreplica, client_location)
                            elif select == 'ranking':
                                replicas = sort_ranking(dictreplica, client_location)
                            elif select == 'random':
                                replicas = sort_random(dictreplica)
                            else:
                                replicas = sorted(dictreplica, key=dictreplica.get)

                            idx = 0
                            for replica in replicas:
                                yield '  <url location="' + str(dictreplica[replica][2]) \
                                    + '" domain="' + str(dictreplica[replica][0]) \
                                    + '" priority="' + str(dictreplica[replica][1]) \
                                    + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                                    + '">' + escape(replica) + '</url>\n'
                                idx += 1
                                if limit and limit == idx:
                                    break
                            yield ' </file>\n'

                    if metalink and first:
                        # if still first output, i.e. there were no replicas
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                finally:
                    # don't forget to send the metalink footer
                    if metalink and not first:
                        yield '</metalink>\n'