def generate(vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly first = metalink try: # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes, vo=vo): if first and metalink: # first, set the appropriate content type, and stream the header yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' first = False replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if select == 'geoip': try: replicas = sort_geoip(dictreplica, client_ip) except AddressNotFoundError: pass else: replicas = sort_random(dictreplica) if not metalink: yield dumps(rfile) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile['adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name']) yield '</glfn>\n' idx = 0 for replica in replicas: yield ' <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx + 1) + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' if metalink and first: # if still first output, i.e. there were no replicas yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' finally: # don't forget to send the metalink footer if metalink and not first: yield '</metalink>\n'
def generate(): # first, set the appropriate content type, and stream the header yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # iteratively stream the XML per file for rfile in itertools.chain((first, ), replicas_iter): replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) yield '</glfn>\n' # sort the actual replicas if necessary if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip'], ignore_error=True) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) # stream URLs idx = 1 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 yield ' </file>\n' # don't forget to send the metalink footer yield '</metalink>\n'
def GET(self, scope, name): """ Metalink redirect HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound 406 Not Acceptable :param scope: The scope name of the file. :param name: The name of the file. """ header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN')) header('Access-Control-Allow-Headers', ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) header('Access-Control-Allow-Methods', '*') header('Access-Control-Allow-Credentials', 'true') dids, schemes, select = [{ 'scope': scope, 'name': name }], [ 'http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp', 'srm', 'davs' ], None # set the correct client IP client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] try: tmp_replicas = [ rep for rep in list_replicas(dids=dids, schemes=schemes, client_location=client_location) ] if not tmp_replicas: raise ReplicaNotFound( 'no redirection possible - cannot find the DID') # first, set the appropriate content type, and stream the header header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # iteratively stream the XML per file for rfile in tmp_replicas: replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name']) yield '</glfn>\n' # sort the actual replicas if necessary if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip'], ignore_error=True) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) # stream URLs idx = 1 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 yield ' </file>\n' # don't forget to send the metalink footer yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except ReplicaNotFound as error: raise generate_http_error(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def GET(self, scope, name): """ Header Redirect HTTP Success: 303 See Other HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound :param scope: The scope name of the file. :param name: The name of the file. """ header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN')) header('Access-Control-Allow-Headers', ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) header('Access-Control-Allow-Methods', '*') header('Access-Control-Allow-Credentials', 'true') try: # use the default HTTP protocols if no scheme is given select, rse, site, schemes = 'random', None, None, [ 'davs', 'http', 'https', 's3+rucio' ] client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'rse' in params: rse = params['rse'][0] if 'site' in params: site = params['site'][0] if 'schemes' in params: schemes = params['schemes'][0] else: schemes = ['davs', 'https', 's3'] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # correctly forward the schemes and select to potential metalink followups cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): header( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, ','.join(schemes), select)) else: header( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, schemes, select)) schemes = [schemes] # list_replicas needs a list replicas = [ r for r in list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location) ] selected_url, selected_rse = None, None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] selected_rse = rse else: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) elif site: rep = site_selector(dictreplica, site) if rep: selected_url = rep[0] else: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) else: if select == 'geoip': rep = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': rep = sort_closeness(dictreplica, client_location) elif select == 'dynamic': rep = sort_dynamic(dictreplica, client_location) elif select == 'ranking': rep = sort_ranking(dictreplica, client_location) else: rep = sort_random(dictreplica) selected_url = rep[0] for rep in r['rses']: for replica in r['rses'][rep]: if selected_url == replica: selected_rse = rep if selected_url: if selected_url.startswith('s3+rucio://'): connect(selected_rse, selected_url) signed_URLS = get_signed_urls([selected_url], rse=selected_rse, operation='read') raise seeother(signed_URLS[selected_url]) raise seeother(selected_url) raise ReplicaNotFound( 'no redirection possible - file does not exist') except seeother: raise except ReplicaNotFound as error: raise generate_http_error(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def GET(self, scope, name): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True dids, schemes, select, limit = [{ 'scope': scope, 'name': name }], None, None, None if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'limit' in params: limit = int(params['limit'][0]) try: # first, set the appropriate content type, and stream the header if not metalink: header('Content-Type', 'application/x-json-stream') else: header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes): client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if select == 'geoip': try: replicas = sort_geoip(dictreplica, client_ip) except AddressNotFoundError: pass else: replicas = sort_random(dictreplica) if not metalink: yield dumps(rfile) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) yield '</glfn>\n' idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # don't forget to send the metalink footer if metalink: yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def POST(self): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :returns: A dictionary containing all replicas information, either as JSON stream or metalink4. """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states, domain = False, None, False, None signature_lifetime, resolve_archives, resolve_parents = None, True, False client_location = {} json_data = data() try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] if 'client_location' in params: client_location = params['client_location'] client_location['ip'] = params['client_location'].get( 'ip', client_ip) if 'sort' in params: select = params['sort'] if 'domain' in params: domain = params['domain'] if 'resolve_archives' in params: resolve_archives = params['resolve_archives'] if 'resolve_parents' in params: resolve_parents = params['resolve_parents'] if 'signature_lifetime' in params: signature_lifetime = params['signature_lifetime'] else: # hardcoded default of 10 minutes if config is not parseable signature_lifetime = config_get('credentials', 'signature_lifetime', raise_exception=False, default=600) except ValueError: raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list') if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = params['limit'][0] if 'sort' in params: select = params['sort'] try: # first, set the appropriate content type, and stream the header if not metalink: header('Content-Type', 'application/x-json-stream') else: header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=ctx.env.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, issuer=ctx.env.get('issuer')): if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = ( rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/%s/rucio/%s:%s"></glfn>\n' % ( config_get( 'policy', 'schema', raise_exception=False, default='generic'), rfile['scope'], rfile['name']) # TODO: deprecate this if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) elif select == 'random': replicas = sort_random(dictreplica) else: replicas = sorted(dictreplica, key=dictreplica.get) idx = 0 for replica in replicas: yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(dictreplica[replica][1]) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # don't forget to send the metalink footer if metalink: yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def get(self, scope, name): """ List all replicas for data identifiers. .. :quickref: Replicas; List all replicas for did HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :reqheader HTTP_ACCEPT: application/metalink4+xml :param scope: data identifier scope. :param name: data identifier name. :resheader Content-Type: application/x-json-stream :resheader Content-Type: application/metalink4+xml :status 200: OK. :status 401: Invalid auth token. :status 404: DID not found. :status 500: Internal Error. :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if request.environ.get('HTTP_ACCEPT') is not None: tmp = request.environ.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True dids, schemes, select, limit = [{ 'scope': scope, 'name': name }], None, None, None schemes = request.args.get('schemes', None) select = request.args.get('select', None) limit = request.args.get('limit', None) if limit: limit = int(limit) data = "" content_type = 'application/x-json-stream' try: # first, set the appropriate content type, and stream the header if metalink: content_type = 'application/metalink4+xml' data += '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes): client_ip = request.environ.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = request.remote_addr replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if select == 'geoip': try: replicas = sort_geoip(dictreplica, client_ip) except AddressNotFoundError: pass else: replicas = sort_random(dictreplica) if not metalink: data += dumps(rfile) + '\n' else: data += ' <file name="' + rfile['name'] + '">\n' data += ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: data += ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: data += ' <hash type="md5">' + rfile[ 'md5'] + '</hash>\n' data += ' <size>' + str(rfile['bytes']) + '</size>\n' data += ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) data += '</glfn>\n' idx = 0 for replica in replicas: data += ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break data += ' </file>\n' # don't forget to send the metalink footer if metalink: data += '</metalink>\n' return Response(data, content_type=content_type) except DataIdentifierNotFound, e: return generate_http_error_flask(404, 'DataIdentifierNotFound', e.args[0][0])
def post(self): """ List all replicas for data identifiers. .. :quickref: ListReplicas; List all replicas for did. :reqheader HTTP_ACCEPT: application/metalink4+xml :query schemes: A list of schemes to filter the replicas. :query sort: Requested sorting of the result, e.g., 'geoip', 'closeness', 'dynamic', 'ranking'. :<json list dids: list of DIDs. :<json list schemes: A list of schemes to filter the replicas. :<json bool unavailable: Also include unavailable replicas. :<json bool all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary. :<json string rse_expression: The RSE expression to restrict on a list of RSEs. :<json dict client_location: Client location dictionary for PFN modification {'ip', 'fqdn', 'site'}. :<json bool sort: Requested sorting of the result, e.g., 'geoip', 'closeness', 'dynamic', 'ranking'. :<json string domain: The network domain for the call, either None, 'wan' or 'lan'. None is fallback to 'wan', 'all' is both ['lan','wan'] :resheader Content-Type: application/x-json-stream :resheader Content-Type: application/metalink4+xml :status 200: OK. :status 400: Cannot decode json parameter list. :status 401: Invalid auth token. :status 404: DID not found. :status 500: Internal Error. :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if request.environ.get('HTTP_ACCEPT') is not None: tmp = request.environ.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True client_ip = request.environ.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = request.remote_addr dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states = False, None, False client_location = {} json_data = request.data try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] if 'client_location' in params: client_location = params['client_location'] client_location['ip'] = params['client_location'].get( 'ip', client_ip) if 'sort' in params: select = params['sort'] if 'domain' in params: domain = params['domain'] except ValueError: return generate_http_error_flask( 400, 'ValueError', 'Cannot decode json parameter list') schemes = request.args.get('schemes', None) select = request.args.get('select', None) select = request.args.get('sort', None) data = "" content_type = 'application/x-json-stream' try: # first, set the appropriate content type, and stream the header if metalink: content_type = 'application/metalink4+xml' data += '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas( dids=dids, schemes=schemes, unavailable=unavailable, request_id=request.environ.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain): replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if not metalink: data += dumps(rfile, cls=APIEncoder) + '\n' else: data += ' <file name="' + rfile['name'] + '">\n' data += ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: data += ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: data += ' <hash type="md5">' + rfile[ 'md5'] + '</hash>\n' data += ' <size>' + str(rfile['bytes']) + '</size>\n' data += ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) data += '</glfn>\n' if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) idx = 0 for replica in replicas: data += ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break data += ' </file>\n' # don't forget to send the metalink footer if metalink: data += '</metalink>\n' return Response(data, content_type=content_type) except DataIdentifierNotFound, e: return generate_http_error_flask(404, 'DataIdentifierNotFound', e.args[0][0])
def GET(self, scope, name): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 406 Not Acceptable 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True dids, schemes, select, limit = [{ 'scope': scope, 'name': name }], None, None, None if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'limit' in params: limit = int(params['limit'][0]) # Resolve all reasonable protocols when doing metalink for maximum access possibilities if metalink and schemes is None: schemes = SUPPORTED_PROTOCOLS try: # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly __first = True header( 'Content-Type', 'application/metalink4+xml' if metalink else 'application/x-json-stream') for rfile in list_replicas(dids=dids, schemes=schemes, vo=ctx.env.get('vo')): # in first round, set the appropriate content type, and stream the header if __first: if metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' __first = False # ... then, stream the replica information client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if select == 'geoip': try: replicas = sort_geoip(dictreplica, client_ip) except AddressNotFoundError: pass else: replicas = sort_random(dictreplica) if not metalink: yield dumps(rfile) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) yield '</glfn>\n' idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' if metalink: if __first: # ensure complete metalink on success without any content yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # if metalink start was already sent, always send the end yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def POST(self): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 406 Not Acceptable 500 InternalError :returns: A dictionary containing all replicas information, either as JSON stream or metalink4. """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states, domain = False, None, False, None signature_lifetime, resolve_archives, resolve_parents = None, True, False updated_after = None client_location = {} json_data = data() try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] if 'client_location' in params: client_location = params['client_location'] client_location['ip'] = params['client_location'].get( 'ip', client_ip) if 'sort' in params: select = params['sort'] if 'domain' in params: domain = params['domain'] if 'resolve_archives' in params: resolve_archives = params['resolve_archives'] if 'resolve_parents' in params: resolve_parents = params['resolve_parents'] if 'signature_lifetime' in params: signature_lifetime = params['signature_lifetime'] else: # hardcoded default of 10 minutes if config is not parseable signature_lifetime = config_get('credentials', 'signature_lifetime', raise_exception=False, default=600) if 'updated_after' in params: if isinstance(params['updated_after'], (int, float)): # convert from epoch time stamp to datetime object updated_after = datetime.utcfromtimestamp( params['updated_after']) else: # attempt UTC format '%Y-%m-%dT%H:%M:%S' conversion updated_after = datetime.strptime(params['updated_after'], '%Y-%m-%dT%H:%M:%S') except ValueError: raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list') if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = params['limit'][0] if 'sort' in params: select = params['sort'] # Resolve all reasonable protocols when doing metalink for maximum access possibilities if metalink and schemes is None: schemes = SUPPORTED_PROTOCOLS try: # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly __first = True header( 'Content-Type', 'application/metalink4+xml' if metalink else 'application/x-json-stream') for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=ctx.env.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, updated_after=updated_after, issuer=ctx.env.get('issuer'), vo=ctx.env.get('vo')): # in first round, set the appropriate content type, and stream the header if __first: if metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' __first = False # ... then, stream the replica information if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = ( rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/%s/rucio/%s:%s"></glfn>\n' % ( config_get( 'policy', 'schema', raise_exception=False, default='generic'), rfile['scope'], rfile['name']) # TODO: deprecate this if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) elif select == 'random': replicas = sort_random(dictreplica) else: replicas = sorted(dictreplica, key=dictreplica.get) idx = 0 for replica in replicas: yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(dictreplica[replica][1]) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' if metalink: if __first: # ensure complete metalink on success without any content yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # if metalink start was already sent, always send the end yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def get(self, scope, name): """ Metalink redirect .. :quickref: MetaLinkRedirector; Metalink redirect. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound :param scope: The scope name of the file. :param name: The name of the file. :resheader Content-Type: application/metalink4+xml'. :status 200: OK. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. :status 406: Not Acceptable. :status 500: Internal Error. :returns: Metalink file """ dids = [{'scope': scope, 'name': name}] # set the correct client IP client_ip = request.environ.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = request.remote_addr client_location = {'ip': client_ip, 'fqdn': None, 'site': None} schemes = request.args.get('schemes', [ 'http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp', 'srm', 'davs' ]) select = request.args.get('select', None) if 'sort' in request.args: select = request.args['sort'] client_location['ip'] = request.args.get('ip', None) client_location['fqdn'] = request.args.get('fqdn', None) client_location['site'] = request.args.get('site', None) try: tmp_replicas = [ rep for rep in list_replicas(dids=dids, schemes=schemes, client_location=client_location) ] if not tmp_replicas: return 'no redirection possible - cannot find the DID', 404 # first, set the appropriate content type, and stream the header data = '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # iteratively stream the XML per file for rfile in tmp_replicas: replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata data += ' <file name="' + rfile['name'] + '">\n' data += ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: data += ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: data += ' <hash type="md5">' + rfile['md5'] + '</hash>\n' data += ' <size>' + str(rfile['bytes']) + '</size>\n' data += ' <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name']) data += '</glfn>\n' # sort the actual replicas if necessary if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip'], ignore_error=True) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) # stream URLs idx = 1 for replica in replicas: data += ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 data += ' </file>\n' # don't forget to send the metalink footer data += '</metalink>\n' return Response(data, content_type='application/metalink4+xml') except DataIdentifierNotFound as error: return generate_http_error_flask(404, 'DataIdentifierNotFound', error.args[0]) except ReplicaNotFound as error: return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: return generate_http_error_flask(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) return error, 500
def get(self, scope, name): """ Header Redirect .. :quickref: HeaderRedirector; Header redirect. :param scope: The scope name of the file. :param name: The name of the file. :resheader Content-Type: application/metalink+xml'. :status 303: Redirect. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. :status 500: Internal Error. """ headers = {} try: # use the default HTTP protocols if no scheme is given client_ip = request.environ.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = request.remote_addr client_location = {'ip': client_ip, 'fqdn': None, 'site': None} schemes = request.args.get('schemes', ['davs', 'https', 's3']) select = request.args.get('select', 'random') if 'sort' in request.args: select = request.args['sort'] rse = request.args.get('rse', None) site = request.args.get('site', None) client_location['ip'] = request.args.get('ip', client_ip) client_location['fqdn'] = request.args.get('fqdn', None) client_location['site'] = request.args.get('site', None) # correctly forward the schemes and select to potential metalink followups cleaned_url = request.environ.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): headers[ 'Link'] = '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % ( cleaned_url, ','.join(schemes), select) else: headers[ 'Link'] = '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % ( cleaned_url, schemes, select) schemes = [schemes] # list_replicas needs a list replicas = [ r for r in list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location) ] selected_url, selected_rse = None, None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] selected_rse = rse else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404 else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: return 'no redirection possible - no valid RSE for HTTP redirection found', 404 elif site: rep = site_selector(dictreplica, site) if rep: selected_url = rep[0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404 else: if select == 'geoip': rep = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': rep = sort_closeness(dictreplica, client_location) elif select == 'dynamic': rep = sort_dynamic(dictreplica, client_location) elif select == 'ranking': rep = sort_ranking(dictreplica, client_location) else: rep = sort_random(dictreplica) selected_url = rep[0] for rep in r['rses']: for replica in r['rses'][rep]: if selected_url == replica: selected_rse = rep if selected_url: if selected_url.startswith('s3+rucio://'): connect(selected_rse, selected_url) signed_URLS = get_signed_urls([selected_url], rse=selected_rse, operation='read') res = redirect(signed_URLS[selected_url], code=303) res.header = headers return res res = redirect(signed_URLS[selected_url], code=303) res.header = headers return res return 'no redirection possible - file does not exist', 404 except ReplicaNotFound as error: return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: return generate_http_error_flask(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) return error, 500
def get(self, scope_name): """ Header Redirect .. :quickref: HeaderRedirector; Header redirect. :param scope_name: data identifier (scope)/(name). :resheader Content-Type: application/metalink+xml'. :status 303: Redirect. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. :status 500: Internal Error. """ headers = Headers() headers.set('Access-Control-Allow-Origin', request.environ.get('HTTP_ORIGIN')) headers.set('Access-Control-Allow-Headers', request.environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) headers.set('Access-Control-Allow-Methods', '*') headers.set('Access-Control-Allow-Credentials', 'true') try: scope, name = parse_scope_name(scope_name) except ValueError as error: return generate_http_error_flask(400, 'ValueError', error.args[0], headers=headers) except Exception as error: print(format_exc()) return str(error), 500, headers try: # use the default HTTP protocols if no scheme is given select, rse, site, schemes = 'random', None, None, [ 'davs', 'http', 'https' ] client_ip = request_header_ensure_string('X-Forwarded-For', request.remote_addr) client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if request.query_string: query_string = request.query_string.decode(encoding='utf-8') params = parse_qs(query_string) if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'rse' in params: rse = params['rse'][0] if 'site' in params: site = params['site'][0] if 'schemes' in params: schemes = params['schemes'][0] else: schemes = ['davs', 'https', 's3'] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # correctly forward the schemes and select to potential metalink followups cleaned_url = request.environ.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): headers.set( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, ','.join(schemes), select)) else: headers.set( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, schemes, select)) schemes = [schemes] # list_replicas needs a list # get vo if given vo = request_header_ensure_string('X-Rucio-VO', 'def') replicas = [ r for r in list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location, vo=vo) ] selected_url = None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers elif site: rep = site_selector(dictreplica, site, vo) if rep: selected_url = rep[0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: if select == 'geoip': rep = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': rep = sort_closeness(dictreplica, client_location) elif select == 'dynamic': rep = sort_dynamic(dictreplica, client_location) elif select == 'ranking': rep = sort_ranking(dictreplica, client_location) else: rep = sort_random(dictreplica) selected_url = rep[0] if selected_url: response = redirect(selected_url, code=303) response.headers.extend(headers) return response return 'no redirection possible - file does not exist', 404, headers except ReplicaNotFound as error: return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0], headers=headers) except RucioException as error: return generate_http_error_flask(500, error.__class__.__name__, error.args[0], headers=headers) except Exception as error: print(format_exc()) return str(error), 500, headers
def POST(self): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states = False, None, False client_location = {} json_data = data() try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] if 'client_location' in params: client_location = params['client_location'] client_location['ip'] = params['client_location'].get('ip', client_ip) if 'sort' in params: select = params['sort'] except ValueError: raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list') if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = params['limit'][0] if 'sort' in params: select = params['sort'] try: # first, set the appropriate content type, and stream the header if not metalink: header('Content-Type', 'application/x-json-stream') else: header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=ctx.env.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location): replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile['adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name']) yield '</glfn>\n' if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) idx = 0 for replica in replicas: yield ' <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # don't forget to send the metalink footer if metalink: yield '</metalink>\n' except DataIdentifierNotFound, e: raise generate_http_error(404, 'DataIdentifierNotFound', e.args[0][0])
def generate(request_id, issuer, vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly first = metalink try: for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=request_id, ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, updated_after=updated_after, issuer=issuer, vo=vo): # in first round, set the appropriate content type, and stream the header if first and metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' first = False if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = (rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile['adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (config_get('policy', 'schema', raise_exception=False, default='generic'), rfile['scope'], rfile['name']) # TODO: deprecate this if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) elif select == 'random': replicas = sort_random(dictreplica) else: replicas = sorted(dictreplica, key=dictreplica.get) idx = 0 for replica in replicas: yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(dictreplica[replica][1]) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' if metalink and first: # if still first output, i.e. there were no replicas yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' finally: # don't forget to send the metalink footer if metalink and not first: yield '</metalink>\n'