def _fetch_bundle_contents_info(uuid, path=''): """ Fetch metadata of the bundle contents or a subpath within the bundle. Query parameters: - `depth`: recursively fetch subdirectory info up to this depth. Default is 0. Response format: ``` { "data": { "name": "<name of file or directory>", "link": "<string representing target if file is a symbolic link>", "type": "<file|directory|link>", "size": <size of file in bytes>, "perm": <unix permission integer>, "contents": [ { "name": ..., <each file of directory represented recursively with the same schema> }, ... ] } } ``` """ depth = query_get_type(int, 'depth', default=0) if depth < 0: abort(httplib.BAD_REQUEST, "Depth must be at least 0") check_bundles_have_read_permission(local.model, request.user, [uuid]) try: info = local.download_manager.get_target_info(uuid, path, depth) except NotFoundError as e: abort(httplib.NOT_FOUND, e.message) except Exception as e: abort(httplib.BAD_REQUEST, e.message) return {'data': info}
def _fetch_bundles(): """ Fetch bundles in the following two ways: 1. By bundle `specs` OR search `keywords` . Behavior is undefined when both `specs` and `keywords` are provided. Query parameters: - `worksheet`: UUID of the base worksheet. Required when fetching by specs. - `specs`: Bundle spec of bundle to fetch. May be provided multiples times to fetch multiple bundle specs. A bundle spec is either: 1. a UUID (8 or 32 hex characters with a preceding '0x') 2. a bundle name referring to the last bundle with that name on the given base worksheet 3. or a reverse index of the form `^N` referring to the Nth-to-last bundle on the given base worksheet. - `keywords`: Search keyword. May be provided multiple times for multiple keywords. Bare keywords match the names and descriptions of bundles. Examples of other special keyword forms: - `name=<name> ` : More targeted search of using metadata fields. - `size=.sort ` : Sort by a particular field. - `size=.sort- ` : Sort by a particular field in reverse. - `size=.sum ` : Compute total of a particular field. - `.mine ` : Match only bundles I own. - `.floating ` : Match bundles that aren't on any worksheet. - `.count ` : Count the number of bundles. - `.limit=10 ` : Limit the number of results to the top 10. - `include_display_metadata`: `1` to include additional metadata helpful for displaying the bundle info, `0` to omit them. Default is `0`. - `include`: comma-separated list of related resources to include, such as "owner" When aggregation keywords such as `.count` are used, the resulting value is returned as: ``` { "meta": { "results": <value> } } ``` 2. By bundle `command` and/or `dependencies` (for `--memoized` option in cl [run/mimic] command). When `dependencies` is not defined, the searching result will include bundles that match with command only. Query parameters: - `command` : the command of a bundle in string - `dependencies` : the dependencies of a bundle in the format of '[{"child_path":key1, "parent_uuid":UUID1}, {"child_path":key2, "parent_uuid":UUID2}]' 1. a UUID should be in the format of 32 hex characters with a preceding '0x' (partial UUID is not allowed). 2. the key should be able to uniquely identify a (child_path, parent_uuid) pair in the list. The returning result will be aggregated in the same way as 1. """ keywords = query_get_list('keywords') specs = query_get_list('specs') worksheet_uuid = request.query.get('worksheet') descendant_depth = query_get_type(int, 'depth', None) command = query_get_type(str, 'command', '') dependencies = query_get_type(str, 'dependencies', '[]') if keywords: # Handle search keywords keywords = resolve_owner_in_keywords(keywords) search_result = local.model.search_bundles(request.user.user_id, keywords) # Return simple dict if scalar result (e.g. .sum or .count queries) if search_result['is_aggregate']: return json_api_meta({}, {'result': search_result['result']}) # If not aggregate this is a list bundle_uuids = search_result['result'] elif specs: # Resolve bundle specs bundle_uuids = canonicalize.get_bundle_uuids(local.model, request.user, worksheet_uuid, specs) elif command: bundle_uuids = local.model.get_memoized_bundles( request.user.user_id, command, dependencies) else: abort( http.client.BAD_REQUEST, "Request must include either 'keywords' " "or 'specs' query parameter", ) # Find all descendants down to the provided depth if descendant_depth is not None: bundle_uuids = local.model.get_self_and_descendants( bundle_uuids, depth=descendant_depth) return build_bundles_document(bundle_uuids)
def _fetch_bundle_contents_blob(uuid, path=''): """ API to download the contents of a bundle or a subpath within a bundle. For directories, this method always returns a tarred and gzipped archive of the directory. For files, if the request has an Accept-Encoding header containing gzip, then the returned file is gzipped. Otherwise, the file is returned as-is. HTTP Request headers: - `Range: bytes=<start>-<end>`: fetch bytes from the range `[<start>, <end>)`. - `Accept-Encoding: <encoding>`: indicate that the client can accept encoding `<encoding>`. Currently only `gzip` encoding is supported. Query parameters: - `head`: number of lines to fetch from the beginning of the file. Default is 0, meaning to fetch the entire file. - `tail`: number of lines to fetch from the end of the file. Default is 0, meaning to fetch the entire file. - `max_line_length`: maximum number of characters to fetch from each line, if either `head` or `tail` is specified. Default is 128. HTTP Response headers (for single-file targets): - `Content-Disposition: inline; filename=<bundle name or target filename>` - `Content-Type: <guess of mimetype based on file extension>` - `Content-Encoding: [gzip|identity]` - `Target-Type: file` HTTP Response headers (for directories): - `Content-Disposition: attachment; filename=<bundle or directory name>.tar.gz` - `Content-Type: application/gzip` - `Content-Encoding: identity` - `Target-Type: directory` """ byte_range = get_request_range() head_lines = query_get_type(int, 'head', default=0) tail_lines = query_get_type(int, 'tail', default=0) truncation_text = query_get_type(str, 'truncation_text', default='') max_line_length = query_get_type(int, 'max_line_length', default=128) check_bundles_have_read_permission(local.model, request.user, [uuid]) target = BundleTarget(uuid, path) try: target_info = local.download_manager.get_target_info(target, 0) if target_info['resolved_target'] != target: check_bundles_have_read_permission( local.model, request.user, [target_info['resolved_target'].bundle_uuid]) target = target_info['resolved_target'] except NotFoundError as e: abort(http.client.NOT_FOUND, str(e)) except Exception as e: abort(http.client.BAD_REQUEST, str(e)) # Figure out the file name. bundle_name = local.model.get_bundle(target.bundle_uuid).metadata.name if not path and bundle_name: filename = bundle_name else: filename = target_info['name'] if target_info['type'] == 'directory': if byte_range: abort(http.client.BAD_REQUEST, 'Range not supported for directory blobs.') if head_lines or tail_lines: abort(http.client.BAD_REQUEST, 'Head and tail not supported for directory blobs.') # Always tar and gzip directories gzipped_stream = False # but don't set the encoding to 'gzip' mimetype = 'application/gzip' filename += '.tar.gz' fileobj = local.download_manager.stream_tarred_gzipped_directory( target) elif target_info['type'] == 'file': # Let's gzip to save bandwidth. # For simplicity, we do this even if the file is already a packed # archive (which should be relatively rare). # The browser will transparently decode the file. gzipped_stream = request_accepts_gzip_encoding() # Since guess_type() will interpret '.tar.gz' as an 'application/x-tar' file # with 'gzip' encoding, which would usually go into the Content-Encoding # header. But if the bundle contents is actually a packed archive, we don't # want the client to automatically decompress the file, so we don't want to # set the Content-Encoding header. Instead, if guess_type() detects an # archive, we just set mimetype to indicate an arbitrary binary file. mimetype, encoding = mimetypes.guess_type(filename, strict=False) if encoding is not None: mimetype = 'application/octet-stream' if byte_range and (head_lines or tail_lines): abort(http.client.BAD_REQUEST, 'Head and range not supported on the same request.') elif byte_range: start, end = byte_range fileobj = local.download_manager.read_file_section( target, start, end - start + 1, gzipped_stream) elif head_lines or tail_lines: fileobj = local.download_manager.summarize_file( target, head_lines, tail_lines, max_line_length, truncation_text, gzipped_stream) else: fileobj = local.download_manager.stream_file( target, gzipped_stream) else: # Symlinks. abort(http.client.FORBIDDEN, 'Cannot download files of this type (%s).' % target_info['type']) # Set headers. response.set_header('Content-Type', mimetype or 'text/plain') response.set_header('Content-Encoding', 'gzip' if gzipped_stream else 'identity') if target_info['type'] == 'file': response.set_header('Content-Disposition', 'inline; filename="%s"' % filename) else: response.set_header('Content-Disposition', 'attachment; filename="%s"' % filename) response.set_header('Target-Type', target_info['type']) return fileobj
def _fetch_bundles(): """ Fetch bundles by bundle `specs` OR search `keywords`. Behavior is undefined when both `specs` and `keywords` are provided. Query parameters: - `worksheet`: UUID of the base worksheet. Required when fetching by specs. - `specs`: Bundle spec of bundle to fetch. May be provided multiples times to fetch multiple bundle specs. A bundle spec is either: 1. a UUID (8 or 32 hex characters with a preceding '0x') 2. a bundle name referring to the last bundle with that name on the given base worksheet 3. or a reverse index of the form `^N` referring to the Nth-to-last bundle on the given base worksheet. - `keywords`: Search keyword. May be provided multiples times for multiple keywords. Bare keywords match the names and descriptions of bundles. Examples of other special keyword forms: - `name=<name> ` : More targeted search of using metadata fields. - `size=.sort ` : Sort by a particular field. - `size=.sort- ` : Sort by a particular field in reverse. - `size=.sum ` : Compute total of a particular field. - `.mine ` : Match only bundles I own. - `.floating ` : Match bundles that aren't on any worksheet. - `.count ` : Count the number of bundles. - `.limit=10 ` : Limit the number of results to the top 10. - `include_display_metadata`: `1` to include additional metadata helpful for displaying the bundle info, `0` to omit them. Default is `0`. - `include`: comma-separated list of related resources to include, such as "owner" When aggregation keywords such as `.count` are used, the resulting value is returned as: ``` { "meta": { "results": <value> } } ``` """ keywords = query_get_list('keywords') specs = query_get_list('specs') worksheet_uuid = request.query.get('worksheet') descendant_depth = query_get_type(int, 'depth', None) if keywords: # Handle search keywords keywords = resolve_owner_in_keywords(keywords) bundle_uuids = local.model.search_bundle_uuids(request.user.user_id, keywords) elif specs: # Resolve bundle specs bundle_uuids = canonicalize.get_bundle_uuids(local.model, request.user, worksheet_uuid, specs) else: abort(httplib.BAD_REQUEST, "Request must include either 'keywords' " "or 'specs' query parameter") # Find all descendants down to the provided depth if descendant_depth is not None: bundle_uuids = local.model.get_self_and_descendants(bundle_uuids, depth=descendant_depth) # Return simple dict if scalar result (e.g. .sum or .count queries) if not isinstance(bundle_uuids, list): return json_api_meta({}, {'result': bundle_uuids}) return build_bundles_document(bundle_uuids)
def _fetch_bundle_contents_blob(uuid, path=''): """ API to download the contents of a bundle or a subpath within a bundle. For directories this method always returns a tarred and gzipped archive of the directory. For files, if the request has an Accept-Encoding header containing gzip, then the returned file is gzipped. """ byte_range = get_request_range() head_lines = query_get_type(int, 'head', default=0) tail_lines = query_get_type(int, 'tail', default=0) max_line_length = query_get_type(int, 'max_line_length', default=128) check_bundles_have_read_permission(local.model, request.user, [uuid]) bundle = local.model.get_bundle(uuid) target_info = local.download_manager.get_target_info(uuid, path, 0) if target_info is None: abort(httplib.NOT_FOUND, 'Not found.') # Figure out the file name. if not path and bundle.metadata.name: filename = bundle.metadata.name else: filename = target_info['name'] if target_info['type'] == 'directory': if byte_range: abort(httplib.BAD_REQUEST, 'Range not supported for directory blobs.') if head_lines: abort(httplib.BAD_REQUEST, 'Head not supported for directory blobs.') # Always tar and gzip directories. filename = filename + '.tar.gz' fileobj = local.download_manager.stream_tarred_gzipped_directory( uuid, path) elif target_info['type'] == 'file': gzipped = False if not zip_util.path_is_archive( filename) and request_accepts_gzip_encoding(): # Let's gzip to save bandwidth. The browser will transparently decode # the file. filename = filename + '.gz' gzipped = True if byte_range and (head_lines or tail_lines): abort(httplib.BAD_REQUEST, 'Head and range not supported on the same request.') elif byte_range: start, end = byte_range fileobj = local.download_manager.read_file_section( uuid, path, start, end - start + 1, gzipped) elif head_lines or tail_lines: fileobj = local.download_manager.summarize_file( uuid, path, head_lines, tail_lines, max_line_length, None, gzipped) else: fileobj = local.download_manager.stream_file(uuid, path, gzipped) else: # Symlinks. abort(httplib.FORBIDDEN, 'Cannot download files of this type.') # Set headers. mimetype, _ = mimetypes.guess_type(filename, strict=False) response.set_header('Content-Type', mimetype or 'text/plain') if zip_util.get_archive_ext( filename) == '.gz' and request_accepts_gzip_encoding(): filename = zip_util.strip_archive_ext(filename) response.set_header('Content-Encoding', 'gzip') else: response.set_header('Content-Encoding', 'identity') response.set_header('Content-Disposition', 'filename="%s"' % filename) return fileobj