def course_image_url(course, image_key='course_image'): """Try to look up the image url for the course. If it's not found, log an error and return the dead link. image_key can be one of the three: 'course_image', 'hero_image', 'thumbnail_image' """ if course.static_asset_path: # If we are a static course with the image_key attribute # set different than the default, return that path so that # courses can use custom course image paths, otherwise just # return the default static path. url = '/static/' + (course.static_asset_path or getattr(course, 'data_dir', '')) if hasattr(course, image_key) and getattr(course, image_key) != course.fields[image_key].default: url += '/' + getattr(course, image_key) else: url += '/images/' + image_key + '.jpg' elif not getattr(course, image_key): # if image_key is empty, use the default image url from settings url = settings.STATIC_URL + settings.DEFAULT_COURSE_ABOUT_IMAGE_URL else: loc = StaticContent.compute_location(course.id, getattr(course, image_key)) if getattr(course, image_key).endswith('images_course_image.jpg'): try: AssetManager.find(loc) except NotFoundError: url = '/static/' + settings.DEFAULT_COURSE_ABOUT_IMAGE_URL return url url = StaticContent.serialize_asset_key_with_slash(loc) return url
def test_happy_path(self, modulestore_type, create_after_overview): """ What happens when everything works like we expect it to. If `create_after_overview` is True, we will temporarily disable thumbnail creation so that the initial CourseOverview is created without an image_set, and the CourseOverviewImageSet is created afterwards. If `create_after_overview` is False, we'll create the CourseOverviewImageSet at the same time as the CourseOverview. """ # Create a real (oversized) image... image = Image.new("RGB", (800, 400), "blue") image_buff = StringIO() image.save(image_buff, format="JPEG") image_buff.seek(0) image_name = "big_course_image.jpeg" with self.store.default_store(modulestore_type): course = CourseFactory.create( default_store=modulestore_type, course_image=image_name ) # Save a real image here... course_image_asset_key = StaticContent.compute_location(course.id, course.course_image) course_image_content = StaticContent(course_image_asset_key, image_name, 'image/jpeg', image_buff) contentstore().save(course_image_content) # If create_after_overview is True, disable thumbnail generation so # that the CourseOverview object is created and saved without an # image_set at first (it will be lazily created later). if create_after_overview: self.set_config(enabled=False) # Now generate the CourseOverview... course_overview = CourseOverview.get_from_id(course.id) # If create_after_overview is True, no image_set exists yet. Verify # that, then switch config back over to True and it should lazily # create the image_set on the next get_from_id() call. if create_after_overview: self.assertFalse(hasattr(course_overview, 'image_set')) self.set_config(enabled=True) course_overview = CourseOverview.get_from_id(course.id) self.assertTrue(hasattr(course_overview, 'image_set')) image_urls = course_overview.image_urls config = CourseOverviewImageConfig.current() # Make sure the thumbnail names come out as expected... self.assertTrue(image_urls['raw'].endswith('big_course_image.jpeg')) self.assertTrue(image_urls['small'].endswith('big_course_image-jpeg-{}x{}.jpg'.format(*config.small))) self.assertTrue(image_urls['large'].endswith('big_course_image-jpeg-{}x{}.jpg'.format(*config.large))) # Now make sure our thumbnails are of the sizes we expect... for image_url, expected_size in [(image_urls['small'], config.small), (image_urls['large'], config.large)]: image_key = StaticContent.get_location_from_path(image_url) image_content = AssetManager.find(image_key) image = Image.open(StringIO(image_content.data)) self.assertEqual(image.size, expected_size)
def test_different_resolutions(self, src_dimensions): """ Test various resolutions of images to make thumbnails of. Note that our test sizes are small=(200, 100) and large=(400, 200). 1. Images should won't be blown up if it's too small, so a (100, 50) resolution image will remain (100, 50). 2. However, images *will* be converted using our format and quality settings (JPEG, 75% -- the PIL default). This is because images with relatively small dimensions not compressed properly. 3. Image thumbnail naming will maintain the naming convention of the target resolution, even if the image was not actually scaled to that size (i.e. it was already smaller). This is mostly because it's simpler to be consistent, but it also lets us more easily tell which configuration a thumbnail was created under. """ # Create a source image... image = Image.new("RGB", src_dimensions, "blue") image_buff = StringIO() image.save(image_buff, format="PNG") image_buff.seek(0) image_name = "src_course_image.png" course = CourseFactory.create(course_image=image_name) # Save the image to the contentstore... course_image_asset_key = StaticContent.compute_location(course.id, course.course_image) course_image_content = StaticContent(course_image_asset_key, image_name, 'image/png', image_buff) contentstore().save(course_image_content) # Now generate the CourseOverview... config = CourseOverviewImageConfig.current() course_overview = CourseOverview.get_from_id(course.id) image_urls = course_overview.image_urls for image_url, target in [(image_urls['small'], config.small), (image_urls['large'], config.large)]: image_key = StaticContent.get_location_from_path(image_url) image_content = AssetManager.find(image_key) image = Image.open(StringIO(image_content.data)) # Naming convention for thumbnail self.assertTrue(image_url.endswith('src_course_image-png-{}x{}.jpg'.format(*target))) # Actual thumbnail data src_x, src_y = src_dimensions target_x, target_y = target image_x, image_y = image.size # I'm basically going to assume the image library knows how to do # the right thing in terms of handling aspect ratio. We're just # going to make sure that small images aren't blown up, and that # we never exceed our target sizes self.assertLessEqual(image_x, target_x) self.assertLessEqual(image_y, target_y) if src_x < target_x and src_y < target_y: self.assertEqual(src_x, image_x) self.assertEqual(src_y, image_y)
def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts): """ Returns a fully-qualified path to a piece of static content. If a static asset CDN is configured, this path will include it. Otherwise, the path will simply be relative. Args: course_key: key to the course which owns this asset path: the path to said content Returns: string: fully-qualified path to asset """ # Break down the input path. _, _, relative_path, params, query_string, fragment = urlparse(path) # Convert our path to an asset key if it isn't one already. asset_key = StaticContent.get_asset_key_from_path( course_key, relative_path) # Check the status of the asset to see if this can be served via CDN aka publicly. serve_from_cdn = False try: content = AssetManager.find(asset_key, as_stream=True) is_locked = getattr(content, "locked", True) serve_from_cdn = not is_locked except (ItemNotFoundError, NotFoundError): # If we can't find the item, just treat it as if it's locked. serve_from_cdn = False # See if this is an allowed file extension to serve. Some files aren't served through the # CDN in order to avoid same-origin policy/CORS-related issues. if any(relative_path.lower().endswith(excluded_ext.lower()) for excluded_ext in excluded_exts): serve_from_cdn = False # Update any query parameter values that have asset paths in them. This is for assets that # require their own after-the-fact values, like a Flash file that needs the path of a config # file passed to it e.g. /static/visualization.swf?configFile=/static/visualization.xml query_params = parse_qsl(query_string) updated_query_params = [] for query_name, query_val in query_params: if query_val.startswith("/static/"): new_val = StaticContent.get_canonicalized_asset_path( course_key, query_val, base_url, excluded_exts) updated_query_params.append((query_name, new_val)) else: updated_query_params.append((query_name, query_val)) serialized_asset_key = StaticContent.serialize_asset_key_with_slash( asset_key) base_url = base_url if serve_from_cdn else '' return urlunparse((None, base_url, serialized_asset_key, params, urlencode(updated_query_params), fragment))
def create_course_image_thumbnail(course, dimensions): """Create a course image thumbnail and return the URL. - dimensions is a tuple of (width, height) """ course_image_asset_key = StaticContent.compute_location(course.id, course.course_image) course_image = AssetManager.find(course_image_asset_key) # a StaticContent obj _content, thumb_loc = contentstore().generate_thumbnail(course_image, dimensions=dimensions) return StaticContent.serialize_asset_key_with_slash(thumb_loc)
def get_asset_content_from_path(course_key, asset_path): """ Locate the given asset content, load it into memory, and return it. Returns None if the asset is not found. """ try: asset_key = StaticContent.get_asset_key_from_path( course_key, asset_path) return AssetManager.find(asset_key) except (ItemNotFoundError, NotFoundError): return None
def get_versioned_asset_url(asset_path): """ Creates a versioned asset URL. """ try: locator = StaticContent.get_location_from_path(asset_path) content = AssetManager.find(locator, as_stream=True) return StaticContent.add_version_to_asset_path(asset_path, content.content_digest) except (InvalidKeyError, ItemNotFoundError): pass return asset_path
def get_old_style_versioned_asset_url(asset_path): """ Creates an old-style versioned asset URL. """ try: locator = StaticContent.get_location_from_path(asset_path) content = AssetManager.find(locator, as_stream=True) return f'{VERSIONED_ASSETS_PREFIX}/{content.content_digest}{asset_path}' except (InvalidKeyError, ItemNotFoundError): pass return asset_path
def get_old_style_versioned_asset_url(asset_path): """ Creates an old-style versioned asset URL. """ try: locator = StaticContent.get_location_from_path(asset_path) content = AssetManager.find(locator, as_stream=True) return u'{}/{}{}'.format(VERSIONED_ASSETS_PREFIX, content.content_digest, asset_path) except (InvalidKeyError, ItemNotFoundError): pass return asset_path
def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts): """ Returns a fully-qualified path to a piece of static content. If a static asset CDN is configured, this path will include it. Otherwise, the path will simply be relative. Args: course_key: key to the course which owns this asset path: the path to said content Returns: string: fully-qualified path to asset """ # Break down the input path. _, _, relative_path, params, query_string, fragment = urlparse(path) # Convert our path to an asset key if it isn't one already. asset_key = StaticContent.get_asset_key_from_path(course_key, relative_path) # Check the status of the asset to see if this can be served via CDN aka publicly. serve_from_cdn = False try: content = AssetManager.find(asset_key, as_stream=True) is_locked = getattr(content, "locked", True) serve_from_cdn = not is_locked except (ItemNotFoundError, NotFoundError): # If we can't find the item, just treat it as if it's locked. serve_from_cdn = False # See if this is an allowed file extension to serve. Some files aren't served through the # CDN in order to avoid same-origin policy/CORS-related issues. if any(relative_path.lower().endswith(excluded_ext.lower()) for excluded_ext in excluded_exts): serve_from_cdn = False # Update any query parameter values that have asset paths in them. This is for assets that # require their own after-the-fact values, like a Flash file that needs the path of a config # file passed to it e.g. /static/visualization.swf?configFile=/static/visualization.xml query_params = parse_qsl(query_string) updated_query_params = [] for query_name, query_val in query_params: if query_val.startswith("/static/"): new_val = StaticContent.get_canonicalized_asset_path(course_key, query_val, base_url, excluded_exts) updated_query_params.append((query_name, new_val)) else: updated_query_params.append((query_name, query_val)) serialized_asset_key = StaticContent.serialize_asset_key_with_slash(asset_key) base_url = base_url if serve_from_cdn else '' return urlunparse((None, base_url, serialized_asset_key, params, urlencode(updated_query_params), fragment))
def get_content_digest_for_asset_path(cls, prefix, path): """ Takes an unprocessed asset path, parses it just enough to try and find the asset it refers to, and returns the content digest of that asset if it exists. """ # Parse the path as if it was potentially a relative URL with query parameters, # or an absolute URL, etc. Only keep the path because that's all we need. _, _, relative_path, _, _, _ = urlparse(path) asset_key = StaticContent.get_asset_key_from_path(cls.courses[prefix].id, relative_path) try: content = AssetManager.find(asset_key, as_stream=True) return content.content_digest except (ItemNotFoundError, NotFoundError): return None
def get_asset_content_from_path(course_key, asset_path): """ Locate the given asset content, load it into memory, and return it. Returns None if the asset is not found. """ try: from xmodule.contentstore.content import StaticContent from xmodule.assetstore.assetmgr import AssetManager from xmodule.modulestore.exceptions import ItemNotFoundError from xmodule.exceptions import NotFoundError except ImportError as exc: raise EdXPlatformImportError(exc) try: asset_key = StaticContent.get_asset_key_from_path( course_key, asset_path) return AssetManager.find(asset_key) except (ItemNotFoundError, NotFoundError) as exc: return None
def load_asset_from_location(self, location): """ Loads an asset based on its location, either retrieving it from a cache or loading it directly from the contentstore. """ # See if we can load this item from cache. content = get_cached_content(location) if content is None: # Not in cache, so just try and load it from the asset manager. try: content = AssetManager.find(location, as_stream=True) except (ItemNotFoundError, NotFoundError): raise # Now that we fetched it, let's go ahead and try to cache it. We cap this at 1MB # because it's the default for memcached and also we don't want to do too much # buffering in memory when we're serving an actual request. if content.length is not None and content.length < 1048576: content = content.copy_to_in_mem() set_cached_content(content) return content
def process_request(self, request): """Process the given request""" asset_path = request.path if self.is_asset_request(request): # lint-amnesty, pylint: disable=too-many-nested-blocks # Make sure we can convert this request into a location. if AssetLocator.CANONICAL_NAMESPACE in asset_path: asset_path = asset_path.replace('block/', 'block@', 1) # If this is a versioned request, pull out the digest and chop off the prefix. requested_digest = None if StaticContent.is_versioned_asset_path(asset_path): requested_digest, asset_path = StaticContent.parse_versioned_asset_path( asset_path) # Make sure we have a valid location value for this asset. try: loc = StaticContent.get_location_from_path(asset_path) except (InvalidLocationError, InvalidKeyError): return HttpResponseBadRequest() # Attempt to load the asset to make sure it exists, and grab the asset digest # if we're able to load it. actual_digest = None try: content = self.load_asset_from_location(loc) actual_digest = getattr(content, "content_digest", None) except (ItemNotFoundError, NotFoundError): return HttpResponseNotFound() # If this was a versioned asset, and the digest doesn't match, redirect # them to the actual version. if requested_digest is not None and actual_digest is not None and ( actual_digest != requested_digest): actual_asset_path = StaticContent.add_version_to_asset_path( asset_path, actual_digest) return HttpResponsePermanentRedirect(actual_asset_path) # Set the basics for this request. Make sure that the course key for this # asset has a run, which old-style courses do not. Otherwise, this will # explode when the key is serialized to be sent to NR. safe_course_key = loc.course_key if safe_course_key.run is None: safe_course_key = safe_course_key.replace(run='only') if newrelic: newrelic.agent.add_custom_parameter('course_id', safe_course_key) newrelic.agent.add_custom_parameter('org', loc.org) newrelic.agent.add_custom_parameter('contentserver.path', loc.path) # Figure out if this is a CDN using us as the origin. is_from_cdn = StaticContentServer.is_cdn_request(request) newrelic.agent.add_custom_parameter('contentserver.from_cdn', is_from_cdn) # Check if this content is locked or not. locked = self.is_content_locked(content) newrelic.agent.add_custom_parameter('contentserver.locked', locked) # Check that user has access to the content. if not self.is_user_authorized(request, content, loc): return HttpResponseForbidden('Unauthorized') # Figure out if the client sent us a conditional request, and let them know # if this asset has changed since then. last_modified_at_str = content.last_modified_at.strftime( HTTP_DATE_FORMAT) if 'HTTP_IF_MODIFIED_SINCE' in request.META: if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] if if_modified_since == last_modified_at_str: return HttpResponseNotModified() # *** File streaming within a byte range *** # If a Range is provided, parse Range attribute of the request # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # If we have a StaticContent, get a StaticContentStream. Can't manipulate the bytes otherwise. if isinstance(content, StaticContent): content = AssetManager.find(loc, as_stream=True) header_value = request.META['HTTP_RANGE'] try: unit, ranges = parse_range_header(header_value, content.length) except ValueError as exception: # If the header field is syntactically invalid it should be ignored. log.exception("%s in Range header: %s for content: %s", str(exception), header_value, str(loc)) else: if unit != 'bytes': # Only accept ranges in bytes log.warning( "Unknown unit in Range header: %s for content: %s", header_value, str(loc)) elif len(ranges) > 1: # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 # But we send back the full content. log.warning( "More than 1 ranges in Range header: %s for content: %s", header_value, str(loc)) else: first, last = ranges[0] if 0 <= first <= last < content.length: # If the byte range is satisfiable response = HttpResponse( content.stream_data_in_range(first, last)) response[ 'Content-Range'] = 'bytes {first}-{last}/{length}'.format( first=first, last=last, length=content.length) response['Content-Length'] = str(last - first + 1) response.status_code = 206 # Partial Content if newrelic: newrelic.agent.add_custom_parameter( 'contentserver.ranged', True) else: log.warning( "Cannot satisfy ranges in Range header: %s for content: %s", header_value, str(loc)) return HttpResponse( status=416) # Requested Range Not Satisfiable # If Range header is absent or syntactically invalid return a full content response. if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length if newrelic: newrelic.agent.add_custom_parameter( 'contentserver.content_len', content.length) newrelic.agent.add_custom_parameter( 'contentserver.content_type', content.content_type) # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type response['X-Frame-Options'] = 'ALLOW' # Set any caching headers, and do any response cleanup needed. Based on how much # middleware we have in place, there's no easy way to use the built-in Django # utilities and properly sanitize and modify a response to ensure that it is as # cacheable as possible, which is why we do it ourselves. self.set_caching_headers(content, response) return response
def process_request(self, request): # look to see if the request is prefixed with an asset prefix tag if request.path.startswith("/" + XASSET_LOCATION_TAG + "/") or request.path.startswith( "/" + AssetLocator.CANONICAL_NAMESPACE ): if AssetLocator.CANONICAL_NAMESPACE in request.path: request.path = request.path.replace("block/", "block@", 1) try: loc = StaticContent.get_location_from_path(request.path) except (InvalidLocationError, InvalidKeyError): # return a 'Bad Request' to browser as we have a malformed Location response = HttpResponse() response.status_code = 400 return response # first look in our cache so we don't have to round-trip to the DB content = get_cached_content(loc) if content is None: # nope, not in cache, let's fetch from DB try: content = AssetManager.find(loc, as_stream=True) except (ItemNotFoundError, NotFoundError): response = HttpResponse() response.status_code = 404 return response # since we fetched it from DB, let's cache it going forward, but only if it's < 1MB # this is because I haven't been able to find a means to stream data out of memcached if content.length is not None: if content.length < 1048576: # since we've queried as a stream, let's read in the stream into memory to set in cache content = content.copy_to_in_mem() set_cached_content(content) else: # NOP here, but we may wish to add a "cache-hit" counter in the future pass # Check that user has access to content if getattr(content, "locked", False): if not hasattr(request, "user") or not request.user.is_authenticated(): return HttpResponseForbidden("Unauthorized") if not request.user.is_staff: if getattr(loc, "deprecated", False) and not CourseEnrollment.is_enrolled_by_partial( request.user, loc.course_key ): return HttpResponseForbidden("Unauthorized") if not getattr(loc, "deprecated", False) and not CourseEnrollment.is_enrolled( request.user, loc.course_key ): return HttpResponseForbidden("Unauthorized") # convert over the DB persistent last modified timestamp to a HTTP compatible # timestamp, so we can simply compare the strings last_modified_at_str = content.last_modified_at.strftime("%a, %d-%b-%Y %H:%M:%S GMT") # see if the client has cached this content, if so then compare the # timestamps, if they are the same then just return a 304 (Not Modified) if "HTTP_IF_MODIFIED_SINCE" in request.META: if_modified_since = request.META["HTTP_IF_MODIFIED_SINCE"] if if_modified_since == last_modified_at_str: return HttpResponseNotModified() # *** File streaming within a byte range *** # If a Range is provided, parse Range attribute of the request # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get("HTTP_RANGE"): # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) if type(content) == StaticContent: content = AssetManager.find(loc, as_stream=True) header_value = request.META["HTTP_RANGE"] try: unit, ranges = parse_range_header(header_value, content.length) except ValueError as exception: # If the header field is syntactically invalid it should be ignored. log.exception( u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc) ) else: if unit != "bytes": # Only accept ranges in bytes log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) elif len(ranges) > 1: # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 # But we send back the full content. log.warning( u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc) ) else: first, last = ranges[0] if 0 <= first <= last < content.length: # If the byte range is satisfiable response = HttpResponse(content.stream_data_in_range(first, last)) response["Content-Range"] = "bytes {first}-{last}/{length}".format( first=first, last=last, length=content.length ) response["Content-Length"] = str(last - first + 1) response.status_code = 206 # Partial Content else: log.warning( u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc) ) return HttpResponse(status=416) # Requested Range Not Satisfiable # If Range header is absent or syntactically invalid return a full content response. if response is None: response = HttpResponse(content.stream_data()) response["Content-Length"] = content.length # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response["Accept-Ranges"] = "bytes" response["Content-Type"] = content.content_type response["Last-Modified"] = last_modified_at_str return response
def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts, encode=True): """ Returns a fully-qualified path to a piece of static content. If a static asset CDN is configured, this path will include it. Otherwise, the path will simply be relative. Args: course_key: key to the course which owns this asset path: the path to said content Returns: string: fully-qualified path to asset """ # Break down the input path. _, _, relative_path, params, query_string, _ = urlparse(path) # Convert our path to an asset key if it isn't one already. asset_key = StaticContent.get_asset_key_from_path(course_key, relative_path) # Check the status of the asset to see if this can be served via CDN aka publicly. serve_from_cdn = False content_digest = None try: content = AssetManager.find(asset_key, as_stream=True) serve_from_cdn = not getattr(content, "locked", True) content_digest = getattr(content, "content_digest", None) except (ItemNotFoundError, NotFoundError): # If we can't find the item, just treat it as if it's locked. serve_from_cdn = False # Do a generic check to see if anything about this asset disqualifies it from being CDN'd. is_excluded = False if StaticContent.is_excluded_asset_type(relative_path, excluded_exts): serve_from_cdn = False is_excluded = True # Update any query parameter values that have asset paths in them. This is for assets that # require their own after-the-fact values, like a Flash file that needs the path of a config # file passed to it e.g. /static/visualization.swf?configFile=/static/visualization.xml query_params = parse_qsl(query_string) updated_query_params = [] for query_name, query_val in query_params: if query_val.startswith("/static/"): new_val = StaticContent.get_canonicalized_asset_path( course_key, query_val, base_url, excluded_exts, encode=False) updated_query_params.append((query_name, new_val)) else: # Make sure we're encoding Unicode strings down to their byte string # representation so that `urlencode` can handle it. updated_query_params.append((query_name, query_val.encode('utf-8'))) serialized_asset_key = StaticContent.serialize_asset_key_with_slash(asset_key) base_url = base_url if serve_from_cdn else '' asset_path = serialized_asset_key # If the content has a digest (i.e. md5sum) value specified, create a versioned path to the asset using it. if not is_excluded and content_digest: asset_path = StaticContent.add_version_to_asset_path(serialized_asset_key, content_digest) # Only encode this if told to. Important so that we don't double encode # when working with paths that are in query parameters. asset_path = asset_path.encode('utf-8') if encode: asset_path = quote_plus(asset_path, '/:+@') return urlunparse((None, base_url.encode('utf-8'), asset_path, params, urlencode(updated_query_params), None))
def process_request(self, request): """Process the given request""" if self.is_asset_request(request): # Make sure we can convert this request into a location. if AssetLocator.CANONICAL_NAMESPACE in request.path: request.path = request.path.replace('block/', 'block@', 1) try: loc = StaticContent.get_location_from_path(request.path) except (InvalidLocationError, InvalidKeyError): return HttpResponseBadRequest() # Try and load the asset. content = None try: content = self.load_asset_from_location(loc) except (ItemNotFoundError, NotFoundError): return HttpResponseNotFound() # Set the basics for this request. newrelic.agent.add_custom_parameter('course_id', loc.course_key) newrelic.agent.add_custom_parameter('org', loc.org) newrelic.agent.add_custom_parameter('contentserver.path', loc.path) # Figure out if this is a CDN using us as the origin. is_from_cdn = StaticContentServer.is_cdn_request(request) newrelic.agent.add_custom_parameter('contentserver.from_cdn', True if is_from_cdn else False) # Check if this content is locked or not. locked = self.is_content_locked(content) newrelic.agent.add_custom_parameter('contentserver.locked', True if locked else False) # Check that user has access to the content. if not self.is_user_authorized(request, content, loc): return HttpResponseForbidden('Unauthorized') # Figure out if the client sent us a conditional request, and let them know # if this asset has changed since then. last_modified_at_str = content.last_modified_at.strftime( HTTP_DATE_FORMAT) if 'HTTP_IF_MODIFIED_SINCE' in request.META: if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] if if_modified_since == last_modified_at_str: return HttpResponseNotModified() # *** File streaming within a byte range *** # If a Range is provided, parse Range attribute of the request # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # If we have a StaticContent, get a StaticContentStream. Can't manipulate the bytes otherwise. if type(content) == StaticContent: content = AssetManager.find(loc, as_stream=True) header_value = request.META['HTTP_RANGE'] try: unit, ranges = parse_range_header(header_value, content.length) except ValueError as exception: # If the header field is syntactically invalid it should be ignored. log.exception(u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)) else: if unit != 'bytes': # Only accept ranges in bytes log.warning( u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) elif len(ranges) > 1: # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 # But we send back the full content. log.warning( u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)) else: first, last = ranges[0] if 0 <= first <= last < content.length: # If the byte range is satisfiable response = HttpResponse( content.stream_data_in_range(first, last)) response[ 'Content-Range'] = 'bytes {first}-{last}/{length}'.format( first=first, last=last, length=content.length) range_len = last - first + 1 response['Content-Length'] = str(range_len) response.status_code = 206 # Partial Content newrelic.agent.add_custom_parameter( 'contentserver.range_len', range_len) else: log.warning( u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)) return HttpResponse( status=416) # Requested Range Not Satisfiable # If Range header is absent or syntactically invalid return a full content response. if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length newrelic.agent.add_custom_parameter( 'contentserver.content_len', content.length) newrelic.agent.add_custom_parameter( 'contentserver.content_type', content.content_type) # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type # Set any caching headers, and do any response cleanup needed. Based on how much # middleware we have in place, there's no easy way to use the built-in Django # utilities and properly sanitize and modify a response to ensure that it is as # cacheable as possible, which is why we do it ourselves. self.set_caching_headers(content, response) return response
def process_request(self, request): """Process the given request""" asset_path = request.path if self.is_asset_request(request): # Make sure we can convert this request into a location. if AssetLocator.CANONICAL_NAMESPACE in asset_path: asset_path = asset_path.replace('block/', 'block@', 1) # If this is a versioned request, pull out the digest and chop off the prefix. requested_digest = None if StaticContent.is_versioned_asset_path(asset_path): requested_digest, asset_path = StaticContent.parse_versioned_asset_path(asset_path) # Make sure we have a valid location value for this asset. try: loc = StaticContent.get_location_from_path(asset_path) except (InvalidLocationError, InvalidKeyError): return HttpResponseBadRequest() # Attempt to load the asset to make sure it exists, and grab the asset digest # if we're able to load it. actual_digest = None try: content = self.load_asset_from_location(loc) actual_digest = content.content_digest except (ItemNotFoundError, NotFoundError): return HttpResponseNotFound() # If this was a versioned asset, and the digest doesn't match, redirect # them to the actual version. if requested_digest is not None and (actual_digest != requested_digest): actual_asset_path = StaticContent.add_version_to_asset_path(asset_path, actual_digest) return HttpResponsePermanentRedirect(actual_asset_path) # Set the basics for this request. Make sure that the course key for this # asset has a run, which old-style courses do not. Otherwise, this will # explode when the key is serialized to be sent to NR. safe_course_key = loc.course_key if safe_course_key.run is None: safe_course_key = safe_course_key.replace(run='only') newrelic.agent.add_custom_parameter('course_id', safe_course_key) newrelic.agent.add_custom_parameter('org', loc.org) newrelic.agent.add_custom_parameter('contentserver.path', loc.path) # Figure out if this is a CDN using us as the origin. is_from_cdn = StaticContentServer.is_cdn_request(request) newrelic.agent.add_custom_parameter('contentserver.from_cdn', is_from_cdn) # Check if this content is locked or not. locked = self.is_content_locked(content) newrelic.agent.add_custom_parameter('contentserver.locked', locked) # Check that user has access to the content. if not self.is_user_authorized(request, content, loc): return HttpResponseForbidden('Unauthorized') # Figure out if the client sent us a conditional request, and let them know # if this asset has changed since then. last_modified_at_str = content.last_modified_at.strftime(HTTP_DATE_FORMAT) if 'HTTP_IF_MODIFIED_SINCE' in request.META: if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] if if_modified_since == last_modified_at_str: return HttpResponseNotModified() # *** File streaming within a byte range *** # If a Range is provided, parse Range attribute of the request # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # If we have a StaticContent, get a StaticContentStream. Can't manipulate the bytes otherwise. if type(content) == StaticContent: content = AssetManager.find(loc, as_stream=True) header_value = request.META['HTTP_RANGE'] try: unit, ranges = parse_range_header(header_value, content.length) except ValueError as exception: # If the header field is syntactically invalid it should be ignored. log.exception( u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc) ) else: if unit != 'bytes': # Only accept ranges in bytes log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) elif len(ranges) > 1: # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 # But we send back the full content. log.warning( u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc) ) else: first, last = ranges[0] if 0 <= first <= last < content.length: # If the byte range is satisfiable response = HttpResponse(content.stream_data_in_range(first, last)) response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( first=first, last=last, length=content.length ) response['Content-Length'] = str(last - first + 1) response.status_code = 206 # Partial Content newrelic.agent.add_custom_parameter('contentserver.ranged', True) else: log.warning( u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc) ) return HttpResponse(status=416) # Requested Range Not Satisfiable # If Range header is absent or syntactically invalid return a full content response. if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length newrelic.agent.add_custom_parameter('contentserver.content_len', content.length) newrelic.agent.add_custom_parameter('contentserver.content_type', content.content_type) # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type # Set any caching headers, and do any response cleanup needed. Based on how much # middleware we have in place, there's no easy way to use the built-in Django # utilities and properly sanitize and modify a response to ensure that it is as # cacheable as possible, which is why we do it ourselves. self.set_caching_headers(content, response) return response
def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts, encode=True): """ Returns a fully-qualified path to a piece of static content. If a static asset CDN is configured, this path will include it. Otherwise, the path will simply be relative. Args: course_key: key to the course which owns this asset path: the path to said content Returns: string: fully-qualified path to asset """ # Break down the input path. _, _, relative_path, params, query_string, _ = urlparse(path) # Convert our path to an asset key if it isn't one already. asset_key = StaticContent.get_asset_key_from_path(course_key, relative_path) # Check the status of the asset to see if this can be served via CDN aka publicly. serve_from_cdn = False content_digest = None try: content = AssetManager.find(asset_key, as_stream=True) serve_from_cdn = not getattr(content, "locked", True) content_digest = getattr(content, "content_digest", None) except (ItemNotFoundError, NotFoundError): # If we can't find the item, just treat it as if it's locked. serve_from_cdn = False # Do a generic check to see if anything about this asset disqualifies it from being CDN'd. is_excluded = False if StaticContent.is_excluded_asset_type(relative_path, excluded_exts): serve_from_cdn = False is_excluded = True # Update any query parameter values that have asset paths in them. This is for assets that # require their own after-the-fact values, like a Flash file that needs the path of a config # file passed to it e.g. /static/visualization.swf?configFile=/static/visualization.xml query_params = parse_qsl(query_string) updated_query_params = [] for query_name, query_val in query_params: if query_val.startswith("/static/"): new_val = StaticContent.get_canonicalized_asset_path( course_key, query_val, base_url, excluded_exts, encode=False) updated_query_params.append((query_name, new_val.encode('utf-8'))) else: # Make sure we're encoding Unicode strings down to their byte string # representation so that `urlencode` can handle it. updated_query_params.append((query_name, query_val.encode('utf-8'))) serialized_asset_key = StaticContent.serialize_asset_key_with_slash(asset_key) base_url = base_url if serve_from_cdn else '' asset_path = serialized_asset_key # If the content has a digest (i.e. md5sum) value specified, create a versioned path to the asset using it. if not is_excluded and content_digest: asset_path = StaticContent.add_version_to_asset_path(serialized_asset_key, content_digest) # Only encode this if told to. Important so that we don't double encode # when working with paths that are in query parameters. if encode: asset_path = asset_path.encode('utf-8') asset_path = quote_plus(asset_path, '/:+@') return urlunparse(('', base_url, asset_path, params, urlencode(updated_query_params), ''))
def process_request(self, request): """Process the given request""" if self.is_asset_request(request): # Make sure we can convert this request into a location. if AssetLocator.CANONICAL_NAMESPACE in request.path: request.path = request.path.replace('block/', 'block@', 1) try: loc = StaticContent.get_location_from_path(request.path) except (InvalidLocationError, InvalidKeyError): return HttpResponseBadRequest() # Try and load the asset. content = None try: content = self.load_asset_from_location(loc) except (ItemNotFoundError, NotFoundError): return HttpResponseNotFound() # Check that user has access to the content. if not self.is_user_authorized(request, content, loc): return HttpResponseForbidden('Unauthorized') # Figure out if the client sent us a conditional request, and let them know # if this asset has changed since then. last_modified_at_str = content.last_modified_at.strftime(HTTP_DATE_FORMAT) if 'HTTP_IF_MODIFIED_SINCE' in request.META: if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] if if_modified_since == last_modified_at_str: return HttpResponseNotModified() # *** File streaming within a byte range *** # If a Range is provided, parse Range attribute of the request # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # If we have a StaticContent, get a StaticContentStream. Can't manipulate the bytes otherwise. if type(content) == StaticContent: content = AssetManager.find(loc, as_stream=True) header_value = request.META['HTTP_RANGE'] try: unit, ranges = parse_range_header(header_value, content.length) except ValueError as exception: # If the header field is syntactically invalid it should be ignored. log.exception( u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc) ) else: if unit != 'bytes': # Only accept ranges in bytes log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) elif len(ranges) > 1: # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 # But we send back the full content. log.warning( u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc) ) else: first, last = ranges[0] if 0 <= first <= last < content.length: # If the byte range is satisfiable response = HttpResponse(content.stream_data_in_range(first, last)) response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( first=first, last=last, length=content.length ) response['Content-Length'] = str(last - first + 1) response.status_code = 206 # Partial Content else: log.warning( u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc) ) return HttpResponse(status=416) # Requested Range Not Satisfiable # If Range header is absent or syntactically invalid return a full content response. if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type # Set any caching headers, and do any response cleanup needed. Based on how much # middleware we have in place, there's no easy way to use the built-in Django # utilities and properly sanitize and modify a response to ensure that it is as # cacheable as possible, which is why we do it ourselves. self.set_caching_headers(content, response) return response
def _find_asset_urls_in_block( task_id, value, block_loc, block_assets, course_key, environment, staff_user_id, update, dictionary=None, value_key=None, ): if type(value) == dict: for key, val in value.items(): _find_asset_urls_in_block(task_id, val, block_loc, block_assets, course_key, environment, staff_user_id, update, dictionary=value, value_key=key) elif type(value) == list: for item in value: _find_asset_urls_in_block(task_id, item, block_loc, block_assets, course_key, environment, staff_user_id, update, dictionary=dictionary, value_key=value_key) elif type(value) in (str, unicode): save_updated = False urls = re.findall(URL_RE, value) for url in urls: url = strip_tags(url) parsed_url = urlparse(url) asset_url = StaticContent.ASSET_URL_RE.match(parsed_url.path) if asset_url is not None: # check if asset URL belongs to some other server or course if parsed_url.hostname != environment or \ asset_url.groupdict().get('course') != course_key.course or \ asset_url.groupdict().get('org') != course_key.org: asset_info = { 'name': asset_url.groupdict().get('name'), 'module': block_loc, 'available': False } asset_path = '{}{}'.format( StaticContent.get_base_url_path_for_course_assets( course_key), asset_url.groupdict().get('name')) # check if asset exists in this course try: loc = StaticContent.get_location_from_path(asset_path) except (InvalidLocationError, InvalidKeyError): pass else: try: AssetManager.find(loc, as_stream=True) except (ItemNotFoundError, NotFoundError): pass else: asset_info['available'] = True if update: # replace url with the `asset_path` full_asset_path = urljoin( 'https://{}'.format(environment), asset_path) value = value.replace(url, full_asset_path, 1) save_updated = True log.info( '[{}] Replacing `{}` with new path `{}` in module `{}`' .format(task_id, url, full_asset_path, block_loc)) block_assets.append(asset_info) if urls and save_updated and update: dictionary[value_key] = value
def process_request(self, request): # look to see if the request is prefixed with an asset prefix tag if (request.path.startswith('/' + XASSET_LOCATION_TAG + '/') or request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE)): if AssetLocator.CANONICAL_NAMESPACE in request.path: request.path = request.path.replace('block/', 'block@', 1) try: loc = StaticContent.get_location_from_path(request.path) except (InvalidLocationError, InvalidKeyError): # return a 'Bad Request' to browser as we have a malformed Location response = HttpResponse() response.status_code = 400 return response # first look in our cache so we don't have to round-trip to the DB content = get_cached_content(loc) if content is None: # nope, not in cache, let's fetch from DB try: content = AssetManager.find(loc, as_stream=True) except (ItemNotFoundError, NotFoundError): response = HttpResponse() response.status_code = 404 return response # since we fetched it from DB, let's cache it going forward, but only if it's < 1MB # this is because I haven't been able to find a means to stream data out of memcached if content.length is not None: if content.length < 1048576: # since we've queried as a stream, let's read in the stream into memory to set in cache content = content.copy_to_in_mem() set_cached_content(content) else: # NOP here, but we may wish to add a "cache-hit" counter in the future pass # Check that user has access to content if getattr(content, "locked", False): if not hasattr(request, "user") or not request.user.is_authenticated(): return HttpResponseForbidden('Unauthorized') if not request.user.is_staff: if getattr( loc, 'deprecated', False ) and not CourseEnrollment.is_enrolled_by_partial( request.user, loc.course_key): return HttpResponseForbidden('Unauthorized') if not getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled( request.user, loc.course_key): return HttpResponseForbidden('Unauthorized') # convert over the DB persistent last modified timestamp to a HTTP compatible # timestamp, so we can simply compare the strings last_modified_at_str = content.last_modified_at.strftime( "%a, %d-%b-%Y %H:%M:%S GMT") # see if the client has cached this content, if so then compare the # timestamps, if they are the same then just return a 304 (Not Modified) if 'HTTP_IF_MODIFIED_SINCE' in request.META: if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] if if_modified_since == last_modified_at_str: return HttpResponseNotModified() # *** File streaming within a byte range *** # If a Range is provided, parse Range attribute of the request # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) if type(content) == StaticContent: content = AssetManager.find(loc, as_stream=True) header_value = request.META['HTTP_RANGE'] try: unit, ranges = parse_range_header(header_value, content.length) except ValueError as exception: # If the header field is syntactically invalid it should be ignored. log.exception(u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)) else: if unit != 'bytes': # Only accept ranges in bytes log.warning( u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) elif len(ranges) > 1: # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 # But we send back the full content. log.warning( u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)) else: first, last = ranges[0] if 0 <= first <= last < content.length: # If the byte range is satisfiable response = HttpResponse( content.stream_data_in_range(first, last)) response[ 'Content-Range'] = 'bytes {first}-{last}/{length}'.format( first=first, last=last, length=content.length) response['Content-Length'] = str(last - first + 1) response.status_code = 206 # Partial Content else: log.warning( u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)) return HttpResponse( status=416) # Requested Range Not Satisfiable # If Range header is absent or syntactically invalid return a full content response. if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type response['Last-Modified'] = last_modified_at_str return response