Exemplo n.º 1
0
    def test_simple(self):
        result = http.UrlResult('http://example.com', {}, '', 200, None)
        assert discover_sourcemap(result) is None

        result = http.UrlResult('http://example.com', {
            'x-sourcemap': 'http://example.com/source.map.js'
        }, '', 200, None)
        assert discover_sourcemap(result) == 'http://example.com/source.map.js'

        result = http.UrlResult('http://example.com', {
            'sourcemap': 'http://example.com/source.map.js'
        }, '', 200, None)
        assert discover_sourcemap(result) == 'http://example.com/source.map.js'

        result = http.UrlResult('http://example.com', {}, '//@ sourceMappingURL=http://example.com/source.map.js\nconsole.log(true)', 200, None)
        assert discover_sourcemap(result) == 'http://example.com/source.map.js'

        result = http.UrlResult('http://example.com', {}, '//# sourceMappingURL=http://example.com/source.map.js\nconsole.log(true)', 200, None)
        assert discover_sourcemap(result) == 'http://example.com/source.map.js'

        result = http.UrlResult('http://example.com', {}, 'console.log(true)\n//@ sourceMappingURL=http://example.com/source.map.js', 200, None)
        assert discover_sourcemap(result) == 'http://example.com/source.map.js'

        result = http.UrlResult('http://example.com', {}, 'console.log(true)\n//# sourceMappingURL=http://example.com/source.map.js', 200, None)
        assert discover_sourcemap(result) == 'http://example.com/source.map.js'

        result = http.UrlResult('http://example.com', {}, 'console.log(true)\n//# sourceMappingURL=http://example.com/source.map.js\n//# sourceMappingURL=http://example.com/source2.map.js', 200, None)
        assert discover_sourcemap(result) == 'http://example.com/source2.map.js'

        result = http.UrlResult('http://example.com', {}, '//# sourceMappingURL=app.map.js/*ascii:lol*/', 200, None)
        assert discover_sourcemap(result) == 'http://example.com/app.map.js'

        result = http.UrlResult('http://example.com', {}, '//# sourceMappingURL=/*lol*/', 200, None)
        with self.assertRaises(AssertionError):
            discover_sourcemap(result)
Exemplo n.º 2
0
    def test_distribution(self):
        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id, version="abc")
        release.add_project(project)

        foo_file = File.objects.create(
            name="file.min.js",
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )
        foo_file.putfile(six.BytesIO("foo"))
        foo_dist = release.add_dist("foo")
        ReleaseFile.objects.create(
            name="file.min.js",
            release=release,
            dist=foo_dist,
            organization_id=project.organization_id,
            file=foo_file,
        )

        bar_file = File.objects.create(
            name="file.min.js",
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )
        bar_file.putfile(six.BytesIO("bar"))
        bar_dist = release.add_dist("bar")
        ReleaseFile.objects.create(
            name="file.min.js",
            release=release,
            dist=bar_dist,
            organization_id=project.organization_id,
            file=bar_file,
        )

        foo_result = fetch_release_file("file.min.js", release, foo_dist)

        assert isinstance(foo_result.body, six.binary_type)
        assert foo_result == http.UrlResult(
            "file.min.js", {"content-type": "application/json; charset=utf-8"},
            "foo", 200, "utf-8")

        # test that cache pays attention to dist value as well as name
        bar_result = fetch_release_file("file.min.js", release, bar_dist)

        # result is cached, but that's not what we should find
        assert bar_result != foo_result
        assert bar_result == http.UrlResult(
            "file.min.js", {"content-type": "application/json; charset=utf-8"},
            "bar", 200, "utf-8")
Exemplo n.º 3
0
    def test_unicode(self):
        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id, version="abc")
        release.add_project(project)

        file = File.objects.create(
            name="file.min.js",
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )

        binary_body = unicode_body.encode("utf-8")
        file.putfile(six.BytesIO(binary_body))

        ReleaseFile.objects.create(name="file.min.js",
                                   release=release,
                                   organization_id=project.organization_id,
                                   file=file)

        result = fetch_release_file("file.min.js", release)

        assert isinstance(result.body, six.binary_type)
        assert result == http.UrlResult(
            "file.min.js",
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        # looking again should hit the cache - make sure it's come through the
        # caching/uncaching process unscathed
        new_result = fetch_release_file("file.min.js", release)
        assert result == new_result
Exemplo n.º 4
0
    def test_fallbacks(self):
        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id,
            version='abc',
        )
        release.add_project(project)

        file = File.objects.create(
            name='~/file.min.js',
            type='release.file',
            headers={'Content-Type': 'application/json; charset=utf-8'},
        )

        binary_body = unicode_body.encode('utf-8')
        file.putfile(six.BytesIO(binary_body))

        ReleaseFile.objects.create(
            name='~/file.min.js',
            release=release,
            organization_id=project.organization_id,
            file=file,
        )

        result = fetch_release_file('http://example.com/file.min.js?lol',
                                    release)

        assert isinstance(result.body, six.binary_type)
        assert result == http.UrlResult(
            'http://example.com/file.min.js?lol',
            {'content-type': 'application/json; charset=utf-8'},
            binary_body,
            200,
            'utf-8',
        )
Exemplo n.º 5
0
    def test_unicode(self):
        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id, version="abc")
        release.add_project(project)

        file = File.objects.create(
            name="file.min.js",
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )

        binary_body = unicode_body.encode("utf-8")
        file.putfile(six.BytesIO(binary_body))

        ReleaseFile.objects.create(name="file.min.js",
                                   release=release,
                                   organization_id=project.organization_id,
                                   file=file)

        result = fetch_release_file("file.min.js", release)

        assert isinstance(result.body, six.binary_type)
        assert result == http.UrlResult(
            "file.min.js",
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        # test with cache hit, which should be compressed
        new_result = fetch_release_file("file.min.js", release)

        assert result == new_result
Exemplo n.º 6
0
    def test_tilde(self):
        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id, version="abc")
        release.add_project(project)

        file = File.objects.create(
            name="~/file.min.js",
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )

        binary_body = unicode_body.encode("utf-8")
        file.putfile(six.BytesIO(binary_body))

        ReleaseFile.objects.create(
            name="~/file.min.js",
            release=release,
            organization_id=project.organization_id,
            file=file,
        )

        result = fetch_release_file("http://example.com/file.min.js?lol",
                                    release)

        assert isinstance(result.body, six.binary_type)
        assert result == http.UrlResult(
            "http://example.com/file.min.js?lol",
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )
Exemplo n.º 7
0
def result_from_cache(filename, result):
    # Previous caches would be a 3-tuple instead of a 4-tuple,
    # so this is being maintained for backwards compatibility
    try:
        encoding = result[3]
    except IndexError:
        encoding = None

    return http.UrlResult(filename, result[0], zlib.decompress(result[1]), result[2], encoding)
Exemplo n.º 8
0
    def test_distribution(self):
        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id,
            version='abc',
        )
        release.add_project(project)

        other_file = File.objects.create(
            name='file.min.js',
            type='release.file',
            headers={'Content-Type': 'application/json; charset=utf-8'},
        )
        file = File.objects.create(
            name='file.min.js',
            type='release.file',
            headers={'Content-Type': 'application/json; charset=utf-8'},
        )

        binary_body = unicode_body.encode('utf-8')
        other_file.putfile(six.BytesIO(b''))
        file.putfile(six.BytesIO(binary_body))

        dist = release.add_dist('foo')

        ReleaseFile.objects.create(
            name='file.min.js',
            release=release,
            organization_id=project.organization_id,
            file=other_file,
        )

        ReleaseFile.objects.create(
            name='file.min.js',
            release=release,
            dist=dist,
            organization_id=project.organization_id,
            file=file,
        )

        result = fetch_release_file('file.min.js', release, dist)

        assert isinstance(result.body, six.binary_type)
        assert result == http.UrlResult(
            'file.min.js',
            {'content-type': 'application/json; charset=utf-8'},
            binary_body,
            200,
            'utf-8',
        )

        # test with cache hit, which should be compressed
        new_result = fetch_release_file('file.min.js', release, dist)

        assert result == new_result
Exemplo n.º 9
0
    def test_retry_file_open(self) -> None:
        project = self.project

        release = Release.objects.create(
            organization_id=project.organization_id, version="abc")
        release.add_project(project)

        content = b"foo"

        file = File.objects.create(
            name="file.min.js",
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )
        file.putfile(BytesIO(content))

        ReleaseFile.objects.create(
            name=file.name,
            release=release,
            organization_id=project.organization_id,
            file=file,
        )

        stale_file_error = OSError()
        stale_file_error.errno = errno.ESTALE

        bad_file = MagicMock()
        bad_file.chunks.side_effect = stale_file_error

        bad_file_reader = MagicMock()
        bad_file_reader.__enter__.return_value = bad_file

        good_file = MagicMock()
        good_file.chunks.return_value = iter([content])

        good_file_reader = MagicMock()
        good_file_reader.__enter__.return_value = good_file

        with patch(
                "sentry.lang.javascript.processor.ReleaseFile.cache") as cache:
            cache.getfile.side_effect = [bad_file_reader, good_file_reader]

            assert fetch_release_file(file.name, release) == http.UrlResult(
                file.name,
                {k.lower(): v.lower()
                 for k, v in file.headers.items()},
                content,
                200,
                "utf-8",
            )

        assert bad_file.chunks.call_count == 1
        assert good_file.chunks.call_count == 1
Exemplo n.º 10
0
    def test_non_url_with_release(self, mock_fetch_release_file):
        mock_fetch_release_file.return_value = http.UrlResult(
            "/example.js", {"content-type": "application/json"}, b"foo", 200, None
        )

        release = Release.objects.create(version="1", organization_id=self.project.organization_id)
        release.add_project(self.project)

        result = fetch_file("/example.js", release=release)
        assert result.url == "/example.js"
        assert result.body == b"foo"
        assert isinstance(result.body, six.binary_type)
        assert result.headers == {"content-type": "application/json"}
        assert result.encoding is None
Exemplo n.º 11
0
    def test_non_url_with_release(self, mock_fetch_release_file):
        mock_fetch_release_file.return_value = http.UrlResult(
            '/example.js',
            {'content-type': 'application/json'},
            'foo',
            200,
            None,
        )

        release = Release.objects.create(version='1', organization_id=self.project.organization_id)
        release.add_project(self.project)

        result = fetch_file('/example.js', release=release)
        assert result.url == '/example.js'
        assert result.body == 'foo'
        assert isinstance(result.body, six.binary_type)
        assert result.headers == {'content-type': 'application/json'}
        assert result.encoding is None
Exemplo n.º 12
0
def fetch_and_cache_artifact(filename, fetch_fn, cache_key, cache_key_meta,
                             headers, compress_fn):
    # If the release file is not in cache, check if we can retrieve at
    # least the size metadata from cache and prevent compression and
    # caching if payload exceeds the backend limit.
    z_body_size = None

    if CACHE_MAX_VALUE_SIZE:
        cache_meta = cache.get(cache_key_meta)
        if cache_meta:
            z_body_size = int(cache_meta.get("compressed_size"))

    def fetch_release_body():
        with fetch_fn() as fp:
            if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE:
                return None, fp.read()
            else:
                return compress_fn(fp)

    try:
        with metrics.timer("sourcemaps.release_file_read"):
            z_body, body = fetch_retry_policy(fetch_release_body)
    except Exception:
        logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info())
        result = None
    else:
        headers = {k.lower(): v for k, v in headers.items()}
        encoding = get_encoding_from_headers(headers)
        result = http.UrlResult(filename, headers, body, 200, encoding)

        # If we don't have the compressed body for caching because the
        # cached metadata said it is too large payload for the cache
        # backend, do not attempt to cache.
        if z_body:
            # This will implicitly skip too large payloads. Those will be cached
            # on the file system by `ReleaseFile.cache`, instead.
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

            # In case the previous call to cache implicitly fails, we use
            # the meta data to avoid pointless compression which is done
            # only for caching.
            cache.set(cache_key_meta, {"compressed_size": len(z_body)}, 3600)

    return result
Exemplo n.º 13
0
    def test_caching(self):
        # Set the threshold to zero to force caching on the file system
        options.set("releasefile.cache-limit", 0)

        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id, version="abc")
        release.add_project(project)

        file = File.objects.create(
            name="file.min.js",
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )

        binary_body = unicode_body.encode("utf-8")
        file.putfile(BytesIO(binary_body))

        ReleaseFile.objects.create(
            name="file.min.js",
            release_id=release.id,
            organization_id=project.organization_id,
            file=file,
        )

        result = fetch_release_file("file.min.js", release)

        assert isinstance(result.body, bytes)
        assert result == http.UrlResult(
            "file.min.js",
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        # test with cache hit, coming from the FS
        new_result = fetch_release_file("file.min.js", release)

        assert result == new_result
Exemplo n.º 14
0
def fetch_file(url,
               project=None,
               release=None,
               dist=None,
               allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the database first (assuming there's a release on the
    event), then the internet. Caches the result of each of those two attempts
    separately, whether or not those attempts are successful. Used for both
    source files and source maps.
    """

    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == "...":
        raise http.CannotFetch({
            "type": EventError.JS_MISSING_SOURCE,
            "url": http.expose_url(url)
        })

    # if we've got a release to look on, try that first (incl associated cache)
    if release:
        with metrics.timer("sourcemaps.release_file"):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    # otherwise, try the web-scraping cache and then the web itself

    cache_key = f"source:cache:v4:{md5_text(url).hexdigest()}"

    if result is None:
        if not allow_scraping or not url.startswith(("http:", "https:")):
            error = {
                "type": EventError.JS_MISSING_SOURCE,
                "url": http.expose_url(url)
            }
            raise http.CannotFetch(error)

        logger.debug("Checking cache for url %r", url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(result[0], result[1],
                                    zlib.decompress(result[2]), result[3],
                                    encoding)

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option("sentry:verify_ssl", False))
            token = project.get_option("sentry:token")
            if token:
                token_header = project.get_option(
                    "sentry:token_header") or "X-Sentry-Token"
                headers[token_header] = token

        with metrics.timer("sourcemaps.fetch"):
            result = http.fetch_file(url,
                                     headers=headers,
                                     verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url, result.headers, z_body, result.status, result.encoding),
                get_max_age(result.headers),
            )

            # since the cache.set above can fail we can end up in a situation
            # where the file is too large for the cache. In that case we abort
            # the fetch and cache a failure and lock the domain for future
            # http fetches.
            if cache.get(cache_key) is None:
                error = {
                    "type": EventError.TOO_LARGE_FOR_CACHE,
                    "url": http.expose_url(url),
                }
                http.lock_domain(url, error=error)
                raise http.CannotFetch(error)

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch({
            "type": EventError.FETCH_INVALID_HTTP_CODE,
            "value": result.status,
            "url": http.expose_url(url),
        })

    # Make sure the file we're getting back is bytes. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, bytes):
        try:
            result = http.UrlResult(
                result.url,
                result.headers,
                result.body.encode("utf8"),
                result.status,
                result.encoding,
            )
        except UnicodeEncodeError:
            error = {
                "type": EventError.FETCH_INVALID_ENCODING,
                "value": "utf8",
                "url": http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if urlsplit(url).path.endswith(".js"):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == b"<":
            error = {"type": EventError.JS_INVALID_CONTENT, "url": url}
            raise http.CannotFetch(error)

    return result
Exemplo n.º 15
0
def fetch_release_file(filename, release, dist=None):
    """
    Attempt to retrieve a release artifact from the database.

    Caches the result of that attempt (whether successful or not).
    """

    dist_name = dist and dist.name or None
    releasefile_ident = ReleaseFile.get_ident(filename, dist_name)
    cache_key = get_release_file_cache_key(release_id=release.id,
                                           releasefile_ident=releasefile_ident)
    # Cache key to store file metadata, currently only the size of the
    # compressed version of file. We cannot use the cache_key because large
    # payloads (silently) fail to cache due to e.g. memcached payload size
    # limitation and we use the meta data to avoid compression of such a files.
    cache_key_meta = get_release_file_cache_key_meta(
        release_id=release.id, releasefile_ident=releasefile_ident)

    logger.debug("Checking cache for release artifact %r (release_id=%s)",
                 filename, release.id)
    result = cache.get(cache_key)

    # not in the cache (meaning we haven't checked the database recently), so check the database
    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [
            ReleaseFile.get_ident(f, dist_name) for f in filename_choices
        ]

        logger.debug(
            "Checking database for release artifact %r (release_id=%s)",
            filename, release.id)

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release, dist=dist,
                ident__in=filename_idents).select_related("file"))

        if len(possible_files) == 0:
            logger.debug(
                "Release artifact %r not found in database (release_id=%s)",
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None

        elif len(possible_files) == 1:
            releasefile = possible_files[0]

        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next(rf for ident in filename_idents
                               for rf in possible_files if rf.ident == ident)

        logger.debug("Found release artifact %r (id=%s, release_id=%s)",
                     filename, releasefile.id, release.id)

        # If the release file is not in cache, check if we can retrieve at
        # least the size metadata from cache and prevent compression and
        # caching if payload exceeds the backend limit.
        z_body_size = None

        if CACHE_MAX_VALUE_SIZE:
            cache_meta = cache.get(cache_key_meta)
            if cache_meta:
                z_body_size = int(cache_meta.get("compressed_size"))

        def fetch_release_body():
            with ReleaseFile.cache.getfile(releasefile) as fp:
                if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE:
                    return None, fp.read()
                else:
                    return compress_file(fp)

        try:
            with metrics.timer("sourcemaps.release_file_read"):
                z_body, body = fetch_retry_policy(fetch_release_body)
        except Exception:
            logger.error("sourcemap.compress_read_failed",
                         exc_info=sys.exc_info())
            result = None
        else:
            headers = {
                k.lower(): v
                for k, v in releasefile.file.headers.items()
            }
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)

            # If we don't have the compressed body for caching because the
            # cached metadata said it is too large payload for the cache
            # backend, do not attempt to cache.
            if z_body:
                # This will implicitly skip too large payloads. Those will be cached
                # on the file system by `ReleaseFile.cache`, instead.
                cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

                # In case the previous call to cache implicitly fails, we use
                # the meta data to avoid pointless compression which is done
                # only for caching.
                cache.set(cache_key_meta, {"compressed_size": len(z_body)},
                          3600)

    # in the cache as an unsuccessful attempt
    elif result == -1:
        result = None

    # in the cache as a successful attempt, including the zipped contents of the file
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(filename, result[0],
                                zlib.decompress(result[1]), result[2],
                                encoding)

    return result
Exemplo n.º 16
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    filename_path = None
    if filename is not None:
        # Reconstruct url without protocol + host
        # e.g. http://example.com/foo?bar => ~/foo?bar
        parsed_url = urlparse(filename)
        filename_path = '~' + parsed_url.path
        if parsed_url.query:
            filename_path += '?' + parsed_url.query

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        logger.debug(
            'Checking database for release artifact %r (release_id=%s)',
            filename, release.id)

        filename_idents = [ReleaseFile.get_ident(filename)]
        if filename_path is not None and filename_path != filename:
            filename_idents.append(ReleaseFile.get_ident(filename_path))

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                ident__in=filename_idents,
            ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)',
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Prioritize releasefile that matches full url (w/ host)
            # over hostless releasefile
            target_ident = filename_idents[0]
            releasefile = next(
                (f for f in possible_files if f.ident == target_ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            headers = {
                k.lower(): v
                for k, v in releasefile.file.headers.items()
            }
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(filename, result[0],
                                zlib.decompress(result[1]), result[2],
                                encoding)

    return result
Exemplo n.º 17
0
    def test_compression(self, mock_compress_file):
        """
        For files larger than max memcached payload size we want to avoid
        pointless compression and  caching attempt since it fails silently.

        Tests scenarios:

        - happy path where compressed file is successfully cached
        - compressed payload is too large to cache and we will avoid
          compression and caching while the metadata cache exists

        """
        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id, version="abc")
        release.add_project(project)

        filename = "file.min.js"
        file = File.objects.create(
            name=filename,
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )

        binary_body = unicode_body.encode("utf-8")
        file.putfile(BytesIO(binary_body))

        ReleaseFile.objects.create(name="file.min.js",
                                   release=release,
                                   organization_id=project.organization_id,
                                   file=file)

        mock_compress_file.return_value = (binary_body, binary_body)

        releasefile_ident = ReleaseFile.get_ident(filename, None)
        cache_key = get_release_file_cache_key(
            release_id=release.id, releasefile_ident=releasefile_ident)
        cache_key_meta = get_release_file_cache_key_meta(
            release_id=release.id, releasefile_ident=releasefile_ident)

        fetch_release_file(filename, release)

        # Here the ANY is File() retrieved from cache/db
        assert mock_compress_file.mock_calls == [call(ANY)]
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key)

        # Remove cache and check that calling fetch_release_file will do the
        # compression and caching again

        cache.set(cache_key, None)
        mock_compress_file.reset_mock()

        fetch_release_file(filename, release)

        assert mock_compress_file.mock_calls == [call(ANY)]
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key)

        # If the file is bigger than the max cache value threshold, avoid
        # compression and caching
        cache.set(cache_key, None)
        mock_compress_file.reset_mock()
        with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE",
                   len(binary_body) - 1):
            result = fetch_release_file(filename, release)

        assert result == http.UrlResult(
            filename,
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        assert mock_compress_file.mock_calls == []
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key) is None

        # If the file is bigger than the max cache value threshold, but the
        # metadata cache is empty as well, compress and attempt to cache anyway
        cache.set(cache_key, None)
        cache.set(cache_key_meta, None)
        mock_compress_file.reset_mock()
        with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE",
                   len(binary_body) - 1):
            result = fetch_release_file(filename, release)

        assert result == http.UrlResult(
            filename,
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        assert mock_compress_file.mock_calls == [call(ANY)]
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key)

        # If the file is smaller than the max cache value threshold, but the
        # cache is empty, compress and cache
        cache.set(cache_key, None)
        mock_compress_file.reset_mock()
        with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE",
                   len(binary_body) + 1):
            result = fetch_release_file(filename, release)

        assert result == http.UrlResult(
            filename,
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        assert mock_compress_file.mock_calls == [call(ANY)]
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key)
Exemplo n.º 18
0
def fetch_release_file(filename, release, dist=None):
    cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), )

    logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id)
    result = cache.get(cache_key)

    dist_name = dist and dist.name or None

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            'Checking database for release artifact %r (release_id=%s)', filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                dist=dist,
                ident__in=filename_idents,
            ).select_related('file')
        )

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)', filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next((
                rf
                for ident in filename_idents
                for rf in possible_files
                if rf.ident == ident
            ))

        logger.debug(
            'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id
        )
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result
Exemplo n.º 19
0
def fetch_release_file(filename, release, dist=None):
    dist_name = dist and dist.name or None
    cache_key = "releasefile:v1:%s:%s" % (release.id, ReleaseFile.get_ident(filename, dist_name))

    logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            "Checking database for release artifact %r (release_id=%s)", filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release, dist=dist, ident__in=filename_idents
            ).select_related("file")
        )

        if len(possible_files) == 0:
            logger.debug(
                "Release artifact %r not found in database (release_id=%s)", filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next(
                (rf for ident in filename_idents for rf in possible_files if rf.ident == ident)
            )

        logger.debug(
            "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id
        )
        try:
            with metrics.timer("sourcemaps.release_file_read"):
                with ReleaseFile.cache.getfile(releasefile) as fp:
                    z_body, body = compress_file(fp)
        except Exception:
            logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info())
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            # This will implicitly skip too large payloads. Those will be cached
            # on the file system by `ReleaseFile.cache`, instead.
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result
Exemplo n.º 20
0
def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise http.CannotFetch(
            {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
        )
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    cache_key = 'source:cache:v4:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(
                result[0], result[1], zlib.decompress(result[2]), result[3], encoding
            )

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option('sentry:verify_ssl', False))
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option('sentry:token_header') or 'X-Sentry-Token'
                headers[token_header] = token

        with metrics.timer('sourcemaps.fetch'):
            result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url,
                 result.headers,
                 z_body,
                 result.status,
                 result.encoding),
                get_max_age(result.headers))

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch(
            {
                'type': EventError.FETCH_INVALID_HTTP_CODE,
                'value': result.status,
                'url': http.expose_url(url),
            }
        )

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, six.binary_type):
        try:
            result = http.UrlResult(
                result.url, result.headers,
                result.body.encode('utf8'), result.status, result.encoding
            )
        except UnicodeEncodeError:
            error = {
                'type': EventError.FETCH_INVALID_ENCODING,
                'value': 'utf8',
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise http.CannotFetch(error)

    return result
Exemplo n.º 21
0
    def test_simple(self):
        result = http.UrlResult("http://example.com", {}, "", 200, None)
        assert discover_sourcemap(result) is None

        result = http.UrlResult(
            "http://example.com",
            {"x-sourcemap": "http://example.com/source.map.js"}, "", 200, None)
        assert discover_sourcemap(result) == "http://example.com/source.map.js"

        result = http.UrlResult(
            "http://example.com",
            {"sourcemap": "http://example.com/source.map.js"}, "", 200, None)
        assert discover_sourcemap(result) == "http://example.com/source.map.js"

        result = http.UrlResult(
            "http://example.com",
            {},
            "//@ sourceMappingURL=http://example.com/source.map.js\nconsole.log(true)",
            200,
            None,
        )
        assert discover_sourcemap(result) == "http://example.com/source.map.js"

        result = http.UrlResult(
            "http://example.com",
            {},
            "//# sourceMappingURL=http://example.com/source.map.js\nconsole.log(true)",
            200,
            None,
        )
        assert discover_sourcemap(result) == "http://example.com/source.map.js"

        result = http.UrlResult(
            "http://example.com",
            {},
            "console.log(true)\n//@ sourceMappingURL=http://example.com/source.map.js",
            200,
            None,
        )
        assert discover_sourcemap(result) == "http://example.com/source.map.js"

        result = http.UrlResult(
            "http://example.com",
            {},
            "console.log(true)\n//# sourceMappingURL=http://example.com/source.map.js",
            200,
            None,
        )
        assert discover_sourcemap(result) == "http://example.com/source.map.js"

        result = http.UrlResult(
            "http://example.com",
            {},
            "console.log(true)\n//# sourceMappingURL=http://example.com/source.map.js\n//# sourceMappingURL=http://example.com/source2.map.js",
            200,
            None,
        )
        assert discover_sourcemap(
            result) == "http://example.com/source2.map.js"

        # sourceMappingURL found directly after code w/o newline
        result = http.UrlResult(
            "http://example.com",
            {},
            "console.log(true);//# sourceMappingURL=http://example.com/source.map.js",
            200,
            None,
        )
        assert discover_sourcemap(result) == "http://example.com/source.map.js"

        result = http.UrlResult(
            "http://example.com", {},
            "//# sourceMappingURL=app.map.js/*ascii:lol*/", 200, None)
        assert discover_sourcemap(result) == "http://example.com/app.map.js"

        result = http.UrlResult("http://example.com", {},
                                "//# sourceMappingURL=/*lol*/", 200, None)
        with self.assertRaises(AssertionError):
            discover_sourcemap(result)