def test_artifacts(self): bundle_file = self.create_artifact_bundle() blob1 = FileBlob.from_file(ContentFile(bundle_file)) total_checksum = sha1(bundle_file).hexdigest() assemble_artifacts( org_id=self.organization.id, version=self.release.version, checksum=total_checksum, chunks=[blob1.checksum], ) status, details = get_assemble_status(AssembleTask.ARTIFACTS, self.organization.id, total_checksum) assert status == ChunkFileState.OK assert details is None release_file = ReleaseFile.objects.get(organization=self.organization, release=self.release, name="~/index.js", dist=None) assert release_file assert release_file.file.headers == {"Sourcemap": "index.js.map"}
def test_wrong_dif(self): content1 = b"foo" fileobj1 = ContentFile(content1) content2 = b"bar" fileobj2 = ContentFile(content2) content3 = b"baz" fileobj3 = ContentFile(content3) total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums blob1 = FileBlob.from_file(fileobj1) blob3 = FileBlob.from_file(fileobj3) blob2 = FileBlob.from_file(fileobj2) chunks = [blob2.checksum, blob1.checksum, blob3.checksum] assemble_dif(project_id=self.project.id, name="foo.sym", checksum=total_checksum, chunks=chunks) status, _ = get_assemble_status(AssembleTask.DIF, self.project.id, total_checksum) assert status == ChunkFileState.ERROR
def test_artifacts(self): bundle_file = self.create_artifact_bundle() blob1 = FileBlob.from_file(ContentFile(bundle_file)) total_checksum = sha1(bundle_file).hexdigest() for min_files in (10, 1): with self.options({ "processing.release-archive-min-files": min_files, }): ReleaseFile.objects.filter(release_id=self.release.id).delete() assert self.release.count_artifacts() == 0 assemble_artifacts( org_id=self.organization.id, version=self.release.version, checksum=total_checksum, chunks=[blob1.checksum], ) assert self.release.count_artifacts() == 2 status, details = get_assemble_status(AssembleTask.ARTIFACTS, self.organization.id, total_checksum) assert status == ChunkFileState.OK assert details is None if min_files == 1: # An archive was saved index = read_artifact_index(self.release, dist=None) archive_ident = index["files"]["~/index.js"][ "archive_ident"] releasefile = ReleaseFile.objects.get( release_id=self.release.id, ident=archive_ident) # Artifact is the same as original bundle assert releasefile.file.size == len(bundle_file) else: # Individual files were saved release_file = ReleaseFile.objects.get( organization_id=self.organization.id, release_id=self.release.id, name="~/index.js", dist_id=None, ) assert release_file.file.headers == { "Sourcemap": "index.js.map" }
def test_artifacts_invalid_zip(self): bundle_file = b'' blob1 = FileBlob.from_file(ContentFile(bundle_file)) total_checksum = sha1(bundle_file).hexdigest() assemble_artifacts( org_id=self.organization.id, version=self.release.version, checksum=total_checksum, chunks=[blob1.checksum], ) status, details = get_assemble_status(AssembleTask.ARTIFACTS, self.organization.id, total_checksum) assert status == ChunkFileState.ERROR
def test_dif(self): sym_file = self.load_fixture("crash.sym") blob1 = FileBlob.from_file(ContentFile(sym_file)) total_checksum = sha1(sym_file).hexdigest() assemble_dif( project_id=self.project.id, name="crash.sym", checksum=total_checksum, chunks=[blob1.checksum], ) status, _ = get_assemble_status(AssembleTask.DIF, self.project.id, total_checksum) assert status == ChunkFileState.OK dif = ProjectDebugFile.objects.filter(project=self.project, checksum=total_checksum).get() assert dif.file.headers == {"Content-Type": "text/x-breakpad"}
def test_artifacts(self): bundle_file = self.create_artifact_bundle() blob1 = FileBlob.from_file(ContentFile(bundle_file)) total_checksum = sha1(bundle_file).hexdigest() for has_release_archives in (True, False): with self.options({ "processing.save-release-archives": has_release_archives, "processing.release-archive-min-files": 1, }): assemble_artifacts( org_id=self.organization.id, version=self.release.version, checksum=total_checksum, chunks=[blob1.checksum], ) status, details = get_assemble_status(AssembleTask.ARTIFACTS, self.organization.id, total_checksum) assert status == ChunkFileState.OK assert details is None release_file = ReleaseFile.objects.get( organization=self.organization, release=self.release, name="release-artifacts.zip" if has_release_archives else "~/index.js", dist=None, ) assert release_file if has_release_archives: assert release_file.file.headers == {} # Artifact is the same as original bundle assert release_file.file.size == len(bundle_file) else: assert release_file.file.headers == { "Sourcemap": "index.js.map" }
def test_failing_update(self, _): bundle_file = self.create_artifact_bundle() blob1 = FileBlob.from_file(ContentFile(bundle_file)) total_checksum = sha1(bundle_file).hexdigest() with self.options({ "processing.save-release-archives": True, "processing.release-archive-min-files": 1, }): assemble_artifacts( org_id=self.organization.id, version=self.release.version, checksum=total_checksum, chunks=[blob1.checksum], ) # Status is still OK: status, details = get_assemble_status(AssembleTask.ARTIFACTS, self.organization.id, total_checksum) assert status == ChunkFileState.OK
def test_assemble_debug_id_override(self): sym_file = self.load_fixture("crash.sym") blob1 = FileBlob.from_file(ContentFile(sym_file)) total_checksum = sha1(sym_file).hexdigest() assemble_dif( project_id=self.project.id, name="crash.sym", checksum=total_checksum, chunks=[blob1.checksum], debug_id="67e9247c-814e-392b-a027-dbde6748fcbf-beef", ) status, _ = get_assemble_status(AssembleTask.DIF, self.project.id, total_checksum) assert status == ChunkFileState.OK dif = ProjectDebugFile.objects.filter( project=self.project, file__checksum=total_checksum).get() assert dif.file.headers == {"Content-Type": "text/x-breakpad"} assert dif.debug_id == "67e9247c-814e-392b-a027-dbde6748fcbf-beef"
def post(self, request, project): """ Assemble one or multiple chunks (FileBlob) into debug files ```````````````````````````````````````````````````````````` :auth: required """ schema = { "type": "object", "patternProperties": { "^[0-9a-f]{40}$": { "type": "object", "required": ["name", "chunks"], "properties": { "name": { "type": "string" }, "debug_id": { "type": "string" }, "chunks": { "type": "array", "items": { "type": "string", "pattern": "^[0-9a-f]{40}$" }, }, }, "additionalProperties": True, } }, "additionalProperties": False, } try: files = json.loads(request.body) jsonschema.validate(files, schema) except jsonschema.ValidationError as e: return Response({"error": str(e).splitlines()[0]}, status=400) except BaseException: return Response({"error": "Invalid json body"}, status=400) file_response = {} for checksum, file_to_assemble in six.iteritems(files): name = file_to_assemble.get("name", None) debug_id = file_to_assemble.get("debug_id", None) chunks = file_to_assemble.get("chunks", []) # First, check the cached assemble status. During assembling, a # ProjectDebugFile will be created and we need to prevent a race # condition. state, detail = get_assemble_status(AssembleTask.DIF, project.id, checksum) if state == ChunkFileState.OK: file_response[checksum] = { "state": state, "detail": None, "missingChunks": [], "dif": detail, } continue elif state is not None: file_response[checksum] = { "state": state, "detail": detail, "missingChunks": [] } continue # Next, check if this project already owns the ProjectDebugFile. # This can under rare circumstances yield more than one file # which is why we use first() here instead of get(). dif = (ProjectDebugFile.objects.filter( project=project, file__checksum=checksum).select_related( "file").order_by("-id").first()) if dif is not None: file_response[checksum] = { "state": ChunkFileState.OK, "detail": None, "missingChunks": [], "dif": serialize(dif), } continue # There is neither a known file nor a cached state, so we will # have to create a new file. Assure that there are checksums. # If not, we assume this is a poll and report NOT_FOUND if not chunks: file_response[checksum] = { "state": ChunkFileState.NOT_FOUND, "missingChunks": [] } continue # Check if all requested chunks have been uploaded. missing_chunks = find_missing_chunks(project.organization, chunks) if missing_chunks: file_response[checksum] = { "state": ChunkFileState.NOT_FOUND, "missingChunks": missing_chunks, } continue # We don't have a state yet, this means we can now start # an assemble job in the background. set_assemble_status(AssembleTask.DIF, project.id, checksum, ChunkFileState.CREATED) from sentry.tasks.assemble import assemble_dif assemble_dif.apply_async( kwargs={ "project_id": project.id, "name": name, "debug_id": debug_id, "checksum": checksum, "chunks": chunks, }) file_response[checksum] = { "state": ChunkFileState.CREATED, "missingChunks": [] } return Response(file_response, status=200)
def test_assemble(self, mock_assemble_dif): content1 = b"foo" fileobj1 = ContentFile(content1) checksum1 = sha1(content1).hexdigest() content2 = b"bar" fileobj2 = ContentFile(content2) checksum2 = sha1(content2).hexdigest() content3 = b"baz" fileobj3 = ContentFile(content3) checksum3 = sha1(content3).hexdigest() total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums blob1 = FileBlob.from_file(fileobj1) FileBlobOwner.objects.get_or_create( organization_id=self.organization.id, blob=blob1) blob3 = FileBlob.from_file(fileobj3) FileBlobOwner.objects.get_or_create( organization_id=self.organization.id, blob=blob3) blob2 = FileBlob.from_file(fileobj2) # we make a request now but we are missing ownership for chunk 2 response = self.client.post( self.url, data={ total_checksum: { "name": "test", "chunks": [checksum2, checksum1, checksum3] } }, HTTP_AUTHORIZATION=f"Bearer {self.token.token}", ) assert response.status_code == 200, response.content assert response.data[total_checksum][ "state"] == ChunkFileState.NOT_FOUND assert response.data[total_checksum]["missingChunks"] == [checksum2] # we add ownership to chunk 2 FileBlobOwner.objects.get_or_create( organization_id=self.organization.id, blob=blob2) # new request, ownership for all chunks is there but file does not exist yet response = self.client.post( self.url, data={ total_checksum: { "name": "test", "chunks": [checksum2, checksum1, checksum3] } }, HTTP_AUTHORIZATION=f"Bearer {self.token.token}", ) assert response.status_code == 200, response.content assert response.data[total_checksum]["state"] == ChunkFileState.CREATED assert response.data[total_checksum]["missingChunks"] == [] chunks = [checksum2, checksum1, checksum3] mock_assemble_dif.apply_async.assert_called_once_with( kwargs={ "project_id": self.project.id, "name": "test", "chunks": chunks, "checksum": total_checksum, "debug_id": None, }) file = assemble_file(AssembleTask.DIF, self.project, "test", total_checksum, chunks, "project.dif")[0] status, _ = get_assemble_status(AssembleTask.DIF, self.project.id, total_checksum) assert status != ChunkFileState.ERROR assert file.checksum == total_checksum file_blob_index = FileBlobIndex.objects.all() assert len(file_blob_index) == 3
def test_assemble(self, mock_assemble_dif): content1 = 'foo'.encode('utf-8') fileobj1 = ContentFile(content1) checksum1 = sha1(content1).hexdigest() content2 = 'bar'.encode('utf-8') fileobj2 = ContentFile(content2) checksum2 = sha1(content2).hexdigest() content3 = 'baz'.encode('utf-8') fileobj3 = ContentFile(content3) checksum3 = sha1(content3).hexdigest() total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums blob1 = FileBlob.from_file(fileobj1) FileBlobOwner.objects.get_or_create(organization=self.organization, blob=blob1) blob3 = FileBlob.from_file(fileobj3) FileBlobOwner.objects.get_or_create(organization=self.organization, blob=blob3) blob2 = FileBlob.from_file(fileobj2) # we make a request now but we are missing ownership for chunk 2 response = self.client.post( self.url, data={ total_checksum: { 'name': 'test', 'chunks': [checksum2, checksum1, checksum3] } }, HTTP_AUTHORIZATION=u'Bearer {}'.format(self.token.token)) assert response.status_code == 200, response.content assert response.data[total_checksum][ 'state'] == ChunkFileState.NOT_FOUND assert response.data[total_checksum]['missingChunks'] == [checksum2] # we add ownership to chunk 2 FileBlobOwner.objects.get_or_create(organization=self.organization, blob=blob2) # new request, ownership for all chunks is there but file does not exist yet response = self.client.post( self.url, data={ total_checksum: { 'name': 'test', 'chunks': [checksum2, checksum1, checksum3], } }, HTTP_AUTHORIZATION=u'Bearer {}'.format(self.token.token)) assert response.status_code == 200, response.content assert response.data[total_checksum]['state'] == ChunkFileState.CREATED assert response.data[total_checksum]['missingChunks'] == [] chunks = [checksum2, checksum1, checksum3] mock_assemble_dif.apply_async.assert_called_once_with( kwargs={ 'project_id': self.project.id, 'name': 'test', 'chunks': chunks, 'checksum': total_checksum, }) file = assemble_file(AssembleTask.DIF, self.project, 'test', total_checksum, chunks, 'project.dif')[0] status, _ = get_assemble_status(AssembleTask.DIF, self.project.id, total_checksum) assert status != ChunkFileState.ERROR assert file.checksum == total_checksum file_blob_index = FileBlobIndex.objects.all() assert len(file_blob_index) == 3
def post(self, request, organization, version): """ Handle an artifact bundle and merge it into the release ``````````````````````````````````````````````````````` :auth: required """ try: release = Release.objects.get(organization_id=organization.id, version=version) except Release.DoesNotExist: raise ResourceDoesNotExist if not self.has_release_permission(request, organization, release): raise ResourceDoesNotExist schema = { "type": "object", "properties": { "checksum": {"type": "string", "pattern": "^[0-9a-f]{40}$"}, "chunks": { "type": "array", "items": {"type": "string", "pattern": "^[0-9a-f]{40}$"}, }, }, "required": ["checksum", "chunks"], "additionalProperties": False, } try: data = json.loads(request.body) jsonschema.validate(data, schema) except jsonschema.ValidationError as e: return Response({"error": str(e).splitlines()[0]}, status=400) except BaseException: return Response({"error": "Invalid json body"}, status=400) checksum = data.get("checksum", None) chunks = data.get("chunks", []) state, detail = get_assemble_status(AssembleTask.ARTIFACTS, organization.id, checksum) if state == ChunkFileState.OK: return Response({"state": state, "detail": None, "missingChunks": []}, status=200) elif state is not None: return Response({"state": state, "detail": detail, "missingChunks": []}) # There is neither a known file nor a cached state, so we will # have to create a new file. Assure that there are checksums. # If not, we assume this is a poll and report NOT_FOUND if not chunks: return Response({"state": ChunkFileState.NOT_FOUND, "missingChunks": []}, status=200) set_assemble_status( AssembleTask.ARTIFACTS, organization.id, checksum, ChunkFileState.CREATED ) from sentry.tasks.assemble import assemble_artifacts assemble_artifacts.apply_async( kwargs={ "org_id": organization.id, "version": version, "checksum": checksum, "chunks": chunks, } ) return Response({"state": ChunkFileState.CREATED, "missingChunks": []}, status=200)