示例#1
0
    def test_delete_remove_leaked_stored_objects_and_uploaded_files(self):
        workflow = Workflow.create_and_init()
        # If the user deletes a workflow, all data associated with that
        # workflow should disappear. Postgres handles DB objects; but Django's
        # ORM doesn't do a great job with StoredObjects and UploadedFiles.
        #
        # This test isn't about minutae. It's just: if the user deletes a
        # Workflow, make sure all data gets deleted.
        #
        # TODO fix all other bugs that leak data.
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0, slug="step-1", module_id_name="x"
        )

        # Add StoredObject ... and leak it
        wf_module.store_fetched_table(pd.DataFrame({"A": [1, 2]}))
        stored_object_key = wf_module.stored_objects.first().key
        wf_module.stored_objects.all()._raw_delete("default")  # skip S3-delete

        # Add UploadedFile, missing a DB entry. (Even if we fix all bugs that
        # leak an S3 object after deleting a DB entry [and 2019-06-03 there are
        # still more] we'll still need to handle missing DB entries from legacy
        # code.)
        uploaded_file_key = f"{wf_module.uploaded_file_prefix}{uuid.uuid4()}.csv"
        minio.put_bytes(minio.UserFilesBucket, uploaded_file_key, b"A\nb")
        workflow.delete()
        self.assertFalse(minio.exists(minio.StoredObjectsBucket, stored_object_key))
        self.assertFalse(minio.exists(minio.UserFilesBucket, uploaded_file_key))
示例#2
0
 def test_abort_upload(self):
     user = User.objects.create(username='******', email='*****@*****.**')
     workflow = Workflow.create_and_init(owner=user)
     uuid = str(uuidgen.uuid4())
     key = f'wf-123/wfm-234/{uuid}.csv'
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0,
         module_id_name='x',
         inprogress_file_upload_id=None,
         inprogress_file_upload_key=key,
         inprogress_file_upload_last_accessed_at=timezone.now(),
     )
     # let's pretend the user has uploaded at least partial data.
     minio.put_bytes(
         minio.UserFilesBucket,
         key,
         b'1234567',
         ContentDisposition="attachment; filename*=UTF-8''file.csv",
     )
     response = self.run_handler(abort_upload, user=user,
                                 workflow=workflow, wfModuleId=wf_module.id,
                                 key=key)
     self.assertResponse(response, data=None)
     wf_module.refresh_from_db()
     self.assertIsNone(wf_module.inprogress_file_upload_id)
     self.assertIsNone(wf_module.inprogress_file_upload_key)
     self.assertIsNone(wf_module.inprogress_file_upload_last_accessed_at)
     # Ensure the file is deleted from S3
     self.assertFalse(minio.exists(minio.UserFilesBucket, key))
 def test_convert_to_uploaded_file_happy_path(self):
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1",
                                                         module_id_name="x")
     ipu = wf_module.in_progress_uploads.create()
     minio.put_bytes(ipu.Bucket, ipu.get_upload_key(), b"1234567")
     uploaded_file = ipu.convert_to_uploaded_file("test sheet.xlsx")
     self.assertEqual(uploaded_file.uuid, str(ipu.id))
     final_key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx"
     # New file on S3 has the right bytes and metadata
     self.assertEqual(
         minio.get_object_with_data(minio.UserFilesBucket,
                                    final_key)["Body"],
         b"1234567",
     )
     self.assertEqual(
         minio.client.head_object(Bucket=minio.UserFilesBucket,
                                  Key=final_key)["ContentDisposition"],
         "attachment; filename*=UTF-8''test%20sheet.xlsx",
     )
     # InProgressUpload is completed
     self.assertEqual(ipu.is_completed, True)
     ipu.refresh_from_db()
     self.assertEqual(ipu.is_completed, True)  # also on DB
     # Uploaded file is deleted
     self.assertFalse(
         minio.exists(minio.UserFilesBucket, ipu.get_upload_key()))
示例#4
0
    def test_delete_wfmodule(self):
        result = ProcessResult(pandas.DataFrame({"a": [1]}))
        self.wf_module.cache_render_result(self.delta.id, result)

        parquet_key = self.wf_module.cached_render_result.parquet_key
        self.wf_module.delete()
        self.assertFalse(minio.exists(minio.CachedRenderResultsBucket, parquet_key))
示例#5
0
 def test_delete_deletes_from_s3(self):
     minio.put_bytes(minio.StoredObjectsBucket, "test.dat", b"abcd")
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1")
     so = wf_module.stored_objects.create(
         size=4, bucket=minio.StoredObjectsBucket, key="test.dat", hash="123"
     )
     so.delete()
     self.assertFalse(minio.exists(minio.StoredObjectsBucket, "test.dat"))
示例#6
0
 def test_delete_wf_module_deletes_from_s3(self):
     minio.put_bytes(minio.StoredObjectsBucket, 'test.dat', b'abcd')
     workflow = Workflow.objects.create()
     tab = workflow.tabs.create(position=0)
     wf_module = tab.wf_modules.create(order=0)
     wf_module.stored_objects.create(size=4, bucket=minio.StoredObjectsBucket,
                                     key='test.dat', hash='123')
     wf_module.delete()
     self.assertFalse(minio.exists(minio.StoredObjectsBucket, 'test.dat'))
 def test_delete_s3_data_leaked_file(self):
     # Delete a file with our UUID but without an UploadedFile.
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1",
                                                         module_id_name="x")
     ipu = wf_module.in_progress_uploads.create()
     key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx"
     minio.put_bytes(minio.UserFilesBucket, key, b"1234567")
     ipu.delete_s3_data()
     self.assertFalse(minio.exists(minio.UserFilesBucket, key))
示例#8
0
    def test_set_to_empty(self):
        result = ProcessResult(pandas.DataFrame({"a": [1]}))
        self.wf_module.cache_render_result(self.delta.id, result)
        parquet_key = self.wf_module.cached_render_result.parquet_key

        db_wf_module = WfModule.objects.get(id=self.wf_module.id)
        db_wf_module.clear_cached_render_result()
        self.assertIsNone(db_wf_module.cached_render_result)

        db_wf_module.refresh_from_db()
        self.assertIsNone(db_wf_module.cached_render_result)

        self.assertFalse(minio.exists(minio.CachedRenderResultsBucket, parquet_key))
示例#9
0
 def test_finish_upload_happy_path(self, send_delta):
     user = User.objects.create(username="******", email="*****@*****.**")
     workflow = Workflow.create_and_init(owner=user)
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0, slug="step-1", module_id_name="x"
     )
     in_progress_upload = wf_module.in_progress_uploads.create(
         id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f"
     )
     key = in_progress_upload.get_upload_key()
     minio.put_bytes(in_progress_upload.Bucket, key, b"1234567")
     send_delta.side_effect = async_noop
     response = self.run_handler(
         finish_upload,
         user=user,
         workflow=workflow,
         wfModuleId=wf_module.id,
         key=key,
         filename="test sheet.csv",
     )
     self.assertResponse(
         response, data={"uuid": "147a9f5d-5b3e-41c3-a968-a84a5a9d587f"}
     )
     # The uploaded file is deleted
     self.assertFalse(minio.exists(in_progress_upload.Bucket, key))
     # A new upload is created
     uploaded_file = wf_module.uploaded_files.first()
     self.assertEqual(uploaded_file.name, "test sheet.csv")
     self.assertEqual(uploaded_file.size, 7)
     self.assertEqual(uploaded_file.uuid, "147a9f5d-5b3e-41c3-a968-a84a5a9d587f")
     self.assertEqual(uploaded_file.bucket, in_progress_upload.Bucket)
     final_key = f"wf-{workflow.id}/wfm-{wf_module.id}/147a9f5d-5b3e-41c3-a968-a84a5a9d587f.csv"
     self.assertEqual(uploaded_file.key, final_key)
     # The file has the right bytes and metadata
     self.assertEqual(
         minio.get_object_with_data(minio.UserFilesBucket, final_key)["Body"],
         b"1234567",
     )
     self.assertEqual(
         minio.client.head_object(Bucket=minio.UserFilesBucket, Key=final_key)[
             "ContentDisposition"
         ],
         "attachment; filename*=UTF-8''test%20sheet.csv",
     )
     # wf_module is updated
     send_delta.assert_called()
示例#10
0
 def test_abort_upload_happy_path_after_complete(self):
     user = User.objects.create(username="******", email="*****@*****.**")
     workflow = Workflow.create_and_init(owner=user)
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0, slug="step-1", module_id_name="x"
     )
     in_progress_upload = wf_module.in_progress_uploads.create(
         id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f"
     )
     key = in_progress_upload.get_upload_key()
     minio.put_bytes(in_progress_upload.Bucket, key, b"1234567")
     response = self.run_handler(
         abort_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, key=key
     )
     self.assertResponse(response, data=None)
     wf_module.refresh_from_db()
     self.assertFalse(minio.exists(in_progress_upload.Bucket, key))
 def test_delete_s3_data_ignore_non_leaked_file(self):
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1",
                                                         module_id_name="x")
     ipu = wf_module.in_progress_uploads.create()
     key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx"
     minio.put_bytes(minio.UserFilesBucket, key, b"1234567")
     wf_module.uploaded_files.create(
         name="text.xlsx",
         size=7,
         uuid=str(self.id),
         bucket=minio.UserFilesBucket,
         key=key,
     )
     ipu.delete_s3_data()
     self.assertFalse(minio.exists(minio.UserFilesBucket, key))
示例#12
0
 def test_abort(self):
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0,
         slug="step-123",
         module_id_name="x",
         file_upload_api_token="abc123")
     upload = wf_module.in_progress_uploads.create()
     key = upload.get_upload_key()
     minio.put_bytes(upload.Bucket, key, b"1234567")
     response = self.client.delete(
         f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}",
         HTTP_AUTHORIZATION="Bearer abc123",
     )
     self.assertEqual(response.status_code, 200)
     self.assertEqual(json.loads(response.content), {})
     self.assertFalse(minio.exists(upload.Bucket, key))  # file was deleted
     upload.refresh_from_db()
     self.assertTrue(upload.is_completed)
示例#13
0
 def test_complete_happy_path(self, queue_render, send_delta):
     send_delta.return_value = async_noop()
     queue_render.return_value = async_noop()
     _init_module("x")
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0,
         slug="step-123",
         module_id_name="x",
         file_upload_api_token="abc123")
     upload = wf_module.in_progress_uploads.create()
     uuid = str(upload.id)
     key = upload.get_upload_key()
     minio.put_bytes(upload.Bucket, key, b"1234567")
     response = self.client.post(
         f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}",
         {"filename": "test.csv"},
         content_type="application/json",
         HTTP_AUTHORIZATION="Bearer abc123",
     )
     self.assertEqual(response.status_code, 200)
     # Upload and its S3 data were deleted
     self.assertFalse(minio.exists(upload.Bucket, key))
     upload.refresh_from_db()
     self.assertTrue(upload.is_completed)
     # Final upload was created
     uploaded_file = wf_module.uploaded_files.first()
     self.assertEqual(uploaded_file.key,
                      f"wf-{workflow.id}/wfm-{wf_module.id}/{uuid}.csv")
     self.assertEqual(
         minio.get_object_with_data(minio.UserFilesBucket,
                                    uploaded_file.key)["Body"],
         b"1234567",
     )
     self.assertEqual(uploaded_file.name, "test.csv")
     # Return value includes uuid
     data = json.loads(response.content)
     self.assertEqual(data["uuid"], uuid)
     self.assertEqual(data["name"], "test.csv")
     self.assertEqual(data["size"], 7)
     # Send deltas
     send_delta.assert_called()
     queue_render.assert_called()
def move_uploaded_file(workflow, wf_module, uploaded_file):
    """
    Move files from /uuid.ext to /wf-1/wfm-2/uuid.ext.

    This helps delete leaked files and find problem files.
    """
    from server import minio

    bucket = uploaded_file.bucket
    old_key = uploaded_file.key
    if '/' in old_key:
        return

    new_key = f'wf-{workflow.id}/wfm-{wf_module.id}/{old_key}'

    logger.info(f'Move %s/%s to %s/%s', bucket, old_key, bucket, new_key)
    try:
        minio.copy(bucket, new_key, f'{bucket}/{old_key}')
        minio.remove(bucket, old_key)
    except minio.error.NoSuchKey:
        # old_key is missing. Two possibilities:
        #
        # 1. We're re-running this script after it failed once with
        #    atomic=True (which used to be set, by accident); the move already
        #    succeeded but the DB doesn't know it. In that case, continue
        #    because this error actually means, "all is well."
        # 2. The file didn't exist to begin with. In that case, write a blank
        #    file in its stead. That way the user will remark, "hey, Workbench
        #    ate my file!" instead of undefined behavior (which is worse).
        #    https://www.pivotaltracker.com/story/show/163336822
        if minio.exists(bucket, new_key):
            pass  # "all is well"
        else:
            # write an empty file
            minio.put_bytes(bucket, new_key, b'')
            uploaded_file.size = 0
            uploaded_file.save(update_fields=['size'])
    uploaded_file.key = new_key
    uploaded_file.save(update_fields=['key'])