Exemplo n.º 1
0
class HistoriesApiTestCase(api.ApiTestCase):

    def setUp(self):
        super(HistoriesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)

    def test_create_history(self):
        # Create a history.
        create_response = self._create_history("TestHistory1")
        created_id = create_response["id"]

        # Make sure new history appears in index of user's histories.
        index_response = self._get("histories").json()
        indexed_history = [h for h in index_response if h["id"] == created_id][0]
        self.assertEqual(indexed_history["name"], "TestHistory1")

    def test_show_history(self):
        history_id = self._create_history("TestHistoryForShow")["id"]
        show_response = self._show(history_id)
        self._assert_has_key(
            show_response,
            'id', 'name', 'annotation', 'size', 'contents_url',
            'state', 'state_details', 'state_ids'
        )

        state_details = show_response["state_details"]
        state_ids = show_response["state_ids"]
        states = [
            'discarded', 'empty', 'error', 'failed_metadata', 'new',
            'ok', 'paused', 'queued', 'running', 'setting_metadata', 'upload'
        ]
        assert isinstance(state_details, dict)
        assert isinstance(state_ids, dict)
        self._assert_has_keys(state_details, *states)
        self._assert_has_keys(state_ids, *states)

    def test_show_most_recently_used(self):
        history_id = self._create_history("TestHistoryRecent")["id"]
        show_response = self._get("histories/most_recently_used").json()
        assert show_response["id"] == history_id

    def test_index_order(self):
        slightly_older_history_id = self._create_history("TestHistorySlightlyOlder")["id"]
        newer_history_id = self._create_history("TestHistoryNewer")["id"]
        index_response = self._get("histories").json()
        assert index_response[0]["id"] == newer_history_id
        assert index_response[1]["id"] == slightly_older_history_id

    def test_delete(self):
        # Setup a history and ensure it is in the index
        history_id = self._create_history("TestHistoryForDelete")["id"]
        index_response = self._get("histories").json()
        assert index_response[0]["id"] == history_id

        show_response = self._show(history_id)
        assert not show_response["deleted"]

        # Delete the history
        self._delete("histories/%s" % history_id)

        # Check can view it - but it is deleted
        show_response = self._show(history_id)
        assert show_response["deleted"]

        # Verify it is dropped from history index
        index_response = self._get("histories").json()
        assert len(index_response) == 0 or index_response[0]["id"] != history_id

        # Add deleted filter to index to view it
        index_response = self._get("histories", {"deleted": "true"}).json()
        assert index_response[0]["id"] == history_id

    def test_purge(self):
        history_id = self._create_history("TestHistoryForPurge")["id"]
        data = {'purge': True}
        self._delete("histories/%s" % history_id, data=data)
        show_response = self._show(history_id)
        assert show_response["deleted"]
        assert show_response["purged"]

    def test_undelete(self):
        history_id = self._create_history("TestHistoryForDeleteAndUndelete")["id"]
        self._delete("histories/%s" % history_id)
        self._post("histories/deleted/%s/undelete" % history_id)
        show_response = self._show(history_id)
        assert not show_response["deleted"]

    def test_update(self):
        history_id = self._create_history("TestHistoryForUpdating")["id"]

        self._update(history_id, {"name": "New Name"})
        show_response = self._show(history_id)
        assert show_response["name"] == "New Name"

        unicode_name = u'桜ゲノム'
        self._update(history_id, {"name": unicode_name})
        show_response = self._show(history_id)
        assert show_response["name"] == unicode_name, show_response

        quoted_name = "'MooCow'"
        self._update(history_id, {"name": quoted_name})
        show_response = self._show(history_id)
        assert show_response["name"] == quoted_name

        self._update(history_id, {"deleted": True})
        show_response = self._show(history_id)
        assert show_response["deleted"], show_response

        self._update(history_id, {"deleted": False})
        show_response = self._show(history_id)
        assert not show_response["deleted"]

        self._update(history_id, {"published": True})
        show_response = self._show(history_id)
        assert show_response["published"]

        self._update(history_id, {"genome_build": "hg18"})
        show_response = self._show(history_id)
        assert show_response["genome_build"] == "hg18"

        self._update(history_id, {"annotation": "The annotation is cool"})
        show_response = self._show(history_id)
        assert show_response["annotation"] == "The annotation is cool"

        self._update(history_id, {"annotation": unicode_name})
        show_response = self._show(history_id)
        assert show_response["annotation"] == unicode_name, show_response

        self._update(history_id, {"annotation": quoted_name})
        show_response = self._show(history_id)
        assert show_response["annotation"] == quoted_name

    def test_update_invalid_attribute(self):
        history_id = self._create_history("TestHistoryForInvalidUpdating")["id"]
        put_response = self._update(history_id, {"invalidkey": "moo"})
        assert "invalidkey" not in put_response.json()

    def test_update_invalid_types(self):
        history_id = self._create_history("TestHistoryForUpdatingInvalidTypes")["id"]
        for str_key in ["name", "annotation"]:
            assert self._update(history_id, {str_key: False}).status_code == 400

        for bool_key in ['deleted', 'importable', 'published']:
            assert self._update(history_id, {bool_key: "a string"}).status_code == 400

        assert self._update(history_id, {"tags": "a simple string"}).status_code == 400
        assert self._update(history_id, {"tags": [True]}).status_code == 400

    def test_invalid_keys(self):
        invalid_history_id = "1234123412341234"

        assert self._get("histories/%s" % invalid_history_id).status_code == 400
        assert self._update(invalid_history_id, {"name": "new name"}).status_code == 400
        assert self._delete("histories/%s" % invalid_history_id).status_code == 400
        assert self._post("histories/deleted/%s/undelete" % invalid_history_id).status_code == 400

    def test_create_anonymous_fails(self):
        post_data = dict(name="CannotCreate")
        # Using lower-level _api_url will cause key to not be injected.
        histories_url = self._api_url("histories")
        create_response = post(url=histories_url, data=post_data)
        self._assert_status_code_is(create_response, 403)

    def test_import_export(self):
        history_name = "for_export_default"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_populator.new_dataset(history_id, content="1 2 3")
        deleted_hda = self.dataset_populator.new_dataset(history_id, content="1 2 3")
        self.dataset_populator.delete_dataset(history_id, deleted_hda["id"])
        deleted_details = self.dataset_populator.get_history_dataset_details(history_id, id=deleted_hda["id"])
        assert deleted_details["deleted"]
        imported_history_id = self._reimport_history(history_id, history_name, wait_on_history_length=2)

        def upload_job_check(job):
            assert job["tool_id"] == "upload1"

        def check_discarded(hda):
            assert hda["deleted"]
            assert hda["state"] == "discarded", hda
            assert hda["purged"] is True

        self._check_imported_dataset(history_id=imported_history_id, hid=1, job_checker=upload_job_check)
        self._check_imported_dataset(history_id=imported_history_id, hid=2, has_job=False, hda_checker=check_discarded, job_checker=upload_job_check)

        imported_content = self.dataset_populator.get_history_dataset_content(
            history_id=imported_history_id,
            hid=1,
        )
        assert imported_content == "1 2 3\n"

    def test_import_1901_histories(self):
        f = open(self.test_data_resolver.get_filename("exports/1901_two_datasets.tgz"), 'rb')
        import_data = dict(archive_source='', archive_file=f)
        self._import_history_and_wait(import_data, "API Test History", wait_on_history_length=2)

    def test_import_export_include_deleted(self):
        history_name = "for_export_include_deleted"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_populator.new_dataset(history_id, content="1 2 3")
        deleted_hda = self.dataset_populator.new_dataset(history_id, content="1 2 3")
        self.dataset_populator.delete_dataset(history_id, deleted_hda["id"])

        imported_history_id = self._reimport_history(history_id, history_name, wait_on_history_length=2, export_kwds={"include_deleted": "True"})
        self._assert_history_length(imported_history_id, 2)

        def upload_job_check(job):
            assert job["tool_id"] == "upload1"

        def check_ok(hda):
            assert hda["state"] == "ok", hda
            assert hda["deleted"] is True, hda

        self._check_imported_dataset(history_id=imported_history_id, hid=1, job_checker=upload_job_check)
        self._check_imported_dataset(history_id=imported_history_id, hid=2, hda_checker=check_ok, job_checker=upload_job_check)

        imported_content = self.dataset_populator.get_history_dataset_content(
            history_id=imported_history_id,
            hid=1,
        )
        assert imported_content == "1 2 3\n"

    def test_import_metadata_regeneration(self):
        history_name = "for_import_metadata_regeneration"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_populator.new_dataset(history_id, content=open(self.test_data_resolver.get_filename("1.bam"), 'rb'), file_type='bam', wait=True)
        imported_history_id = self._reimport_history(history_id, history_name)
        self._assert_history_length(imported_history_id, 1)
        self._check_imported_dataset(history_id=imported_history_id, hid=1)
        import_bam_metadata = self.dataset_populator.get_history_dataset_details(
            history_id=imported_history_id,
            hid=1,
        )
        # The cleanup() method of the __IMPORT_HISTORY__ job (which is executed
        # after the job has entered its final state):
        # - creates a new dataset with 'ok' state and adds it to the history
        # - starts a __SET_METADATA__ job to regenerate the dataset metadata, if
        #   needed
        # We need to wait a bit for the creation of the __SET_METADATA__ job.
        time.sleep(1)
        self.dataset_populator.wait_for_history_jobs(imported_history_id, assert_ok=True)
        bai_metadata = import_bam_metadata["meta_files"][0]
        assert bai_metadata["file_type"] == "bam_index"
        api_url = bai_metadata["download_url"].split("api/", 1)[1]
        bai_response = self._get(api_url)
        self._assert_status_code_is(bai_response, 200)
        assert len(bai_response.content) > 4

    def test_import_export_collection(self):
        history_name = "for_export_with_collections"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_collection_populator.create_list_in_history(history_id, contents=["Hello", "World"], direct_upload=True)

        imported_history_id = self._reimport_history(history_id, history_name, wait_on_history_length=3)
        self._assert_history_length(imported_history_id, 3)

        def check_elements(elements):
            assert len(elements) == 2
            element0 = elements[0]["object"]
            element1 = elements[1]["object"]
            for element in [element0, element1]:
                assert not element["visible"]
                assert not element["deleted"]
                assert element["state"] == "ok"

            assert element0["hid"] == 2
            assert element1["hid"] == 3

        self._check_imported_collection(imported_history_id, hid=1, collection_type="list", elements_checker=check_elements)

    def test_import_export_nested_collection(self):
        history_name = "for_export_with_nested_collections"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_collection_populator.create_list_of_pairs_in_history(history_id)

        imported_history_id = self._reimport_history(history_id, history_name, wait_on_history_length=3)
        self._assert_history_length(imported_history_id, 3)

        def check_elements(elements):
            assert len(elements) == 1
            element0 = elements[0]["object"]
            self._assert_has_keys(element0, "elements", "collection_type")

            child_elements = element0["elements"]

            assert len(child_elements) == 2
            assert element0["collection_type"] == "paired"

        self._check_imported_collection(imported_history_id, hid=1, collection_type="list:paired", elements_checker=check_elements)

    def _reimport_history(self, history_id, history_name, wait_on_history_length=None, export_kwds={}):
        # Ensure the history is ready to go...
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)

        return self.dataset_populator.reimport_history(
            history_id, history_name, wait_on_history_length=wait_on_history_length, export_kwds=export_kwds, url=self.url, api_key=self.galaxy_interactor.api_key
        )

    def _import_history_and_wait(self, import_data, history_name, wait_on_history_length=None):

        imported_history_id = self.dataset_populator.import_history_and_wait_for_name(import_data, history_name)

        if wait_on_history_length:
            self.dataset_populator.wait_on_history_length(imported_history_id, wait_on_history_length)

        return imported_history_id

    def _assert_history_length(self, history_id, n):
        contents_response = self._get("histories/%s/contents" % history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == n, contents

    def _check_imported_dataset(self, history_id, hid, has_job=True, hda_checker=None, job_checker=None):
        imported_dataset_metadata = self.dataset_populator.get_history_dataset_details(
            history_id=history_id,
            hid=hid,
        )
        assert imported_dataset_metadata["history_content_type"] == "dataset"
        assert imported_dataset_metadata["history_id"] == history_id

        if hda_checker is not None:
            hda_checker(imported_dataset_metadata)

        assert "creating_job" in imported_dataset_metadata
        job_id = imported_dataset_metadata["creating_job"]
        if has_job:
            assert job_id

            job_details = self.dataset_populator.get_job_details(job_id, full=True)
            assert job_details.status_code == 200, job_details.content
            job = job_details.json()
            assert 'history_id' in job, job
            assert job['history_id'] == history_id, job

            if job_checker is not None:
                job_checker(job)

    def _check_imported_collection(self, history_id, hid, collection_type=None, elements_checker=None):
        imported_collection_metadata = self.dataset_populator.get_history_collection_details(
            history_id=history_id,
            hid=hid,
        )
        assert imported_collection_metadata["history_content_type"] == "dataset_collection"
        assert imported_collection_metadata["history_id"] == history_id
        assert "collection_type" in imported_collection_metadata
        assert "elements" in imported_collection_metadata
        if collection_type is not None:
            assert imported_collection_metadata["collection_type"] == collection_type, imported_collection_metadata

        if elements_checker is not None:
            elements_checker(imported_collection_metadata["elements"])

    def test_create_tag(self):
        post_data = dict(name="TestHistoryForTag")
        history_id = self._post("histories", data=post_data).json()["id"]
        tag_data = dict(value="awesometagvalue")
        tag_url = "histories/%s/tags/awesometagname" % history_id
        tag_create_response = self._post(tag_url, data=tag_data)
        self._assert_status_code_is(tag_create_response, 200)

    def _show(self, history_id):
        return self._get("histories/%s" % history_id).json()

    def _update(self, history_id, data):
        update_url = self._api_url("histories/%s" % history_id, use_key=True)
        put_response = put(update_url, json=data)
        return put_response

    def _create_history(self, name):
        post_data = dict(name=name)
        create_response = self._post("histories", data=post_data).json()
        self._assert_has_keys(create_response, "name", "id")
        self.assertEqual(create_response["name"], name)
        return create_response
Exemplo n.º 2
0
class LibrariesApiTestCase(api.ApiTestCase, TestsDatasets):
    def setUp(self):
        super(LibrariesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_create(self):
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries", data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "name")
        assert library["name"] == "CreateTestLibrary"

    def test_delete(self):
        library = self.library_populator.new_library("DeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"],
                                       admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "deleted")
        assert library["deleted"] is True
        # Test undeleting
        data = dict(undelete=True)
        create_response = self._delete("libraries/%s" % library["id"],
                                       data=data,
                                       admin=True)
        library = create_response.json()
        self._assert_status_code_is(create_response, 200)
        assert library["deleted"] is False

    def test_nonadmin(self):
        # Anons can't create libs
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries",
                                     data=data,
                                     admin=False,
                                     anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't delete libs
        library = self.library_populator.new_library("AnonDeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"],
                                       admin=False,
                                       anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't update libs
        data = dict(name="ChangedName",
                    description="ChangedDescription",
                    synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"],
                                      data=data,
                                      admin=False,
                                      anon=True)
        self._assert_status_code_is(create_response, 403)

    def test_update(self):
        library = self.library_populator.new_library("UpdateTestLibrary")
        data = dict(name='ChangedName',
                    description='ChangedDescription',
                    synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"],
                                      data=data,
                                      admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, 'name', 'description', 'synopsis')
        assert library['name'] == 'ChangedName'
        assert library['description'] == 'ChangedDescription'
        assert library['synopsis'] == 'ChangedSynopsis'

    def test_create_private_library_permissions(self):
        library = self.library_populator.new_library("PermissionTestLibrary")
        library_id = library["id"]
        role_id = self.library_populator.user_private_role_id()
        self.library_populator.set_permissions(library_id, role_id)
        create_response = self._create_folder(library)
        self._assert_status_code_is(create_response, 200)

    def test_create_dataset_denied(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        with self._different_user():
            payload = {'from_hda_id': hda_id}
            create_response = self._post("folders/%s/contents" % folder_id,
                                         payload)
            self._assert_status_code_is(create_response, 403)

    def test_show_private_dataset_permissions(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "ForCreateDatasets", wait=True)
        with self._different_user():
            response = self.library_populator.show_ldda(
                library["id"], library_dataset["id"])
            # TODO: this should really be 403 and a proper JSON exception.
            self._assert_status_code_is(response, 400)

    def test_create_dataset(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "ForCreateDatasets", wait=True)
        self._assert_has_keys(library_dataset, "peek", "data_type")
        assert library_dataset["peek"].find("create_test") >= 0
        assert library_dataset["file_ext"] == "txt", library_dataset[
            "file_ext"]

    def test_fetch_upload_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "flat_zip")
        items = [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]
        targets = [{"destination": destination, "items": items}]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset
        assert dataset["genome_build"] == "hg19", dataset
        assert dataset["misc_info"] == "my cool bed", dataset
        assert dataset["file_ext"] == "bed", dataset

    def test_fetch_zip_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "flat_zip")
        bed_test_data_path = self.test_data_resolver.get_filename("4.bed.zip")
        targets = [{
            "destination": destination,
            "items_from": "archive",
            "src": "files",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data": open(bed_test_data_path, 'rb')
            }
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    def test_fetch_single_url_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "single_url")
        items = [{
            "src": "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed",
            "MD5": "37b59762b59fff860460522d271bc111"
        }]
        targets = [{
            "destination": destination,
            "items": items,
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "validate_hashes": True
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    def test_fetch_failed_validation(self):
        # Exception handling is really rough here - we should be creating a dataset in error instead
        # of just failing the job like this.
        history_id, library, destination = self._setup_fetch_to_folder(
            "single_url")
        items = [{
            "src": "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed",
            "MD5": "37b59762b59fff860460522d271bc112"
        }]
        targets = [{
            "destination": destination,
            "items": items,
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "validate_hashes": True
        }
        tool_response = self.dataset_populator.fetch(payload, assert_ok=False)
        job = self.dataset_populator.check_run(tool_response)
        self.dataset_populator.wait_for_job(job["id"])

        job = tool_response.json()["jobs"][0]
        details = self.dataset_populator.get_job_details(job["id"]).json()
        assert details["state"] == "error", details

    def test_fetch_url_archive_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "single_url")
        targets = [{
            "destination":
            destination,
            "items_from":
            "archive",
            "src":
            "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed.zip",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    @unittest.skip  # reference URLs changed, checksums now invalid.
    def test_fetch_bagit_archive_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "bagit_archive")
        example_bag_path = self.test_data_resolver.get_filename(
            "example-bag.zip")
        targets = [{
            "destination": destination,
            "items_from": "bagit_archive",
            "src": "files",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data": open(example_bag_path)
            },
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/README.txt")
        assert dataset["file_size"] == 66, dataset

        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/bdbag-profile.json")
        assert dataset["file_size"] == 723, dataset

    def _setup_fetch_to_folder(self, test_name):
        return self.library_populator.setup_fetch_to_folder(test_name)

    def test_create_dataset_in_folder(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        payload = {'from_hda_id': hda_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "id")

    def test_update_dataset_in_folder(self):
        ld = self._create_dataset_in_folder_in_library("ForUpdateDataset")
        data = {
            'name': 'updated_name',
            'file_ext': 'fastq',
            'misc_info': 'updated_info',
            'genome_build': 'updated_genome_build'
        }
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "file_ext",
                              "misc_info", "genome_build")

    def test_invalid_update_dataset_in_folder(self):
        ld = self._create_dataset_in_folder_in_library(
            "ForInvalidUpdateDataset")
        data = {'file_ext': 'nonexisting_type'}
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 400)
        assert 'This Galaxy does not recognize the datatype of:' in create_response.json(
        )['err_msg']

    def test_detect_datatype_of_dataset_in_folder(self):
        ld = self._create_dataset_in_folder_in_library("ForDetectDataset")
        # Wait for metadata job to finish.
        time.sleep(2)
        data = {'file_ext': 'data'}
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "file_ext")
        assert create_response.json()["file_ext"] == "data"
        # Wait for metadata job to finish.
        time.sleep(2)
        data = {'file_ext': 'auto'}
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "file_ext")
        assert create_response.json()["file_ext"] == "txt"

    def test_create_datasets_in_library_from_collection(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasetsFromCollection")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["xxx", "yyy"],
            direct_upload=True).json()["outputs"][0]["id"]
        payload = {
            'from_hdca_id': hdca_id,
            'create_type': 'file',
            'folder_id': folder_id
        }
        create_response = self._post("libraries/%s/contents" % library['id'],
                                     payload)
        self._assert_status_code_is(create_response, 200)

    def test_create_datasets_in_folder_from_collection(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasetsFromCollection")
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["xxx", "yyy"],
            direct_upload=True).json()["outputs"][0]["id"]
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        payload = {'from_hdca_id': hdca_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        assert len(create_response.json()) == 2
        # Also test that anything different from a flat dataset collection list
        # is refused
        hdca_pair_id = self.dataset_collection_populator.create_list_of_pairs_in_history(
            history_id).json()["outputs"][0]['id']
        payload = {'from_hdca_id': hdca_pair_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 501)
        assert create_response.json(
        )['err_msg'] == 'Cannot add nested collections to library. Please flatten your collection first.'

    def _create_folder(self, library):
        create_data = dict(
            folder_id=library["root_folder_id"],
            create_type="folder",
            name="New Folder",
        )
        return self._post("libraries/%s/contents" % library["id"],
                          data=create_data)

    def _create_dataset_in_folder_in_library(self, library_name):
        library = self.library_populator.new_private_library(library_name)
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        payload = {
            'from_hda_id': hda_id,
            'create_type': 'file',
            'folder_id': folder_id
        }
        ld = self._post("libraries/%s/contents" % folder_id, payload)
        return ld
Exemplo n.º 3
0
class JobsApiTestCase(api.ApiTestCase):
    def setUp(self):
        super(JobsApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)

    @uses_test_history(require_new=True)
    def test_index(self, history_id):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset(history_id)
        jobs = self.__jobs_index()
        assert "upload1" in map(itemgetter("tool_id"), jobs)

    @uses_test_history(require_new=True)
    def test_system_details_admin_only(self, history_id):
        self.__history_with_new_dataset(history_id)
        jobs = self.__jobs_index(admin=False)
        job = jobs[0]
        self._assert_not_has_keys(job, "command_line", "external_id")

        jobs = self.__jobs_index(admin=True)
        job = jobs[0]
        self._assert_has_keys(job, "command_line", "external_id")

    @uses_test_history(require_new=True)
    def test_index_state_filter(self, history_id):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok"))
        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset(history_id)

        # Verify number of ok jobs is actually greater.
        count_increased = False
        for i in range(10):
            new_count = len(self.__uploads_with_state("ok"))
            if original_count < new_count:
                count_increased = True
                break
            time.sleep(.1)

        if not count_increased:
            template = "Jobs in ok state did not increase (was %d, now %d)"
            message = template % (original_count, new_count)
            raise AssertionError(message)

    @uses_test_history(require_new=True)
    def test_index_date_filter(self, history_id):
        self.__history_with_new_dataset(history_id)
        two_weeks_ago = (datetime.datetime.utcnow() -
                         datetime.timedelta(7)).isoformat()
        last_week = (datetime.datetime.utcnow() -
                     datetime.timedelta(7)).isoformat()
        next_week = (datetime.datetime.utcnow() +
                     datetime.timedelta(7)).isoformat()
        today = datetime.datetime.utcnow().isoformat()
        tomorrow = (datetime.datetime.utcnow() +
                    datetime.timedelta(1)).isoformat()

        jobs = self.__jobs_index(data={
            "date_range_min": today[0:10],
            "date_range_max": tomorrow[0:10]
        })
        assert len(jobs) > 0
        today_job_id = jobs[0]["id"]

        jobs = self.__jobs_index(data={
            "date_range_min": two_weeks_ago,
            "date_range_max": last_week
        })
        assert today_job_id not in map(itemgetter("id"), jobs)

        jobs = self.__jobs_index(data={
            "date_range_min": last_week,
            "date_range_max": next_week
        })
        assert today_job_id in map(itemgetter("id"), jobs)

    @uses_test_history(require_new=True)
    def test_index_history(self, history_id):
        self.__history_with_new_dataset(history_id)
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) > 0

        with self.dataset_populator.test_history() as other_history_id:
            jobs = self.__jobs_index(data={"history_id": other_history_id})
            assert len(jobs) == 0

    @uses_test_history(require_new=True)
    def test_index_multiple_states_filter(self, history_id):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok", "new"))

        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset(history_id)

        # Verify number of ok jobs is actually greater.
        new_count = len(self.__uploads_with_state("new", "ok"))
        assert original_count < new_count, new_count

    @uses_test_history(require_new=True)
    def test_show(self, history_id):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset(history_id)

        jobs_response = self._get("jobs")
        first_job = jobs_response.json()[0]
        self._assert_has_key(first_job, 'id', 'state', 'exit_code',
                             'update_time', 'create_time')

        job_id = first_job["id"]
        show_jobs_response = self._get("jobs/%s" % job_id)
        self._assert_status_code_is(show_jobs_response, 200)

        job_details = show_jobs_response.json()
        self._assert_has_key(job_details, 'id', 'state', 'exit_code',
                             'update_time', 'create_time')

        show_jobs_response = self._get("jobs/%s" % job_id, {"full": True})
        self._assert_status_code_is(show_jobs_response, 200)

        job_details = show_jobs_response.json()
        self._assert_has_key(job_details, 'id', 'state', 'exit_code',
                             'update_time', 'create_time', 'stdout', 'stderr',
                             'job_messages')

    @uses_test_history(require_new=True)
    def test_show_security(self, history_id):
        self.__history_with_new_dataset(history_id)
        jobs_response = self._get("jobs", data={"history_id": history_id})
        job = jobs_response.json()[0]
        job_id = job["id"]

        show_jobs_response = self._get("jobs/%s" % job_id, admin=False)
        self._assert_not_has_keys(show_jobs_response.json(), "command_line",
                                  "external_id")

        # TODO: Re-activate test case when API accepts privacy settings
        # with self._different_user():
        #    show_jobs_response = self._get( "jobs/%s" % job_id, admin=False )
        #    self._assert_status_code_is( show_jobs_response, 200 )

        show_jobs_response = self._get("jobs/%s" % job_id, admin=True)
        self._assert_has_keys(show_jobs_response.json(), "command_line",
                              "external_id")

    def _run_detect_errors(self, history_id, inputs):
        payload = self.dataset_populator.run_tool_payload(
            tool_id='detect_errors_aggressive',
            inputs=inputs,
            history_id=history_id,
        )
        return self._post("tools", data=payload).json()

    @skip_without_tool("detect_errors_aggressive")
    def test_unhide_on_error(self):
        with self.dataset_populator.test_history() as history_id:
            inputs = {'error_bool': 'true'}
            run_response = self._run_detect_errors(history_id=history_id,
                                                   inputs=inputs)
            job_id = run_response['jobs'][0]["id"]
            self.dataset_populator.wait_for_job(job_id)
            job = self.dataset_populator.get_job_details(job_id).json()
            assert job['state'] == 'error'
            dataset = self.dataset_populator.get_history_dataset_details(
                history_id=history_id,
                dataset_id=run_response['outputs'][0]['id'],
                assert_ok=False)
            assert dataset['visible']

    @skip_without_tool("detect_errors_aggressive")
    def test_no_unhide_on_error_if_mapped_over(self):
        with self.dataset_populator.test_history() as history_id:
            hdca1 = self.dataset_collection_populator.create_list_in_history(
                history_id, contents=[("sample1-1", "1 2 3")]).json()
            inputs = {
                'error_bool': 'true',
                'dataset': {
                    'batch': True,
                    'values': [{
                        'src': 'hdca',
                        'id': hdca1['id']
                    }],
                }
            }
            run_response = self._run_detect_errors(history_id=history_id,
                                                   inputs=inputs)
            job_id = run_response['jobs'][0]["id"]
            self.dataset_populator.wait_for_job(job_id)
            job = self.dataset_populator.get_job_details(job_id).json()
            assert job['state'] == 'error'
            dataset = self.dataset_populator.get_history_dataset_details(
                history_id=history_id,
                dataset_id=run_response['outputs'][0]['id'],
                assert_ok=False)
            assert not dataset['visible']

    @skip_without_tool('empty_output')
    def test_common_problems(self):
        with self.dataset_populator.test_history() as history_id:
            empty_run_response = self.dataset_populator.run_tool(
                tool_id='empty_output',
                inputs={},
                history_id=history_id,
            )
            empty_hda = empty_run_response["outputs"][0]
            cat_empty_twice_run_response = self.dataset_populator.run_tool(
                tool_id='cat1',
                inputs={
                    'input1': {
                        'src': 'hda',
                        'id': empty_hda['id']
                    },
                    'queries_0|input2': {
                        'src': 'hda',
                        'id': empty_hda['id']
                    }
                },
                history_id=history_id,
            )
            empty_output_job = empty_run_response["jobs"][0]
            cat_empty_job = cat_empty_twice_run_response["jobs"][0]
            empty_output_common_problems_response = self._get(
                'jobs/%s/common_problems' % empty_output_job["id"]).json()
            cat_empty_common_problems_response = self._get(
                'jobs/%s/common_problems' % cat_empty_job["id"]).json()
            self._assert_has_keys(empty_output_common_problems_response,
                                  "has_empty_inputs", "has_duplicate_inputs")
            self._assert_has_keys(cat_empty_common_problems_response,
                                  "has_empty_inputs", "has_duplicate_inputs")
            assert not empty_output_common_problems_response["has_empty_inputs"]
            assert cat_empty_common_problems_response["has_empty_inputs"]
            assert not empty_output_common_problems_response[
                "has_duplicate_inputs"]
            assert cat_empty_common_problems_response["has_duplicate_inputs"]

    @skip_without_tool('detect_errors_aggressive')
    def test_report_error(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.run_tool_payload(
                tool_id='detect_errors_aggressive',
                inputs={'error_bool': 'true'},
                history_id=history_id,
            )
            run_response = self._post("tools", data=payload).json()
            job_id = run_response['jobs'][0]["id"]
            dataset_id = run_response['outputs'][0]['id']
            response = self._post('jobs/%s/error' % job_id,
                                  data={'dataset_id': dataset_id})
            assert response.status_code == 200

    @skip_without_tool('detect_errors_aggressive')
    def test_report_error_anon(self):
        # Need to get a cookie and use that for anonymous tool runs
        cookies = requests.get(self.url).cookies
        payload = json.dumps({
            "tool_id": "detect_errors_aggressive",
            "inputs": {
                "error_bool": "true"
            }
        })
        run_response = requests.post("%s/tools" %
                                     self.galaxy_interactor.api_url,
                                     data=payload,
                                     cookies=cookies).json()
        job_id = run_response['jobs'][0]["id"]
        dataset_id = run_response['outputs'][0]['id']
        response = requests.post('%s/jobs/%s/error' %
                                 (self.galaxy_interactor.api_url, job_id),
                                 params={
                                     'email': '*****@*****.**',
                                     'dataset_id': dataset_id
                                 },
                                 cookies=cookies)
        assert response.status_code == 200

    @uses_test_history(require_new=True)
    def test_deleting_output_keep_running_until_all_deleted(self, history_id):
        job_state, outputs = self._setup_running_two_output_job(
            history_id, 120)

        self._hack_to_skip_test_if_state_ok(job_state)

        # Delete one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"deleted": True})

        self._hack_to_skip_test_if_state_ok(job_state)

        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        state = job_state().json()["state"]
        assert state == "running", state

        # Delete the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"],
                                      {"deleted": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

    @uses_test_history(require_new=True)
    def test_purging_output_keep_running_until_all_purged(self, history_id):
        job_state, outputs = self._setup_running_two_output_job(
            history_id, 120)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id,
                                                    outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id,
                                                    outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [
                dataset_1["file_name"], dataset_2["file_name"]
            ]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(
                output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"purged": True})
        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"],
                                      {"purged": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

        def paths_deleted():
            if not os.path.exists(
                    output_dataset_paths[0]) and not os.path.exists(
                        output_dataset_paths[1]):
                return True

        if output_dataset_paths_exist:
            wait_on(paths_deleted, "path deletion")

    @uses_test_history(require_new=True)
    def test_purging_output_cleaned_after_ok_run(self, history_id):
        job_state, outputs = self._setup_running_two_output_job(history_id, 10)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id,
                                                    outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id,
                                                    outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [
                dataset_1["file_name"], dataset_2["file_name"]
            ]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(
                output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        if not output_dataset_paths_exist:
            # Given this Galaxy configuration - there is nothing more to be tested here.
            # Consider throwing a skip instead.
            return

        # Purge one of the two outputs and wait for the job to complete.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"purged": True})
        wait_on_state(job_state, assert_ok=True)

        if output_dataset_paths_exist:
            time.sleep(.5)
            # Make sure the non-purged dataset is on disk and the purged one is not.
            assert os.path.exists(output_dataset_paths[1])
            assert not os.path.exists(output_dataset_paths[0])

    def _hack_to_skip_test_if_state_ok(self, job_state):
        from nose.plugins.skip import SkipTest
        if job_state().json()["state"] == "ok":
            message = "Job state switch from running to ok too quickly - the rest of the test requires the job to be in a running state. Skipping test."
            raise SkipTest(message)

    def _setup_running_two_output_job(self, history_id, sleep_time):
        payload = self.dataset_populator.run_tool_payload(
            tool_id='create_2',
            inputs=dict(sleep_time=sleep_time, ),
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        outputs = run_response["outputs"]
        jobs = run_response["jobs"]

        assert len(outputs) == 2
        assert len(jobs) == 1

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        # Give job some time to get up and running.
        time.sleep(2)
        running_state = wait_on_state(job_state,
                                      skip_states=["queued", "new"],
                                      assert_ok=False,
                                      timeout=15)
        assert running_state == "running", running_state

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        return job_state, outputs

    def _raw_update_history_item(self, history_id, item_id, data):
        update_url = self._api_url("histories/%s/contents/%s" %
                                   (history_id, item_id),
                                   use_key=True)
        update_response = requests.put(update_url, json=data)
        assert_status_code_is_ok(update_response)
        return update_response

    @skip_without_tool("cat_data_and_sleep")
    @uses_test_history(require_new=True)
    def test_resume_job(self, history_id):
        hda1 = self.dataset_populator.new_dataset(
            history_id, content="samp1\t10.0\nsamp2\t20.0\n")
        hda2 = self.dataset_populator.new_dataset(
            history_id, content="samp1\t30.0\nsamp2\t40.0\n")
        # Submit first job
        payload = self.dataset_populator.run_tool_payload(
            tool_id='cat_data_and_sleep',
            inputs={
                'sleep_time': 15,
                'input1': {
                    'src': 'hda',
                    'id': hda2['id']
                },
                'queries_0|input2': {
                    'src': 'hda',
                    'id': hda2['id']
                }
            },
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        output = run_response["outputs"][0]
        # Submit second job that waits on job1
        payload = self.dataset_populator.run_tool_payload(
            tool_id='cat1',
            inputs={
                'input1': {
                    'src': 'hda',
                    'id': hda1['id']
                },
                'queries_0|input2': {
                    'src': 'hda',
                    'id': output['id']
                }
            },
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        job_id = run_response['jobs'][0]['id']
        output = run_response["outputs"][0]
        # Delete second jobs input while second job is waiting for first job
        delete_response = self._delete("histories/%s/contents/%s" %
                                       (history_id, hda1['id']))
        self._assert_status_code_is(delete_response, 200)
        self.dataset_populator.wait_for_history_jobs(history_id,
                                                     assert_ok=False)
        dataset_details = self._get("histories/%s/contents/%s" %
                                    (history_id, output['id'])).json()
        assert dataset_details['state'] == 'paused'
        # Undelete input dataset
        undelete_response = self._put("histories/%s/contents/%s" %
                                      (history_id, hda1['id']),
                                      data=json.dumps({'deleted': False}))
        self._assert_status_code_is(undelete_response, 200)
        resume_response = self._put("jobs/%s/resume" % job_id)
        self._assert_status_code_is(resume_response, 200)
        self.dataset_populator.wait_for_history_jobs(history_id,
                                                     assert_ok=True)
        dataset_details = self._get("histories/%s/contents/%s" %
                                    (history_id, output['id'])).json()
        assert dataset_details['state'] == 'ok'

    def _get_history_item_as_admin(self, history_id, item_id):
        response = self._get("histories/%s/contents/%s?view=detailed" %
                             (history_id, item_id),
                             admin=True)
        assert_status_code_is_ok(response)
        return response.json()

    @uses_test_history(require_new=True)
    def test_search(self, history_id):
        dataset_id = self.__history_with_ok_dataset(history_id)
        # We first copy the datasets, so that the update time is lower than the job creation time
        new_history_id = self.dataset_populator.new_history()
        copy_payload = {
            "content": dataset_id,
            "source": "hda",
            "type": "dataset"
        }
        copy_response = self._post("histories/%s/contents" % new_history_id,
                                   data=copy_payload)
        self._assert_status_code_is(copy_response, 200)
        inputs = json.dumps({'input1': {'src': 'hda', 'id': dataset_id}})
        self._job_search(tool_id='cat1', history_id=history_id, inputs=inputs)
        # We test that a job can be found even if the dataset has been copied to another history
        new_dataset_id = copy_response.json()['id']
        copied_inputs = json.dumps(
            {'input1': {
                'src': 'hda',
                'id': new_dataset_id
            }})
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='cat1',
                                              inputs=copied_inputs)
        self._search(search_payload, expected_search_count=1)
        # Now we delete the original input HDA that was used -- we should still be able to find the job
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, dataset_id))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=1)
        # Now we also delete the copy -- we shouldn't find a job
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (new_history_id, new_dataset_id))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_handle_identifiers(self, history_id):
        # Test that input name and element identifier of a jobs' output must match for a job to be returned.
        dataset_id = self.__history_with_ok_dataset(history_id)
        inputs = json.dumps({'input1': {'src': 'hda', 'id': dataset_id}})
        self._job_search(tool_id='identifier_single',
                         history_id=history_id,
                         inputs=inputs)
        dataset_details = self._get("histories/%s/contents/%s" %
                                    (history_id, dataset_id)).json()
        dataset_details['name'] = 'Renamed Test Dataset'
        dataset_update_response = self._put(
            "histories/%s/contents/%s" % (history_id, dataset_id),
            data=dict(name='Renamed Test Dataset'))
        self._assert_status_code_is(dataset_update_response, 200)
        assert dataset_update_response.json()['name'] == 'Renamed Test Dataset'
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='identifier_single',
                                              inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_delete_outputs(self, history_id):
        dataset_id = self.__history_with_ok_dataset(history_id)
        inputs = json.dumps({'input1': {'src': 'hda', 'id': dataset_id}})
        tool_response = self._job_search(tool_id='cat1',
                                         history_id=history_id,
                                         inputs=inputs)
        output_id = tool_response.json()['outputs'][0]['id']
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='cat1',
                                              inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_with_hdca_list_input(self, history_id):
        list_id_a = self.__history_with_ok_collection(collection_type='list',
                                                      history_id=history_id)
        list_id_b = self.__history_with_ok_collection(collection_type='list',
                                                      history_id=history_id)
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_b
            },
        })
        tool_response = self._job_search(tool_id='multi_data_param',
                                         history_id=history_id,
                                         inputs=inputs)
        # We switch the inputs, this should not return a match
        inputs_switched = json.dumps({
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f1': {
                'src': 'hdca',
                'id': list_id_b
            },
        })
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='multi_data_param',
                                              inputs=inputs_switched)
        self._search(search_payload, expected_search_count=0)
        # We delete the ouput (this is a HDA, as multi_data_param reduces collections)
        # and use the correct input job definition, the job should not be found
        output_id = tool_response.json()['outputs'][0]['id']
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='multi_data_param',
                                              inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_delete_hdca_output(self, history_id):
        list_id_a = self.__history_with_ok_collection(collection_type='list',
                                                      history_id=history_id)
        inputs = json.dumps({
            'input1': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        tool_response = self._job_search(tool_id='collection_creates_list',
                                         history_id=history_id,
                                         inputs=inputs)
        output_id = tool_response.json()['outputs'][0]['id']
        # We delete a single tool output, no job should be returned
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(
            history_id=history_id,
            tool_id='collection_creates_list',
            inputs=inputs)
        self._search(search_payload, expected_search_count=0)
        tool_response = self._job_search(tool_id='collection_creates_list',
                                         history_id=history_id,
                                         inputs=inputs)
        output_collection_id = tool_response.json(
        )['output_collections'][0]['id']
        # We delete a collection output, no job should be returned
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, output_collection_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(
            history_id=history_id,
            tool_id='collection_creates_list',
            inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_with_hdca_pair_input(self, history_id):
        list_id_a = self.__history_with_ok_collection(collection_type='pair',
                                                      history_id=history_id)
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        self._job_search(tool_id='multi_data_param',
                         history_id=history_id,
                         inputs=inputs)
        # We test that a job can be found even if the collection has been copied to another history
        new_history_id = self.dataset_populator.new_history()
        copy_payload = {
            "content": list_id_a,
            "source": "hdca",
            "type": "dataset_collection"
        }
        copy_response = self._post("histories/%s/contents" % new_history_id,
                                   data=copy_payload)
        self._assert_status_code_is(copy_response, 200)
        new_list_a = copy_response.json()['id']
        copied_inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': new_list_a
            },
            'f2': {
                'src': 'hdca',
                'id': new_list_a
            },
        })
        search_payload = self._search_payload(history_id=new_history_id,
                                              tool_id='multi_data_param',
                                              inputs=copied_inputs)
        self._search(search_payload, expected_search_count=1)
        # Now we delete the original input HDCA that was used -- we should still be able to find the job
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, list_id_a))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=1)
        # Now we also delete the copy -- we shouldn't find a job
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, new_list_a))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_with_hdca_list_pair_input(self, history_id):
        list_id_a = self.__history_with_ok_collection(
            collection_type='list:pair', history_id=history_id)
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        self._job_search(tool_id='multi_data_param',
                         history_id=history_id,
                         inputs=inputs)

    def _job_search(self, tool_id, history_id, inputs):
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id=tool_id,
                                              inputs=inputs)
        empty_search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(empty_search_response, 200)
        self.assertEqual(len(empty_search_response.json()), 0)
        tool_response = self._post("tools", data=search_payload)
        self.dataset_populator.wait_for_tool_run(history_id,
                                                 run_response=tool_response)
        self._search(search_payload, expected_search_count=1)
        return tool_response

    def _search_payload(self, history_id, tool_id, inputs, state='ok'):
        search_payload = dict(tool_id=tool_id,
                              inputs=inputs,
                              history_id=history_id,
                              state=state)
        return search_payload

    def _search(self, payload, expected_search_count=1):
        # in case job and history aren't updated at exactly the same
        # time give time to wait
        for i in range(5):
            search_count = self._search_count(payload)
            if search_count == expected_search_count:
                break
            time.sleep(1)
        assert search_count == expected_search_count, "expected to find %d jobs, got %d jobs" % (
            expected_search_count, search_count)
        return search_count

    def _search_count(self, search_payload):
        search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(search_response, 200)
        search_json = search_response.json()
        return len(search_json)

    def __uploads_with_state(self, *states):
        jobs_response = self._get("jobs", data=dict(state=states))
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert not [j for j in jobs if not j['state'] in states]
        return [j for j in jobs if j['tool_id'] == 'upload1']

    def __history_with_new_dataset(self, history_id):
        dataset_id = self.dataset_populator.new_dataset(history_id)["id"]
        return dataset_id

    def __history_with_ok_dataset(self, history_id):
        dataset_id = self.dataset_populator.new_dataset(history_id,
                                                        wait=True)["id"]
        return dataset_id

    def __history_with_ok_collection(self,
                                     collection_type='list',
                                     history_id=None):
        if not history_id:
            history_id = self.dataset_populator.new_history()
        if collection_type == 'list':
            fetch_response = self.dataset_collection_populator.create_list_in_history(
                history_id, direct_upload=True).json()
        elif collection_type == 'pair':
            fetch_response = self.dataset_collection_populator.create_pair_in_history(
                history_id, direct_upload=True).json()
        elif collection_type == 'list:pair':
            fetch_response = self.dataset_collection_populator.create_list_of_pairs_in_history(
                history_id).json()
        self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        return fetch_response["outputs"][0]['id']

    def __jobs_index(self, **kwds):
        jobs_response = self._get("jobs", **kwds)
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert isinstance(jobs, list)
        return jobs