class JobRecoveryAfterHandledIntegerationTestCase(
        integration_util.IntegrationTestCase):
    framework_tool_and_types = True

    def setUp(self):
        super(JobRecoveryAfterHandledIntegerationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        config["job_config_file"] = DELAY_JOB_CONFIG_FILE

    def handle_reconfigure_galaxy_config_kwds(self, config):
        config["job_config_file"] = SIMPLE_JOB_CONFIG_FILE

    def test_recovery(self):
        history_id = self.dataset_populator.new_history()
        self.dataset_populator.run_tool(
            "exit_code_oom",
            {},
            history_id,
            assert_ok=False,
        ).json()
        self.restart(
            handle_reconfig=self.handle_reconfigure_galaxy_config_kwds)
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)
class JobRecoveryAfterHandledIntegerationTestCase(integration_util.IntegrationTestCase):
    framework_tool_and_types = True

    def setUp(self):
        super(JobRecoveryAfterHandledIntegerationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        config["job_config_file"] = DELAY_JOB_CONFIG_FILE

    def handle_reconfigure_galaxy_config_kwds(self, config):
        config["job_config_file"] = SIMPLE_JOB_CONFIG_FILE

    def test_recovery(self):
        history_id = self.dataset_populator.new_history()
        self.dataset_populator.run_tool(
            "exit_code_oom",
            {},
            history_id,
            assert_ok=False,
        ).json()
        self.restart(handle_reconfig=self.handle_reconfigure_galaxy_config_kwds)
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)
class DataManagerIntegrationTestCase(integration_util.IntegrationTestCase,
                                     UsesShed):
    """Test data manager installation and table reload through the API"""

    framework_tool_and_types = True

    def setUp(self):
        super(DataManagerIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        try:
            import watchdog  # noqa: F401
        except ImportError:
            raise SkipTest("watchdog library is not available")
        cls.configure_shed_and_conda(config)
        config["tool_data_path"] = cls.shed_tool_data_dir
        config["watch_tool_data_dir"] = True
        cls.username = cls.get_secure_ascii_digits()
        config["admin_users"] = "*****@*****.**" % cls.username

    def test_data_manager_installation_table_reload(self):
        """
        Test that we can install data managers, create a new dbkey, and use that dbkey in a downstream data manager.
        """
        self.install_repository("devteam",
                                "data_manager_fetch_genome_dbkeys_all_fasta",
                                "b1bc53e9bbc5")
        self.install_repository("devteam",
                                "data_manager_sam_fasta_index_builder",
                                "1865e693d8b2")
        with self._different_user(email="*****@*****.**" % self.username):
            with self.dataset_populator.test_history() as history_id:
                run_response = self.dataset_populator.run_tool(
                    tool_id=FETCH_TOOL_ID,
                    inputs=FETCH_GENOME_DBKEYS_ALL_FASTA_INPUT,
                    history_id=history_id,
                    assert_ok=False)
                self.dataset_populator.wait_for_tool_run(
                    history_id=history_id,
                    run_response=run_response,
                    timeout=CONDA_AUTO_INSTALL_JOB_TIMEOUT)
                run_response = self.dataset_populator.run_tool(
                    tool_id=SAM_FASTA_ID,
                    inputs=SAM_FASTA_INPUT,
                    history_id=history_id,
                    assert_ok=False)
                self.dataset_populator.wait_for_tool_run(
                    history_id=history_id,
                    run_response=run_response,
                    timeout=CONDA_AUTO_INSTALL_JOB_TIMEOUT)

    @classmethod
    def get_secure_ascii_digits(cls, n=12):
        return ''.join(random.SystemRandom().choice(string.ascii_lowercase +
                                                    string.digits)
                       for _ in range(12))
class DockerizedJobsIntegrationTestCase(integration_util.IntegrationTestCase, RunsEnvironmentJobs):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls.jobs_directory = tempfile.mkdtemp()
        config["jobs_directory"] = cls.jobs_directory
        config["job_config_file"] = DOCKERIZED_JOB_CONFIG_FILE
        # Disable tool dependency resolution.
        config["tool_dependency_dir"] = "none"
        config["enable_beta_mulled_containers"] = "true"

    def setUp(self):
        super(DockerizedJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_explicit(self):
        self.dataset_populator.run_tool("mulled_example_explicit", {}, self.history_id)
        self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(self.history_id)
        assert "0.7.15-r1140" in output

    def test_mulled_simple(self):
        self.dataset_populator.run_tool("mulled_example_simple", {}, self.history_id)
        self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(self.history_id)
        assert "0.7.15-r1140" in output

    def test_docker_job_enviornment(self):
        job_env = self._run_and_get_environment_properties("job_environment_default")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        assert job_env.home.startswith(self.jobs_directory)
        assert job_env.home.endswith("/home")

    def test_docker_job_environment_legacy(self):
        job_env = self._run_and_get_environment_properties("job_environment_default_legacy")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        # Should we change env_pass_through to just always include TMP and HOME for docker?
        # I'm not sure, if yes this would change.
        assert job_env.home == "/", job_env.home
class ObjectStoreJobsIntegrationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        temp_directory = cls._test_driver.mkdtemp()
        cls.object_stores_parent = temp_directory
        for disk_store_file_name in ["files1", "files2", "files3"]:
            disk_store_path = os.path.join(temp_directory, disk_store_file_name)
            os.makedirs(disk_store_path)
            setattr(cls, "%s_path" % disk_store_file_name, disk_store_path)
        config_path = os.path.join(temp_directory, "object_store_conf.xml")
        with open(config_path, "w") as f:
            f.write(DISTRIBUTED_OBJECT_STORE_CONFIG_TEMPLATE.safe_substitute({"temp_directory": temp_directory}))
        config["object_store_config_file"] = config_path

    def setUp(self):
        super(ObjectStoreJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_tool_simple_constructs(self):
        with self.dataset_populator.test_history() as history_id:
            hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3")
            create_10_inputs = {
                "input1": {"src": "hda", "id": hda1["id"]},
                "input2": {"src": "hda", "id": hda1["id"]},
            }
            self.dataset_populator.run_tool(
                "create_10",
                create_10_inputs,
                history_id,
                assert_ok=True,
            )
            self.dataset_populator.wait_for_history(history_id)

        files_1_count = _files_count(self.files1_path)
        files_2_count = _files_count(self.files2_path)
        files_3_count = _files_count(self.files3_path)

        # Ensure no files written to the secondary/inactive hierarchical disk store.
        assert files_3_count == 0

        # Ensure the 10 inputs were written to one of the distributed object store's disk
        # stores (it will have either 10 or 11 depeending on whether the input was also
        # written there. The other disk store may or may not have the input file so should
        # have at most one file.
        assert (files_1_count >= 10) or (files_2_count >= 10)
        assert (files_1_count <= 1) or (files_2_count <= 1)

        # Other sanity checks on the test - just make sure the test was setup as intended
        # and not actually testing object store behavior.
        assert (files_1_count <= 11) and (files_2_count <= 11)
        assert (files_1_count >= 0) and (files_2_count >= 0)
class DataManagerIntegrationTestCase(integration_util.IntegrationTestCase, UsesShed):

    """Test data manager installation and table reload through the API"""

    framework_tool_and_types = True

    def setUp(self):
        super(DataManagerIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        try:
            import watchdog  # noqa: F401
        except ImportError:
            raise SkipTest("watchdog library is not available")
        cls.configure_shed_and_conda(config)
        config["tool_data_path"] = cls.shed_tool_data_dir
        config["watch_tool_data_dir"] = True
        cls.username = cls.get_secure_ascii_digits()
        config["admin_users"] = "*****@*****.**" % cls.username

    def test_data_manager_installation_table_reload(self):
        """
        Test that we can install data managers, create a new dbkey, and use that dbkey in a downstream data manager.
        """
        self.install_repository("devteam", "data_manager_fetch_genome_dbkeys_all_fasta", "b1bc53e9bbc5")
        self.install_repository("devteam", "data_manager_sam_fasta_index_builder", "1865e693d8b2")
        with self._different_user(email="*****@*****.**" % self.username):
            with self.dataset_populator.test_history() as history_id:
                run_response = self.dataset_populator.run_tool(tool_id=FETCH_TOOL_ID,
                                                               inputs=FETCH_GENOME_DBKEYS_ALL_FASTA_INPUT,
                                                               history_id=history_id,
                                                               assert_ok=False)
                self.dataset_populator.wait_for_tool_run(history_id=history_id, run_response=run_response, timeout=CONDA_AUTO_INSTALL_JOB_TIMEOUT)
                run_response = self.dataset_populator.run_tool(tool_id=SAM_FASTA_ID,
                                                               inputs=SAM_FASTA_INPUT,
                                                               history_id=history_id,
                                                               assert_ok=False)
                self.dataset_populator.wait_for_tool_run(history_id=history_id, run_response=run_response, timeout=CONDA_AUTO_INSTALL_JOB_TIMEOUT)

    @classmethod
    def get_secure_ascii_digits(cls, n=12):
        return ''.join(random.SystemRandom().choice(string.ascii_lowercase + string.digits) for _ in range(12))
 def test_runs_on_mule(self):
     tool_id = 'config_vars'
     expect_server_name = self.expected_server_name
     dataset_populator = DatasetPopulator(self.galaxy_interactor)
     history_id = dataset_populator.new_history()
     payload = dataset_populator.run_tool(
         tool_id=tool_id,
         inputs={'var': 'server_name'},
         history_id=history_id,
     )
     dataset_id = payload['outputs'][0]['id']
     dataset_populator.wait_for_dataset(history_id, dataset_id, assert_ok=True)
     output = dataset_populator.get_history_dataset_content(history_id, dataset_id=dataset_id).strip()
     assert output.startswith(expect_server_name), (
         "Job handler's server name '{output}' does not start with expected string '{expected}'".format(
             output=output,
             expected=expect_server_name,
         )
     )
Exemplo n.º 8
0
class DockerizedJobsIntegrationTestCase(integration_util.IntegrationTestCase, RunsEnvironmentJobs):

    framework_tool_and_types = True
    job_config_file = DOCKERIZED_JOB_CONFIG_FILE
    build_mulled_resolver = 'build_mulled'
    container_type = 'docker'
    default_container_home_dir = '/'

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls.jobs_directory = cls._test_driver.mkdtemp()
        config["jobs_directory"] = cls.jobs_directory
        config["job_config_file"] = cls.job_config_file
        # Disable tool dependency resolution.
        config["tool_dependency_dir"] = "none"
        config["conda_auto_init"] = False
        config["conda_auto_install"] = False
        config["enable_beta_mulled_containers"] = "true"

    @classmethod
    def setUpClass(cls):
        if not which(cls.container_type):
            raise unittest.SkipTest("Executable '%s' not found on PATH" % cls.container_type)
        super(DockerizedJobsIntegrationTestCase, cls).setUpClass()

    def setUp(self):
        super(DockerizedJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_explicit(self):
        self.dataset_populator.run_tool("mulled_example_explicit", {}, self.history_id)
        self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(self.history_id, timeout=EXTENDED_TIMEOUT)
        assert "0.7.15-r1140" in output

    def test_mulled_simple(self):
        self.dataset_populator.run_tool("mulled_example_simple", {}, self.history_id)
        self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(self.history_id, timeout=EXTENDED_TIMEOUT)
        assert "0.7.15-r1140" in output

    def test_container_job_enviornment(self):
        job_env = self._run_and_get_environment_properties("job_environment_default")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        assert job_env.home.startswith(self.jobs_directory)
        assert job_env.home.endswith("/home")

    def test_container_job_environment_legacy(self):
        job_env = self._run_and_get_environment_properties("job_environment_default_legacy")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        # Should we change env_pass_through to just always include TMP and HOME for docker?
        # I'm not sure, if yes this would change.
        assert job_env.home == self.default_container_home_dir, job_env.home

    def test_build_mulled(self):
        if not which('docker'):
            raise unittest.SkipTest("Docker not found on PATH, required for building images via involucro")
        resolver_type = self.build_mulled_resolver
        tool_id = 'mulled_example_multi_1'
        endpoint = "tools/%s/dependencies" % tool_id
        data = {'id': tool_id, 'resolver_type': resolver_type}
        create_response = self._post(endpoint, data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        response = create_response.json()
        assert any([True for d in response if d['dependency_type'] == self.container_type])
Exemplo n.º 9
0
class HistoryContentsApiTestCase(api.ApiTestCase, TestsDatasets):
    def setUp(self):
        super(HistoryContentsApiTestCase, self).setUp()
        self.history_id = self._new_history()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_index_hda_summary(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents" %
                                      self.history_id)
        hda_summary = self.__check_for_hda(contents_response, hda1)
        assert "display_types" not in hda_summary  # Quick summary, not full details

    def test_make_private_and_public(self):
        hda1 = self._wait_for_new_hda()
        update_url = "histories/%s/contents/%s/permissions" % (self.history_id,
                                                               hda1["id"])

        role_id = self.dataset_populator.user_private_role_id()
        # Give manage permission to the user.
        payload = {
            "access": [],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url,
                                                   payload,
                                                   admin=True)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_can_access(hda1["id"])
        # Then we restrict access.
        payload = {
            "action": "make_private",
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_cannot_access(hda1["id"])

        # Then we restrict access.
        payload = {
            "action": "remove_restrictions",
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_can_access(hda1["id"])

    def test_set_permissions_add_admin_history_contents(self):
        self._verify_dataset_permissions("history_contents")

    def test_set_permissions_add_admin_datasets(self):
        self._verify_dataset_permissions("dataset")

    def _verify_dataset_permissions(self, api_endpoint):
        hda1 = self._wait_for_new_hda()
        hda_id = hda1["id"]
        if api_endpoint == "history_contents":
            update_url = "histories/%s/contents/%s/permissions" % (
                self.history_id, hda_id)
        else:
            update_url = "datasets/%s/permissions" % hda_id

        role_id = self.dataset_populator.user_private_role_id()

        payload = {
            "access": [role_id],
            "manage": [role_id],
        }

        # Other users cannot modify permissions.
        with self._different_user():
            update_response = self._update_permissions(update_url, payload)
            self._assert_status_code_is(update_response, 403)

        # First the details render for another user.
        self._assert_other_user_can_access(hda_id)

        # Then we restrict access.
        update_response = self._update_permissions(update_url,
                                                   payload,
                                                   admin=True)
        self._assert_status_code_is(update_response, 200)

        # Finally the details don't render.
        self._assert_other_user_cannot_access(hda_id)

        # But they do for the original user.
        contents_response = self._get("histories/%s/contents/%s" %
                                      (self.history_id, hda_id)).json()
        assert "name" in contents_response

        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)

        payload = {
            "access": [role_id],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_cannot_access(hda_id)

        user_id = self.dataset_populator.user_id()
        with self._different_user():
            different_user_id = self.dataset_populator.user_id()
        combined_user_role = self.dataset_populator.create_role(
            [user_id, different_user_id],
            description="role for testing permissions")

        payload = {
            "access": [combined_user_role["id"]],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        # Now other user can see dataset again with access permission.
        self._assert_other_user_can_access(hda_id)
        # access doesn't imply management though...
        with self._different_user():
            update_response = self._update_permissions(update_url, payload)
            self._assert_status_code_is(update_response, 403)

    def _assert_other_user_cannot_access(self, history_content_id):
        with self._different_user():
            contents_response = self._get(
                "histories/%s/contents/%s" %
                (self.history_id, history_content_id)).json()
            assert "name" not in contents_response

    def _assert_other_user_can_access(self, history_content_id):
        with self._different_user():
            contents_response = self._get(
                "histories/%s/contents/%s" %
                (self.history_id, history_content_id)).json()
            assert "name" in contents_response

    def test_index_hda_all_details(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=all" %
                                      self.history_id)
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_index_hda_detail_by_id(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=%s" %
                                      (self.history_id, hda1["id"]))
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_show_hda(self):
        hda1 = self._new_dataset(self.history_id)
        show_response = self.__show(hda1)
        self._assert_status_code_is(show_response, 200)
        self.__assert_matches_hda(hda1, show_response.json())

    def test_hda_copy(self):
        hda1 = self._new_dataset(self.history_id)
        create_data = dict(
            source='hda',
            content=hda1["id"],
        )
        second_history_id = self._new_history()
        assert self.__count_contents(second_history_id) == 0
        create_response = self._post(
            "histories/%s/contents" % second_history_id, create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(second_history_id) == 1

    def test_library_copy(self):
        ld = self.library_populator.new_library_dataset("lda_test_library")
        create_data = dict(
            source='library',
            content=ld["id"],
        )
        assert self.__count_contents(self.history_id) == 0
        create_response = self._post("histories/%s/contents" % self.history_id,
                                     create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(self.history_id) == 1

    def test_update(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        update_response = self._raw_update(hda1["id"], dict(deleted=True))
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hda1)
        assert str(show_response.json()["deleted"]).lower() == "true"

        update_response = self._raw_update(hda1["id"],
                                           dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        update_response = self._raw_update(hda1["id"],
                                           dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        unicode_name = u'ржевский сапоги'
        update_response = self._raw_update(hda1["id"], dict(name=unicode_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == unicode_name, updated_hda

        quoted_name = '"Mooo"'
        update_response = self._raw_update(hda1["id"], dict(name=quoted_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == quoted_name, quoted_name

        data = {
            "dataset_id": hda1["id"],
            "name": "moocow",
            "dbkey": "?",
            "annotation": None,
            "info": "my info is",
            "operation": "attributes"
        }
        update_response = self._set_edit_update(data)
        # No key or anything supplied, expect a permission problem.
        # A bit questionable but I think this is a 400 instead of a 403 so that
        # we don't distinguish between this is a valid ID you don't have access to
        # and this is an invalid ID.
        assert update_response.status_code == 400, update_response.content

    def test_update_batch(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        payload = dict(items=[{
            "history_content_type": "dataset",
            "id": hda1["id"]
        }],
                       deleted=True)
        update_response = self._raw_update_batch(payload)
        objects = update_response.json()
        assert objects[0]["deleted"]

    def test_update_type_failures(self):
        hda1 = self._wait_for_new_hda()
        update_response = self._raw_update(hda1["id"],
                                           dict(deleted='not valid'))
        self._assert_status_code_is(update_response, 400)

    def _wait_for_new_hda(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        return hda1

    def _set_edit_update(self, json):
        set_edit_url = "%s/dataset/set_edit" % self.url
        update_response = put(set_edit_url, json=json)
        return update_response

    def _raw_update(self, item_id, data, admin=False, history_id=None):
        history_id = history_id or self.history_id
        key_param = "use_admin_key" if admin else "use_key"
        update_url = self._api_url(
            "histories/%s/contents/%s" % (history_id, item_id),
            **{key_param: True})
        update_response = put(update_url, json=data)
        return update_response

    def _update_permissions(self, url, data, admin=False):
        key_param = "use_admin_key" if admin else "use_key"
        update_url = self._api_url(url, **{key_param: True})
        update_response = put(update_url, json=data)
        return update_response

    def _raw_update_batch(self, data):
        update_url = self._api_url("histories/%s/contents" % (self.history_id),
                                   use_key=True)
        update_response = put(update_url, json=data)
        return update_response

    def test_delete(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        delete_response = self._delete("histories/%s/contents/%s" %
                                       (self.history_id, hda1["id"]))
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"

    def test_purge(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        assert str(self.__show(hda1).json()["purged"]).lower() == "false"
        data = {'purge': True}
        delete_response = self._delete("histories/%s/contents/%s" %
                                       (self.history_id, hda1["id"]),
                                       data=data)
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"
        assert str(self.__show(hda1).json()["purged"]).lower() == "true"

    def test_dataset_collection_creation_on_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")
        endpoint = "histories/%s/contents" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_creation_on_typed_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, )
        endpoint = "histories/%s/contents/dataset_collections" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_create_from_exisiting_datasets_with_new_tags(
            self):
        with self.dataset_populator.test_history() as history_id:
            hda_id = self.dataset_populator.new_dataset(history_id,
                                                        content="1 2 3")['id']
            hda2_id = self.dataset_populator.new_dataset(history_id,
                                                         content="1 2 3")['id']
            update_response = self._raw_update(hda2_id,
                                               dict(tags=['existing:tag']),
                                               history_id=history_id).json()
            assert update_response['tags'] == ['existing:tag']
            creation_payload = {
                'collection_type':
                'list',
                'history_id':
                history_id,
                'element_identifiers':
                json.dumps([{
                    'id': hda_id,
                    'src': 'hda',
                    'name': 'element_id1',
                    'tags': ['my_new_tag']
                }, {
                    'id': hda2_id,
                    'src': 'hda',
                    'name': 'element_id2',
                    'tags': ['another_new_tag']
                }]),
                'type':
                'dataset_collection',
                'copy_elements':
                True
            }
            r = self._post("histories/%s/contents" % self.history_id,
                           creation_payload).json()
            assert r['elements'][0]['object'][
                'id'] != hda_id, "HDA has not been copied"
            assert len(r['elements'][0]['object']['tags']) == 1
            assert r['elements'][0]['object']['tags'][0] == 'my_new_tag'
            assert len(r['elements'][1]['object']
                       ['tags']) == 2, r['elements'][1]['object']['tags']
            original_hda = self.dataset_populator.get_history_dataset_details(
                history_id=history_id, dataset_id=hda_id)
            assert len(original_hda['tags']) == 0, original_hda['tags']

    def _check_pair_creation(self, endpoint, payload):
        pre_collection_count = self.__count_contents(type="dataset_collection")
        pre_dataset_count = self.__count_contents(type="dataset")
        pre_combined_count = self.__count_contents(
            type="dataset,dataset_collection")

        dataset_collection_response = self._post(endpoint, payload)

        dataset_collection = self.__check_create_collection_response(
            dataset_collection_response)

        post_collection_count = self.__count_contents(
            type="dataset_collection")
        post_dataset_count = self.__count_contents(type="dataset")
        post_combined_count = self.__count_contents(
            type="dataset,dataset_collection")

        # Test filtering types with index.
        assert pre_collection_count == 0
        assert post_collection_count == 1
        assert post_combined_count == pre_dataset_count + 1
        assert post_combined_count == pre_combined_count + 1
        assert pre_dataset_count == post_dataset_count

        # Test show dataset colleciton.
        collection_url = "histories/%s/contents/dataset_collections/%s" % (
            self.history_id, dataset_collection["id"])
        show_response = self._get(collection_url)
        self._assert_status_code_is(show_response, 200)
        dataset_collection = show_response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted")

        assert not dataset_collection["deleted"]

        delete_response = delete(self._api_url(collection_url, use_key=True))
        self._assert_status_code_is(delete_response, 200)

        show_response = self._get(collection_url)
        dataset_collection = show_response.json()
        assert dataset_collection["deleted"]

    @skip_without_tool("collection_creates_list")
    def test_jobs_summary_simple_hdca(self):
        create_response = self.dataset_collection_populator.create_list_in_history(
            self.history_id, contents=["a\nb\nc\nd", "e\nf\ng\nh"])
        hdca_id = create_response.json()["id"]
        run = self.dataset_populator.run_collection_creates_list(
            self.history_id, hdca_id)
        collections = run['output_collections']
        collection = collections[0]
        jobs_summary_url = "histories/%s/contents/dataset_collections/%s/jobs_summary" % (
            self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")

    @skip_without_tool("cat1")
    def test_jobs_summary_implicit_hdca(self):
        create_response = self.dataset_collection_populator.create_pair_in_history(
            self.history_id, contents=["123", "456"])
        hdca_id = create_response.json()["id"]
        inputs = {
            "input1": {
                'batch': True,
                'values': [{
                    'src': 'hdca',
                    'id': hdca_id
                }]
            },
        }
        run = self.dataset_populator.run_tool("cat1",
                                              inputs=inputs,
                                              history_id=self.history_id)
        self.dataset_populator.wait_for_history_jobs(self.history_id)
        collections = run['implicit_collections']
        collection = collections[0]
        jobs_summary_url = "histories/%s/contents/dataset_collections/%s/jobs_summary" % (
            self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")
        states = jobs_summary["states"]
        assert states.get("ok") == 2, states

    def test_dataset_collection_hide_originals(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")

        payload["hide_source_items"] = True
        dataset_collection_response = self._post(
            "histories/%s/contents" % self.history_id, payload)
        self.__check_create_collection_response(dataset_collection_response)

        contents_response = self._get("histories/%s/contents" %
                                      self.history_id)
        datasets = [
            d for d in contents_response.json()
            if d["history_content_type"] == "dataset" and d["hid"] in [1, 2]
        ]
        # Assert two datasets in source were hidden.
        assert len(datasets) == 2
        assert not datasets[0]["visible"]
        assert not datasets[1]["visible"]

    def test_update_dataset_collection(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")
        dataset_collection_response = self._post(
            "histories/%s/contents" % self.history_id, payload)
        self._assert_status_code_is(dataset_collection_response, 200)
        hdca = dataset_collection_response.json()
        update_url = self._api_url(
            "histories/%s/contents/dataset_collections/%s" %
            (self.history_id, hdca["id"]),
            use_key=True)
        # Awkward json.dumps required here because of https://trello.com/c/CQwmCeG6
        body = json.dumps(dict(name="newnameforpair"))
        update_response = put(update_url, data=body)
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hdca)
        assert str(show_response.json()["name"]) == "newnameforpair"

    def test_hdca_copy(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self._new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
        )
        assert len(
            self._get("histories/%s/contents/dataset_collections" %
                      second_history_id).json()) == 0
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % second_history_id,
            create_data)
        self.__check_create_collection_response(create_response)
        contents = self._get("histories/%s/contents/dataset_collections" %
                             second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == self.history_id

    def test_hdca_copy_and_elements(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self._new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
            copy_elements=True,
        )
        assert len(
            self._get("histories/%s/contents/dataset_collections" %
                      second_history_id).json()) == 0
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % second_history_id,
            create_data)
        self.__check_create_collection_response(create_response)

        contents = self._get("histories/%s/contents/dataset_collections" %
                             second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == second_history_id

    def __get_paired_response_elements(self, contents):
        hdca = self.__show(contents).json()
        self._assert_has_keys(hdca, "name", "deleted", "visible", "elements")
        elements = hdca["elements"]
        assert len(elements) == 2
        element0 = elements[0]
        element1 = elements[1]
        self._assert_has_keys(element0, "object")
        self._assert_has_keys(element1, "object")

        return element0["object"], element1["object"]

    def test_hdca_from_library_datasets(self):
        ld = self.library_populator.new_library_dataset("el1")
        ldda_id = ld["ldda_id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % self.history_id,
            create_data)
        hdca = self.__check_create_collection_response(create_response)
        elements = hdca["elements"]
        assert len(elements) == 1
        hda = elements[0]["object"]
        assert hda["hda_ldda"] == "hda"
        assert hda["history_content_type"] == "dataset"
        assert hda["copied_from_ldda_id"] == ldda_id

    def test_hdca_from_inaccessible_library_datasets(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "HDCACreateInaccesibleLibrary")
        ldda_id = library_dataset["id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        with self._different_user():
            second_history_id = self._new_history()
            create_response = self._post(
                "histories/%s/contents/dataset_collections" %
                second_history_id, create_data)
            self._assert_status_code_is(create_response, 403)

    def __check_create_collection_response(self, response):
        self._assert_status_code_is(response, 200)
        dataset_collection = response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted",
                              "visible", "elements")
        return dataset_collection

    def __show(self, contents):
        show_response = self._get(
            "histories/%s/contents/%ss/%s" %
            (self.history_id, contents["history_content_type"],
             contents["id"]))
        return show_response

    def __count_contents(self, history_id=None, **kwds):
        if history_id is None:
            history_id = self.history_id
        contents_response = self._get("histories/%s/contents" % history_id,
                                      kwds)
        return len(contents_response.json())

    def __assert_hda_has_full_details(self, hda_details):
        self._assert_has_keys(hda_details, "display_types", "display_apps")

    def __check_for_hda(self, contents_response, hda):
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hda_summary = contents[0]
        self.__assert_matches_hda(hda, hda_summary)
        return hda_summary

    def __assert_matches_hda(self, input_hda, query_hda):
        self._assert_has_keys(query_hda, "id", "name")
        assert input_hda["name"] == query_hda["name"]
        assert input_hda["id"] == query_hda["id"]
class DataManagerIntegrationTestCase(integration_util.IntegrationTestCase):

    """Test data manager installation and table reload through the API"""

    framework_tool_and_types = True

    def setUp(self):
        super(DataManagerIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        try:
            import watchdog  # noqa: F401
        except ImportError:
            raise SkipTest("watchdog library is not available")
        cls.username = cls.get_secure_ascii_digits()
        cls.conda_tmp_prefix = tempfile.mkdtemp()
        cls.shed_tools_dir = tempfile.mkdtemp()
        cls.shed_tool_data_dir = tempfile.mkdtemp()
        cls._test_driver.temp_directories.extend([cls.conda_tmp_prefix, cls.shed_tool_data_dir, cls.shed_tools_dir])
        config["conda_auto_init"] = True
        config["conda_auto_install"] = True
        config["conda_prefix"] = os.path.join(cls.conda_tmp_prefix, 'conda')
        config["tool_sheds_config_file"] = TOOL_SHEDS_CONF
        config["tool_config_file"] = os.path.join(cls.shed_tools_dir, 'shed_tool_conf.xml')
        config["shed_data_manager_config_file"] = os.path.join(cls.shed_tool_data_dir, 'shed_data_manager_config_file')
        config["shed_tool_data_table_config"] = os.path.join(cls.shed_tool_data_dir, 'shed_data_table_conf.xml')
        config["shed_tool_data_path"] = cls.shed_tool_data_dir
        config["tool_data_path"] = cls.shed_tool_data_dir
        config["watch_tool_data_dir"] = True
        config["admin_users"] = "*****@*****.**" % cls.username
        with open(config["tool_config_file"], 'w') as tool_conf_file:
            tool_conf_file.write(SHED_TOOL_CONF.substitute(shed_tools_path=cls.shed_tools_dir))
        with open(config["shed_data_manager_config_file"], 'w') as shed_data_config:
            shed_data_config.write(SHED_DATA_MANAGER_CONF)
        with open(config["shed_tool_data_table_config"], 'w') as shed_data_table_config:
            shed_data_table_config.write(SHED_DATA_TABLES)

    def test_data_manager_installation_table_reload(self):
        """
        Test that we can install data managers, create a new dbkey, and use that dbkey in a downstream data manager.
        """
        create_response = self._post('/tool_shed_repositories/new/install_repository_revision', data=CREATE_DBKEY_PAYLOAD, admin=True)
        self._assert_status_code_is(create_response, 200)
        create_response = self._post('/tool_shed_repositories/new/install_repository_revision', data=SAM_FASTA_PAYLOAD, admin=True)
        self._assert_status_code_is(create_response, 200)

        with self._different_user(email="*****@*****.**" % self.username):
            with self.dataset_populator.test_history() as history_id:
                run_response = self.dataset_populator.run_tool(tool_id=FETCH_TOOL_ID,
                                                               inputs=FETCH_GENOME_DBKEYS_ALL_FASTA_INPUT,
                                                               history_id=history_id,
                                                               assert_ok=False)
                self.dataset_populator.wait_for_tool_run(history_id=history_id, run_response=run_response)
                run_response = self.dataset_populator.run_tool(tool_id=SAM_FASTA_ID,
                                                               inputs=SAM_FASTA_INPUT,
                                                               history_id=history_id,
                                                               assert_ok=False)
                self.dataset_populator.wait_for_tool_run(history_id=history_id, run_response=run_response)

    def create_local_user(self):
        """Creates a local user and returns the user id."""
        password = self.get_secure_ascii_digits()
        payload = {'username': self.username,
                   'password': password,
                   'email': "*****@*****.**" % self.username}
        create_response = self._post('/users', data=payload, admin=True)
        self._assert_status_code_is(create_response, 200)
        response = create_response.json()
        return response['id']

    def create_api_key_for_user(self, user_id):
        create_response = self._post("/users/%s/api_key" % user_id, data={}, admin=True)
        self._assert_status_code_is(create_response, 200)
        return create_response.json()

    @classmethod
    def get_secure_ascii_digits(cls, n=12):
        return ''.join(random.SystemRandom().choice(string.ascii_lowercase + string.digits) for _ in range(12))
Exemplo n.º 11
0
class InteractiveToolsIntegrationTestCase(ContainerizedIntegrationTestCase):

    framework_tool_and_types = True
    container_type = "docker"
    require_uwsgi = True
    enable_realtime_mapping = True

    def setUp(self):
        super(InteractiveToolsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_simple_execution(self):
        response_dict = self.dataset_populator.run_tool(
            "interactivetool_simple", {}, self.history_id, assert_ok=True)
        assert "jobs" in response_dict, response_dict
        jobs = response_dict["jobs"]
        assert isinstance(jobs, list)
        assert len(jobs) == 1
        job0 = jobs[0]
        entry_points = self.wait_on_entry_points_active(job0["id"])
        assert len(entry_points) == 1
        entry_point0 = entry_points[0]
        target = self.entry_point_target(entry_point0["id"])
        content = self.wait_on_proxied_content(target)
        assert content == "moo cow\n", content

    def test_multi_server_realtime_tool(self):
        response_dict = self.dataset_populator.run_tool(
            "interactivetool_two_entry_points", {},
            self.history_id,
            assert_ok=True)
        assert "jobs" in response_dict, response_dict
        jobs = response_dict["jobs"]
        assert isinstance(jobs, list)
        assert len(jobs) == 1
        job0 = jobs[0]
        entry_points = self.wait_on_entry_points_active(job0["id"])
        assert len(entry_points) == 2
        entry_point0 = entry_points[0]
        entry_point1 = entry_points[1]
        target0 = self.entry_point_target(entry_point0["id"])
        target1 = self.entry_point_target(entry_point1["id"])
        assert target0 != target1
        content0 = self.wait_on_proxied_content(target0)
        assert content0 == "moo cow\n", content0

        content1 = self.wait_on_proxied_content(target1)
        assert content1 == "moo cow\n", content1
        assert False

    def wait_on_proxied_content(self, target):
        def get_hosted_content():
            try:
                scheme, rest = target.split("://", 1)
                prefix, host_and_port = rest.split(".realtime.")
                print(rest)
                faked_host = rest
                if "/" in rest:
                    faked_host = rest.split("/", 1)[0]
                response = requests.get("%s://%s" % (scheme, host_and_port),
                                        timeout=1,
                                        headers={"Host": faked_host})
                return response.content
            except Exception as e:
                print(e)
                return None

        content = wait_on(get_hosted_content,
                          "realtime hosted content at %s" % target)
        return content

    def entry_point_target(self, entry_point_id):
        entry_point_access_response = self._get("entry_points/%s/access" %
                                                entry_point_id)
        api_asserts.assert_status_code_is(entry_point_access_response, 200)
        access_json = entry_point_access_response.json()
        api_asserts.assert_has_key(access_json, "target")
        return access_json["target"]

    def wait_on_entry_points_active(self, job_id, expected_num=1):
        def active_entry_points():
            entry_points = self.entry_points_for_job(job_id)
            if len(entry_points) != expected_num:
                return None
            elif any([not e["active"] for e in entry_points]):
                return None
            else:
                return entry_points

        return wait_on(active_entry_points, "entry points to become active")

    def entry_points_for_job(self, job_id):
        entry_points_response = self._get("entry_points?job_id=%s" % job_id)
        api_asserts.assert_status_code_is(entry_points_response, 200)
        return entry_points_response.json()
class HistoryContentsApiTestCase(api.ApiTestCase, TestsDatasets):

    def setUp(self):
        super(HistoryContentsApiTestCase, self).setUp()
        self.history_id = self._new_history()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_index_hda_summary(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents" % self.history_id)
        hda_summary = self.__check_for_hda(contents_response, hda1)
        assert "display_types" not in hda_summary  # Quick summary, not full details

    def test_make_private_and_public(self):
        hda1 = self._wait_for_new_hda()
        update_url = "histories/%s/contents/%s/permissions" % (self.history_id, hda1["id"])

        role_id = self.dataset_populator.user_private_role_id()
        # Give manage permission to the user.
        payload = {
            "access": [],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload, admin=True)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_can_access(hda1["id"])
        # Then we restrict access.
        payload = {
            "action": "make_private",
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_cannot_access(hda1["id"])

        # Then we restrict access.
        payload = {
            "action": "remove_restrictions",
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_can_access(hda1["id"])

    def test_set_permissions_add_admin_history_contents(self):
        self._verify_dataset_permissions("history_contents")

    def test_set_permissions_add_admin_datasets(self):
        self._verify_dataset_permissions("dataset")

    def _verify_dataset_permissions(self, api_endpoint):
        hda1 = self._wait_for_new_hda()
        hda_id = hda1["id"]
        if api_endpoint == "history_contents":
            update_url = "histories/%s/contents/%s/permissions" % (self.history_id, hda_id)
        else:
            update_url = "datasets/%s/permissions" % hda_id

        role_id = self.dataset_populator.user_private_role_id()

        payload = {
            "access": [role_id],
            "manage": [role_id],
        }

        # Other users cannot modify permissions.
        with self._different_user():
            update_response = self._update_permissions(update_url, payload)
            self._assert_status_code_is(update_response, 403)

        # First the details render for another user.
        self._assert_other_user_can_access(hda_id)

        # Then we restrict access.
        update_response = self._update_permissions(update_url, payload, admin=True)
        self._assert_status_code_is(update_response, 200)

        # Finally the details don't render.
        self._assert_other_user_cannot_access(hda_id)

        # But they do for the original user.
        contents_response = self._get("histories/%s/contents/%s" % (self.history_id, hda_id)).json()
        assert "name" in contents_response

        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)

        payload = {
            "access": [role_id],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_cannot_access(hda_id)

        user_id = self.dataset_populator.user_id()
        with self._different_user():
            different_user_id = self.dataset_populator.user_id()
        combined_user_role = self.dataset_populator.create_role([user_id, different_user_id], description="role for testing permissions")

        payload = {
            "access": [combined_user_role["id"]],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        # Now other user can see dataset again with access permission.
        self._assert_other_user_can_access(hda_id)
        # access doesn't imply management though...
        with self._different_user():
            update_response = self._update_permissions(update_url, payload)
            self._assert_status_code_is(update_response, 403)

    def _assert_other_user_cannot_access(self, history_content_id):
        with self._different_user():
            contents_response = self._get("histories/%s/contents/%s" % (self.history_id, history_content_id)).json()
            assert "name" not in contents_response

    def _assert_other_user_can_access(self, history_content_id):
        with self._different_user():
            contents_response = self._get("histories/%s/contents/%s" % (self.history_id, history_content_id)).json()
            assert "name" in contents_response

    def test_index_hda_all_details(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=all" % self.history_id)
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_index_hda_detail_by_id(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=%s" % (self.history_id, hda1["id"]))
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_show_hda(self):
        hda1 = self._new_dataset(self.history_id)
        show_response = self.__show(hda1)
        self._assert_status_code_is(show_response, 200)
        self.__assert_matches_hda(hda1, show_response.json())

    def test_hda_copy(self):
        hda1 = self._new_dataset(self.history_id)
        create_data = dict(
            source='hda',
            content=hda1["id"],
        )
        second_history_id = self._new_history()
        assert self.__count_contents(second_history_id) == 0
        create_response = self._post("histories/%s/contents" % second_history_id, create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(second_history_id) == 1

    def test_library_copy(self):
        ld = self.library_populator.new_library_dataset("lda_test_library")
        create_data = dict(
            source='library',
            content=ld["id"],
        )
        assert self.__count_contents(self.history_id) == 0
        create_response = self._post("histories/%s/contents" % self.history_id, create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(self.history_id) == 1

    def test_update(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        update_response = self._raw_update(hda1["id"], dict(deleted=True))
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hda1)
        assert str(show_response.json()["deleted"]).lower() == "true"

        update_response = self._raw_update(hda1["id"], dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        update_response = self._raw_update(hda1["id"], dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        unicode_name = u'ржевский сапоги'
        update_response = self._raw_update(hda1["id"], dict(name=unicode_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == unicode_name, updated_hda

        quoted_name = '"Mooo"'
        update_response = self._raw_update(hda1["id"], dict(name=quoted_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == quoted_name, quoted_name

    def test_update_type_failures(self):
        hda1 = self._wait_for_new_hda()
        update_response = self._raw_update(hda1["id"], dict(deleted='not valid'))
        self._assert_status_code_is(update_response, 400)

    def _wait_for_new_hda(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        return hda1

    def _raw_update(self, item_id, data, admin=False, history_id=None):
        history_id = history_id or self.history_id
        key_param = "use_admin_key" if admin else "use_key"
        update_url = self._api_url("histories/%s/contents/%s" % (history_id, item_id), **{key_param: True})
        update_response = put(update_url, json=data)
        return update_response

    def _update_permissions(self, url, data, admin=False):
        key_param = "use_admin_key" if admin else "use_key"
        update_url = self._api_url(url, **{key_param: True})
        update_response = put(update_url, json=data)
        return update_response

    def test_delete(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        delete_response = self._delete("histories/%s/contents/%s" % (self.history_id, hda1["id"]))
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"

    def test_purge(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        assert str(self.__show(hda1).json()["purged"]).lower() == "false"
        data = {'purge': True}
        delete_response = self._delete("histories/%s/contents/%s" % (self.history_id, hda1["id"]), data=data)
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"
        assert str(self.__show(hda1).json()["purged"]).lower() == "true"

    def test_dataset_collection_creation_on_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            type="dataset_collection"
        )
        endpoint = "histories/%s/contents" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_creation_on_typed_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
        )
        endpoint = "histories/%s/contents/dataset_collections" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_create_from_exisiting_datasets_with_new_tags(self):
        with self.dataset_populator.test_history() as history_id:
            hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
            hda2_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
            update_response = self._raw_update(hda2_id, dict(tags=['existing:tag']), history_id=history_id).json()
            assert update_response['tags'] == ['existing:tag']
            creation_payload = {'collection_type': 'list',
                                'history_id': history_id,
                                'element_identifiers': json.dumps([{'id': hda_id,
                                                                    'src': 'hda',
                                                                    'name': 'element_id1',
                                                                    'tags': ['my_new_tag']},
                                                                   {'id': hda2_id,
                                                                    'src': 'hda',
                                                                    'name': 'element_id2',
                                                                    'tags': ['another_new_tag']}
                                                                   ]),
                                'type': 'dataset_collection',
                                'copy_elements': True}
            r = self._post("histories/%s/contents" % self.history_id, creation_payload).json()
            assert r['elements'][0]['object']['id'] != hda_id, "HDA has not been copied"
            assert len(r['elements'][0]['object']['tags']) == 1
            assert r['elements'][0]['object']['tags'][0] == 'my_new_tag'
            assert len(r['elements'][1]['object']['tags']) == 2, r['elements'][1]['object']['tags']
            original_hda = self.dataset_populator.get_history_dataset_details(history_id=history_id, dataset_id=hda_id)
            assert len(original_hda['tags']) == 0, original_hda['tags']

    def _check_pair_creation(self, endpoint, payload):
        pre_collection_count = self.__count_contents(type="dataset_collection")
        pre_dataset_count = self.__count_contents(type="dataset")
        pre_combined_count = self.__count_contents(type="dataset,dataset_collection")

        dataset_collection_response = self._post(endpoint, payload)

        dataset_collection = self.__check_create_collection_response(dataset_collection_response)

        post_collection_count = self.__count_contents(type="dataset_collection")
        post_dataset_count = self.__count_contents(type="dataset")
        post_combined_count = self.__count_contents(type="dataset,dataset_collection")

        # Test filtering types with index.
        assert pre_collection_count == 0
        assert post_collection_count == 1
        assert post_combined_count == pre_dataset_count + 1
        assert post_combined_count == pre_combined_count + 1
        assert pre_dataset_count == post_dataset_count

        # Test show dataset colleciton.
        collection_url = "histories/%s/contents/dataset_collections/%s" % (self.history_id, dataset_collection["id"])
        show_response = self._get(collection_url)
        self._assert_status_code_is(show_response, 200)
        dataset_collection = show_response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted")

        assert not dataset_collection["deleted"]

        delete_response = delete(self._api_url(collection_url, use_key=True))
        self._assert_status_code_is(delete_response, 200)

        show_response = self._get(collection_url)
        dataset_collection = show_response.json()
        assert dataset_collection["deleted"]

    @skip_without_tool("collection_creates_list")
    def test_jobs_summary_simple_hdca(self):
        create_response = self.dataset_collection_populator.create_list_in_history(self.history_id, contents=["a\nb\nc\nd", "e\nf\ng\nh"])
        hdca_id = create_response.json()["id"]
        run = self.dataset_populator.run_collection_creates_list(self.history_id, hdca_id)
        collections = run['output_collections']
        collection = collections[0]
        jobs_summary_url = "histories/%s/contents/dataset_collections/%s/jobs_summary" % (self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")

    @skip_without_tool("cat1")
    def test_jobs_summary_implicit_hdca(self):
        create_response = self.dataset_collection_populator.create_pair_in_history(self.history_id, contents=["123", "456"])
        hdca_id = create_response.json()["id"]
        inputs = {
            "input1": {'batch': True, 'values': [{'src': 'hdca', 'id': hdca_id}]},
        }
        run = self.dataset_populator.run_tool("cat1", inputs=inputs, history_id=self.history_id)
        self.dataset_populator.wait_for_history_jobs(self.history_id)
        collections = run['implicit_collections']
        collection = collections[0]
        jobs_summary_url = "histories/%s/contents/dataset_collections/%s/jobs_summary" % (self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")
        states = jobs_summary["states"]
        assert states.get("ok") == 2, states

    def test_dataset_collection_hide_originals(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            type="dataset_collection"
        )

        payload["hide_source_items"] = True
        dataset_collection_response = self._post("histories/%s/contents" % self.history_id, payload)
        self.__check_create_collection_response(dataset_collection_response)

        contents_response = self._get("histories/%s/contents" % self.history_id)
        datasets = [d for d in contents_response.json() if d["history_content_type"] == "dataset" and d["hid"] in [1, 2]]
        # Assert two datasets in source were hidden.
        assert len(datasets) == 2
        assert not datasets[0]["visible"]
        assert not datasets[1]["visible"]

    def test_update_dataset_collection(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            type="dataset_collection"
        )
        dataset_collection_response = self._post("histories/%s/contents" % self.history_id, payload)
        self._assert_status_code_is(dataset_collection_response, 200)
        hdca = dataset_collection_response.json()
        update_url = self._api_url("histories/%s/contents/dataset_collections/%s" % (self.history_id, hdca["id"]), use_key=True)
        # Awkward json.dumps required here because of https://trello.com/c/CQwmCeG6
        body = json.dumps(dict(name="newnameforpair"))
        update_response = put(update_url, data=body)
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hdca)
        assert str(show_response.json()["name"]) == "newnameforpair"

    def test_hdca_copy(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self._new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
        )
        assert len(self._get("histories/%s/contents/dataset_collections" % second_history_id).json()) == 0
        create_response = self._post("histories/%s/contents/dataset_collections" % second_history_id, create_data)
        self.__check_create_collection_response(create_response)
        contents = self._get("histories/%s/contents/dataset_collections" % second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == self.history_id

    def test_hdca_copy_and_elements(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self._new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
            copy_elements=True,
        )
        assert len(self._get("histories/%s/contents/dataset_collections" % second_history_id).json()) == 0
        create_response = self._post("histories/%s/contents/dataset_collections" % second_history_id, create_data)
        self.__check_create_collection_response(create_response)

        contents = self._get("histories/%s/contents/dataset_collections" % second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == second_history_id

    def __get_paired_response_elements(self, contents):
        hdca = self.__show(contents).json()
        self._assert_has_keys(hdca, "name", "deleted", "visible", "elements")
        elements = hdca["elements"]
        assert len(elements) == 2
        element0 = elements[0]
        element1 = elements[1]
        self._assert_has_keys(element0, "object")
        self._assert_has_keys(element1, "object")

        return element0["object"], element1["object"]

    def test_hdca_from_library_datasets(self):
        ld = self.library_populator.new_library_dataset("el1")
        ldda_id = ld["ldda_id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post("histories/%s/contents/dataset_collections" % self.history_id, create_data)
        hdca = self.__check_create_collection_response(create_response)
        elements = hdca["elements"]
        assert len(elements) == 1
        hda = elements[0]["object"]
        assert hda["hda_ldda"] == "hda"
        assert hda["history_content_type"] == "dataset"
        assert hda["copied_from_ldda_id"] == ldda_id

    def test_hdca_from_inaccessible_library_datasets(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library("HDCACreateInaccesibleLibrary")
        ldda_id = library_dataset["id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        with self._different_user():
            second_history_id = self._new_history()
            create_response = self._post("histories/%s/contents/dataset_collections" % second_history_id, create_data)
            self._assert_status_code_is(create_response, 403)

    def __check_create_collection_response(self, response):
        self._assert_status_code_is(response, 200)
        dataset_collection = response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted", "visible", "elements")
        return dataset_collection

    def __show(self, contents):
        show_response = self._get("histories/%s/contents/%ss/%s" % (self.history_id, contents["history_content_type"], contents["id"]))
        return show_response

    def __count_contents(self, history_id=None, **kwds):
        if history_id is None:
            history_id = self.history_id
        contents_response = self._get("histories/%s/contents" % history_id, kwds)
        return len(contents_response.json())

    def __assert_hda_has_full_details(self, hda_details):
        self._assert_has_keys(hda_details, "display_types", "display_apps")

    def __check_for_hda(self, contents_response, hda):
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hda_summary = contents[0]
        self.__assert_matches_hda(hda, hda_summary)
        return hda_summary

    def __assert_matches_hda(self, input_hda, query_hda):
        self._assert_has_keys(query_hda, "id", "name")
        assert input_hda["name"] == query_hda["name"]
        assert input_hda["id"] == query_hda["id"]
Exemplo n.º 13
0
class JobsApiTestCase(api.ApiTestCase):
    def setUp(self):
        super(JobsApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)

    @uses_test_history(require_new=True)
    def test_index(self, history_id):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset(history_id)
        jobs = self.__jobs_index()
        assert "upload1" in map(itemgetter("tool_id"), jobs)

    @uses_test_history(require_new=True)
    def test_system_details_admin_only(self, history_id):
        self.__history_with_new_dataset(history_id)
        jobs = self.__jobs_index(admin=False)
        job = jobs[0]
        self._assert_not_has_keys(job, "command_line", "external_id")

        jobs = self.__jobs_index(admin=True)
        job = jobs[0]
        self._assert_has_keys(job, "command_line", "external_id")

    @uses_test_history(require_new=True)
    def test_index_state_filter(self, history_id):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok"))
        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset(history_id)

        # Verify number of ok jobs is actually greater.
        count_increased = False
        for i in range(10):
            new_count = len(self.__uploads_with_state("ok"))
            if original_count < new_count:
                count_increased = True
                break
            time.sleep(.1)

        if not count_increased:
            template = "Jobs in ok state did not increase (was %d, now %d)"
            message = template % (original_count, new_count)
            raise AssertionError(message)

    @uses_test_history(require_new=True)
    def test_index_date_filter(self, history_id):
        self.__history_with_new_dataset(history_id)
        two_weeks_ago = (datetime.datetime.utcnow() -
                         datetime.timedelta(7)).isoformat()
        last_week = (datetime.datetime.utcnow() -
                     datetime.timedelta(7)).isoformat()
        next_week = (datetime.datetime.utcnow() +
                     datetime.timedelta(7)).isoformat()
        today = datetime.datetime.utcnow().isoformat()
        tomorrow = (datetime.datetime.utcnow() +
                    datetime.timedelta(1)).isoformat()

        jobs = self.__jobs_index(data={
            "date_range_min": today[0:10],
            "date_range_max": tomorrow[0:10]
        })
        assert len(jobs) > 0
        today_job_id = jobs[0]["id"]

        jobs = self.__jobs_index(data={
            "date_range_min": two_weeks_ago,
            "date_range_max": last_week
        })
        assert today_job_id not in map(itemgetter("id"), jobs)

        jobs = self.__jobs_index(data={
            "date_range_min": last_week,
            "date_range_max": next_week
        })
        assert today_job_id in map(itemgetter("id"), jobs)

    @uses_test_history(require_new=True)
    def test_index_history(self, history_id):
        self.__history_with_new_dataset(history_id)
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) > 0

        with self.dataset_populator.test_history() as other_history_id:
            jobs = self.__jobs_index(data={"history_id": other_history_id})
            assert len(jobs) == 0

    @uses_test_history(require_new=True)
    def test_index_multiple_states_filter(self, history_id):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok", "new"))

        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset(history_id)

        # Verify number of ok jobs is actually greater.
        new_count = len(self.__uploads_with_state("new", "ok"))
        assert original_count < new_count, new_count

    @uses_test_history(require_new=True)
    def test_show(self, history_id):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset(history_id)

        jobs_response = self._get("jobs")
        first_job = jobs_response.json()[0]
        self._assert_has_key(first_job, 'id', 'state', 'exit_code',
                             'update_time', 'create_time')

        job_id = first_job["id"]
        show_jobs_response = self._get("jobs/%s" % job_id)
        self._assert_status_code_is(show_jobs_response, 200)

        job_details = show_jobs_response.json()
        self._assert_has_key(job_details, 'id', 'state', 'exit_code',
                             'update_time', 'create_time')

        show_jobs_response = self._get("jobs/%s" % job_id, {"full": True})
        self._assert_status_code_is(show_jobs_response, 200)

        job_details = show_jobs_response.json()
        self._assert_has_key(job_details, 'id', 'state', 'exit_code',
                             'update_time', 'create_time', 'stdout', 'stderr',
                             'job_messages')

    @uses_test_history(require_new=True)
    def test_show_security(self, history_id):
        self.__history_with_new_dataset(history_id)
        jobs_response = self._get("jobs", data={"history_id": history_id})
        job = jobs_response.json()[0]
        job_id = job["id"]

        show_jobs_response = self._get("jobs/%s" % job_id, admin=False)
        self._assert_not_has_keys(show_jobs_response.json(), "command_line",
                                  "external_id")

        # TODO: Re-activate test case when API accepts privacy settings
        # with self._different_user():
        #    show_jobs_response = self._get( "jobs/%s" % job_id, admin=False )
        #    self._assert_status_code_is( show_jobs_response, 200 )

        show_jobs_response = self._get("jobs/%s" % job_id, admin=True)
        self._assert_has_keys(show_jobs_response.json(), "command_line",
                              "external_id")

    def _run_detect_errors(self, history_id, inputs):
        payload = self.dataset_populator.run_tool_payload(
            tool_id='detect_errors_aggressive',
            inputs=inputs,
            history_id=history_id,
        )
        return self._post("tools", data=payload).json()

    @skip_without_tool("detect_errors_aggressive")
    def test_unhide_on_error(self):
        with self.dataset_populator.test_history() as history_id:
            inputs = {'error_bool': 'true'}
            run_response = self._run_detect_errors(history_id=history_id,
                                                   inputs=inputs)
            job_id = run_response['jobs'][0]["id"]
            self.dataset_populator.wait_for_job(job_id)
            job = self.dataset_populator.get_job_details(job_id).json()
            assert job['state'] == 'error'
            dataset = self.dataset_populator.get_history_dataset_details(
                history_id=history_id,
                dataset_id=run_response['outputs'][0]['id'],
                assert_ok=False)
            assert dataset['visible']

    @skip_without_tool("detect_errors_aggressive")
    def test_no_unhide_on_error_if_mapped_over(self):
        with self.dataset_populator.test_history() as history_id:
            hdca1 = self.dataset_collection_populator.create_list_in_history(
                history_id, contents=[("sample1-1", "1 2 3")]).json()
            inputs = {
                'error_bool': 'true',
                'dataset': {
                    'batch': True,
                    'values': [{
                        'src': 'hdca',
                        'id': hdca1['id']
                    }],
                }
            }
            run_response = self._run_detect_errors(history_id=history_id,
                                                   inputs=inputs)
            job_id = run_response['jobs'][0]["id"]
            self.dataset_populator.wait_for_job(job_id)
            job = self.dataset_populator.get_job_details(job_id).json()
            assert job['state'] == 'error'
            dataset = self.dataset_populator.get_history_dataset_details(
                history_id=history_id,
                dataset_id=run_response['outputs'][0]['id'],
                assert_ok=False)
            assert not dataset['visible']

    @skip_without_tool('empty_output')
    def test_common_problems(self):
        with self.dataset_populator.test_history() as history_id:
            empty_run_response = self.dataset_populator.run_tool(
                tool_id='empty_output',
                inputs={},
                history_id=history_id,
            )
            empty_hda = empty_run_response["outputs"][0]
            cat_empty_twice_run_response = self.dataset_populator.run_tool(
                tool_id='cat1',
                inputs={
                    'input1': {
                        'src': 'hda',
                        'id': empty_hda['id']
                    },
                    'queries_0|input2': {
                        'src': 'hda',
                        'id': empty_hda['id']
                    }
                },
                history_id=history_id,
            )
            empty_output_job = empty_run_response["jobs"][0]
            cat_empty_job = cat_empty_twice_run_response["jobs"][0]
            empty_output_common_problems_response = self._get(
                'jobs/%s/common_problems' % empty_output_job["id"]).json()
            cat_empty_common_problems_response = self._get(
                'jobs/%s/common_problems' % cat_empty_job["id"]).json()
            self._assert_has_keys(empty_output_common_problems_response,
                                  "has_empty_inputs", "has_duplicate_inputs")
            self._assert_has_keys(cat_empty_common_problems_response,
                                  "has_empty_inputs", "has_duplicate_inputs")
            assert not empty_output_common_problems_response["has_empty_inputs"]
            assert cat_empty_common_problems_response["has_empty_inputs"]
            assert not empty_output_common_problems_response[
                "has_duplicate_inputs"]
            assert cat_empty_common_problems_response["has_duplicate_inputs"]

    @skip_without_tool('detect_errors_aggressive')
    def test_report_error(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.run_tool_payload(
                tool_id='detect_errors_aggressive',
                inputs={'error_bool': 'true'},
                history_id=history_id,
            )
            run_response = self._post("tools", data=payload).json()
            job_id = run_response['jobs'][0]["id"]
            dataset_id = run_response['outputs'][0]['id']
            response = self._post('jobs/%s/error' % job_id,
                                  data={'dataset_id': dataset_id})
            assert response.status_code == 200

    @skip_without_tool('detect_errors_aggressive')
    def test_report_error_anon(self):
        # Need to get a cookie and use that for anonymous tool runs
        cookies = requests.get(self.url).cookies
        payload = json.dumps({
            "tool_id": "detect_errors_aggressive",
            "inputs": {
                "error_bool": "true"
            }
        })
        run_response = requests.post("%s/tools" %
                                     self.galaxy_interactor.api_url,
                                     data=payload,
                                     cookies=cookies).json()
        job_id = run_response['jobs'][0]["id"]
        dataset_id = run_response['outputs'][0]['id']
        response = requests.post('%s/jobs/%s/error' %
                                 (self.galaxy_interactor.api_url, job_id),
                                 params={
                                     'email': '*****@*****.**',
                                     'dataset_id': dataset_id
                                 },
                                 cookies=cookies)
        assert response.status_code == 200

    @uses_test_history(require_new=True)
    def test_deleting_output_keep_running_until_all_deleted(self, history_id):
        job_state, outputs = self._setup_running_two_output_job(
            history_id, 120)

        self._hack_to_skip_test_if_state_ok(job_state)

        # Delete one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"deleted": True})

        self._hack_to_skip_test_if_state_ok(job_state)

        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        state = job_state().json()["state"]
        assert state == "running", state

        # Delete the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"],
                                      {"deleted": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

    @uses_test_history(require_new=True)
    def test_purging_output_keep_running_until_all_purged(self, history_id):
        job_state, outputs = self._setup_running_two_output_job(
            history_id, 120)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id,
                                                    outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id,
                                                    outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [
                dataset_1["file_name"], dataset_2["file_name"]
            ]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(
                output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"purged": True})
        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"],
                                      {"purged": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

        def paths_deleted():
            if not os.path.exists(
                    output_dataset_paths[0]) and not os.path.exists(
                        output_dataset_paths[1]):
                return True

        if output_dataset_paths_exist:
            wait_on(paths_deleted, "path deletion")

    @uses_test_history(require_new=True)
    def test_purging_output_cleaned_after_ok_run(self, history_id):
        job_state, outputs = self._setup_running_two_output_job(history_id, 10)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id,
                                                    outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id,
                                                    outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [
                dataset_1["file_name"], dataset_2["file_name"]
            ]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(
                output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        if not output_dataset_paths_exist:
            # Given this Galaxy configuration - there is nothing more to be tested here.
            # Consider throwing a skip instead.
            return

        # Purge one of the two outputs and wait for the job to complete.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"purged": True})
        wait_on_state(job_state, assert_ok=True)

        if output_dataset_paths_exist:
            time.sleep(.5)
            # Make sure the non-purged dataset is on disk and the purged one is not.
            assert os.path.exists(output_dataset_paths[1])
            assert not os.path.exists(output_dataset_paths[0])

    def _hack_to_skip_test_if_state_ok(self, job_state):
        from nose.plugins.skip import SkipTest
        if job_state().json()["state"] == "ok":
            message = "Job state switch from running to ok too quickly - the rest of the test requires the job to be in a running state. Skipping test."
            raise SkipTest(message)

    def _setup_running_two_output_job(self, history_id, sleep_time):
        payload = self.dataset_populator.run_tool_payload(
            tool_id='create_2',
            inputs=dict(sleep_time=sleep_time, ),
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        outputs = run_response["outputs"]
        jobs = run_response["jobs"]

        assert len(outputs) == 2
        assert len(jobs) == 1

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        # Give job some time to get up and running.
        time.sleep(2)
        running_state = wait_on_state(job_state,
                                      skip_states=["queued", "new"],
                                      assert_ok=False,
                                      timeout=15)
        assert running_state == "running", running_state

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        return job_state, outputs

    def _raw_update_history_item(self, history_id, item_id, data):
        update_url = self._api_url("histories/%s/contents/%s" %
                                   (history_id, item_id),
                                   use_key=True)
        update_response = requests.put(update_url, json=data)
        assert_status_code_is_ok(update_response)
        return update_response

    @skip_without_tool("cat_data_and_sleep")
    @uses_test_history(require_new=True)
    def test_resume_job(self, history_id):
        hda1 = self.dataset_populator.new_dataset(
            history_id, content="samp1\t10.0\nsamp2\t20.0\n")
        hda2 = self.dataset_populator.new_dataset(
            history_id, content="samp1\t30.0\nsamp2\t40.0\n")
        # Submit first job
        payload = self.dataset_populator.run_tool_payload(
            tool_id='cat_data_and_sleep',
            inputs={
                'sleep_time': 15,
                'input1': {
                    'src': 'hda',
                    'id': hda2['id']
                },
                'queries_0|input2': {
                    'src': 'hda',
                    'id': hda2['id']
                }
            },
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        output = run_response["outputs"][0]
        # Submit second job that waits on job1
        payload = self.dataset_populator.run_tool_payload(
            tool_id='cat1',
            inputs={
                'input1': {
                    'src': 'hda',
                    'id': hda1['id']
                },
                'queries_0|input2': {
                    'src': 'hda',
                    'id': output['id']
                }
            },
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        job_id = run_response['jobs'][0]['id']
        output = run_response["outputs"][0]
        # Delete second jobs input while second job is waiting for first job
        delete_response = self._delete("histories/%s/contents/%s" %
                                       (history_id, hda1['id']))
        self._assert_status_code_is(delete_response, 200)
        self.dataset_populator.wait_for_history_jobs(history_id,
                                                     assert_ok=False)
        dataset_details = self._get("histories/%s/contents/%s" %
                                    (history_id, output['id'])).json()
        assert dataset_details['state'] == 'paused'
        # Undelete input dataset
        undelete_response = self._put("histories/%s/contents/%s" %
                                      (history_id, hda1['id']),
                                      data=json.dumps({'deleted': False}))
        self._assert_status_code_is(undelete_response, 200)
        resume_response = self._put("jobs/%s/resume" % job_id)
        self._assert_status_code_is(resume_response, 200)
        self.dataset_populator.wait_for_history_jobs(history_id,
                                                     assert_ok=True)
        dataset_details = self._get("histories/%s/contents/%s" %
                                    (history_id, output['id'])).json()
        assert dataset_details['state'] == 'ok'

    def _get_history_item_as_admin(self, history_id, item_id):
        response = self._get("histories/%s/contents/%s?view=detailed" %
                             (history_id, item_id),
                             admin=True)
        assert_status_code_is_ok(response)
        return response.json()

    @uses_test_history(require_new=True)
    def test_search(self, history_id):
        dataset_id = self.__history_with_ok_dataset(history_id)
        # We first copy the datasets, so that the update time is lower than the job creation time
        new_history_id = self.dataset_populator.new_history()
        copy_payload = {
            "content": dataset_id,
            "source": "hda",
            "type": "dataset"
        }
        copy_response = self._post("histories/%s/contents" % new_history_id,
                                   data=copy_payload)
        self._assert_status_code_is(copy_response, 200)
        inputs = json.dumps({'input1': {'src': 'hda', 'id': dataset_id}})
        self._job_search(tool_id='cat1', history_id=history_id, inputs=inputs)
        # We test that a job can be found even if the dataset has been copied to another history
        new_dataset_id = copy_response.json()['id']
        copied_inputs = json.dumps(
            {'input1': {
                'src': 'hda',
                'id': new_dataset_id
            }})
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='cat1',
                                              inputs=copied_inputs)
        self._search(search_payload, expected_search_count=1)
        # Now we delete the original input HDA that was used -- we should still be able to find the job
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, dataset_id))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=1)
        # Now we also delete the copy -- we shouldn't find a job
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (new_history_id, new_dataset_id))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_handle_identifiers(self, history_id):
        # Test that input name and element identifier of a jobs' output must match for a job to be returned.
        dataset_id = self.__history_with_ok_dataset(history_id)
        inputs = json.dumps({'input1': {'src': 'hda', 'id': dataset_id}})
        self._job_search(tool_id='identifier_single',
                         history_id=history_id,
                         inputs=inputs)
        dataset_details = self._get("histories/%s/contents/%s" %
                                    (history_id, dataset_id)).json()
        dataset_details['name'] = 'Renamed Test Dataset'
        dataset_update_response = self._put(
            "histories/%s/contents/%s" % (history_id, dataset_id),
            data=dict(name='Renamed Test Dataset'))
        self._assert_status_code_is(dataset_update_response, 200)
        assert dataset_update_response.json()['name'] == 'Renamed Test Dataset'
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='identifier_single',
                                              inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_delete_outputs(self, history_id):
        dataset_id = self.__history_with_ok_dataset(history_id)
        inputs = json.dumps({'input1': {'src': 'hda', 'id': dataset_id}})
        tool_response = self._job_search(tool_id='cat1',
                                         history_id=history_id,
                                         inputs=inputs)
        output_id = tool_response.json()['outputs'][0]['id']
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='cat1',
                                              inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_with_hdca_list_input(self, history_id):
        list_id_a = self.__history_with_ok_collection(collection_type='list',
                                                      history_id=history_id)
        list_id_b = self.__history_with_ok_collection(collection_type='list',
                                                      history_id=history_id)
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_b
            },
        })
        tool_response = self._job_search(tool_id='multi_data_param',
                                         history_id=history_id,
                                         inputs=inputs)
        # We switch the inputs, this should not return a match
        inputs_switched = json.dumps({
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f1': {
                'src': 'hdca',
                'id': list_id_b
            },
        })
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='multi_data_param',
                                              inputs=inputs_switched)
        self._search(search_payload, expected_search_count=0)
        # We delete the ouput (this is a HDA, as multi_data_param reduces collections)
        # and use the correct input job definition, the job should not be found
        output_id = tool_response.json()['outputs'][0]['id']
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='multi_data_param',
                                              inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_delete_hdca_output(self, history_id):
        list_id_a = self.__history_with_ok_collection(collection_type='list',
                                                      history_id=history_id)
        inputs = json.dumps({
            'input1': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        tool_response = self._job_search(tool_id='collection_creates_list',
                                         history_id=history_id,
                                         inputs=inputs)
        output_id = tool_response.json()['outputs'][0]['id']
        # We delete a single tool output, no job should be returned
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(
            history_id=history_id,
            tool_id='collection_creates_list',
            inputs=inputs)
        self._search(search_payload, expected_search_count=0)
        tool_response = self._job_search(tool_id='collection_creates_list',
                                         history_id=history_id,
                                         inputs=inputs)
        output_collection_id = tool_response.json(
        )['output_collections'][0]['id']
        # We delete a collection output, no job should be returned
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, output_collection_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(
            history_id=history_id,
            tool_id='collection_creates_list',
            inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_with_hdca_pair_input(self, history_id):
        list_id_a = self.__history_with_ok_collection(collection_type='pair',
                                                      history_id=history_id)
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        self._job_search(tool_id='multi_data_param',
                         history_id=history_id,
                         inputs=inputs)
        # We test that a job can be found even if the collection has been copied to another history
        new_history_id = self.dataset_populator.new_history()
        copy_payload = {
            "content": list_id_a,
            "source": "hdca",
            "type": "dataset_collection"
        }
        copy_response = self._post("histories/%s/contents" % new_history_id,
                                   data=copy_payload)
        self._assert_status_code_is(copy_response, 200)
        new_list_a = copy_response.json()['id']
        copied_inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': new_list_a
            },
            'f2': {
                'src': 'hdca',
                'id': new_list_a
            },
        })
        search_payload = self._search_payload(history_id=new_history_id,
                                              tool_id='multi_data_param',
                                              inputs=copied_inputs)
        self._search(search_payload, expected_search_count=1)
        # Now we delete the original input HDCA that was used -- we should still be able to find the job
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, list_id_a))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=1)
        # Now we also delete the copy -- we shouldn't find a job
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, new_list_a))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=0)

    @uses_test_history(require_new=True)
    def test_search_with_hdca_list_pair_input(self, history_id):
        list_id_a = self.__history_with_ok_collection(
            collection_type='list:pair', history_id=history_id)
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        self._job_search(tool_id='multi_data_param',
                         history_id=history_id,
                         inputs=inputs)

    def _job_search(self, tool_id, history_id, inputs):
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id=tool_id,
                                              inputs=inputs)
        empty_search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(empty_search_response, 200)
        self.assertEqual(len(empty_search_response.json()), 0)
        tool_response = self._post("tools", data=search_payload)
        self.dataset_populator.wait_for_tool_run(history_id,
                                                 run_response=tool_response)
        self._search(search_payload, expected_search_count=1)
        return tool_response

    def _search_payload(self, history_id, tool_id, inputs, state='ok'):
        search_payload = dict(tool_id=tool_id,
                              inputs=inputs,
                              history_id=history_id,
                              state=state)
        return search_payload

    def _search(self, payload, expected_search_count=1):
        # in case job and history aren't updated at exactly the same
        # time give time to wait
        for i in range(5):
            search_count = self._search_count(payload)
            if search_count == expected_search_count:
                break
            time.sleep(1)
        assert search_count == expected_search_count, "expected to find %d jobs, got %d jobs" % (
            expected_search_count, search_count)
        return search_count

    def _search_count(self, search_payload):
        search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(search_response, 200)
        search_json = search_response.json()
        return len(search_json)

    def __uploads_with_state(self, *states):
        jobs_response = self._get("jobs", data=dict(state=states))
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert not [j for j in jobs if not j['state'] in states]
        return [j for j in jobs if j['tool_id'] == 'upload1']

    def __history_with_new_dataset(self, history_id):
        dataset_id = self.dataset_populator.new_dataset(history_id)["id"]
        return dataset_id

    def __history_with_ok_dataset(self, history_id):
        dataset_id = self.dataset_populator.new_dataset(history_id,
                                                        wait=True)["id"]
        return dataset_id

    def __history_with_ok_collection(self,
                                     collection_type='list',
                                     history_id=None):
        if not history_id:
            history_id = self.dataset_populator.new_history()
        if collection_type == 'list':
            fetch_response = self.dataset_collection_populator.create_list_in_history(
                history_id, direct_upload=True).json()
        elif collection_type == 'pair':
            fetch_response = self.dataset_collection_populator.create_pair_in_history(
                history_id, direct_upload=True).json()
        elif collection_type == 'list:pair':
            fetch_response = self.dataset_collection_populator.create_list_of_pairs_in_history(
                history_id).json()
        self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        return fetch_response["outputs"][0]['id']

    def __jobs_index(self, **kwds):
        jobs_response = self._get("jobs", **kwds)
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert isinstance(jobs, list)
        return jobs
class ObjectStoreJobsIntegrationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        temp_directory = cls._test_driver.mkdtemp()
        cls.object_stores_parent = temp_directory
        for disk_store_file_name in ["files1", "files2", "files3"]:
            disk_store_path = os.path.join(temp_directory,
                                           disk_store_file_name)
            os.makedirs(disk_store_path)
            setattr(cls, "%s_path" % disk_store_file_name, disk_store_path)
        config_path = os.path.join(temp_directory, "object_store_conf.xml")
        with open(config_path, "w") as f:
            f.write(
                DISTRIBUTED_OBJECT_STORE_CONFIG_TEMPLATE.safe_substitute(
                    {"temp_directory": temp_directory}))
        config["object_store_config_file"] = config_path

    def setUp(self):
        super(ObjectStoreJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_tool_simple_constructs(self):
        with self.dataset_populator.test_history() as history_id:
            hda1 = self.dataset_populator.new_dataset(history_id,
                                                      content="1 2 3")
            create_10_inputs = {
                "input1": {
                    "src": "hda",
                    "id": hda1["id"]
                },
                "input2": {
                    "src": "hda",
                    "id": hda1["id"]
                },
            }
            self.dataset_populator.run_tool(
                "create_10",
                create_10_inputs,
                history_id,
                assert_ok=True,
            )
            self.dataset_populator.wait_for_history(history_id)

        files_1_count = _files_count(self.files1_path)
        files_2_count = _files_count(self.files2_path)
        files_3_count = _files_count(self.files3_path)

        # Ensure no files written to the secondary/inactive hierarchical disk store.
        assert files_3_count == 0

        # Ensure the 10 inputs were written to one of the distributed object store's disk
        # stores (it will have either 10 or 11 depeending on whether the input was also
        # written there. The other disk store may or may not have the input file so should
        # have at most one file.
        assert (files_1_count >= 10) or (files_2_count >= 10)
        assert (files_1_count <= 1) or (files_2_count <= 1)

        # Other sanity checks on the test - just make sure the test was setup as intended
        # and not actually testing object store behavior.
        assert (files_1_count <= 11) and (files_2_count <= 11)
        assert (files_1_count >= 0) and (files_2_count >= 0)
Exemplo n.º 15
0
class LocalJobCancellationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    def setUp(self):
        super(LocalJobCancellationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_kill_process(self):
        """
        """
        with self.dataset_populator.test_history() as history_id:
            hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3")
            running_inputs = {
                "input1": {"src": "hda", "id": hda1["id"]},
                "sleep_time": 240,
            }
            running_response = self.dataset_populator.run_tool(
                "cat_data_and_sleep",
                running_inputs,
                history_id,
                assert_ok=False,
            ).json()
            job_dict = running_response["jobs"][0]

            app = self._app
            sa_session = app.model.context.current
            external_id = None
            state = False

            job = sa_session.query(app.model.Job).filter_by(tool_id="cat_data_and_sleep").one()
            # Not checking the state here allows the change from queued to running to overwrite
            # the change from queued to deleted_new in the API thread - this is a problem because
            # the job will still run. See issue https://github.com/galaxyproject/galaxy/issues/4960.
            while external_id is None or state != app.model.Job.states.RUNNING:
                sa_session.refresh(job)
                assert not job.finished
                external_id = job.job_runner_external_id
                state = job.state

            assert external_id
            external_id = int(external_id)

            pid_exists = psutil.pid_exists(external_id)
            assert pid_exists

            delete_response = self.dataset_populator.cancel_job(job_dict["id"])
            assert delete_response.json() is True

            state = None
            # Now make sure the job becomes complete.
            for i in range(100):
                sa_session.refresh(job)
                state = job.state
                if state == app.model.Job.states.DELETED:
                    break
                time.sleep(.1)

            # Now make sure the pid is actually killed.
            for i in range(100):
                if not pid_exists:
                    break
                pid_exists = psutil.pid_exists(external_id)
                time.sleep(.1)

            final_state = "pid exists? %s, final db job state %s" % (pid_exists, state)
            assert state == app.model.Job.states.DELETED, final_state
            assert not pid_exists, final_state
class ObjectStoreJobsIntegrationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        temp_directory = cls._test_driver.mkdtemp()
        cls.object_stores_parent = temp_directory
        for disk_store_file_name in ["files_default", "files_static", "files_dynamic_ebs", "files_dynamic_s3"]:
            disk_store_path = os.path.join(temp_directory, disk_store_file_name)
            os.makedirs(disk_store_path)
            setattr(cls, "%s_path" % disk_store_file_name, disk_store_path)
        config_path = os.path.join(temp_directory, "object_store_conf.xml")
        with open(config_path, "w") as f:
            f.write(DISTRIBUTED_OBJECT_STORE_CONFIG_TEMPLATE.safe_substitute({"temp_directory": temp_directory}))
        config["object_store_config_file"] = config_path
        config["job_config_file"] = JOB_CONFIG_FILE
        config["job_resource_params_file"] = JOB_RESOURCE_PARAMETERS_CONFIG_FILE

    def setUp(self):
        super(ObjectStoreJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def _object_store_counts(self):
        files_default_count = _files_count(self.files_default_path)
        files_static_count = _files_count(self.files_static_path)
        files_dynamic_count = _files_count(self.files_dynamic_path)
        return files_default_count, files_static_count, files_dynamic_count

    def _assert_file_counts(self, default, static, dynamic_ebs, dynamic_s3):
        files_default_count = _files_count(self.files_default_path)
        files_static_count = _files_count(self.files_static_path)
        files_dynamic_ebs_count = _files_count(self.files_dynamic_ebs_path)
        files_dynamic_s3_count = _files_count(self.files_dynamic_s3_path)
        assert default == files_default_count
        assert static == files_static_count
        assert dynamic_ebs == files_dynamic_ebs_count
        assert dynamic_s3 == files_dynamic_s3_count

    def test_tool_simple_constructs(self):

        with self.dataset_populator.test_history() as history_id:

            def _run_tool(tool_id, inputs):
                self.dataset_populator.run_tool(
                    tool_id,
                    inputs,
                    history_id,
                    assert_ok=True,
                )
                self.dataset_populator.wait_for_history(history_id)

            self._assert_file_counts(0, 0, 0, 0)

            hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3")
            self.dataset_populator.wait_for_history(history_id)
            hda1_input = {"src": "hda", "id": hda1["id"]}

            # One file uploaded, added to default object store ID.
            self._assert_file_counts(1, 0, 0, 0)

            # should create two files in static object store.
            _run_tool("multi_data_param", {"f1": hda1_input, "f2": hda1_input})
            self._assert_file_counts(1, 2, 0, 0)

            # should create two files in ebs object store.
            create_10_inputs = {
                "input1": hda1_input,
                "input2": hda1_input,
            }
            _run_tool("create_10", create_10_inputs)
            self._assert_file_counts(1, 2, 10, 0)

            # should create 10 files in S3 object store.
            create_10_inputs = {
                "__job_resource|__job_resource__select": "yes",
                "__job_resource|how_store": "slow",
                "input1": hda1_input,
                "input2": hda1_input,
            }
            _run_tool("create_10", create_10_inputs)
            self._assert_file_counts(1, 2, 10, 10)
class HistoryContentsApiTestCase(api.ApiTestCase, TestsDatasets):

    def setUp(self):
        super(HistoryContentsApiTestCase, self).setUp()
        self.history_id = self._new_history()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_index_hda_summary(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents" % self.history_id)
        hda_summary = self.__check_for_hda(contents_response, hda1)
        assert "display_types" not in hda_summary  # Quick summary, not full details

    def test_index_hda_all_details(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=all" % self.history_id)
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_index_hda_detail_by_id(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=%s" % (self.history_id, hda1["id"]))
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_show_hda(self):
        hda1 = self._new_dataset(self.history_id)
        show_response = self.__show(hda1)
        self._assert_status_code_is(show_response, 200)
        self.__assert_matches_hda(hda1, show_response.json())

    def test_hda_copy(self):
        hda1 = self._new_dataset(self.history_id)
        create_data = dict(
            source='hda',
            content=hda1["id"],
        )
        second_history_id = self._new_history()
        assert self.__count_contents(second_history_id) == 0
        create_response = self._post("histories/%s/contents" % second_history_id, create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(second_history_id) == 1

    def test_library_copy(self):
        ld = self.library_populator.new_library_dataset("lda_test_library")
        create_data = dict(
            source='library',
            content=ld["id"],
        )
        assert self.__count_contents(self.history_id) == 0
        create_response = self._post("histories/%s/contents" % self.history_id, create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(self.history_id) == 1

    def test_update(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        update_response = self._raw_update(hda1["id"], dict(deleted=True))
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hda1)
        assert str(show_response.json()["deleted"]).lower() == "true"

        update_response = self._raw_update(hda1["id"], dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        update_response = self._raw_update(hda1["id"], dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        unicode_name = u'ржевский сапоги'
        update_response = self._raw_update(hda1["id"], dict(name=unicode_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == unicode_name, updated_hda

        quoted_name = '"Mooo"'
        update_response = self._raw_update(hda1["id"], dict(name=quoted_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == quoted_name, quoted_name

    def test_update_type_failures(self):
        hda1 = self._wait_for_new_hda()
        update_response = self._raw_update(hda1["id"], dict(deleted='not valid'))
        self._assert_status_code_is(update_response, 400)

    def _wait_for_new_hda(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        return hda1

    def _raw_update(self, item_id, data):
        update_url = self._api_url("histories/%s/contents/%s" % (self.history_id, item_id), use_key=True)
        update_response = put(update_url, json=data)
        return update_response

    def test_delete(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        delete_response = self._delete("histories/%s/contents/%s" % (self.history_id, hda1["id"]))
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"

    def test_purge(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        assert str(self.__show(hda1).json()["purged"]).lower() == "false"
        data = {'purge': True}
        delete_response = self._delete("histories/%s/contents/%s" % (self.history_id, hda1["id"]), data=data)
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"
        assert str(self.__show(hda1).json()["purged"]).lower() == "true"

    def test_dataset_collection_creation_on_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            type="dataset_collection"
        )
        endpoint = "histories/%s/contents" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_creation_on_typed_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
        )
        endpoint = "histories/%s/contents/dataset_collections" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def _check_pair_creation(self, endpoint, payload):
        pre_collection_count = self.__count_contents(type="dataset_collection")
        pre_dataset_count = self.__count_contents(type="dataset")
        pre_combined_count = self.__count_contents(type="dataset,dataset_collection")

        dataset_collection_response = self._post(endpoint, payload)

        dataset_collection = self.__check_create_collection_response(dataset_collection_response)

        post_collection_count = self.__count_contents(type="dataset_collection")
        post_dataset_count = self.__count_contents(type="dataset")
        post_combined_count = self.__count_contents(type="dataset,dataset_collection")

        # Test filtering types with index.
        assert pre_collection_count == 0
        assert post_collection_count == 1
        assert post_combined_count == pre_dataset_count + 1
        assert post_combined_count == pre_combined_count + 1
        assert pre_dataset_count == post_dataset_count

        # Test show dataset colleciton.
        collection_url = "histories/%s/contents/dataset_collections/%s" % (self.history_id, dataset_collection["id"])
        show_response = self._get(collection_url)
        self._assert_status_code_is(show_response, 200)
        dataset_collection = show_response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted")

        assert not dataset_collection["deleted"]

        delete_response = delete(self._api_url(collection_url, use_key=True))
        self._assert_status_code_is(delete_response, 200)

        show_response = self._get(collection_url)
        dataset_collection = show_response.json()
        assert dataset_collection["deleted"]

    @skip_without_tool("collection_creates_list")
    def test_jobs_summary_simple_hdca(self):
        create_response = self.dataset_collection_populator.create_list_in_history(self.history_id, contents=["a\nb\nc\nd", "e\nf\ng\nh"])
        hdca_id = create_response.json()["id"]
        run = self.dataset_populator.run_collection_creates_list(self.history_id, hdca_id)
        collections = run['output_collections']
        collection = collections[0]
        jobs_summary_url = "histories/%s/contents/dataset_collections/%s/jobs_summary" % (self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")

    @skip_without_tool("cat1")
    def test_jobs_summary_implicit_hdca(self):
        create_response = self.dataset_collection_populator.create_pair_in_history(self.history_id, contents=["123", "456"])
        hdca_id = create_response.json()["id"]
        inputs = {
            "input1": {'batch': True, 'values': [{'src': 'hdca', 'id': hdca_id}]},
        }
        run = self.dataset_populator.run_tool("cat1", inputs=inputs, history_id=self.history_id)
        self.dataset_populator.wait_for_history_jobs(self.history_id)
        collections = run['implicit_collections']
        collection = collections[0]
        jobs_summary_url = "histories/%s/contents/dataset_collections/%s/jobs_summary" % (self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")
        states = jobs_summary["states"]
        assert states.get("ok") == 2, states

    def test_dataset_collection_hide_originals(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            type="dataset_collection"
        )

        payload["hide_source_items"] = True
        dataset_collection_response = self._post("histories/%s/contents" % self.history_id, payload)
        self.__check_create_collection_response(dataset_collection_response)

        contents_response = self._get("histories/%s/contents" % self.history_id)
        datasets = [d for d in contents_response.json() if d["history_content_type"] == "dataset" and d["hid"] in [1, 2]]
        # Assert two datasets in source were hidden.
        assert len(datasets) == 2
        assert not datasets[0]["visible"]
        assert not datasets[1]["visible"]

    def test_update_dataset_collection(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            type="dataset_collection"
        )
        dataset_collection_response = self._post("histories/%s/contents" % self.history_id, payload)
        self._assert_status_code_is(dataset_collection_response, 200)
        hdca = dataset_collection_response.json()
        update_url = self._api_url("histories/%s/contents/dataset_collections/%s" % (self.history_id, hdca["id"]), use_key=True)
        # Awkward json.dumps required here because of https://trello.com/c/CQwmCeG6
        body = json.dumps(dict(name="newnameforpair"))
        update_response = put(update_url, data=body)
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hdca)
        assert str(show_response.json()["name"]) == "newnameforpair"

    def test_hdca_copy(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self._new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
        )
        assert len(self._get("histories/%s/contents/dataset_collections" % second_history_id).json()) == 0
        create_response = self._post("histories/%s/contents/dataset_collections" % second_history_id, create_data)
        self.__check_create_collection_response(create_response)
        contents = self._get("histories/%s/contents/dataset_collections" % second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == self.history_id

    def test_hdca_copy_and_elements(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self._new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
            copy_elements=True,
        )
        assert len(self._get("histories/%s/contents/dataset_collections" % second_history_id).json()) == 0
        create_response = self._post("histories/%s/contents/dataset_collections" % second_history_id, create_data)
        self.__check_create_collection_response(create_response)

        contents = self._get("histories/%s/contents/dataset_collections" % second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == second_history_id

    def __get_paired_response_elements(self, contents):
        hdca = self.__show(contents).json()
        self._assert_has_keys(hdca, "name", "deleted", "visible", "elements")
        elements = hdca["elements"]
        assert len(elements) == 2
        element0 = elements[0]
        element1 = elements[1]
        self._assert_has_keys(element0, "object")
        self._assert_has_keys(element1, "object")

        return element0["object"], element1["object"]

    def test_hdca_from_library_datasets(self):
        ld = self.library_populator.new_library_dataset("el1")
        ldda_id = ld["ldda_id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post("histories/%s/contents/dataset_collections" % self.history_id, create_data)
        hdca = self.__check_create_collection_response(create_response)
        elements = hdca["elements"]
        assert len(elements) == 1
        hda = elements[0]["object"]
        assert hda["hda_ldda"] == "hda"
        assert hda["history_content_type"] == "dataset"
        assert hda["copied_from_ldda_id"] == ldda_id

    def test_hdca_from_inaccessible_library_datasets(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library("HDCACreateInaccesibleLibrary")
        ldda_id = library_dataset["id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        with self._different_user():
            second_history_id = self._new_history()
            create_response = self._post("histories/%s/contents/dataset_collections" % second_history_id, create_data)
            # TODO: This should be 403 and a proper JSON response.
            self._assert_status_code_is(create_response, 400)

    def __check_create_collection_response(self, response):
        self._assert_status_code_is(response, 200)
        dataset_collection = response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted", "visible", "elements")
        return dataset_collection

    def __show(self, contents):
        show_response = self._get("histories/%s/contents/%ss/%s" % (self.history_id, contents["history_content_type"], contents["id"]))
        return show_response

    def __count_contents(self, history_id=None, **kwds):
        if history_id is None:
            history_id = self.history_id
        contents_response = self._get("histories/%s/contents" % history_id, kwds)
        return len(contents_response.json())

    def __assert_hda_has_full_details(self, hda_details):
        self._assert_has_keys(hda_details, "display_types", "display_apps")

    def __check_for_hda(self, contents_response, hda):
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hda_summary = contents[0]
        self.__assert_matches_hda(hda, hda_summary)
        return hda_summary

    def __assert_matches_hda(self, input_hda, query_hda):
        self._assert_has_keys(query_hda, "id", "name")
        assert input_hda["name"] == query_hda["name"]
        assert input_hda["id"] == query_hda["id"]
Exemplo n.º 18
0
class ObjectStoreJobsIntegrationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        temp_directory = cls._test_driver.mkdtemp()
        cls.object_stores_parent = temp_directory
        for disk_store_file_name in ["files1", "files2", "files3"]:
            disk_store_path = os.path.join(temp_directory,
                                           disk_store_file_name)
            os.makedirs(disk_store_path)
            setattr(cls, "%s_path" % disk_store_file_name, disk_store_path)
        config_path = os.path.join(temp_directory, "object_store_conf.xml")
        with open(config_path, "w") as f:
            f.write(
                DISTRIBUTED_OBJECT_STORE_CONFIG_TEMPLATE.safe_substitute(
                    {"temp_directory": temp_directory}))
        config["object_store_config_file"] = config_path

    def setUp(self):
        super(ObjectStoreJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        with self.dataset_populator.test_history() as history_id:
            hda1 = self.dataset_populator.new_dataset(
                history_id, content=TEST_INPUT_FILES_CONTENT)
            create_10_inputs = {
                "input1": {
                    "src": "hda",
                    "id": hda1["id"]
                },
                "input2": {
                    "src": "hda",
                    "id": hda1["id"]
                },
            }
            self.dataset_populator.run_tool(
                "create_10",
                create_10_inputs,
                history_id,
                assert_ok=True,
            )
            self.dataset_populator.wait_for_history(history_id)

    def test_files_count_and_content_in_each_objectstore_backend(self):
        """
        According to the ObjectStore configuration given in the
        `DISTRIBUTED_OBJECT_STORE_CONFIG_TEMPLATE` variable, datasets
        can be stored on three backends, named:
            -   primary/files1;
            -   primary/files2;
            -   secondary/files3.

        Objectstore _randomly_ distributes tools outputs on
        `primary/files1` and `primary/files2`, and will use
        `secondary/files3` if both `primary` backends fail.

        This test runs a tools that creates ten dummy datasets,
        and asserts if ObjectStore correctly creates ten files
        in `primary/files1` and `primary/files2`, and none in
        `secondary/files3`, assuming it will not fail persisting
        data in `primary` backend.
        """
        files_1_count = _files_count(self.files1_path)
        files_2_count = _files_count(self.files2_path)
        files_3_count = _files_count(self.files3_path)

        # Ensure no files written to the secondary/inactive hierarchical disk store.
        assert files_3_count == 0

        # Ensure the 10 inputs were written to one of the distributed object store's disk
        # stores (it will have either 10 or 11 depending on whether the input was also
        # written there. The other disk store may or may not have the input file so should
        # have at most one file.
        assert (files_1_count + files_2_count
                == 10) or (files_1_count + files_2_count == 11)

        # Other sanity checks on the test - just make sure the test was setup as intended
        # and not actually testing object store behavior.
        assert (files_1_count <= 11) and (files_2_count <= 11)
        assert (files_1_count >= 0) and (files_2_count >= 0)

        # TODO: ideally the following assertion should be separated in a different test method.
        contents = []
        path1_files = _get_datasets_files_in_path(self.files1_path)
        path2_files = _get_datasets_files_in_path(self.files2_path)
        path3_files = _get_datasets_files_in_path(self.files3_path)
        for filename in path1_files + path2_files + path3_files:
            with open(filename) as f:
                content = f.read().strip()
                if content != TEST_INPUT_FILES_CONTENT:
                    contents.append(content)

        for expected_content in range(1, 10):
            assert str(expected_content) in contents
Exemplo n.º 19
0
class HistoryContentsApiTestCase(api.ApiTestCase, TestsDatasets):
    def setUp(self):
        super(HistoryContentsApiTestCase, self).setUp()
        self.history_id = self._new_history()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_index_hda_summary(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents" %
                                      self.history_id)
        hda_summary = self.__check_for_hda(contents_response, hda1)
        assert "display_types" not in hda_summary  # Quick summary, not full details

    def test_index_hda_all_details(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=all" %
                                      self.history_id)
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_index_hda_detail_by_id(self):
        hda1 = self._new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=%s" %
                                      (self.history_id, hda1["id"]))
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_show_hda(self):
        hda1 = self._new_dataset(self.history_id)
        show_response = self.__show(hda1)
        self._assert_status_code_is(show_response, 200)
        self.__assert_matches_hda(hda1, show_response.json())

    def test_hda_copy(self):
        hda1 = self._new_dataset(self.history_id)
        create_data = dict(
            source='hda',
            content=hda1["id"],
        )
        second_history_id = self._new_history()
        assert self.__count_contents(second_history_id) == 0
        create_response = self._post(
            "histories/%s/contents" % second_history_id, create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(second_history_id) == 1

    def test_library_copy(self):
        ld = self.library_populator.new_library_dataset("lda_test_library")
        create_data = dict(
            source='library',
            content=ld["id"],
        )
        assert self.__count_contents(self.history_id) == 0
        create_response = self._post("histories/%s/contents" % self.history_id,
                                     create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(self.history_id) == 1

    def test_update(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        update_response = self._raw_update(hda1["id"], dict(deleted=True))
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hda1)
        assert str(show_response.json()["deleted"]).lower() == "true"

        update_response = self._raw_update(hda1["id"],
                                           dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        update_response = self._raw_update(hda1["id"],
                                           dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        unicode_name = u'ржевский сапоги'
        update_response = self._raw_update(hda1["id"], dict(name=unicode_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == unicode_name, updated_hda

        quoted_name = '"Mooo"'
        update_response = self._raw_update(hda1["id"], dict(name=quoted_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == quoted_name, quoted_name

    def test_update_type_failures(self):
        hda1 = self._wait_for_new_hda()
        update_response = self._raw_update(hda1["id"],
                                           dict(deleted='not valid'))
        self._assert_status_code_is(update_response, 400)

    def _wait_for_new_hda(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        return hda1

    def _raw_update(self, item_id, data):
        update_url = self._api_url("histories/%s/contents/%s" %
                                   (self.history_id, item_id),
                                   use_key=True)
        update_response = put(update_url, json=data)
        return update_response

    def test_delete(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        delete_response = self._delete("histories/%s/contents/%s" %
                                       (self.history_id, hda1["id"]))
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"

    def test_purge(self):
        hda1 = self._new_dataset(self.history_id)
        self._wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        assert str(self.__show(hda1).json()["purged"]).lower() == "false"
        data = {'purge': True}
        delete_response = self._delete("histories/%s/contents/%s" %
                                       (self.history_id, hda1["id"]),
                                       data=data)
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"
        assert str(self.__show(hda1).json()["purged"]).lower() == "true"

    def test_dataset_collection_creation_on_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")
        endpoint = "histories/%s/contents" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_creation_on_typed_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, )
        endpoint = "histories/%s/contents/dataset_collections" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def _check_pair_creation(self, endpoint, payload):
        pre_collection_count = self.__count_contents(type="dataset_collection")
        pre_dataset_count = self.__count_contents(type="dataset")
        pre_combined_count = self.__count_contents(
            type="dataset,dataset_collection")

        dataset_collection_response = self._post(endpoint, payload)

        dataset_collection = self.__check_create_collection_response(
            dataset_collection_response)

        post_collection_count = self.__count_contents(
            type="dataset_collection")
        post_dataset_count = self.__count_contents(type="dataset")
        post_combined_count = self.__count_contents(
            type="dataset,dataset_collection")

        # Test filtering types with index.
        assert pre_collection_count == 0
        assert post_collection_count == 1
        assert post_combined_count == pre_dataset_count + 1
        assert post_combined_count == pre_combined_count + 1
        assert pre_dataset_count == post_dataset_count

        # Test show dataset colleciton.
        collection_url = "histories/%s/contents/dataset_collections/%s" % (
            self.history_id, dataset_collection["id"])
        show_response = self._get(collection_url)
        self._assert_status_code_is(show_response, 200)
        dataset_collection = show_response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted")

        assert not dataset_collection["deleted"]

        delete_response = delete(self._api_url(collection_url, use_key=True))
        self._assert_status_code_is(delete_response, 200)

        show_response = self._get(collection_url)
        dataset_collection = show_response.json()
        assert dataset_collection["deleted"]

    @skip_without_tool("collection_creates_list")
    def test_jobs_summary_simple_hdca(self):
        create_response = self.dataset_collection_populator.create_list_in_history(
            self.history_id, contents=["a\nb\nc\nd", "e\nf\ng\nh"])
        hdca_id = create_response.json()["id"]
        run = self.dataset_populator.run_collection_creates_list(
            self.history_id, hdca_id)
        collections = run['output_collections']
        collection = collections[0]
        jobs_summary_url = "histories/%s/contents/dataset_collections/%s/jobs_summary" % (
            self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")

    @skip_without_tool("cat1")
    def test_jobs_summary_implicit_hdca(self):
        create_response = self.dataset_collection_populator.create_pair_in_history(
            self.history_id, contents=["123", "456"])
        hdca_id = create_response.json()["id"]
        inputs = {
            "input1": {
                'batch': True,
                'values': [{
                    'src': 'hdca',
                    'id': hdca_id
                }]
            },
        }
        run = self.dataset_populator.run_tool("cat1",
                                              inputs=inputs,
                                              history_id=self.history_id)
        self.dataset_populator.wait_for_history_jobs(self.history_id)
        collections = run['implicit_collections']
        collection = collections[0]
        jobs_summary_url = "histories/%s/contents/dataset_collections/%s/jobs_summary" % (
            self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")
        states = jobs_summary["states"]
        assert states.get("ok") == 2, states

    def test_dataset_collection_hide_originals(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")

        payload["hide_source_items"] = True
        dataset_collection_response = self._post(
            "histories/%s/contents" % self.history_id, payload)
        self.__check_create_collection_response(dataset_collection_response)

        contents_response = self._get("histories/%s/contents" %
                                      self.history_id)
        datasets = [
            d for d in contents_response.json()
            if d["history_content_type"] == "dataset" and d["hid"] in [1, 2]
        ]
        # Assert two datasets in source were hidden.
        assert len(datasets) == 2
        assert not datasets[0]["visible"]
        assert not datasets[1]["visible"]

    def test_update_dataset_collection(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")
        dataset_collection_response = self._post(
            "histories/%s/contents" % self.history_id, payload)
        self._assert_status_code_is(dataset_collection_response, 200)
        hdca = dataset_collection_response.json()
        update_url = self._api_url(
            "histories/%s/contents/dataset_collections/%s" %
            (self.history_id, hdca["id"]),
            use_key=True)
        # Awkward json.dumps required here because of https://trello.com/c/CQwmCeG6
        body = json.dumps(dict(name="newnameforpair"))
        update_response = put(update_url, data=body)
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hdca)
        assert str(show_response.json()["name"]) == "newnameforpair"

    def test_hdca_copy(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self._new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
        )
        assert len(
            self._get("histories/%s/contents/dataset_collections" %
                      second_history_id).json()) == 0
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % second_history_id,
            create_data)
        self.__check_create_collection_response(create_response)
        contents = self._get("histories/%s/contents/dataset_collections" %
                             second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == self.history_id

    def test_hdca_copy_and_elements(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self._new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
            copy_elements=True,
        )
        assert len(
            self._get("histories/%s/contents/dataset_collections" %
                      second_history_id).json()) == 0
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % second_history_id,
            create_data)
        self.__check_create_collection_response(create_response)

        contents = self._get("histories/%s/contents/dataset_collections" %
                             second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == second_history_id

    def __get_paired_response_elements(self, contents):
        hdca = self.__show(contents).json()
        self._assert_has_keys(hdca, "name", "deleted", "visible", "elements")
        elements = hdca["elements"]
        assert len(elements) == 2
        element0 = elements[0]
        element1 = elements[1]
        self._assert_has_keys(element0, "object")
        self._assert_has_keys(element1, "object")

        return element0["object"], element1["object"]

    def test_hdca_from_library_datasets(self):
        ld = self.library_populator.new_library_dataset("el1")
        ldda_id = ld["ldda_id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % self.history_id,
            create_data)
        hdca = self.__check_create_collection_response(create_response)
        elements = hdca["elements"]
        assert len(elements) == 1
        hda = elements[0]["object"]
        assert hda["hda_ldda"] == "hda"
        assert hda["history_content_type"] == "dataset"
        assert hda["copied_from_ldda_id"] == ldda_id

    def test_hdca_from_inaccessible_library_datasets(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "HDCACreateInaccesibleLibrary")
        ldda_id = library_dataset["id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        with self._different_user():
            second_history_id = self._new_history()
            create_response = self._post(
                "histories/%s/contents/dataset_collections" %
                second_history_id, create_data)
            # TODO: This should be 403 and a proper JSON response.
            self._assert_status_code_is(create_response, 400)

    def __check_create_collection_response(self, response):
        self._assert_status_code_is(response, 200)
        dataset_collection = response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted",
                              "visible", "elements")
        return dataset_collection

    def __show(self, contents):
        show_response = self._get(
            "histories/%s/contents/%ss/%s" %
            (self.history_id, contents["history_content_type"],
             contents["id"]))
        return show_response

    def __count_contents(self, history_id=None, **kwds):
        if history_id is None:
            history_id = self.history_id
        contents_response = self._get("histories/%s/contents" % history_id,
                                      kwds)
        return len(contents_response.json())

    def __assert_hda_has_full_details(self, hda_details):
        self._assert_has_keys(hda_details, "display_types", "display_apps")

    def __check_for_hda(self, contents_response, hda):
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hda_summary = contents[0]
        self.__assert_matches_hda(hda, hda_summary)
        return hda_summary

    def __assert_matches_hda(self, input_hda, query_hda):
        self._assert_has_keys(query_hda, "id", "name")
        assert input_hda["name"] == query_hda["name"]
        assert input_hda["id"] == query_hda["id"]
Exemplo n.º 20
0
class ObjectStoreJobsIntegrationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        temp_directory = cls._test_driver.mkdtemp()
        cls.object_stores_parent = temp_directory
        for disk_store_file_name in [
                "files_default", "files_static", "files_dynamic_ebs",
                "files_dynamic_s3"
        ]:
            disk_store_path = os.path.join(temp_directory,
                                           disk_store_file_name)
            os.makedirs(disk_store_path)
            setattr(cls, "%s_path" % disk_store_file_name, disk_store_path)
        config_path = os.path.join(temp_directory, "object_store_conf.xml")
        with open(config_path, "w") as f:
            f.write(
                DISTRIBUTED_OBJECT_STORE_CONFIG_TEMPLATE.safe_substitute(
                    {"temp_directory": temp_directory}))
        config["object_store_config_file"] = config_path
        config["job_config_file"] = JOB_CONFIG_FILE
        config[
            "job_resource_params_file"] = JOB_RESOURCE_PARAMETERS_CONFIG_FILE

    def setUp(self):
        super(ObjectStoreJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def _object_store_counts(self):
        files_default_count = _files_count(self.files_default_path)
        files_static_count = _files_count(self.files_static_path)
        files_dynamic_count = _files_count(self.files_dynamic_path)
        return files_default_count, files_static_count, files_dynamic_count

    def _assert_file_counts(self, default, static, dynamic_ebs, dynamic_s3):
        files_default_count = _files_count(self.files_default_path)
        files_static_count = _files_count(self.files_static_path)
        files_dynamic_ebs_count = _files_count(self.files_dynamic_ebs_path)
        files_dynamic_s3_count = _files_count(self.files_dynamic_s3_path)
        assert default == files_default_count
        assert static == files_static_count
        assert dynamic_ebs == files_dynamic_ebs_count
        assert dynamic_s3 == files_dynamic_s3_count

    def test_tool_simple_constructs(self):
        with self.dataset_populator.test_history() as history_id:

            def _run_tool(tool_id, inputs):
                self.dataset_populator.run_tool(
                    tool_id,
                    inputs,
                    history_id,
                    assert_ok=True,
                )
                self.dataset_populator.wait_for_history(history_id)

            self._assert_file_counts(0, 0, 0, 0)

            hda1 = self.dataset_populator.new_dataset(history_id,
                                                      content="1 2 3")
            self.dataset_populator.wait_for_history(history_id)
            hda1_input = {"src": "hda", "id": hda1["id"]}

            # One file uploaded, added to default object store ID.
            self._assert_file_counts(1, 0, 0, 0)

            # should create two files in static object store.
            _run_tool("multi_data_param", {"f1": hda1_input, "f2": hda1_input})
            self._assert_file_counts(1, 2, 0, 0)

            # should create two files in ebs object store.
            create_10_inputs = {
                "input1": hda1_input,
                "input2": hda1_input,
            }
            _run_tool("create_10", create_10_inputs)
            self._assert_file_counts(1, 2, 10, 0)

            # should create 10 files in S3 object store.
            create_10_inputs = {
                "__job_resource|__job_resource__select": "yes",
                "__job_resource|how_store": "slow",
                "input1": hda1_input,
                "input2": hda1_input,
            }
            _run_tool("create_10", create_10_inputs)
            self._assert_file_counts(1, 2, 10, 10)
class DockerizedJobsIntegrationTestCase(integration_util.IntegrationTestCase,
                                        RunsEnvironmentJobs):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls.jobs_directory = tempfile.mkdtemp()
        config["jobs_directory"] = cls.jobs_directory
        config["job_config_file"] = DOCKERIZED_JOB_CONFIG_FILE
        # Disable tool dependency resolution.
        config["tool_dependency_dir"] = "none"
        config["enable_beta_mulled_containers"] = "true"

    def setUp(self):
        super(DockerizedJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_explicit(self):
        self.dataset_populator.run_tool("mulled_example_explicit", {},
                                        self.history_id)
        self.dataset_populator.wait_for_history(self.history_id,
                                                assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(
            self.history_id)
        assert "0.7.15-r1140" in output

    def test_mulled_simple(self):
        self.dataset_populator.run_tool("mulled_example_simple", {},
                                        self.history_id)
        self.dataset_populator.wait_for_history(self.history_id,
                                                assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(
            self.history_id)
        assert "0.7.15-r1140" in output

    def test_docker_job_enviornment(self):
        job_env = self._run_and_get_environment_properties(
            "job_environment_default")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        assert job_env.home.startswith(self.jobs_directory)
        assert job_env.home.endswith("/home")

    def test_docker_job_environment_legacy(self):
        job_env = self._run_and_get_environment_properties(
            "job_environment_default_legacy")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        # Should we change env_pass_through to just always include TMP and HOME for docker?
        # I'm not sure, if yes this would change.
        assert job_env.home == "/", job_env.home