async def run_pc_pre_validation_cli(
            self, pc_instance: PrivateComputationInstance) -> None:
        region = self._pc_validator_config.region
        binary_name = OneDockerBinaryNames.PC_PRE_VALIDATION.value
        binary_config = self._onedocker_binary_config_map[binary_name]
        cmd_args = get_cmd_args(pc_instance.input_path, region, binary_config)

        env_vars = {ONEDOCKER_REPOSITORY_PATH: binary_config.repository_path}
        container_instances = await RunBinaryBaseService().start_containers(
            [cmd_args],
            self._onedocker_svc,
            binary_config.binary_version,
            binary_name,
            timeout=PRE_VALIDATION_CHECKS_TIMEOUT,
            env_vars=env_vars,
        )

        stage_state = StageStateInstance(
            pc_instance.instance_id,
            pc_instance.current_stage.name,
            containers=container_instances,
        )
        pc_instance.instances.append(stage_state)
        self._logger.info(
            f"[PCPreValidation] - Started container instance_id: {container_instances[0].instance_id} status: {container_instances[0].status}"
        )
    async def run_async(
        self,
        pc_instance: PrivateComputationInstance,
        server_ips: Optional[List[str]] = None,
    ) -> PrivateComputationInstance:
        """Runs the PID run protocol stage

        Args:
            pc_instance: the private computation instance to start pid run protocol stage service
            server_ips: only used by partner to get server hostnames
        Returns:
            An updated version of pc_instance
        """
        self._logger.info(f"[{self}] Starting PIDRunProtocolStageService")
        container_instances = await self.start_pid_run_protocol_service(
            pc_instance=pc_instance,
            server_ips=server_ips,
        )
        self._logger.info("PIDRunProtocolStageService finished")
        stage_state = StageStateInstance(
            pc_instance.instance_id,
            pc_instance.current_stage.name,
            containers=container_instances,
        )
        pc_instance.instances.append(stage_state)
        return pc_instance
    def test_cancel_current_stage_state(self, mock_get_stage_service) -> None:
        mock_stage_svc = Mock(spec=self._get_dummy_stage_svc())
        mock_stage_svc.get_status.return_value = (
            PrivateComputationInstanceStatus.CREATION_FAILED)
        mock_get_stage_service.return_value = mock_stage_svc
        # create one StageStateInstance to be put into PrivateComputationInstance
        # at the beginning of the cancel_current_stage function
        state_instance = StageStateInstance(
            instance_id=self.test_private_computation_id,
            stage_name="test_stage",
        )
        private_computation_instance = self.create_sample_instance(
            status=PrivateComputationInstanceStatus.CREATION_STARTED,
            role=PrivateComputationRole.PARTNER,
            instances=[state_instance],
        )
        self.private_computation_service.instance_repository.read = MagicMock(
            return_value=private_computation_instance)

        # call cancel, expect no exception
        private_computation_instance = (
            self.private_computation_service.cancel_current_stage(
                instance_id=self.test_private_computation_id, ))

        # aseerts
        mock_stage_svc.stop_service.assert_called_once_with(
            private_computation_instance)
        self.assertEqual(
            PrivateComputationInstanceStatus.CREATION_FAILED,
            private_computation_instance.status,
        )
Пример #4
0
    async def run_async(
        self,
        pc_instance: PrivateComputationInstance,
        server_ips: Optional[List[str]] = None,
    ) -> PrivateComputationInstance:
        """This function run mr workflow service

        Args:
            pc_instance: the private computation instance to run mr match
            server_ips: only used by the partner role. These are the ip addresses of the publisher's containers.

        Returns:
            An updated version of pc_instance
        """
        stage_state = StageStateInstance(
            pc_instance.instance_id,
            pc_instance.current_stage.name,
        )
        logging.info("Start PID MR Stage Service")
        pid_configs = pc_instance.pid_configs
        if (
            pid_configs
            and PIDMR in pid_configs
            and PIDRunConfigs in pid_configs[PIDMR]
            and PIDWorkflowConfigs in pid_configs[PIDMR]
            and SPARK_CONFIGS in pid_configs[PIDMR]
        ):
            data_configs = {
                INTPUT: self.get_s3uri_from_url(pc_instance.input_path),
                OUTPUT: self.get_s3uri_from_url(
                    pc_instance.pid_mr_stage_output_data_path
                ),
                INSTANCE: self.removePrefixForInstance(pc_instance.instance_id),
            }
            pid_overall_configs = {
                **pid_configs[PIDMR][PIDRunConfigs],
                **pid_configs[PIDMR][SPARK_CONFIGS],
                **data_configs,
            }

            stage_state.instance_id = self.workflow_svc.start_workflow(
                pid_configs[PIDMR][PIDWorkflowConfigs],
                pc_instance.instance_id,
                pid_overall_configs,
            )
        pc_instance.instances.append(stage_state)
        return pc_instance
Пример #5
0
 async def test_pid_shard_stage_service(
     self,
     pc_role: PrivateComputationRole,
     test_num_containers: int,
     has_hmac_key: bool,
 ) -> None:
     hamc_key_expected = self.test_hmac_key if has_hmac_key else None
     pc_instance = self.create_sample_pc_instance(pc_role,
                                                  test_num_containers,
                                                  hamc_key_expected)
     stage_svc = PIDShardStageService(
         storage_svc=self.mock_storage_svc,
         onedocker_svc=self.mock_onedocker_svc,
         onedocker_binary_config_map=self.onedocker_binary_config_map,
         container_timeout=self.container_timeout,
     )
     containers = [
         self.create_container_instance()
         for _ in range(test_num_containers)
     ]
     self.mock_onedocker_svc.start_containers = MagicMock(
         return_value=containers)
     self.mock_onedocker_svc.wait_for_pending_containers = AsyncMock(
         return_value=containers)
     updated_pc_instance = await stage_svc.run_async(pc_instance=pc_instance
                                                     )
     env_vars = {
         "ONEDOCKER_REPOSITORY_PATH":
         self.onedocker_binary_config.repository_path
     }
     args_ls_expect = self.get_args_expect(pc_role, test_num_containers,
                                           has_hmac_key)
     # test the start_containers is called with expected parameters
     self.mock_onedocker_svc.start_containers.assert_called_with(
         package_name=self.binary_name,
         version=self.onedocker_binary_config.binary_version,
         cmd_args_list=args_ls_expect,
         timeout=self.container_timeout,
         env_vars=env_vars,
     )
     # test the return value is as expected
     self.assertEqual(
         len(updated_pc_instance.instances),
         1,
         "Failed to add the StageStageInstance into pc_instance",
     )
     stage_state_expect = StageStateInstance(
         pc_instance.instance_id,
         pc_instance.current_stage.name,
         containers=containers,
     )
     stage_state_actual = updated_pc_instance.instances[0]
     self.assertEqual(
         stage_state_actual,
         stage_state_expect,
         "Appended StageStageInstance is not as expected",
     )
Пример #6
0
    async def test_pid_run_protocol_stage(
        self, pc_role: PrivateComputationRole, multikey_enabled: bool
    ) -> None:
        protocol = (
            PIDProtocol.UNION_PID_MULTIKEY
            if self.test_num_containers == 1 and multikey_enabled
            else PIDProtocol.UNION_PID
        )
        pc_instance = self.create_sample_pc_instance(pc_role)
        stage_svc = PIDRunProtocolStageService(
            storage_svc=self.mock_storage_svc,
            onedocker_svc=self.mock_onedocker_svc,
            onedocker_binary_config_map=self.onedocker_binary_config_map,
            multikey_enabled=multikey_enabled,
        )
        containers = [
            await self.create_container_instance()
            for _ in range(self.test_num_containers)
        ]
        self.mock_onedocker_svc.start_containers = MagicMock(return_value=containers)
        self.mock_onedocker_svc.wait_for_pending_containers = AsyncMock(
            return_value=containers
        )
        updated_pc_instance = await stage_svc.run_async(
            pc_instance=pc_instance, server_ips=self.server_ips
        )

        binary_name = PIDRunProtocolBinaryService.get_binary_name(protocol, pc_role)
        binary_config = self.onedocker_binary_config_map[binary_name]
        env_vars = {ONEDOCKER_REPOSITORY_PATH: binary_config.repository_path}
        args_str_expect = self.get_args_expect(pc_role, protocol, self.use_row_numbers)
        # test the start_containers is called with expected parameters
        self.mock_onedocker_svc.start_containers.assert_called_with(
            package_name=binary_name,
            version=binary_config.binary_version,
            cmd_args_list=args_str_expect,
            timeout=DEFAULT_CONTAINER_TIMEOUT_IN_SEC,
            env_vars=env_vars,
        )
        # test the return value is as expected
        self.assertEqual(
            len(updated_pc_instance.instances),
            self.test_num_containers,
            "Failed to add the StageStageInstance into pc_instance",
        )
        stage_state_expect = StageStateInstance(
            pc_instance.instance_id,
            pc_instance.current_stage.name,
            containers=containers,
        )
        stage_state_actual = updated_pc_instance.instances[0]
        self.assertEqual(
            stage_state_actual,
            stage_state_expect,
            "Appended StageStageInstance is not as expected",
        )
Пример #7
0
    async def run_async(
        self,
        pc_instance: PrivateComputationInstance,
        server_ips: Optional[List[str]] = None,
    ) -> PrivateComputationInstance:
        """Runs the private computation prepare data stage - spine combiner stage

        Args:
            pc_instance: the private computation instance to run prepare data with
            server_ips: ignored

        Returns:
            An updated version of pc_instance
        """

        output_path = pc_instance.data_processing_output_path
        combine_output_path = output_path + "_combine"

        self._logger.info(f"[{self}] Starting id spine combiner service")

        pid_protocol = get_pid_protocol_from_num_shards(
            pc_instance.num_pid_containers,
            False if self._pid_svc is None else self._pid_svc.multikey_enabled,
        )

        # TODO: we will write log_cost_to_s3 to the instance, so this function interface
        #   will get simplified
        container_instances = await start_combiner_service(
            pc_instance,
            self._onedocker_svc,
            self._onedocker_binary_config_map,
            combine_output_path,
            log_cost_to_s3=self._log_cost_to_s3,
            max_id_column_count=get_max_id_column_cnt(pid_protocol),
        )
        self._logger.info("Finished running CombinerService")

        stage_state = StageStateInstance(
            pc_instance.instance_id,
            pc_instance.current_stage.name,
            containers=container_instances,
        )

        pc_instance.instances.append(stage_state)
        return pc_instance
 def setUp(self) -> None:
     self.stage_state_instance = StageStateInstance(
         instance_id="stage_state_instance",
         stage_name="test_stage",
         status=StageStateInstanceStatus.COMPLETED,
         containers=[
             ContainerInstance(
                 instance_id="test_container_instance_1",
                 ip_address="192.0.2.4",
                 status=ContainerInstanceStatus.COMPLETED,
             ),
             ContainerInstance(
                 instance_id="test_container_instance_2",
                 ip_address="192.0.2.5",
                 status=ContainerInstanceStatus.COMPLETED,
             ),
         ],
         creation_ts=1646642432,
         end_ts=1646642432 + 5,
     )
Пример #9
0
    async def run_async(
        self,
        pc_instance: PrivateComputationInstance,
        server_ips: Optional[List[str]] = None,
    ) -> PrivateComputationInstance:
        """Runs the private computation prepare data stage - shard stage

        Args:
            pc_instance: the private computation instance to run prepare data with
            server_ips: ignored

        Returns:
            An updated version of pc_instance
        """

        output_path = pc_instance.data_processing_output_path
        combine_output_path = output_path + "_combine"

        self._logger.info(f"[{self}] Starting reshard service")

        # reshard each file into x shards
        #     note we need each file to be sharded into the same # of files
        #     because we want to keep the data of each existing file to run
        #     on the same container
        container_instances = await start_sharder_service(
            pc_instance,
            self._onedocker_svc,
            self._onedocker_binary_config_map,
            combine_output_path,
        )
        self._logger.info("All sharding coroutines finished")

        stage_state = StageStateInstance(
            pc_instance.instance_id,
            pc_instance.current_stage.name,
            containers=container_instances,
        )

        pc_instance.instances.append(stage_state)
        return pc_instance
    async def test_get_status_logs_a_helpful_error_when_the_validation_fails(
            self, mock_get_pc_status_from_stage_state) -> None:
        pc_instance = self._pc_instance
        task_id = "test-task-id-123"
        cluster_name = "test-cluster-name"
        account_id = "1234567890"
        region = "us-west-1"
        instance_id = f"arn:aws:ecs:{region}:{account_id}:task/{cluster_name}/{task_id}"
        container_instance = ContainerInstance(instance_id=instance_id)
        stage_state_instance = StageStateInstance(
            instance_id="instance-id-0",
            stage_name="stage-name-1",
            containers=[container_instance],
        )
        unioned_pc_instances = [stage_state_instance]
        # pyre-fixme[8]: Attribute has type `List[Union[StageStateInstance,
        #  PCSMPCInstance, PIDInstance, PostProcessingInstance]]`; used as
        #  `List[StageStateInstance]`.
        pc_instance.instances = unioned_pc_instances
        expected_status = PrivateComputationInstanceStatus.INPUT_DATA_VALIDATION_FAILED
        onedocker_svc_mock = MagicMock()
        onedocker_svc_mock.get_cluster.side_effect = [cluster_name]
        pc_validator_config = PCValidatorConfig(
            region=region,
            pc_pre_validator_enabled=True,
        )
        failed_task_link = f"https://{region}.console.aws.amazon.com/ecs/home?region={region}#/clusters/{cluster_name}/tasks/{task_id}/details"
        logger_mock = MagicMock()
        mock_get_pc_status_from_stage_state.side_effect = [expected_status]

        stage_service = InputDataValidationStageService(
            pc_validator_config, onedocker_svc_mock,
            self.onedocker_binary_config_map)
        stage_service._logger = logger_mock
        status = stage_service.get_status(pc_instance)

        self.assertEqual(status, expected_status)
        logger_mock.error.assert_called_with(
            f"[PCPreValidation] - stage failed because of some failed validations. Please check the logs in ECS for task id '{task_id}' to see the validation issues:\n"
            + f"Failed task link: {failed_task_link}")
    async def run_async(
        self,
        pc_instance: PrivateComputationInstance,
        server_ips: Optional[List[str]] = None,
    ) -> PrivateComputationInstance:
        """Runs the PID shard stage service
        Args:
            pc_instance: the private computation instance to start pid shard stage service
            server_ips: No need in this stage.
        Returns:
            An updated version of pc_instance
        """
        self._logger.info(f"[{self}] Starting PIDShardStageService")
        container_instances = await self.start_pid_shard_service(pc_instance)

        self._logger.info("PIDShardStageService finished")
        stage_state = StageStateInstance(
            pc_instance.instance_id,
            pc_instance.current_stage.name,
            containers=container_instances,
        )
        pc_instance.instances.append(stage_state)
        return pc_instance
Пример #12
0
 async def test_pid_prepare_stage_service(
     self,
     pc_role: PrivateComputationRole,
     multikey_enabled: bool,
     test_num_containers: int,
 ) -> None:
     pid_protocol = (PIDProtocol.UNION_PID_MULTIKEY
                     if test_num_containers == 1 and multikey_enabled else
                     PIDProtocol.UNION_PID)
     max_col_cnt_expect = (DEFAULT_MULTIKEY_PROTOCOL_MAX_COLUMN_COUNT
                           if pid_protocol is PIDProtocol.UNION_PID_MULTIKEY
                           else 1)
     pc_instance = self.create_sample_pc_instance(pc_role,
                                                  test_num_containers)
     stage_svc = PIDPrepareStageService(
         storage_svc=self.mock_storage_svc,
         onedocker_svc=self.mock_onedocker_svc,
         onedocker_binary_config_map=self.onedocker_binary_config_map,
         multikey_enabled=multikey_enabled,
     )
     containers = [
         self.create_container_instance()
         for _ in range(test_num_containers)
     ]
     self.mock_onedocker_svc.start_containers = MagicMock(
         return_value=containers)
     self.mock_onedocker_svc.wait_for_pending_containers = AsyncMock(
         return_value=containers)
     updated_pc_instance = await stage_svc.run_async(pc_instance=pc_instance
                                                     )
     env_vars = {
         "ONEDOCKER_REPOSITORY_PATH":
         self.onedocker_binary_config.repository_path
     }
     args_ls_expect = self.get_args_expected(pc_role, test_num_containers,
                                             max_col_cnt_expect)
     # test the start_containers is called with expected parameters
     self.mock_onedocker_svc.start_containers.assert_called_with(
         package_name=self.binary_name,
         version=self.onedocker_binary_config.binary_version,
         cmd_args_list=args_ls_expect,
         timeout=self.container_timeout,
         env_vars=env_vars,
     )
     # test the return value is as expected
     self.assertEqual(
         len(updated_pc_instance.instances),
         1,
         "Failed to add the StageStateInstance into pc_instance",
     )
     stage_state_expect = StageStateInstance(
         pc_instance.instance_id,
         pc_instance.current_stage.name,
         containers=containers,
     )
     stage_state_actual = updated_pc_instance.instances[0]
     self.assertEqual(
         stage_state_actual,
         stage_state_expect,
         "Appended StageStateInstance is not as expected",
     )