def test_update(self) -> None: instance_id = self._get_random_id() test_update_private_computation_instance = PrivateComputationInstance( instance_id=instance_id, role=PrivateComputationRole.PUBLISHER, instances=[self.test_mpc_instance], status=PrivateComputationInstanceStatus.CREATED, status_update_ts=1600000000, num_files_per_mpc_container=40, game_type=PrivateComputationGameType.LIFT, input_path="in", output_dir="out", num_pid_containers=4, num_mpc_containers=4, concurrency=1, ) # Create a new MPC instance to be added to instances self.repo.create(test_update_private_computation_instance) test_mpc_instance_new = PCSMPCInstance.create_instance( instance_id=instance_id, game_name="aggregation", mpc_party=MPCParty.SERVER, num_workers=1, ) instances_new = [self.test_mpc_instance, test_mpc_instance_new] # Update instances test_update_private_computation_instance.instances = instances_new self.repo.update(test_update_private_computation_instance) # Assert instances is updated self.assertEqual(self.repo.read(instance_id).instances, instances_new) self.repo.delete(instance_id)
def test_pc_deserialiation(self) -> None: # this tests that old fields (and instances) can be deserialized with open(LIFT_PC_PATH) as f: instance_json = f.read().strip() try: PrivateComputationInstance.loads_schema(instance_json) except Exception as e: raise RuntimeError(ERR_MSG) from e
def _update_instance( self, private_computation_instance: PrivateComputationInstance ) -> PrivateComputationInstance: stage = private_computation_instance.current_stage stage_svc = stage.get_stage_service(self.stage_service_args) self.logger.info(f"Updating instance | {stage}={stage!r}") new_status = stage_svc.get_status(private_computation_instance) private_computation_instance.update_status(new_status, self.logger) self.instance_repository.update(private_computation_instance) self.logger.info( f"Finished updating instance: {private_computation_instance.instance_id}" ) return private_computation_instance
async def test_run_async(self, pid_mr_svc_mock) -> None: pc_instance = PrivateComputationInstance( instance_id="publisher_123", role=PrivateComputationRole.PUBLISHER, instances=[], status=PrivateComputationInstanceStatus.PID_MR_STARTED, status_update_ts=1600000000, num_pid_containers=1, num_mpc_containers=1, num_files_per_mpc_container=1, game_type=PrivateComputationGameType.LIFT, input_path= "https://mpc-aem-exp-platform-input.s3.us-west-2.amazonaws.com/pid_test_data/stress_test/input.csv", output_dir= "https://mpc-aem-exp-platform-input.s3.us-west-2.amazonaws.com/pid_test/output", pid_configs={ "pid_mr": { "PIDWorkflowConfigs": { "state_machine_arn": "machine_arn" }, "PIDRunConfigs": { "conf": "conf1" }, "sparkConfigs": { "conf-2": "conf2" }, } }, ) flow = PrivateComputationMRStageFlow pc_instance._stage_flow_cls_name = flow.get_cls_name() service = SfnWorkflowService("us-west-2", "access_key", "access_data") service.start_workflow = MagicMock(return_value="execution_arn") service.get_workflow_status = MagicMock( return_value=WorkflowStatus.COMPLETED) stage_svc = PIDMRStageService(service, ) await stage_svc.run_async(pc_instance) self.assertEqual( stage_svc.get_status(pc_instance), PrivateComputationInstanceStatus.PID_MR_COMPLETED, ) self.assertEqual( pc_instance.pid_mr_stage_output_data_path, "https://mpc-aem-exp-platform-input.s3.us-west-2.amazonaws.com/pid_test/output/publisher_123_out_dir/pid_mr", ) self.assertEqual(pc_instance.instances[0].instance_id, "execution_arn") self.assertIsInstance(pc_instance.instances[0], StageStateInstance)
async def test_run_async(self, pid_svc_mock) -> None: pc_instance = PrivateComputationInstance( instance_id="123", role=PrivateComputationRole.PUBLISHER, instances=[], status=PrivateComputationInstanceStatus.CREATED, status_update_ts=1600000000, num_pid_containers=1, num_mpc_containers=1, num_files_per_mpc_container=1, game_type=PrivateComputationGameType.LIFT, input_path="456", output_dir="789", ) pid_instance = PIDInstance( instance_id="123_id_match0", protocol=PIDProtocol.UNION_PID, pid_role=PIDRole.PUBLISHER, num_shards=2, input_path=pc_instance.input_path, output_path=pc_instance.pid_stage_output_data_path, status=PIDInstanceStatus.STARTED, ) pid_svc_mock.run_instance = AsyncMock(return_value=pid_instance) stage_svc = IdMatchStageService(pid_svc_mock, ) await stage_svc.run_async(pc_instance) self.assertIsInstance(pc_instance.instances[0], PIDInstance)
async def run_async( self, pc_instance: PrivateComputationInstance, server_ips: Optional[List[str]] = None, ) -> PrivateComputationInstance: """Runs the private computation prepare data stage Args: pc_instance: the private computation instance to run prepare data with server_ips: ignored Returns: An updated version of pc_instance """ output_path = pc_instance.data_processing_output_path combine_output_path = output_path + "_combine" self._logger.info(f"[{self}] Starting id spine combiner service") # TODO: we will write log_cost_to_s3 to the instance, so this function interface # will get simplified await start_combiner_service( pc_instance, self._onedocker_svc, self._onedocker_binary_config_map, combine_output_path, log_cost_to_s3=self._log_cost_to_s3, wait_for_containers=True, ) self._logger.info( "Finished running CombinerService, starting to reshard") # reshard each file into x shards # note we need each file to be sharded into the same # of files # because we want to keep the data of each existing file to run # on the same container await start_sharder_service( pc_instance, self._onedocker_svc, self._onedocker_binary_config_map, combine_output_path, wait_for_containers=True, ) self._logger.info("All sharding coroutines finished") # currently, prepare data blocks and runs until completion or failure (exception is thrown) # this if statement will let the legacy way of calling prepare data NOT update the status, # whereas the new way of calling prepare data can update the status. if self._update_status_to_complete: pc_instance.status = pc_instance.current_stage.completed_status return pc_instance
def _create_pc_instance(self) -> PrivateComputationInstance: return PrivateComputationInstance( instance_id="123", role=PrivateComputationRole.PUBLISHER, instances=[], status=PrivateComputationInstanceStatus.UNKNOWN, status_update_ts=1600000000, num_pid_containers=1, num_mpc_containers=1, num_files_per_mpc_container=1, game_type=PrivateComputationGameType.LIFT, input_path="456", output_dir="789", )
def create_sample_instance(self) -> PrivateComputationInstance: return PrivateComputationInstance( instance_id="test_instance_123", role=PrivateComputationRole.PARTNER, instances=[], status=PrivateComputationInstanceStatus.ID_MATCHING_COMPLETED, status_update_ts=1600000000, num_pid_containers=self.test_num_containers, num_mpc_containers=self.test_num_containers, num_files_per_mpc_container=NUM_NEW_SHARDS_PER_FILE, game_type=PrivateComputationGameType.LIFT, input_path="456", output_dir="789", )
def _get_pc_instance( self, status: PrivateComputationInstanceStatus ) -> PrivateComputationInstance: return PrivateComputationInstance( instance_id=self.instance_id, role=PrivateComputationRole.PARTNER, instances=[], status=status, status_update_ts=1600000000, num_pid_containers=self.num_shards, num_mpc_containers=self.num_shards, num_files_per_mpc_container=40, game_type=PrivateComputationGameType.LIFT, input_path="fake_input_path", output_dir="789", )
def _create_pc_instance(self) -> PrivateComputationInstance: return PrivateComputationInstance( instance_id="test_instance_123", role=PrivateComputationRole.PARTNER, instances=[], status=PrivateComputationInstanceStatus.ID_MATCHING_COMPLETED, attribution_rule=AttributionRule.LAST_CLICK_1D, status_update_ts=1600000000, num_pid_containers=2, num_mpc_containers=2, num_files_per_mpc_container=NUM_NEW_SHARDS_PER_FILE, game_type=PrivateComputationGameType.ATTRIBUTION, input_path="456", output_dir="789", padding_size=4, )
def test_create_with_invalid_num_containers(self) -> None: instance_id = self._get_random_id() with self.assertRaises(ValueError): PrivateComputationInstance( instance_id=instance_id, role=PrivateComputationRole.PUBLISHER, instances=[self.test_mpc_instance], status=PrivateComputationInstanceStatus.CREATED, status_update_ts=1600000000, num_files_per_mpc_container=40, game_type=PrivateComputationGameType.LIFT, input_path="in", output_dir="out", num_pid_containers=8, num_mpc_containers=4, concurrency=1, )
def create_sample_pc_instance( self, pc_role: PrivateComputationRole ) -> PrivateComputationInstance: return PrivateComputationInstance( instance_id=self.pc_instance_id, role=pc_role, instances=[], status=PrivateComputationInstanceStatus.PID_PREPARE_COMPLETED, status_update_ts=1600000000, num_pid_containers=self.test_num_containers, num_mpc_containers=self.test_num_containers, num_files_per_mpc_container=self.test_num_containers, game_type=PrivateComputationGameType.LIFT, input_path=self.input_path, output_dir=self.output_path, pid_use_row_numbers=True, )
def test_read(self) -> None: instance_id = self._get_random_id() test_read_private_computation_instance = PrivateComputationInstance( instance_id=instance_id, role=PrivateComputationRole.PUBLISHER, instances=[self.test_mpc_instance], status=PrivateComputationInstanceStatus.CREATED, status_update_ts=1600000000, num_files_per_mpc_container=40, game_type=PrivateComputationGameType.LIFT, input_path="in", output_dir="out", num_pid_containers=4, num_mpc_containers=4, concurrency=1, ) self.repo.create(test_read_private_computation_instance) self.assertEqual(self.repo.read(instance_id), test_read_private_computation_instance) self.repo.delete(instance_id)
def create_sample_pc_instance( self, pc_role: PrivateComputationRole, test_num_containers: int, hmac_key: Optional[str], ) -> PrivateComputationInstance: return PrivateComputationInstance( instance_id=self.pc_instance_id, role=pc_role, instances=[], status=PrivateComputationInstanceStatus.PID_SHARD_COMPLETED, status_update_ts=1600000000, num_pid_containers=test_num_containers, num_mpc_containers=test_num_containers, num_files_per_mpc_container=test_num_containers, game_type=PrivateComputationGameType.LIFT, input_path=self.input_path, output_dir=self.output_path, hmac_key=hmac_key, )
def get_status( self, pc_instance: PrivateComputationInstance, ) -> PrivateComputationInstanceStatus: """Updates the PIDInstances and gets latest PrivateComputationInstance status Arguments: private_computation_instance: The PC instance that is being updated Returns: The latest status for private_computation_instance """ status = pc_instance.status if pc_instance.instances: # Only need to update the last stage/instance last_instance = pc_instance.instances[-1] if not isinstance(last_instance, PIDInstance): raise ValueError(f"Expected {last_instance} to be a PIDInstance") # PID service has to call update_instance to get the newest containers # information in case they are still running pc_instance.instances[-1] = self._pid_svc.update_instance( last_instance.instance_id ) last_instance = pc_instance.instances[-1] assert isinstance(last_instance, PIDInstance) # appeasing pyre pid_current_stage = last_instance.current_stage if not pid_current_stage: return status pid_stage_status = last_instance.stages_status.get(pid_current_stage) stage = pc_instance.current_stage if pid_stage_status is PIDStageStatus.STARTED: status = stage.started_status elif pid_stage_status is PIDStageStatus.COMPLETED: status = stage.completed_status elif pid_stage_status is PIDStageStatus.FAILED: status = stage.failed_status return status
def gen_dummy_pc_instance() -> PrivateComputationInstance: """Creates a dummy private computation instance to be used in unit tests""" return PrivateComputationInstance( instance_id="pc_instance_id", role=PrivateComputationRole.PUBLISHER, instances=[ gen_dummy_pid_instance(), gen_dummy_mpc_instance(), gen_dummy_post_processing_instance(), ], status=PrivateComputationInstanceStatus. POST_PROCESSING_HANDLERS_COMPLETED, status_update_ts=int(time.time()), num_files_per_mpc_container=40, game_type=PrivateComputationGameType.LIFT, input_path= "https://bucket.s3.us-west-2.amazonaws.com/lift/partner/partner_e2e_input.csv", output_dir="https://bucket.s3.us-west-2.amazonaws.com/lift/partner", num_pid_containers=1, num_mpc_containers=1, attribution_rule=None, aggregation_type=None, partial_container_retry_enabled=False, is_validating=False, synthetic_shard_path=None, retry_counter=0, hmac_key="", concurrency=4, padding_size=25, is_test=False, k_anonymity_threshold=100, _stage_flow_cls_name="PrivateComputationStageFlow", breakdown_key=BreakdownKey.get_default_key(), pce_config=PCEConfig( subnets=["subnet"], cluster="onedocker-cluster-name", region="us-west-2", onedocker_task_definition= "arn:aws:ecs:us-west-2:000000000000:task/cluster-name/subnet", ), )
def get_updated_pc_status_mpc_game( private_computation_instance: PrivateComputationInstance, mpc_svc: MPCService, ) -> PrivateComputationInstanceStatus: """Updates the MPCInstances and gets latest PrivateComputationInstance status Arguments: private_computation_instance: The PC instance that is being updated mpc_svc: Used to update MPC instances stored on private_computation_instance Returns: The latest status for private_computation_instance """ status = private_computation_instance.status if private_computation_instance.instances: # Only need to update the last stage/instance last_instance = private_computation_instance.instances[-1] if not isinstance(last_instance, MPCInstance): return status # MPC service has to call update_instance to get the newest containers # information in case they are still running private_computation_instance.instances[ -1] = PCSMPCInstance.from_mpc_instance( mpc_svc.update_instance(last_instance.instance_id)) mpc_instance_status = private_computation_instance.instances[-1].status current_stage = private_computation_instance.current_stage if mpc_instance_status is MPCInstanceStatus.STARTED: status = current_stage.started_status elif mpc_instance_status is MPCInstanceStatus.COMPLETED: status = current_stage.completed_status elif mpc_instance_status in ( MPCInstanceStatus.FAILED, MPCInstanceStatus.CANCELED, ): status = current_stage.failed_status return status
def create_sample_instance( self, status: PrivateComputationInstanceStatus, role: PrivateComputationRole = PrivateComputationRole.PUBLISHER, instances: Optional[List[UnionedPCInstance]] = None, ) -> PrivateComputationInstance: return PrivateComputationInstance( instance_id=self.test_private_computation_id, role=role, instances=instances or [], status=status, status_update_ts=1600000000, num_pid_containers=self.test_num_containers, num_mpc_containers=self.test_num_containers, concurrency=self.test_concurrency, num_files_per_mpc_container=NUM_NEW_SHARDS_PER_FILE, game_type=PrivateComputationGameType.LIFT, input_path=self.test_input_path, output_dir=self.test_output_dir, k_anonymity_threshold=DEFAULT_K_ANONYMITY_THRESHOLD_PL, hmac_key=self.test_hmac_key, )
def setUp(self) -> None: self._pc_instance = PrivateComputationInstance( instance_id="123", role=PrivateComputationRole.PARTNER, instances=[], status=PrivateComputationInstanceStatus. INPUT_DATA_VALIDATION_STARTED, status_update_ts=1600000000, num_pid_containers=1, num_mpc_containers=1, num_files_per_mpc_container=1, game_type=PrivateComputationGameType.LIFT, input_path= "https://a-test-bucket.s3.us-west-2.amazonaws.com/lift/test/input_data1.csv", output_dir="789", ) self.onedocker_binary_config_map = defaultdict( lambda: OneDockerBinaryConfig( tmp_directory="/test_tmp_directory/", binary_version="latest", repository_path="test_path/", ))
async def test_update_instance(self, mock_update) -> None: # mock pc update_instance to return a pc instance with specific test status and instances test_pid_id = self.test_instance_id test_pid_role = PIDRole.PUBLISHER test_input_path = "pid_in" test_output_path = "pid_out" # create one PID instance to be put into PrivateComputationInstance pid_instance = PIDInstance( instance_id=test_pid_id, protocol=DEFAULT_PID_PROTOCOL, pid_role=test_pid_role, num_shards=self.test_num_containers, input_path=test_input_path, output_path=test_output_path, status=PIDInstanceStatus.STARTED, server_ips=["10.0.10.242"], ) test_instance = PrivateComputationInstance( instance_id=self.test_instance_id, role=self.test_role, instances=[pid_instance], status=PrivateComputationInstanceStatus.CREATED, status_update_ts=0, num_files_per_mpc_container=NUM_NEW_SHARDS_PER_FILE, game_type=self.test_game_type, input_path=self.test_input_path, output_dir=self.test_output_path, num_pid_containers=self.test_num_containers, num_mpc_containers=self.test_num_containers, ) mock_update.return_value = test_instance return_state = await self.bolt_pcs_client.update_instance( instance_id=self.test_instance_id, ) self.assertEqual(return_state.pc_instance_status, PrivateComputationInstanceStatus.CREATED) self.assertEqual(["10.0.10.242"], return_state.server_ips)
def get_status( self, pc_instance: PrivateComputationInstance, ) -> PrivateComputationInstanceStatus: """Updates the PIDInstances and gets latest PrivateComputationInstance status Arguments: private_computation_instance: The PC instance that is being updated Returns: The latest status for private_computation_instance """ status = pc_instance.status if pc_instance.instances: # Only need to update the last stage/instance last_instance = pc_instance.instances[-1] if not isinstance(last_instance, PIDInstance): return status # PID service has to call update_instance to get the newest containers # information in case they are still running pc_instance.instances[-1] = self._pid_svc.update_instance( last_instance.instance_id ) pid_instance_status = pc_instance.instances[-1].status stage = pc_instance.current_stage if pid_instance_status is PIDInstanceStatus.STARTED: status = stage.started_status elif pid_instance_status is PIDInstanceStatus.COMPLETED: status = stage.completed_status elif pid_instance_status is PIDInstanceStatus.FAILED: status = stage.failed_status return status
def create_instance( self, instance_id: str, role: PrivateComputationRole, game_type: PrivateComputationGameType, input_path: str, output_dir: str, num_pid_containers: int, num_mpc_containers: int, concurrency: Optional[int] = None, attribution_rule: Optional[AttributionRule] = None, aggregation_type: Optional[AggregationType] = None, num_files_per_mpc_container: Optional[int] = None, is_validating: Optional[bool] = False, synthetic_shard_path: Optional[str] = None, breakdown_key: Optional[BreakdownKey] = None, pce_config: Optional[PCEConfig] = None, is_test: Optional[bool] = False, hmac_key: Optional[str] = None, padding_size: Optional[int] = None, k_anonymity_threshold: Optional[int] = None, stage_flow_cls: Optional[Type[PrivateComputationBaseStageFlow]] = None, result_visibility: Optional[ResultVisibility] = None, tier: Optional[str] = None, pid_use_row_numbers: bool = True, post_processing_data_optional: Optional[PostProcessingData] = None, pid_configs: Optional[Dict[str, Any]] = None, ) -> PrivateComputationInstance: self.logger.info(f"Creating instance: {instance_id}") # For Private Attribution daily recurrent runs, we would need dataset_timestamp of data used for computation. # Assigning a default value of day before the computation for dataset_timestamp. yesterday_date = datetime.now(tz=timezone.utc) - timedelta(days=1) yesterday_timestamp = datetime.timestamp(yesterday_date) post_processing_data = post_processing_data_optional or PostProcessingData( dataset_timestamp=int(yesterday_timestamp)) instance = PrivateComputationInstance( instance_id=instance_id, role=role, instances=[], status=PrivateComputationInstanceStatus.CREATED, status_update_ts=PrivateComputationService.get_ts_now(), num_files_per_mpc_container=unwrap_or_default( optional=num_files_per_mpc_container, default=NUM_NEW_SHARDS_PER_FILE), game_type=game_type, is_validating=is_validating, synthetic_shard_path=synthetic_shard_path, num_pid_containers=num_pid_containers, num_mpc_containers=self._get_number_of_mpc_containers( game_type, num_pid_containers, num_mpc_containers), attribution_rule=attribution_rule, aggregation_type=aggregation_type, input_path=input_path, output_dir=output_dir, breakdown_key=breakdown_key, pce_config=pce_config, is_test=is_test, hmac_key=unwrap_or_default(optional=hmac_key, default=DEFAULT_HMAC_KEY), padding_size=unwrap_or_default( optional=padding_size, default=LIFT_DEFAULT_PADDING_SIZE if game_type is PrivateComputationGameType.LIFT else ATTRIBUTION_DEFAULT_PADDING_SIZE, ), concurrency=concurrency or DEFAULT_CONCURRENCY, k_anonymity_threshold=unwrap_or_default( optional=k_anonymity_threshold, default=DEFAULT_K_ANONYMITY_THRESHOLD_PA if game_type is PrivateComputationGameType.ATTRIBUTION else DEFAULT_K_ANONYMITY_THRESHOLD_PL, ), _stage_flow_cls_name=unwrap_or_default( optional=stage_flow_cls, default=PrivateComputationPCF2StageFlow if game_type is PrivateComputationGameType.ATTRIBUTION else PrivateComputationStageFlow, ).get_cls_name(), result_visibility=result_visibility or ResultVisibility.PUBLIC, tier=tier, pid_use_row_numbers=pid_use_row_numbers, post_processing_data=post_processing_data, pid_configs=pid_configs, ) self.instance_repository.create(instance) return instance
async def run_async( self, pc_instance: PrivateComputationInstance, server_ips: Optional[List[str]] = None, ) -> PrivateComputationInstance: """Runs a pid service stage, e.g. pid shard, pid prepare, pid run This function creates a pid instance if necessary, stores it on the caller provided pc_instance, and runs PIDService for a given stage. Args: pc_instance: the private computation instance to run ID match with server_ips: only used by the partner role. These are the ip addresses of the publisher's containers. Returns: An updated version of pc_instance that stores a PIDInstance """ # if this in the shard stage (first pid stage), then create the pid instance if ( self._publisher_stage is UnionPIDStage.PUBLISHER_SHARD and self._partner_stage is UnionPIDStage.ADV_SHARD ): # increment the retry counter (starts at 0 for first attempt) pid_instance_id = ( f"{pc_instance.instance_id}_id_match{pc_instance.retry_counter}" ) pid_instance = self._pid_svc.create_instance( instance_id=pid_instance_id, pid_role=self._map_private_computation_role_to_pid_role( pc_instance.role ), num_shards=pc_instance.num_pid_containers, input_path=pc_instance.input_path, output_path=pc_instance.pid_stage_output_base_path, is_validating=self._is_validating or pc_instance.is_validating, synthetic_shard_path=self._synthetic_shard_path or pc_instance.synthetic_shard_path, hmac_key=pc_instance.hmac_key, pid_use_row_numbers=pc_instance.pid_use_row_numbers, ) else: # If there no previous instance, then we should run shard first if not pc_instance.instances: raise RuntimeError( f"Cannot run PID stages {self._publisher_stage}, {self._partner_stage}. Run PID shard first." ) pid_instance = pc_instance.instances[-1] # if the last instance is not a pid instance, then we are out of order if not isinstance(pid_instance, PIDInstance): raise ValueError( f"Cannot run PID stages {self._publisher_stage}, {self._partner_stage}. Last instance is not a PIDInstance." ) # Run pid pid_instance = await self._pid_svc.run_stage_or_next( instance_id=pid_instance.instance_id, server_ips=server_ips, pid_union_stage=self._publisher_stage if pc_instance.role is PrivateComputationRole.PUBLISHER else self._partner_stage, wait_for_containers=False, container_timeout=self._container_timeout, ) if not pc_instance.instances or not isinstance( pc_instance.instances[-1], PIDInstance ): # Push PID instance to PrivateComputationInstance.instances pc_instance.instances.append(pid_instance) else: # replace the outdated pid instance with the updated one pc_instance.instances[-1] = pid_instance return pc_instance
def read(self, instance_id: str) -> PrivateComputationInstance: return PrivateComputationInstance.loads_schema( self.repo.read(instance_id))
async def run_async( self, pc_instance: PrivateComputationInstance, server_ips: Optional[List[str]] = None, ) -> PrivateComputationInstance: """Runs the private computation post processing handlers stage Post processing handlers are designed to run after final results are available. You can write post processing handlers to download results from cloud storage, send you an email, etc. Args: pc_instance: the private computation instance to run post processing handlers with server_ips: only used by the partner role. These are the ip addresses of the publisher's containers. Returns: An updated version of pc_instance that stores a post processing instance """ post_processing_handlers_statuses = None if pc_instance.instances: last_instance = pc_instance.instances[-1] if ( isinstance(last_instance, PostProcessingInstance) and last_instance.handler_statuses.keys() == self._post_processing_handlers.keys() ): self._logger.info("Copying statuses from last instance") post_processing_handlers_statuses = ( last_instance.handler_statuses.copy() ) post_processing_instance = PostProcessingInstance.create_instance( instance_id=pc_instance.instance_id + "_post_processing" + str(pc_instance.retry_counter), handlers=self._post_processing_handlers, handler_statuses=post_processing_handlers_statuses, status=PostProcessingInstanceStatus.STARTED, ) pc_instance.instances.append(post_processing_instance) # if any handlers fail, then the post_processing_instance status will be # set to failed, as will the pc_instance status await asyncio.gather( *[ self._run_post_processing_handler( pc_instance, post_processing_instance, name, handler, ) for name, handler in self._post_processing_handlers.items() if post_processing_instance.handler_statuses[name] != PostProcessingHandlerStatus.COMPLETED ] ) # if any of the handlers failed, then the status of the post processing instance would have # been set to failed. If none of them failed, then that means all of the handlers completed, so # we can set the status to completed. if post_processing_instance.status is not PostProcessingInstanceStatus.FAILED: post_processing_instance.status = PostProcessingInstanceStatus.COMPLETED pc_instance.update_status( pc_instance.current_stage.completed_status, self._logger ) return pc_instance