def _copy_aws_session( aws_session: Optional[AwsSession], region: Optional[str] = None, max_connections: Optional[int] = None, ) -> AwsSession: config = Config( max_pool_connections=max_connections) if max_connections else None if aws_session: session_region = aws_session.boto_session.region_name new_region = region or session_region if session_region == new_region and not config: return aws_session else: creds = aws_session.boto_session.get_credentials() boto_session = boto3.Session( aws_access_key_id=creds.access_key, aws_secret_access_key=creds.secret_key, aws_session_token=creds.token, region_name=new_region, ) return AwsSession(boto_session=boto_session, config=config) else: boto_session = boto3.Session( region_name=region) if region else None return AwsSession(boto_session=boto_session, config=config)
def _aws_session_for_qpu(device_arn: str, aws_session: AwsSession) -> AwsSession: """ Get an AwsSession for the device ARN. QPUs are physically located in specific AWS Regions. The AWS sessions should connect to the Region that the QPU is located in. See `braket.aws.aws_qpu.AwsDevice.QPU_REGIONS` for the AWS Regions the QPUs are located in. """ region_key = device_arn.split("/")[-2] qpu_regions = AwsDevice.QPU_REGIONS.get(region_key, []) if aws_session: if aws_session.boto_session.region_name in qpu_regions: return aws_session else: creds = aws_session.boto_session.get_credentials() boto_session = boto3.Session( aws_access_key_id=creds.access_key, aws_secret_access_key=creds.secret_key, aws_session_token=creds.token, region_name=qpu_regions[0], ) return AwsSession(boto_session=boto_session) else: boto_session = boto3.Session(region_name=qpu_regions[0]) return AwsSession(boto_session=boto_session)
def __init__( self, image_uri: str, aws_session: AwsSession = None, logger: Logger = getLogger(__name__), force_update: bool = False, ): """Represents and provides functions for interacting with a Braket Jobs docker container. The function "end_session" must be called when the container is no longer needed. Args: image_uri (str): The URI of the container image to run. aws_session (AwsSession): AwsSession for connecting to AWS Services. Default: AwsSession() logger (Logger): Logger object with which to write logs. Default: `getLogger(__name__)` force_update (bool): Try to update the container, if an update is availble. Default: False """ self._aws_session = aws_session or AwsSession() self.image_uri = image_uri self.run_result = None self._container_name = None self._logger = logger self._force_update = force_update
def _aws_session_for_device(device_arn: str, aws_session: AwsSession) -> AwsSession: """AwsSession: Returns an AwsSession for the device ARN. """ if "qpu" in device_arn: return AwsDevice._aws_session_for_qpu(device_arn, aws_session) else: return aws_session or AwsSession()
def _get_arn_sessions(arns, names, types, statuses, provider_names, aws_session): aws_session = aws_session if aws_session else AwsSession() sessions_for_arns = {} session_region = aws_session.boto_session.region_name device_regions_set = AwsDevice._get_devices_regions_set( types, arns, session_region) for region in device_regions_set: session_for_region = AwsDevice._copy_aws_session( aws_session, region) # Simulators are only instantiated in the same region as the AWS session types_for_region = sorted(types if region == session_region else types - {AwsDeviceType.SIMULATOR}) region_device_arns = [ result["deviceArn"] for result in session_for_region.search_devices( arns=arns, names=names, types=types_for_region, statuses=statuses, provider_names=provider_names, ) ] sessions_for_arns.update({ arn: session_for_region for arn in region_device_arns if arn not in sessions_for_arns }) return sessions_for_arns
def _copy_aws_session(aws_session: Optional[AwsSession], regions: List[str]) -> AwsSession: if aws_session: if aws_session.boto_session.region_name in regions: return aws_session else: creds = aws_session.boto_session.get_credentials() boto_session = boto3.Session( aws_access_key_id=creds.access_key, aws_secret_access_key=creds.secret_key, aws_session_token=creds.token, region_name=regions[0], ) return AwsSession(boto_session=boto_session) else: boto_session = boto3.Session(region_name=regions[0]) return AwsSession(boto_session=boto_session)
def _initialize_session(session_value: AwsSession, device: AwsDevice, logger: Logger) -> AwsSession: aws_session = session_value or AwsSession() if device.startswith("local:"): return aws_session device_region = AwsDevice.get_device_region(device) return (AwsQuantumJob._initialize_regional_device_session( aws_session, device, logger) if device_region else AwsQuantumJob._initialize_non_regional_device_session( aws_session, device, logger))
def _aws_session_for_task_arn(task_arn: str) -> AwsSession: """ Get an AwsSession for the Task ARN. The AWS session should be in the region of the task. Returns: AwsSession: `AwsSession` object with default `boto_session` in task's region """ task_region = task_arn.split(":")[3] boto_session = boto3.Session(region_name=task_region) return AwsSession(boto_session=boto_session)
def _default_session_for_job_arn(job_arn: str) -> AwsSession: """Get an AwsSession for the Job ARN. The AWS session should be in the region of the job. Args: job_arn (str): The ARN for the quantum job. Returns: AwsSession: `AwsSession` object with default `boto_session` in job's region. """ job_region = job_arn.split(":")[3] boto_session = boto3.Session(region_name=job_region) return AwsSession(boto_session=boto_session)
def __init__( self, image_uri: str, aws_session: AwsSession = None, logger: Logger = getLogger(__name__) ): """Represents and provides functions for interacting with a Braket Jobs docker container. The function "end_session" must be called when the container is no longer needed. Args: image_uri (str): The URI of the container image to run. aws_session (AwsSession, Optional): AwsSession for connecting to AWS Services. Default: AwsSession() logger (Logger): Logger object with which to write logs. Default: `getLogger(__name__)` """ self._aws_session = aws_session or AwsSession() self.image_uri = image_uri self.run_log = None self._container_name = None self._logger = logger
def _initialize_session(session_value, device, logger): aws_session = session_value or AwsSession() current_region = aws_session.region try: aws_session.get_device(device) return aws_session except ClientError as e: if e.response["Error"]["Code"] == "ResourceNotFoundException": if "qpu" not in device: raise ValueError( f"Simulator '{device}' not found in '{current_region}'" ) else: raise e return AwsQuantumJob._find_device_session(aws_session, device, current_region, logger)
def _copy_aws_session( aws_session: AwsSession, region: Optional[str] = None, max_connections: Optional[int] = None, ) -> AwsSession: config = Config( max_pool_connections=max_connections) if max_connections else None session_region = aws_session.boto_session.region_name new_region = region or session_region creds = aws_session.boto_session.get_credentials() if creds.method == "explicit": boto_session = boto3.Session( aws_access_key_id=creds.access_key, aws_secret_access_key=creds.secret_key, aws_session_token=creds.token, region_name=new_region, ) else: boto_session = boto3.Session(region_name=new_region) return AwsSession(boto_session=boto_session, config=config)
def __init__(self, arn: str, aws_session: Optional[AwsSession] = None): """ Args: arn (str): The ARN of the device aws_session (AwsSession, optional): An AWS session object. Default is `None`. Note: Some devices (QPUs) are physically located in specific AWS Regions. In some cases, the current `aws_session` connects to a Region other than the Region in which the QPU is physically located. When this occurs, a cloned `aws_session` is created for the Region the QPU is located in. See `braket.aws.aws_device.AwsDevice.DEVICE_REGIONS` for the AWS Regions provider devices are located in. """ super().__init__(name=None, status=None) self._arn = arn self._properties = None self._provider_name = None self._topology_graph = None self._type = None self._aws_session = self._get_session_and_initialize(aws_session or AwsSession())
def prepare_quantum_job( device: str, source_module: str, entry_point: str = None, image_uri: str = None, job_name: str = None, code_location: str = None, role_arn: str = None, hyperparameters: Dict[str, Any] = None, input_data: Union[str, Dict, S3DataSourceConfig] = None, instance_config: InstanceConfig = None, distribution: str = None, stopping_condition: StoppingCondition = None, output_data_config: OutputDataConfig = None, copy_checkpoints_from_job: str = None, checkpoint_config: CheckpointConfig = None, aws_session: AwsSession = None, tags: Dict[str, str] = None, ) -> Dict: """Creates a job by invoking the Braket CreateJob API. Args: device (str): ARN for the AWS device which is primarily accessed for the execution of this job. source_module (str): Path (absolute, relative or an S3 URI) to a python module to be tarred and uploaded. If `source_module` is an S3 URI, it must point to a tar.gz file. Otherwise, source_module may be a file or directory. entry_point (str): A str that specifies the entry point of the job, relative to the source module. The entry point must be in the format `importable.module` or `importable.module:callable`. For example, `source_module.submodule:start_here` indicates the `start_here` function contained in `source_module.submodule`. If source_module is an S3 URI, entry point must be given. Default: source_module's name image_uri (str): A str that specifies the ECR image to use for executing the job. `image_uris.retrieve_image()` function may be used for retrieving the ECR image URIs for the containers supported by Braket. Default = `<Braket base image_uri>`. job_name (str): A str that specifies the name with which the job is created. Default: f'{image_uri_type}-{timestamp}'. code_location (str): The S3 prefix URI where custom code will be uploaded. Default: f's3://{default_bucket_name}/jobs/{job_name}/script'. role_arn (str): A str providing the IAM role ARN used to execute the script. Default: IAM role returned by AwsSession's `get_default_jobs_role()`. hyperparameters (Dict[str, Any]): Hyperparameters accessible to the job. The hyperparameters are made accessible as a Dict[str, str] to the job. For convenience, this accepts other types for keys and values, but `str()` is called to convert them before being passed on. Default: None. input_data (Union[str, Dict, S3DataSourceConfig]): Information about the training data. Dictionary maps channel names to local paths or S3 URIs. Contents found at any local paths will be uploaded to S3 at f's3://{default_bucket_name}/jobs/{job_name}/data/{channel_name}. If a local path, S3 URI, or S3DataSourceConfig is provided, it will be given a default channel name "input". Default: {}. instance_config (InstanceConfig): Configuration of the instances to be used to execute the job. Default: InstanceConfig(instanceType='ml.m5.large', instanceCount=1, volumeSizeInGB=30, volumeKmsKey=None). distribution (str): A str that specifies how the job should be distributed. If set to "data_parallel", the hyperparameters for the job will be set to use data parallelism features for PyTorch or TensorFlow. Default: None. stopping_condition (StoppingCondition): The maximum length of time, in seconds, and the maximum number of tasks that a job can run before being forcefully stopped. Default: StoppingCondition(maxRuntimeInSeconds=5 * 24 * 60 * 60). output_data_config (OutputDataConfig): Specifies the location for the output of the job. Default: OutputDataConfig(s3Path=f's3://{default_bucket_name}/jobs/{job_name}/data', kmsKeyId=None). copy_checkpoints_from_job (str): A str that specifies the job ARN whose checkpoint you want to use in the current job. Specifying this value will copy over the checkpoint data from `use_checkpoints_from_job`'s checkpoint_config s3Uri to the current job's checkpoint_config s3Uri, making it available at checkpoint_config.localPath during the job execution. Default: None checkpoint_config (CheckpointConfig): Configuration that specifies the location where checkpoint data is stored. Default: CheckpointConfig(localPath='/opt/jobs/checkpoints', s3Uri=f's3://{default_bucket_name}/jobs/{job_name}/checkpoints'). aws_session (AwsSession): AwsSession for connecting to AWS Services. Default: AwsSession() tags (Dict[str, str]): Dict specifying the key-value pairs for tagging this job. Default: {}. Returns: Dict: Job tracking the execution on Amazon Braket. Raises: ValueError: Raises ValueError if the parameters are not valid. """ param_datatype_map = { "instance_config": (instance_config, InstanceConfig), "stopping_condition": (stopping_condition, StoppingCondition), "output_data_config": (output_data_config, OutputDataConfig), "checkpoint_config": (checkpoint_config, CheckpointConfig), } _validate_params(param_datatype_map) aws_session = aws_session or AwsSession() device_config = DeviceConfig(device) job_name = job_name or _generate_default_job_name(image_uri) role_arn = role_arn or os.getenv("BRAKET_JOBS_ROLE_ARN", aws_session.get_default_jobs_role()) hyperparameters = hyperparameters or {} hyperparameters = { str(key): str(value) for key, value in hyperparameters.items() } input_data = input_data or {} tags = tags or {} default_bucket = aws_session.default_bucket() input_data_list = _process_input_data(input_data, job_name, aws_session) instance_config = instance_config or InstanceConfig() stopping_condition = stopping_condition or StoppingCondition() output_data_config = output_data_config or OutputDataConfig() checkpoint_config = checkpoint_config or CheckpointConfig() code_location = code_location or AwsSession.construct_s3_uri( default_bucket, "jobs", job_name, "script", ) if AwsSession.is_s3_uri(source_module): _process_s3_source_module(source_module, entry_point, aws_session, code_location) else: # if entry point is None, it will be set to default here entry_point = _process_local_source_module(source_module, entry_point, aws_session, code_location) algorithm_specification = { "scriptModeConfig": { "entryPoint": entry_point, "s3Uri": f"{code_location}/source.tar.gz", "compressionType": "GZIP", } } if image_uri: algorithm_specification["containerImage"] = {"uri": image_uri} if not output_data_config.s3Path: output_data_config.s3Path = AwsSession.construct_s3_uri( default_bucket, "jobs", job_name, "data", ) if not checkpoint_config.s3Uri: checkpoint_config.s3Uri = AwsSession.construct_s3_uri( default_bucket, "jobs", job_name, "checkpoints", ) if copy_checkpoints_from_job: checkpoints_to_copy = aws_session.get_job( copy_checkpoints_from_job)["checkpointConfig"]["s3Uri"] aws_session.copy_s3_directory(checkpoints_to_copy, checkpoint_config.s3Uri) if distribution == "data_parallel": distributed_hyperparams = { "sagemaker_distributed_dataparallel_enabled": "true", "sagemaker_instance_type": instance_config.instanceType, } hyperparameters.update(distributed_hyperparams) create_job_kwargs = { "jobName": job_name, "roleArn": role_arn, "algorithmSpecification": algorithm_specification, "inputDataConfig": input_data_list, "instanceConfig": asdict(instance_config), "outputDataConfig": asdict(output_data_config), "checkpointConfig": asdict(checkpoint_config), "deviceConfig": asdict(device_config), "hyperParameters": hyperparameters, "stoppingCondition": asdict(stopping_condition), "tags": tags, } return create_job_kwargs
def get_devices( arns: Optional[List[str]] = None, names: Optional[List[str]] = None, types: Optional[List[AwsDeviceType]] = None, statuses: Optional[List[str]] = None, provider_names: Optional[List[str]] = None, order_by: str = "name", aws_session: Optional[AwsSession] = None, ) -> List[AwsDevice]: """ Get devices based on filters and desired ordering. The result is the AND of all the filters `arns`, `names`, `types`, `statuses`, `provider_names`. Examples: >>> AwsDevice.get_devices(provider_names=['Rigetti'], statuses=['ONLINE']) >>> AwsDevice.get_devices(order_by='provider_name') >>> AwsDevice.get_devices(types=['SIMULATOR']) Args: arns (List[str], optional): device ARN list, default is `None` names (List[str], optional): device name list, default is `None` types (List[AwsDeviceType], optional): device type list, default is `None` QPUs will be searched for all regions and simulators will only be searched for the region of the current session. statuses (List[str], optional): device status list, default is `None` provider_names (List[str], optional): provider name list, default is `None` order_by (str, optional): field to order result by, default is `name`. Accepted values are ['arn', 'name', 'type', 'provider_name', 'status'] aws_session (AwsSession, optional) aws_session: An AWS session object. Default is `None`. Returns: List[AwsDevice]: list of AWS devices """ if order_by not in AwsDevice._GET_DEVICES_ORDER_BY_KEYS: raise ValueError( f"order_by '{order_by}' must be in {AwsDevice._GET_DEVICES_ORDER_BY_KEYS}" ) types = (frozenset(types) if types else frozenset( {device_type for device_type in AwsDeviceType})) aws_session = aws_session if aws_session else AwsSession() device_map = {} session_region = aws_session.boto_session.region_name search_regions = ((session_region, ) if types == {AwsDeviceType.SIMULATOR} else AwsDevice.REGIONS) for region in search_regions: session_for_region = (aws_session if region == session_region else AwsDevice._copy_aws_session( aws_session, region)) # Simulators are only instantiated in the same region as the AWS session types_for_region = sorted(types if region == session_region else types - {AwsDeviceType.SIMULATOR}) region_device_arns = [ result["deviceArn"] for result in session_for_region.search_devices( arns=arns, names=names, types=types_for_region, statuses=statuses, provider_names=provider_names, ) ] device_map.update({ arn: AwsDevice(arn, session_for_region) for arn in region_device_arns if arn not in device_map }) devices = list(device_map.values()) devices.sort(key=lambda x: getattr(x, order_by)) return devices
def aws_session(boto_session): return AwsSession(boto_session)
def create( cls, device: str, source_module: str, entry_point: str = None, image_uri: str = None, job_name: str = None, code_location: str = None, role_arn: str = None, hyperparameters: Dict[str, Any] = None, input_data: Union[str, Dict, S3DataSourceConfig] = None, output_data_config: OutputDataConfig = None, checkpoint_config: CheckpointConfig = None, aws_session: AwsSession = None, ) -> LocalQuantumJob: """Creates and runs job by setting up and running the customer script in a local docker container. Args: device (str): ARN for the AWS device which is primarily accessed for the execution of this job. source_module (str): Path (absolute, relative or an S3 URI) to a python module to be tarred and uploaded. If `source_module` is an S3 URI, it must point to a tar.gz file. Otherwise, source_module may be a file or directory. entry_point (str): A str that specifies the entry point of the job, relative to the source module. The entry point must be in the format `importable.module` or `importable.module:callable`. For example, `source_module.submodule:start_here` indicates the `start_here` function contained in `source_module.submodule`. If source_module is an S3 URI, entry point must be given. Default: source_module's name image_uri (str): A str that specifies the ECR image to use for executing the job. `image_uris.retrieve_image()` function may be used for retrieving the ECR image URIs for the containers supported by Braket. Default = `<Braket base image_uri>`. job_name (str): A str that specifies the name with which the job is created. Default: f'{image_uri_type}-{timestamp}'. code_location (str): The S3 prefix URI where custom code will be uploaded. Default: f's3://{default_bucket_name}/jobs/{job_name}/script'. role_arn (str): This field is currently not used for local jobs. Local jobs will use the current role's credentials. This may be subject to change. hyperparameters (Dict[str, Any]): Hyperparameters accessible to the job. The hyperparameters are made accessible as a Dict[str, str] to the job. For convenience, this accepts other types for keys and values, but `str()` is called to convert them before being passed on. Default: None. input_data (Union[str, S3DataSourceConfig, dict]): Information about the training data. Dictionary maps channel names to local paths or S3 URIs. Contents found at any local paths will be uploaded to S3 at f's3://{default_bucket_name}/jobs/{job_name}/data/{channel_name}. If a local path, S3 URI, or S3DataSourceConfig is provided, it will be given a default channel name "input". Default: {}. output_data_config (OutputDataConfig): Specifies the location for the output of the job. Default: OutputDataConfig(s3Path=f's3://{default_bucket_name}/jobs/{job_name}/data', kmsKeyId=None). checkpoint_config (CheckpointConfig): Configuration that specifies the location where checkpoint data is stored. Default: CheckpointConfig(localPath='/opt/jobs/checkpoints', s3Uri=f's3://{default_bucket_name}/jobs/{job_name}/checkpoints'). aws_session (AwsSession): AwsSession for connecting to AWS Services. Default: AwsSession() Returns: LocalQuantumJob: The representation of a local Braket Job. """ create_job_kwargs = prepare_quantum_job( device=device, source_module=source_module, entry_point=entry_point, image_uri=image_uri, job_name=job_name, code_location=code_location, role_arn=role_arn, hyperparameters=hyperparameters, input_data=input_data, output_data_config=output_data_config, checkpoint_config=checkpoint_config, aws_session=aws_session, ) job_name = create_job_kwargs["jobName"] if os.path.isdir(job_name): raise ValueError( f"A local directory called {job_name} already exists. " f"Please use a different job name." ) session = aws_session or AwsSession() algorithm_specification = create_job_kwargs["algorithmSpecification"] if "containerImage" in algorithm_specification: image_uri = algorithm_specification["containerImage"]["uri"] else: image_uri = retrieve_image(Framework.BASE, session.region) with _LocalJobContainer(image_uri) as container: env_variables = setup_container(container, session, **create_job_kwargs) container.run_local_job(env_variables) container.copy_from("/opt/ml/model", job_name) with open(os.path.join(job_name, "log.txt"), "w") as log_file: log_file.write(container.run_log) if "checkpointConfig" in create_job_kwargs: checkpoint_config = create_job_kwargs["checkpointConfig"] if "localPath" in checkpoint_config: checkpoint_path = checkpoint_config["localPath"] container.copy_from(checkpoint_path, os.path.join(job_name, "checkpoints")) run_log = container.run_log return LocalQuantumJob(f"local:job/{job_name}", run_log)