def set_user(iam: BaseClient, account_id: str, user: str): """Summary Args: account_id (str): Description user (str): Description """ key_path = (f"credentials/{os.environ['ENV']}/users/" f"{user}/aws/{account_id}.csv") if not os.path.exists(key_path): try: iam.get_user(f'{user}-sftp') print(f"User {user}-sftp already exists !") except iam.meta.client.exceptions.NoSuchEntityException: print(f'Creating user : '******'arn:aws:iam::{account_id}:user/{user}-sftp', UserName=f'{user}-sftp', PermissionsBoundary= f"arn:aws:iam::{account_id}:policy/sftp-users") print(f"Created user : {user}") print(f'Creating access key for user : '******'{user}-sftp !') created_key = iam.create_access_key(UserName=f"{user}-sftp") key_directory = os.path.join(*key_path.split("/")[:-1]) if not os.path.exists(key_directory): os.mkdir(key_directory) with open(key_path, 'w') as file: writer = csv.writer(file) for row in [["Access key ID", "Secret access key"], [ created_key['AccessKey']['AccessKeyId'], created_key['AccessKey']['SecretAccessKey'] ]]: writer.writerow(row) print(f'Done creating key at path {key_path}') else: print(f"Key already found for project {account_id} and user {user}")
def upload_backup_to_s3(backup_path: str, s3_client: client.BaseClient, config: Dict) -> None: """Upload a local backup to S3. Delete the local backup file if the upload to S3 was successful. Args: backup_path: Local path to the backup file. s3_client: Client to use for S3 operations. config: Complete config for the script. """ backup_s3_key = posixpath.join(config['aws_s3_backup_bucket_prefix'], os.path.split(backup_path)[1]) backup_s3_uri = 's3://{}/{}'.format(config['aws_s3_backup_bucket'], backup_s3_key) extra_args = { 'Metadata': config['aws_s3_object_metadata'], 'StorageClass': config['aws_s3_storage_class'] } _log.info('Will upload backup at "%s" to "%s" with extra args:\n%s', backup_path, backup_s3_uri, pformat(extra_args)) s3_client.upload_file(Filename=backup_path, Bucket=config['aws_s3_backup_bucket'], Key=backup_s3_key, ExtraArgs=extra_args, Callback=UploadProgressLogger(backup_path)) _log.info('Backup upload completed') _log.info('Removing local backup at "%s"', backup_path) os.remove(backup_path) _log.info('Local backup at "%s" removed successfully', backup_path)
def upload_plugin(s3_client: BaseClient, key: str, contents: str) -> Optional[Response]: plugin_bucket = (os.environ["BUCKET_PREFIX"] + "-model-plugins-bucket").lower() plugin_parts = key.split("/") plugin_name = plugin_parts[0] plugin_key = "/".join(plugin_parts[1:]) if not (plugin_name and plugin_key): # if we upload a dir that looks like # model_plugins/ # __init__.py # grapl_aws_model_plugin/ # ...lots of files... # we want to skip uploading the initial __init__.py, since we can't figure out what # plugin_name it would belong to. LOGGER.info(f"Skipping uploading key {key}") return None try: s3_client.put_object( Body=contents, Bucket=plugin_bucket, Key=plugin_name + "/" + base64.encodebytes( (plugin_key.encode("utf8"))).decode(), ) except Exception: msg = f"Failed to put_object to s3 {key}" LOGGER.error(f"{msg} {traceback.format_exc()}") return respond(msg) return None
def list_from_aws( cls: Type["OrgsAccountResourceSpec"], client: BaseClient, account_id: str, region: str ) -> ListFromAWSResult: """Return a dict of dicts of the format: {'ou_1_arn': {org_1_dict}, 'ou_2_arn': {org_2_dict}, ...} Where the dicts represent results from list_accounts_for_parent.""" org_resp = client.describe_organization() org_arn = org_resp["Organization"]["Arn"] # get all ou ids and arns as a dict ou_ids_arns = get_ou_ids_arns(client) # now look up accounts for each ou orgs_accounts = {} accounts_paginator = client.get_paginator("list_accounts_for_parent") for parent_id, parent_arn in ou_ids_arns.items(): for accounts_resp in accounts_paginator.paginate(ParentId=parent_id): for account in accounts_resp["Accounts"]: account_id = account["Id"] account_arn = f"arn:aws::::account/{account_id}" account["OrganizationArn"] = org_arn account["OUArn"] = parent_arn orgs_accounts[account_arn] = account return ListFromAWSResult(resources=orgs_accounts)
def list_from_aws(cls: Type["IAMPolicyResourceSpec"], client: BaseClient, account_id: str, region: str) -> ListFromAWSResult: """Return a dict of dicts of the format: {'role_1_arn': {role_1_dict}, 'role_2_arn': {role_2_dict}, ...} Where the dicts represent results from list_policies and additional info per role from list_targets_by_role.""" policies = {} paginator = client.get_paginator("list_policies") for resp in paginator.paginate(Scope="Local"): for policy in resp.get("Policies", []): resource_arn = policy["Arn"] default_policy_version = policy["DefaultVersionId"] policy_version_resp = client.get_policy_version( PolicyArn=resource_arn, VersionId=default_policy_version) default_policy_version_document_text = policy_version_resp[ "PolicyVersion"]["Document"] policy[ "DefaultVersionPolicyDocumentText"] = policy_doc_dict_to_sorted_str( default_policy_version_document_text) policies[resource_arn] = policy return ListFromAWSResult(resources=policies)
def list_from_aws(cls: Type["OUResourceSpec"], client: BaseClient, account_id: str, region: str) -> ListFromAWSResult: """Return a dict of dicts of the format: {'ou_1_arn': {ou_1_dict}, 'ou_2_arn': {ou_2_dict}, ...} Where the dicts represent results from list_organizational_units_for_parent with some additional info 'Path') tagged on.""" org_resp = client.describe_organization() org_arn = org_resp["Organization"]["Arn"] ous = {} paginator = client.get_paginator("list_roots") for resp in paginator.paginate(): for root in resp["Roots"]: root_id, root_arn = root["Id"], root["Arn"] root_path = f"/{root['Name']}" ous[root_arn] = root ous[root_arn]["OrganizationArn"] = org_arn ous[root_arn]["Path"] = root_path ou_details = cls._recursively_get_ou_details_for_parent( client=client, parent_id=root_id, parent_path=root_path) for ou_detail in ou_details: arn = ou_detail["Arn"] ou_detail["OrganizationArn"] = org_arn ous[arn] = ou_detail return ListFromAWSResult(resources=ous)
def start_transcribe_job(job_name: str, media_file_uri: str, media_format: str, language_code: str, client: BaseClient) -> None: client.start_transcription_job(TranscriptionJobName=job_name, Media={'MediaFileUri': media_file_uri}, MediaFormat=media_format, LanguageCode=language_code)
def list_from_aws( cls: Type["TransitGatewayResourceSpec"], client: BaseClient, account_id: str, region: str ) -> ListFromAWSResult: """Return a dict of dicts of the format: {'tgw_1_arn': {tgw_1_dict}, 'tgw_2_arn': {tgw_2_dict}, ...} Where the dicts represent results from describe_transit_gateways.""" tgws = {} paginator = client.get_paginator("describe_transit_gateways") tgw_filters = [{"Name": "owner-id", "Values": [account_id]}] for resp in paginator.paginate(Filters=tgw_filters): for tgw in resp["TransitGateways"]: resource_arn = tgw["TransitGatewayArn"] vpc_attachments: List[Dict[str, Any]] = [] vpc_attachments_paginator = client.get_paginator( "describe_transit_gateway_attachments" ) vpc_filters = [ {"Name": "transit-gateway-id", "Values": [tgw["TransitGatewayId"]]}, {"Name": "resource-type", "Values": ["vpc"]}, ] for vpc_attachments_resp in vpc_attachments_paginator.paginate(Filters=vpc_filters): vpc_attachments += vpc_attachments_resp["TransitGatewayAttachments"] tgw["VPCAttachments"] = vpc_attachments tgws[resource_arn] = tgw return ListFromAWSResult(resources=tgws)
def list_from_aws(cls: Type["RDSInstanceResourceSpec"], client: BaseClient, account_id: str, region: str) -> ListFromAWSResult: logger = Logger() dbinstances = {} paginator = client.get_paginator("describe_db_instances") for resp in paginator.paginate(): for db in resp.get("DBInstances", []): resource_arn = db["DBInstanceArn"] db["Tags"] = client.list_tags_for_resource( ResourceName=resource_arn).get("TagList", []) db["Backup"] = [] dbinstances[resource_arn] = db backup_paginator = client.get_paginator( "describe_db_instance_automated_backups") for resp in backup_paginator.paginate(): for backup in resp.get("DBInstanceAutomatedBackups", []): if backup["DBInstanceArn"] in dbinstances: dbinstances[backup["DBInstanceArn"]]["Backup"].append( backup) else: logger.info( event=AWSLogEvents.ScanAWSResourcesNonFatalError, msg= (f'Unable to find matching DB Instance {backup["DBInstanceArn"]} ' "(Possible Deletion)"), ) return ListFromAWSResult(resources=dbinstances)
def create_pre_signed_url_for_get(id: str, filename: str, has_thumbnail: Optional[bool], s3_client: BaseClient) -> dict: bucket = get_bucket_name() expire = 3600 method = 'GET' url = s3_client.generate_presigned_url(ClientMethod='get_object', Params={ 'Bucket': bucket, 'Key': f'images/{id}/{filename}' }, ExpiresIn=expire, HttpMethod=method) option = { 'id': id, 'url': url, 'thumbnail_url': None, 'method': method, 'expiresIn': expire } if has_thumbnail: thumbnail_url = s3_client.generate_presigned_url( ClientMethod='get_object', Params={ 'Bucket': bucket, 'Key': f'thumbnails/{id}/{filename}' }, ExpiresIn=expire, HttpMethod=method) option['thumbnail_url'] = thumbnail_url return option
def put_s3_content(data_file: str, s3_uri: str, s3_client: BaseClient = None) -> Optional[str]: """ Write a file to an s3 URI :param data_file: a data file :param s3_uri: a fully qualified S3 URI for the s3 object to write :param s3_client: an optional botocore.client.BaseClient for s3 :return: the s3 URI on success """ if s3_client is None: s3_client = s3_io_client() s3_uri = S3URI(s3_uri) try: with open(data_file, "rb") as fd: response = s3_client.put_object(Bucket=s3_uri.bucket, Key=s3_uri.key, Body=fd) success = response_success(response) if success: # Use a boto3 waiter to confirm it worked exists_waiter = s3_client.get_waiter("object_exists") exists_waiter.wait(Bucket=s3_uri.bucket, Key=s3_uri.key) return str(s3_uri) except ClientError as err: LOGGER.error("Failed S3 PUT to: %s", s3_uri) LOGGER.error(err)
def get_detector(cls: Type["DetectorResourceSpec"], client: BaseClient, detector_id: str, region: str) -> Dict[str, Any]: detector_resp = client.get_detector(DetectorId=detector_id) detector = { key: detector_resp[key] for key in ( "CreatedAt", "FindingPublishingFrequency", "ServiceRole", "Status", "UpdatedAt", ) } detector["Members"] = cls.get_detector_members(client, detector_id, region) master_account_resp = client.get_master_account(DetectorId=detector_id) master_account_dict = master_account_resp.get("Master") if master_account_dict: detector["Master"] = { # key: master_account_dict[key] key: master_account_dict.get(key) for key in ( "AccountId", "RelationshipStatus", "InvitedAt", ) } return detector
def _create_database(client: BaseClient, database: str) -> None: try: client.create_database(DatabaseName=database) log.info("Database [%s] created successfully.", database) except client.exceptions.ConflictException: # Database exists. Skipping database creation log.debug("Database [%s] exists. Skipping database creation.", database)
def _update_table(client: BaseClient, database: str, table_name: str, mem_ttl: int, mag_ttl: int) -> None: client.update_table(DatabaseName=database, TableName=table_name, RetentionProperties={ 'MemoryStoreRetentionPeriodInHours': mem_ttl, 'MagneticStoreRetentionPeriodInDays': mag_ttl }) log.info("Retention updated to %sh and %sd.", mem_ttl, mag_ttl)
def _has_file(s3_client: BaseClient, *, bucket: str, key: str) -> bool: from botocore.errorfactory import ClientError try: s3_client.head_object(Bucket=bucket, Key=key) except ClientError: return False else: return True
def download_s3( bucket: str, obj: str, temp_path: str, s3_session: BaseClient, download_config: S3DownloadConfig, ) -> str: """Attempts to download the file from the S3 uri to a temp location using any extra arguments to the download Args: bucket: s3 bucket obj: s3 object temp_path: local temporary path to write file s3_session: current s3 session download_config: S3DownloadConfig with extra options for the file transfer Returns: temp_path: the temporary path of the config file downloaded from s3 """ try: # Unroll the extra options for those values that are not None extra_options = { k: v for k, v in attr.asdict(download_config).items() if v is not None } file_size = s3_session.head_object(Bucket=bucket, Key=obj, **extra_options)["ContentLength"] print( f"Attempting to download s3://{bucket}/{obj} (size: {size(file_size)})" ) current_progress = 0 n_ticks = 50 def _s3_progress_bar(chunk): nonlocal current_progress # Increment progress current_progress += chunk done = int(n_ticks * (current_progress / file_size)) sys.stdout.write(f"\r[%s%s] " f"{int(current_progress/file_size) * 100}%%" % ("=" * done, " " * (n_ticks - done))) sys.stdout.flush() sys.stdout.write("\n\n") # Download with the progress callback s3_session.download_file(bucket, obj, temp_path, Callback=_s3_progress_bar, ExtraArgs=extra_options) return temp_path except IOError: print(f"Failed to download file from S3 " f"(bucket: {bucket}, object: {obj}) " f"and write to {temp_path}")
def put_data(process_id: str, page: int, data: dict, bucket_name: str, s3_client: BaseClient): option = { "Bucket": bucket_name, "Key": create_key_of_xivapi_item(process_id, page), "Body": json.dumps(data["Results"], ensure_ascii=False).encode(), "ContentType": "application/json", } s3_client.put_object(**option)
def put_s3_data(bucket_name: str, key: str, data: List[dict], s3_client: BaseClient): option = { "Bucket": bucket_name, "Key": key, "Body": json.dumps(data, ensure_ascii=False).encode(), "ContentType": "application/json", } s3_client.put_object(**option)
def put_data(process_id: str, data: List[dict], bucket_name: str, s3_client: BaseClient): option = { "Bucket": bucket_name, "Key": create_key_of_eorzea_database_merged_item(process_id), "Body": json.dumps(data, ensure_ascii=False).encode(), "ContentType": "application/json", } s3_client.put_object(**option)
def upload_s3( bucket: str, obj: str, temp_path: str, s3_session: BaseClient, upload_config: S3UploadConfig, ): """Attempts to upload the local file to the S3 uri using any extra arguments to the upload Args: bucket: s3 bucket obj: s3 object temp_path: temporary path of the config file s3_session: current s3 session upload_config: S3UploadConfig with extra options for the file transfer Returns: """ try: # Unroll the extra options for those values that are not None extra_options = { k: v for k, v in attr.asdict(upload_config).items() if v is not None } file_size = os.path.getsize(temp_path) print( f"Attempting to upload s3://{bucket}/{obj} (size: {size(file_size)})" ) current_progress = 0 n_ticks = 50 def _s3_progress_bar(chunk): nonlocal current_progress # Increment progress current_progress += chunk done = int(n_ticks * (current_progress / file_size)) sys.stdout.write(f"\r[%s%s] " f"{int(current_progress/file_size) * 100}%%" % ("=" * done, " " * (n_ticks - done))) sys.stdout.flush() sys.stdout.write("\n\n") # Upload with progress callback s3_session.upload_file(temp_path, bucket, obj, Callback=_s3_progress_bar, ExtraArgs=extra_options) except IOError: print(f"Failed to upload file to S3 " f"(bucket: {bucket}, object: {obj}) " f"from {temp_path}")
def graph_set_from_s3(s3_client: BaseClient, json_bucket: str, json_key: str) -> GraphSet: """Load a GraphSet from json located in an s3 object.""" logger = Logger() logger.info(event=LogEvent.ReadFromS3Start) with io.BytesIO() as json_bytes_buf: s3_client.download_fileobj(json_bucket, json_key, json_bytes_buf) json_bytes_buf.flush() json_bytes_buf.seek(0) graph_set_bytes = json_bytes_buf.read() logger.info(event=LogEvent.ReadFromS3End) graph_set_str = graph_set_bytes.decode("utf-8") graph_set_dict = json.loads(graph_set_str) return GraphSet.from_dict(graph_set_dict)
def upload_to_s3(file_paths: List[str], s3_client: BaseClient = None): if s3_client is None: s3_client = boto3.client("s3") try: bucket_name = _generate_unique_bucket_name() print(f"Creating bucket: {bucket_name}") s3_client.create_bucket(Bucket=bucket_name) for i, file_path in enumerate(file_paths): print(f"Uploading ({i + 1}/{len(file_paths)}): {file_path}") s3_client.upload_file(file_path, bucket_name, file_path) except Exception as e: print(f"ERROR: Failed uploading to S3: {e}")
def _create_table(client: BaseClient, database: str, table_name: str, mem_ttl: int, mag_ttl: int) -> None: try: client.create_table(DatabaseName=database, TableName=table_name, RetentionProperties={ 'MemoryStoreRetentionPeriodInHours': mem_ttl, 'MagneticStoreRetentionPeriodInDays': mag_ttl }) log.info( "Table [%s] successfully created (memory ttl: %sh, magnetic ttl: %sd.", table_name, mem_ttl, mag_ttl) except client.exceptions.ConflictException: # Table exists on database [{database}]. Skipping table creation" log.debug("Table [%s] exists. Skipping database creation.", table_name)
def get_detector_members(cls: Type["DetectorResourceSpec"], client: BaseClient, detector_id: str, region: str) -> List[Dict[str, Any]]: member_resps: List[Dict[str, Any]] = [] list_members_paginator = client.get_paginator("list_members") for list_members_resp in list_members_paginator.paginate( DetectorId=detector_id): member_resps += list_members_resp.get("Members", []) members = [] if member_resps: for member_resp in member_resps: member_account_id = member_resp["AccountId"] member_detector_id = member_resp["DetectorId"] # member_email = member_resp["Email"] member_email = member_resp.get("Email", "[NONE]") member_relationship_status = member_resp["RelationshipStatus"] # member_invited_at = member_resp["InvitedAt"] member_invited_at = member_resp.get("InvitedAt", "[NONE]") member_updated_at = member_resp["UpdatedAt"] member_detector_arn = cls.generate_arn( account_id=member_account_id, region=region, resource_id=member_detector_id, ) member = { "DetectorArn": member_detector_arn, "Email": member_email, "RelationshipStatus": member_relationship_status, "InvitedAt": member_invited_at, "UpdatedAt": member_updated_at, } members.append(member) return members
def s3_file_info(s3_uri: Union[S3URI, str], s3_client: BaseClient = None) -> S3Info: """ Collect data from an S3 HEAD request for an S3URI :param s3_uri: a fully qualified S3 URI for the s3 object to read :param s3_client: an optional botocore.client.BaseClient for s3 :return: an S3Info object with HEAD data on success; on failure the S3Info object has no HEAD data """ if s3_client is None: s3_client = s3_io_client() if isinstance(s3_uri, str): s3_uri = S3URI(s3_uri) s3_info = S3Info(s3_uri=s3_uri) try: s3_head = s3_client.head_object(Bucket=s3_uri.bucket, Key=s3_uri.key) if response_success(s3_head): # LastModified is a datetime.datetime s3_info.last_modified = s3_head["LastModified"] s3_info.s3_size = int(s3_head["ContentLength"]) LOGGER.debug("Success S3URI info: %s", s3_uri) except ClientError as err: LOGGER.debug("Failed S3URI info: %s", s3_uri) LOGGER.debug(err) return s3_info
def copy_object(s3_cli: BaseClient, src_bucket_name: str, src_object_name: str, dest_bucket_name: str, dest_object_name: str = None) -> Optional[str]: """Copy an Amazon S3 bucket object Args: s3_cli: An instance of boto3 s3 client. src_bucket_name: The source S3 bucket name. src_object_name: The key of the s3 object being copied. dest_bucket_name: The target S3 bucket name. dest_object_name: Optional; The key of the destination object. If an object with the same name exists in the given bucket, the object is overwritten. Defaults to {src_object_name}. Returns: None if object was copied, otherwise contains error message. """ if dest_object_name is None: dest_object_name = src_object_name # Construct source bucket/object parameter copy_source = {'Bucket': src_bucket_name, 'Key': src_object_name} # Copy the object try: response = s3_cli.copy_object(CopySource=copy_source, Bucket=dest_bucket_name, Key=dest_object_name) logging.debug(f"Copy response: {response}") except ClientError as ex: return str(ex) return None
def get_instance_tags( cls: Type["RDSInstanceResourceSpec"], client: BaseClient, instance_arn: str, ) -> List[Dict[str, str]]: return client.list_tags_for_resource(ResourceName=instance_arn).get( "TagList", [])
def list_from_aws(cls: Type["IAMRoleResourceSpec"], client: BaseClient, account_id: str, region: str) -> ListFromAWSResult: """Return a dict of dicts of the format: {'role_1_arn': {role_1_dict}, 'role_2_arn': {role_2_dict}, ...} Where the dicts represent results from list_roles and additional info per role from list_targets_by_role.""" roles = {} paginator = client.get_paginator("list_roles") for resp in paginator.paginate(): for role in resp.get("Roles", []): role_name = role["RoleName"] assume_role_policy_document = copy.deepcopy( role["AssumeRolePolicyDocument"]) assume_role_policy_document_text = policy_doc_dict_to_sorted_str( assume_role_policy_document) role[ "AssumeRolePolicyDocumentText"] = assume_role_policy_document_text for statement in assume_role_policy_document.get( "Statement", []): for obj in statement.get("Condition", {}).values(): for obj_key in obj.keys(): if obj_key.lower() == "sts:externalid": obj[obj_key] = "REMOVED" policies_result = get_attached_role_policies(client, role_name) policies = policies_result role["PolicyAttachments"] = policies resource_arn = role["Arn"] roles[resource_arn] = role return ListFromAWSResult(resources=roles)
def list_from_aws(cls: Type["IAMSAMLProviderResourceSpec"], client: BaseClient, account_id: str, region: str) -> ListFromAWSResult: """Return a dict of dicts of the format: {'saml_provider_1_arn': {saml_provider_1_dict}, 'saml_provider_2_arn': {saml_provider_2_dict}, ...} Where the dicts represent results from list_saml_providers and additional info per saml_provider list_saml_providers. An additional 'Name' key is added.""" saml_providers = {} resp = client.list_saml_providers() for saml_provider in resp.get("SAMLProviderList", []): resource_arn = saml_provider["Arn"] saml_provider["Name"] = "/".join(resource_arn.split("/")[1:]) try: saml_metadata_document = cls.get_saml_provider_metadata_doc( client=client, arn=resource_arn) hash_object = hashlib.sha256(saml_metadata_document.encode()) saml_provider[ "MetadataDocumentChecksum"] = hash_object.hexdigest() saml_providers[resource_arn] = saml_provider except ClientError as c_e: error_code = getattr(c_e, "response", {}).get("Error", {}).get("Code", {}) if error_code != "NoSuchEntity": raise c_e return ListFromAWSResult(resources=saml_providers)
def list_from_aws(cls: Type["DetectorResourceSpec"], client: BaseClient, account_id: str, region: str) -> ListFromAWSResult: """Return a dict of dicts of the format: {'detector_1_arn': {detector_1_dict}, 'detector_2_arn': {detector_2_dict}, ...} Where the dicts represent results from list_detectors and list_members, get_detector for each listed detector.""" list_detectors_paginator = client.get_paginator("list_detectors") detectors: Dict[str, Dict[str, Any]] = {} for list_detectors_resp in list_detectors_paginator.paginate(): detector_ids = list_detectors_resp["DetectorIds"] for detector_id in detector_ids: resource_arn = cls.generate_arn(account_id=account_id, region=region, resource_id=detector_id) try: detectors[resource_arn] = cls.get_detector( client, detector_id, region) except ClientError as c_e: error_code = getattr(c_e, "response", {}).get("Error", {}).get("Code", {}) if error_code != "BadRequestException": raise c_e return ListFromAWSResult(resources=detectors)