Exemplo n.º 1
0
def api_application_dict(application_instance):
    spawner_state = get_spawner(
        application_instance.application_template.spawner).state(
            application_instance.spawner_application_template_options,
            application_instance.created_date.replace(tzinfo=None),
            application_instance.spawner_application_instance_id,
            application_instance.public_host,
        )

    # Only pass through the database state if the spawner is running,
    # Otherwise, we are in an error condition, and so return the spawner
    # state, so the client (i.e. the proxy) knows to take action
    api_state = (application_instance.state
                 if spawner_state == 'RUNNING' else spawner_state)

    sso_id_hex_short = stable_identification_suffix(str(
        application_instance.owner.profile.sso_id),
                                                    short=True)

    return {
        'proxy_url': application_instance.proxy_url,
        'state': api_state,
        'user': sso_id_hex_short,
        'wrap': application_instance.application_template.wrap,
        # Used by metrics to label the application
        'name': application_instance.application_template.nice_name,
    }
Exemplo n.º 2
0
def drop_temporary_results_table(querylog):
    # ID hardcoded in conftest.staff_user_data
    suffix = stable_identification_suffix("aae8901a-082f-4f12-8c6c-fdf4aeba2d68", short=True)
    schema_and_user_name = f"{USER_SCHEMA_STEM}{suffix}"

    with connections["my_database"].cursor() as cursor:
        cursor.execute(
            f"DROP TABLE IF EXISTS {schema_and_user_name}._data_explorer_tmp_query_{querylog.id}"
        )
Exemplo n.º 3
0
 def _get_data_source_id(db_name):
     return (
         "data-workspace-"
         + env
         + "-"
         + db_name
         + "-"
         + stable_identification_suffix(quicksight_user['Arn'], short=True)
     )
Exemplo n.º 4
0
def application_api_is_allowed(request, public_host):
    try:
        (
            application_template,
            _,
            host_user,
            commit_id,
        ) = application_template_tag_user_commit_from_host(public_host)
    except ApplicationTemplate.DoesNotExist:
        return False

    visualisation_catalogue_item = None
    if application_template.application_type == 'VISUALISATION':
        visualisation_catalogue_item = VisualisationCatalogueItem.objects.get(
            visualisation_template=application_template)

    request_sso_id_hex_short = stable_identification_suffix(str(
        request.user.profile.sso_id),
                                                            short=True)
    is_preview = commit_id is not None

    def is_tool_and_correct_user_and_allowed_to_start():
        return (application_template.application_type == 'TOOL'
                and host_user == request_sso_id_hex_short and
                request.user.has_perm('applications.start_all_applications'))

    def is_published_visualisation_and_requires_authentication():
        return (not is_preview and application_template.visible is True
                and visualisation_catalogue_item
                and visualisation_catalogue_item.user_access_type
                == 'REQUIRES_AUTHENTICATION')

    def is_published_visualisation_and_requires_authorisation_and_has_authorisation(
    ):
        return (not is_preview and application_template.visible is True
                and visualisation_catalogue_item
                and visualisation_catalogue_item.user_access_type
                == 'REQUIRES_AUTHORIZATION'
                and request.user.visualisationuserpermission_set.filter(
                    visualisation=visualisation_catalogue_item).exists())

    def is_visualisation_preview_and_has_gitlab_developer():
        return (is_preview and visualisation_catalogue_item
                and gitlab_has_developer_access(
                    request.user, application_template.gitlab_project_id))

    return (
        is_tool_and_correct_user_and_allowed_to_start()
        or is_published_visualisation_and_requires_authentication() or
        is_published_visualisation_and_requires_authorisation_and_has_authorisation(
        ) or is_visualisation_preview_and_has_gitlab_developer())
Exemplo n.º 5
0
def create_temporary_results_table(querylog):
    # ID hardcoded in conftest.staff_user_data
    suffix = stable_identification_suffix("aae8901a-082f-4f12-8c6c-fdf4aeba2d68", short=True)
    schema_and_user_name = f"{USER_SCHEMA_STEM}{suffix}"

    with connections["my_database"].cursor() as cursor:
        cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_and_user_name}")
        cursor.execute(
            f"DROP TABLE IF EXISTS {schema_and_user_name}._data_explorer_tmp_query_{querylog.id}"
        )
        cursor.execute(
            f"CREATE TABLE {schema_and_user_name}._data_explorer_tmp_query_{querylog.id} "
            "(id int primary key, data text)"
        )
        cursor.execute(
            f"INSERT INTO {schema_and_user_name}._data_explorer_tmp_query_{querylog.id} VALUES (1, 2)"
        )
        cursor.execute(
            f"ALTER TABLE {schema_and_user_name}._data_explorer_tmp_query_{querylog.id} OWNER TO {schema_and_user_name}"
        )
Exemplo n.º 6
0
    def spawn(
        user,
        tag,
        application_instance,
        spawner_options,
        credentials,
        app_schema,
    ):
        try:
            pipeline_id = None
            task_arn = None
            options = json.loads(spawner_options)

            cluster_name = options['CLUSTER_NAME']
            container_name = options['CONTAINER_NAME']
            definition_arn = options['DEFINITION_ARN']
            ecr_repository_name = options.get('ECR_REPOSITORY_NAME')
            security_groups = options['SECURITY_GROUPS']
            subnets = options['SUBNETS']
            cmd = options['CMD'] if 'CMD' in options else []
            env = options.get('ENV', {})
            port = options['PORT']
            s3_sync = options['S3_SYNC'] == 'true'

            s3_region = options['S3_REGION']
            s3_host = options['S3_HOST']
            s3_bucket = options['S3_BUCKET']

            platform_version = options.get('PLATFORM_VERSION', '1.3.0')

            # The database users are stored so when the database users are cleaned up,
            # we know _not_ to delete any users used by running or spawning apps
            for creds in credentials:
                ApplicationInstanceDbUsers.objects.create(
                    application_instance=application_instance,
                    db_id=creds['db_id'],
                    db_username=creds['db_user'],
                    db_persistent_role=creds['db_persistent_role'],
                )

            database_env = {
                f'DATABASE_DSN__{database["memorable_name"]}':
                f'host={database["db_host"]} '
                f'port={database["db_port"]} sslmode=require dbname={database["db_name"]} '
                f'user={database["db_user"]} password={database["db_password"]}'
                for database in credentials
            }

            schema_env = {'APP_SCHEMA': app_schema}

            user_efs_access_point_id = (
                user.profile.home_directory_efs_access_point_id
                if application_instance.application_template.application_type
                == 'TOOL' else None)

            logger.info('Starting %s', cmd)

            role_arn, s3_prefix = create_tools_access_iam_role(
                user.email, str(user.profile.sso_id), user_efs_access_point_id)

            s3_env = {
                'S3_PREFIX': s3_prefix,
                'S3_REGION': s3_region,
                'S3_HOST': s3_host,
                'S3_BUCKET': s3_bucket,
            }

            # Build tag if we can and it doesn't already exist
            if (ecr_repository_name and tag and application_instance.commit_id
                    and
                    application_instance.application_template.gitlab_project_id
                    and not _ecr_tag_exists(ecr_repository_name, tag)):
                pipeline = gitlab_api_v4_ecr_pipeline_trigger(
                    ECR_PROJECT_ID,
                    application_instance.application_template.
                    gitlab_project_id,
                    application_instance.commit_id,
                    ecr_repository_name,
                    tag,
                )
                if 'id' not in pipeline:
                    raise Exception(
                        'Unable to start pipeline: {}'.format(pipeline))
                pipeline_id = pipeline['id']
                application_instance.spawner_application_instance_id = json.dumps(
                    {
                        'pipeline_id': pipeline_id,
                        'task_arn': None
                    })
                application_instance.save(
                    update_fields=['spawner_application_instance_id'])

                for _ in range(0, 900):
                    gevent.sleep(3)
                    pipeline = _gitlab_ecr_pipeline_get(pipeline_id)
                    logger.info('Fetched pipeline %s', pipeline)
                    if (pipeline['status'] not in RUNNING_PIPELINE_STATUSES
                            and pipeline['status']
                            not in SUCCESS_PIPELINE_STATUSES):
                        raise Exception('Pipeline failed {}'.format(pipeline))
                    if pipeline['status'] in SUCCESS_PIPELINE_STATUSES:
                        break
                else:
                    logger.error('Pipeline took too long, cancelling: %s',
                                 pipeline)
                    _gitlab_ecr_pipeline_cancel(pipeline_id)
                    raise Exception(
                        'Pipeline {} took too long'.format(pipeline))

            # It doesn't really matter what the suffix is: it could even be a random
            # number, but we choose the short hashed version of the SSO ID to help debugging
            task_family_suffix = stable_identification_suffix(str(
                user.profile.sso_id),
                                                              short=True)
            definition_arn_with_image = _fargate_new_task_definition(
                role_arn,
                definition_arn,
                container_name,
                tag,
                task_family_suffix,
                settings.EFS_ID,
                user_efs_access_point_id,
            )

            for i in range(0, 10):
                # Sometimes there is an error assuming the new role: both IAM  and ECS are
                # eventually consistent
                try:
                    start_task_response = _fargate_task_run(
                        role_arn,
                        cluster_name,
                        container_name,
                        definition_arn_with_image,
                        security_groups,
                        subnets,
                        application_instance.cpu,
                        application_instance.memory,
                        cmd,
                        {
                            **s3_env,
                            **database_env,
                            **schema_env,
                            **env
                        },
                        s3_sync,
                        platform_version,
                    )
                except ClientError:
                    gevent.sleep(3)
                    if i == 9:
                        raise
                else:
                    break

            task = (start_task_response['tasks'][0] if 'tasks'
                    in start_task_response else start_task_response['task'])
            task_arn = task['taskArn']
            application_instance.spawner_application_instance_id = json.dumps({
                'pipeline_id':
                pipeline_id,
                'task_arn':
                task_arn
            })
            application_instance.spawner_created_at = task['createdAt']
            application_instance.spawner_cpu = task['cpu']
            application_instance.spawner_memory = task['memory']
            application_instance.save(update_fields=[
                'spawner_application_instance_id',
                'spawner_created_at',
                'spawner_cpu',
                'spawner_memory',
            ])

            application_instance.refresh_from_db()
            if application_instance.state == 'STOPPED':
                raise Exception(
                    'Application set to stopped before spawning complete')

            for _ in range(0, 60):
                ip_address = _fargate_task_ip(options['CLUSTER_NAME'],
                                              task_arn)
                if ip_address:
                    application_instance.proxy_url = f'http://{ip_address}:{port}'
                    application_instance.save(update_fields=['proxy_url'])
                    return
                gevent.sleep(3)

            raise Exception('Spawner timed out before finding ip address')
        except Exception:  # pylint: disable=broad-except
            logger.exception(
                'Spawning %s %s %s',
                pipeline_id,
                application_instance.id,
                spawner_options,
            )
            if task_arn:
                _fargate_task_stop(cluster_name, task_arn)
            if pipeline_id:
                _gitlab_ecr_pipeline_cancel(pipeline_id)
Exemplo n.º 7
0
    def spawn(
        user,
        tag,
        application_instance,
        spawner_options,
        credentials,
        app_schema,
    ):
        try:
            pipeline_id = None
            task_arn = None
            options = json.loads(spawner_options)

            cluster_name = options["CLUSTER_NAME"]
            container_name = options["CONTAINER_NAME"]
            definition_arn = options["DEFINITION_ARN"]
            ecr_repository_name = options.get("ECR_REPOSITORY_NAME")
            security_groups = options["SECURITY_GROUPS"]
            subnets = options["SUBNETS"]
            cmd = options["CMD"] if "CMD" in options else []
            env = options.get("ENV", {})
            port = options["PORT"]
            s3_sync = options["S3_SYNC"] == "true"

            s3_region = options["S3_REGION"]
            s3_host = options["S3_HOST"]
            s3_bucket = options["S3_BUCKET"]

            platform_version = options.get("PLATFORM_VERSION", "1.3.0")

            # The database users are stored so when the database users are cleaned up,
            # we know _not_ to delete any users used by running or spawning apps
            for creds in credentials:
                ApplicationInstanceDbUsers.objects.create(
                    application_instance=application_instance,
                    db_id=creds["db_id"],
                    db_username=creds["db_user"],
                    db_persistent_role=creds["db_persistent_role"],
                )

            database_env = _creds_to_env_vars(credentials)

            schema_env = {"APP_SCHEMA": app_schema}

            user_efs_access_point_id = (
                user.profile.home_directory_efs_access_point_id
                if application_instance.application_template.application_type
                == "TOOL" else None)

            logger.info("Starting %s", cmd)

            user_email = user.email
            user_profile_sso_id = user.profile.sso_id
            close_admin_db_connection_if_not_in_atomic_block()

            role_arn, s3_prefix = create_tools_access_iam_role(
                user_email, str(user_profile_sso_id), user_efs_access_point_id)

            s3_env = {
                "S3_PREFIX": s3_prefix,
                "S3_REGION": s3_region,
                "S3_HOST": s3_host,
                "S3_BUCKET": s3_bucket,
            }

            # Build tag if we can and it doesn't already exist
            if (ecr_repository_name and tag and application_instance.commit_id
                    and
                    application_instance.application_template.gitlab_project_id
                    and not _ecr_tag_exists(ecr_repository_name, tag)):
                gitlab_project_id = application_instance.application_template.gitlab_project_id
                commit_id = application_instance.commit_id
                close_admin_db_connection_if_not_in_atomic_block()

                pipeline = gitlab_api_v4_ecr_pipeline_trigger(
                    ECR_PROJECT_ID,
                    gitlab_project_id,
                    commit_id,
                    ecr_repository_name,
                    tag,
                )
                if "id" not in pipeline:
                    raise Exception(
                        "Unable to start pipeline: {}".format(pipeline))
                pipeline_id = pipeline["id"]
                application_instance.spawner_application_instance_id = json.dumps(
                    {
                        "pipeline_id": pipeline_id,
                        "task_arn": None
                    })
                application_instance.save(
                    update_fields=["spawner_application_instance_id"])
                close_admin_db_connection_if_not_in_atomic_block()

                for _ in range(0, 900):
                    gevent.sleep(3)
                    pipeline = _gitlab_ecr_pipeline_get(pipeline_id)
                    logger.info("Fetched pipeline %s", pipeline)
                    if (pipeline["status"] not in RUNNING_PIPELINE_STATUSES
                            and pipeline["status"]
                            not in SUCCESS_PIPELINE_STATUSES):
                        raise Exception("Pipeline failed {}".format(pipeline))
                    if pipeline["status"] in SUCCESS_PIPELINE_STATUSES:
                        break
                else:
                    logger.error("Pipeline took too long, cancelling: %s",
                                 pipeline)
                    _gitlab_ecr_pipeline_cancel(pipeline_id)
                    raise Exception(
                        "Pipeline {} took too long".format(pipeline))

            # It doesn't really matter what the suffix is: it could even be a random
            # number, but we choose the short hashed version of the SSO ID to help debugging
            task_family_suffix = stable_identification_suffix(str(
                user.profile.sso_id),
                                                              short=True)
            definition_arn_with_image = _fargate_new_task_definition(
                role_arn,
                definition_arn,
                container_name,
                tag,
                task_family_suffix,
                settings.EFS_ID,
                user_efs_access_point_id,
            )

            cpu = application_instance.cpu
            memory = application_instance.memory
            close_admin_db_connection_if_not_in_atomic_block()

            for i in range(0, 10):
                # Sometimes there is an error assuming the new role: both IAM  and ECS are
                # eventually consistent
                try:
                    start_task_response = _fargate_task_run(
                        role_arn,
                        cluster_name,
                        container_name,
                        definition_arn_with_image,
                        security_groups,
                        subnets,
                        cpu,
                        memory,
                        cmd,
                        {
                            **s3_env,
                            **database_env,
                            **schema_env,
                            **env
                        },
                        s3_sync,
                        platform_version,
                    )
                except ClientError:
                    gevent.sleep(3)
                    if i == 9:
                        raise
                else:
                    break

            task = (start_task_response["tasks"][0] if "tasks"
                    in start_task_response else start_task_response["task"])
            task_arn = task["taskArn"]
            application_instance.spawner_application_instance_id = json.dumps({
                "pipeline_id":
                pipeline_id,
                "task_arn":
                task_arn
            })
            application_instance.spawner_created_at = task["createdAt"]
            application_instance.spawner_cpu = task["cpu"]
            application_instance.spawner_memory = task["memory"]
            application_instance.save(update_fields=[
                "spawner_application_instance_id",
                "spawner_created_at",
                "spawner_cpu",
                "spawner_memory",
            ])

            application_instance.refresh_from_db()
            if application_instance.state == "STOPPED":
                raise Exception(
                    "Application set to stopped before spawning complete")

            close_admin_db_connection_if_not_in_atomic_block()

            for _ in range(0, 60):
                ip_address = _fargate_task_ip(options["CLUSTER_NAME"],
                                              task_arn)
                if ip_address:
                    application_instance.proxy_url = f"http://{ip_address}:{port}"
                    application_instance.save(update_fields=["proxy_url"])
                    return
                gevent.sleep(3)

            raise Exception("Spawner timed out before finding ip address")
        except Exception:  # pylint: disable=broad-except
            logger.exception(
                "Spawning %s %s %s",
                pipeline_id,
                application_instance.id,
                spawner_options,
            )
            if task_arn:
                _fargate_task_stop(cluster_name, task_arn)
            if pipeline_id:
                _gitlab_ecr_pipeline_cancel(pipeline_id)
Exemplo n.º 8
0
 def stable_id_suffix(self, instance):
     return stable_identification_suffix(str(instance.profile.sso_id),
                                         short=True)
Exemplo n.º 9
0
def get_superset_credentials(request):
    superset_endpoint = {
        urlparse(url).netloc: name for name, url in settings.SUPERSET_DOMAINS.items()
    }[request.headers["host"]]

    cache_key = get_cached_credentials_key(
        request.headers["sso-profile-user-id"], superset_endpoint
    )
    response = cache.get(cache_key, None)
    if not response:
        dw_user = get_user_model().objects.get(
            profile__sso_id=request.headers["sso-profile-user-id"]
        )
        if not dw_user.user_permissions.filter(
            codename="start_all_applications",
            content_type=ContentType.objects.get_for_model(ApplicationInstance),
        ).exists():
            return HttpResponse("Unauthorized", status=401)

        duration = timedelta(hours=24)
        cache_duration = (duration - timedelta(minutes=15)).total_seconds()

        # Give "public" users full db credentials
        if superset_endpoint == "view":
            dashboards_user_can_access = [
                d.identifier
                for d in VisualisationLink.objects.filter(visualisation_type="SUPERSET")
                if d.visualisation_catalogue_item.user_has_access(dw_user)
            ]
            credentials = [
                {
                    "memorable_name": alias,
                    "db_name": data["NAME"],
                    "db_host": data["HOST"],
                    "db_port": data["PORT"],
                    "db_user": data["USER"],
                    "db_password": data["PASSWORD"],
                }
                for alias, data in settings.DATABASES_DATA.items()
            ]

        # Give "editor"/"admin" users temp private credentials
        else:
            dashboards_user_can_access = []
            source_tables = source_tables_for_user(dw_user)
            db_role_schema_suffix = stable_identification_suffix(
                str(dw_user.profile.sso_id), short=True
            )
            credentials = new_private_database_credentials(
                db_role_schema_suffix,
                source_tables,
                postgres_user(dw_user.email, suffix="superset"),
                dw_user,
                valid_for=duration,
            )

        response = {
            "credentials": credentials[0],
            "dashboards": dashboards_user_can_access,
        }

        cache.set(cache_key, response, timeout=cache_duration)

    return JsonResponse(response)
Exemplo n.º 10
0
def sync_quicksight_users(data_client, user_client, account_id,
                          quicksight_user_list):
    for quicksight_user in quicksight_user_list:
        user_arn = quicksight_user['Arn']
        user_email = quicksight_user['Email']
        user_role = quicksight_user['Role']
        user_username = quicksight_user['UserName']

        if user_role not in {"AUTHOR", "ADMIN"}:
            logger.info("Skipping %s with role %s.", user_email, user_role)
            continue

        try:
            # Lightly enforce that only instance can edit permissions for a user at a time.
            with cache.lock(
                    f"sync-quicksight-permissions-{user_arn}",
                    blocking_timeout=60,
                    timeout=360,
            ):
                try:
                    if user_role == "ADMIN":
                        user_client.update_user(
                            AwsAccountId=account_id,
                            Namespace='default',
                            Role=user_role,
                            UnapplyCustomPermissions=True,
                            UserName=user_username,
                            Email=user_email,
                        )
                    else:
                        user_client.update_user(
                            AwsAccountId=account_id,
                            Namespace="default",
                            Role=user_role,
                            CustomPermissionsName=settings.
                            QUICKSIGHT_AUTHOR_CUSTOM_PERMISSIONS,
                            UserName=user_username,
                            Email=user_email,
                        )

                except botocore.exceptions.ClientError as e:
                    if e.response['Error'][
                            'Code'] == 'ResourceNotFoundException':
                        continue  # Can be raised if the user has been deactivated/"deleted"

                    raise e

                dw_user = get_user_model().objects.filter(
                    email=user_email).first()
                if not dw_user:
                    logger.info(
                        "Skipping %s - cannot match with Data Workspace user.",
                        user_email,
                    )
                    continue

                # We technically ignore the case for where a single email has multiple matches on DW, but I'm not
                # sure this is a case that can happen - and if it can, we don't care while prototyping.
                logger.info("Syncing QuickSight resources for %s", dw_user)

                source_tables = source_tables_for_user(dw_user)
                db_role_schema_suffix = stable_identification_suffix(
                    user_arn, short=True)

                # This creates a DB user for each of our datasets DBs. These users are intended to be long-lived,
                # so they might already exist. If this is the case, we still generate a new password, as at the moment
                # these user accounts only last for 31 days by default - so we need to update the password to keep them
                # from expiring.
                creds = new_private_database_credentials(
                    db_role_schema_suffix,
                    source_tables,
                    postgres_user(user_email, suffix='qs'),
                    valid_for=datetime.timedelta(
                        days=7
                    ),  # We refresh these creds every night, so they don't need to last long at all.
                )

                create_update_delete_quicksight_user_data_sources(
                    data_client, account_id, quicksight_user, creds)

        except redis.exceptions.LockError:
            logger.exception("Unable to sync permissions for %s",
                             quicksight_user['Arn'])
Exemplo n.º 11
0
def sync_quicksight_permissions(
    user_sso_ids_to_update=tuple(), poll_for_user_creation=False
):
    logger.info(
        'sync_quicksight_user_datasources(%s, poll_for_user_creation=%s) started',
        user_sso_ids_to_update,
        poll_for_user_creation,
    )

    # QuickSight manages users in a single specific regions
    user_client = boto3.client(
        'quicksight', region_name=settings.QUICKSIGHT_USER_REGION
    )
    # Data sources can be in other regions - so here we use the Data Workspace default from its env vars.
    data_client = boto3.client('quicksight')

    account_id = boto3.client('sts').get_caller_identity().get('Account')

    quicksight_user_list: List[Dict[str, str]]
    if len(user_sso_ids_to_update) > 0:
        quicksight_user_list = []

        for user_sso_id in user_sso_ids_to_update:
            # Poll for the user for 5 minutes
            attempts = (5 * 60) if poll_for_user_creation else 1
            for _ in range(attempts):
                attempts -= 1

                try:
                    quicksight_user_list.append(
                        user_client.describe_user(
                            AwsAccountId=account_id,
                            Namespace='default',
                            # \/ This is the format of the user name created by DIT SSO \/
                            UserName=f'quicksight_federation/{user_sso_id}',
                        )['User']
                    )
                    break

                except botocore.exceptions.ClientError as e:
                    if e.response['Error']['Code'] == 'ResourceNotFoundException':
                        if attempts > 0:
                            gevent.sleep(1)
                        elif poll_for_user_creation:
                            logger.exception(
                                "Did not find user with sso id `%s` after 5 minutes",
                                user_sso_id,
                            )
                    else:
                        raise e

    else:
        quicksight_user_list: List[Dict[str, str]] = user_client.list_users(
            AwsAccountId=account_id, Namespace='default'
        )['UserList']

    for quicksight_user in quicksight_user_list:
        user_arn = quicksight_user['Arn']
        user_email = quicksight_user['Email']
        user_role = quicksight_user['Role']
        user_username = quicksight_user['UserName']

        if user_role not in {"AUTHOR", "ADMIN"}:
            logger.info("Skipping %s with role %s.", user_email, user_role)
            continue

        try:
            # Lightly enforce that only instance can edit permissions for a user at a time.
            with cache.lock(
                f"sync-quicksight-permissions-{user_arn}",
                blocking_timeout=60,
                timeout=360,
            ):
                try:
                    if user_role == "ADMIN":
                        user_client.update_user(
                            AwsAccountId=account_id,
                            Namespace='default',
                            Role=user_role,
                            UnapplyCustomPermissions=True,
                            UserName=user_username,
                            Email=user_email,
                        )
                    else:
                        user_client.update_user(
                            AwsAccountId=account_id,
                            Namespace="default",
                            Role=user_role,
                            CustomPermissionsName=settings.QUICKSIGHT_AUTHOR_CUSTOM_PERMISSIONS,
                            UserName=user_username,
                            Email=user_email,
                        )

                except botocore.exceptions.ClientError as e:
                    if e.response['Error']['Code'] == 'ResourceNotFoundException':
                        continue  # Can be raised if the user has been deactivated/"deleted"

                    raise e

                dw_user = get_user_model().objects.filter(email=user_email).first()
                if not dw_user:
                    logger.info(
                        "Skipping %s - cannot match with Data Workspace user.",
                        user_email,
                    )
                    continue

                # We technically ignore the case for where a single email has multiple matches on DW, but I'm not
                # sure this is a case that can happen - and if it can, we don't care while prototyping.
                logger.info("Syncing QuickSight resources for %s", dw_user)

                source_tables = source_tables_for_user(dw_user)
                db_role_schema_suffix = stable_identification_suffix(
                    user_arn, short=True
                )

                # This creates a DB user for each of our datasets DBs. These users are intended to be long-lived,
                # so they might already exist. If this is the case, we still generate a new password, as at the moment
                # these user accounts only last for 31 days by default - so we need to update the password to keep them
                # from expiring.
                creds = new_private_database_credentials(
                    db_role_schema_suffix,
                    source_tables,
                    postgres_user(user_email, suffix='qs'),
                    valid_for=datetime.timedelta(
                        days=7
                    ),  # We refresh these creds every night, so they don't need to last long at all.
                )

                create_update_delete_quicksight_user_data_sources(
                    data_client, account_id, quicksight_user, creds
                )

        except redis.exceptions.LockError:
            logger.exception(
                "Unable to sync permissions for %s", quicksight_user['Arn']
            )

    logger.info(
        'sync_quicksight_user_datasources(%s, poll_for_user_creation=%s) finished',
        user_sso_ids_to_update,
        poll_for_user_creation,
    )
Exemplo n.º 12
0
def sync_quicksight_permissions(user_sso_ids_to_update=tuple()):
    try:
        # Lightly enforce that only instance is running the task at a time. The job normally takes just a few minutes.
        with cache.lock(
            "sync-quicksight-permissions", blocking_timeout=360, timeout=3600
        ):
            logger.info(
                f'sync_quicksight_user_datasources({user_sso_ids_to_update}) started'
            )

            # QuickSight manages users in a single specific regions
            user_client = boto3.client(
                'quicksight', region_name=settings.QUICKSIGHT_USER_REGION
            )
            # Data sources can be in other regions - so here we use the Data Workspace default from its env vars.
            data_client = boto3.client('quicksight')

            account_id = boto3.client('sts').get_caller_identity().get('Account')

            quicksight_user_list: List[Dict[str, str]]
            if len(user_sso_ids_to_update) > 0:
                quicksight_user_list = []

                for user_sso_id in user_sso_ids_to_update:
                    try:
                        quicksight_user_list.append(
                            user_client.describe_user(
                                AwsAccountId=account_id,
                                Namespace='default',
                                # \/ This is the format of the user name created by DIT SSO \/
                                UserName=f'quicksight_federation/{user_sso_id}',
                            )['User']
                        )

                    except botocore.exceptions.ClientError as e:
                        if e.response['Error']['Code'] == 'ResourceNotFoundException':
                            pass  # If the user isn't an author on QuickSight, just move on.
                        else:
                            raise e

            else:
                quicksight_user_list: List[Dict[str, str]] = user_client.list_users(
                    AwsAccountId=account_id, Namespace='default'
                )['UserList']

            for quicksight_user in quicksight_user_list:
                user_arn = quicksight_user['Arn']
                user_email = quicksight_user['Email']
                user_role = quicksight_user['Role']

                if user_role != 'AUTHOR' and user_role != 'ADMIN':
                    logger.info(f"Skipping {user_email} with role {user_role}.")
                    continue

                dw_user = get_user_model().objects.filter(email=user_email).first()
                if not dw_user:
                    logger.info(
                        f"Skipping {user_email} - cannot match with Data Workspace user."
                    )
                    continue
                else:
                    # We technically ignore the case for where a single email has multiple matches on DW, but I'm not
                    # sure this is a case that can happen - and if it can, we don't care while prototyping.
                    logger.info(f"Syncing QuickSight resources for {dw_user}")

                source_tables = source_tables_for_user(dw_user)
                db_role_schema_suffix = stable_identification_suffix(user_arn)

                # This creates a DB user for each of our datasets DBs. These users are intended to be long-lived,
                # so they might already exist. If this is the case, we still generate a new password, as at the moment
                # these user accounts only last for 31 days by default - so we need to update the password to keep them
                # from expiring.
                creds = new_private_database_credentials(
                    db_role_schema_suffix,
                    source_tables,
                    postgres_user(user_email, suffix='qs'),
                    valid_for=datetime.timedelta(
                        days=7
                    ),  # We refresh these creds every night, so they don't need to last long at all.
                )

                create_update_delete_quicksight_user_data_sources(
                    data_client, account_id, quicksight_user, creds
                )

            logger.info(
                f'sync_quicksight_user_datasources({user_sso_ids_to_update}) finished'
            )

    except redis.exceptions.LockError:
        pass