def test_as_hive_ddl_with_default_values(
    create_table_ddl, feature_group_dummy_definitions, sagemaker_session_mock
):
    sagemaker_session_mock.describe_feature_group.return_value = {
        "OfflineStoreConfig": {
            "S3StorageConfig": {
                "S3Uri": "s3://some-bucket",
                "ResolvedOutputS3Uri": "s3://resolved_output_s3_uri",
            }
        }
    }
    sagemaker_session_mock.account_id.return_value = "1234"
    sagemaker_session_mock.boto_session.region_name = "us-west-2"

    feature_group = FeatureGroup(name="MyGroup", sagemaker_session=sagemaker_session_mock)
    feature_group.feature_definitions = feature_group_dummy_definitions
    assert (
        create_table_ddl.format(
            database="sagemaker_featurestore",
            table_name="MyGroup",
            account="1234",
            region="us-west-2",
            feature_group_name="MyGroup",
        )
        == feature_group.as_hive_ddl()
    )
def test_create_feature_store(
    feature_store_session,
    role,
    feature_group_name,
    offline_store_s3_uri,
    pandas_data_frame,
    record,
    create_table_ddl,
):
    feature_group = FeatureGroup(name=feature_group_name,
                                 sagemaker_session=feature_store_session)
    feature_group.load_feature_definitions(data_frame=pandas_data_frame)

    with cleanup_feature_group(feature_group):
        output = feature_group.create(
            s3_uri=offline_store_s3_uri,
            record_identifier_name="feature1",
            event_time_feature_name="feature3",
            role_arn=role,
            enable_online_store=True,
        )
        _wait_for_feature_group_create(feature_group)

        # Ingest data
        feature_group.put_record(record=record)
        ingestion_manager = feature_group.ingest(data_frame=pandas_data_frame,
                                                 max_workers=3,
                                                 wait=False)
        ingestion_manager.wait()
        assert 0 == len(ingestion_manager.failed_rows)

        # Query the integrated Glue table.
        athena_query = feature_group.athena_query()
        df = DataFrame()
        with timeout(minutes=10):
            while df.shape[0] < 11:
                athena_query.run(
                    query_string=f'SELECT * FROM "{athena_query.table_name}"',
                    output_location=f"{offline_store_s3_uri}/query_results",
                )
                athena_query.wait()
                assert "SUCCEEDED" == athena_query.get_query_execution().get(
                    "QueryExecution").get("Status").get("State")
                df = athena_query.as_dataframe()
                print(f"Found {df.shape[0]} records.")
                time.sleep(60)

        assert df.shape[0] == 11
        nans = pd.isna(df.loc[df["feature1"].isin([5, 6, 7, 8,
                                                   9])]["feature4"])
        for is_na in nans.items():
            assert is_na
        assert (create_table_ddl.format(
            feature_group_name=feature_group_name,
            region=feature_store_session.boto_session.region_name,
            account=feature_store_session.account_id(),
        ) == feature_group.as_hive_ddl())
    assert output["FeatureGroupArn"].endswith(
        f"feature-group/{feature_group_name}")