Пример #1
0
def test_ipinsights(sagemaker_session):
    job_name = unique_name_from_base('ipinsights')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'ipinsights')
        data_filename = 'train.csv'

        with open(os.path.join(data_path, data_filename), 'rb') as f:
            num_records = len(f.readlines())

            ipinsights = IPInsights(role='SageMakerRole',
                                    train_instance_count=1,
                                    train_instance_type='ml.c4.xlarge',
                                    num_entity_vectors=10,
                                    vector_dim=100,
                                    sagemaker_session=sagemaker_session)

        record_set = prepare_record_set_from_local_files(
            data_path, ipinsights.data_location, num_records, FEATURE_DIM,
            sagemaker_session)
        ipinsights.fit(records=record_set, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = IPInsightsModel(ipinsights.model_data,
                                role='SageMakerRole',
                                sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=job_name)
        assert isinstance(predictor, RealTimePredictor)

        predict_input = [['user_1', '1.1.1.1']]
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["dot_product"] is not None
def test_ipinsights_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        data_path = os.path.join(DATA_DIR, "ipinsights")
        data_filename = "train.csv"

        with open(os.path.join(data_path, data_filename), "rb") as f:
            num_records = len(f.readlines())

        ipinsights = IPInsights(
            role=ROLE,
            train_instance_count=SINGLE_INSTANCE_COUNT,
            train_instance_type=cpu_instance_type,
            num_entity_vectors=10,
            vector_dim=100,
            sagemaker_session=sagemaker_session,
        )

        records = prepare_record_set_from_local_files(
            data_path, ipinsights.data_location, num_records, None, sagemaker_session
        )

        training_config = _build_airflow_workflow(
            estimator=ipinsights, instance_type=cpu_instance_type, inputs=records
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"],
        )
Пример #3
0
def test_ipinsights_serverless_inference(sagemaker_session, cpu_instance_type):
    job_name = unique_name_from_base("ipinsights-serverless")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "ipinsights")
        data_filename = "train.csv"

        with open(os.path.join(data_path, data_filename), "rb") as f:
            num_records = len(f.readlines())

            ipinsights = IPInsights(
                role="SageMakerRole",
                instance_count=1,
                instance_type=cpu_instance_type,
                num_entity_vectors=10,
                vector_dim=100,
                sagemaker_session=sagemaker_session,
            )

        record_set = prepare_record_set_from_local_files(
            data_path, ipinsights.data_location, num_records, FEATURE_DIM, sagemaker_session
        )
        ipinsights.fit(records=record_set, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = IPInsightsModel(
            ipinsights.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session
        )
        predictor = model.deploy(
            serverless_inference_config=ServerlessInferenceConfig(memory_size_in_mb=6144),
            endpoint_name=job_name,
        )
        assert isinstance(predictor, Predictor)

        predict_input = [["user_1", "1.1.1.1"]]
        result = predictor.predict(predict_input)

        assert len(result["predictions"]) == 1
        assert 0 > result["predictions"][0]["dot_product"] > -1  # We expect ~ -0.22