def test_dataset_default(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("some_new_key.csv", "some_bucket") ds = config.dataset(dataset_file) assert ds.data_frequency == "D" assert ds.dataset_type == DatasetType.TARGET_TIME_SERIES assert ds.dataset_domain == DatasetDomain.RETAIL assert ds.dataset_name == "some_new_key" assert ds.dataset_schema == { "Attributes": [ { "AttributeName": "item_id", "AttributeType": "string" }, { "AttributeName": "timestamp", "AttributeType": "timestamp", }, { "AttributeName": "demand", "AttributeType": "float" }, ] }
def test_dataset_import_timestamp_format_none(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) forecast_stub.add_response("list_dataset_import_jobs", {"DatasetImportJobs": []}) dataset.cli = forecast_stub.client assert dataset.timestamp_format == None
def test_dataset_status_lifecycle(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) forecast_stub.add_client_error("describe_dataset", "ResourceNotFoundException") forecast_stub.add_response("describe_dataset", {"Status": "ACTIVE"}) dataset.cli = forecast_stub.client assert dataset.status == Status.DOES_NOT_EXIST assert dataset.status == "ACTIVE"
def test_dataset_create_noop_errors(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) configured_dataset = configuration_data.get("RetailDemandTRM").get( "Datasets")[2] params = { "DatasetType": configured_dataset.get("DatasetType"), "DatasetName": "RetailDemandTRM", "Domain": configured_dataset.get("Domain"), "Schema": configured_dataset.get("Schema"), "DataFrequency": configured_dataset.get("DataFrequency"), } create_params = deepcopy(params) create_params["Tags"] = [{"Key": "SolutionId", "Value": "SOL0123"}] forecast_stub.add_response( "describe_dataset", params, ) forecast_stub.add_response("create_dataset", {"DatasetArn": dataset.arn}, create_params) forecast_stub.add_response( "describe_dataset", params, ) dataset.cli = forecast_stub.client dataset.create() # clobber the values to trigger some exceptions # this is likey caused by a user changing configuration unexpectedly dataset._dataset_type = DatasetType.RELATED_TIME_SERIES dataset._dataset_domain = DatasetDomain.WORK_FORCE dataset._data_frequency = DataFrequency("1min") dataset._dataset_schema = {} with pytest.raises(ValueError) as excinfo: dataset.create() assert "dataset type" in str(excinfo.value) assert "dataset domain" in str(excinfo.value) assert "data frequency" in str(excinfo.value) assert "dataset schema" in str(excinfo.value)
def test_dataset_create(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) configured_dataset = configuration_data.get("RetailDemandTRM").get("Datasets")[2] forecast_stub.add_client_error("describe_dataset", "ResourceNotFoundException") forecast_stub.add_response("create_dataset", {"DatasetArn": "arn:"}) # should not call anything dataset.cli = forecast_stub.client dataset.create() forecast_stub.assert_no_pending_responses()
def test_dataset_import_timestamp_format(configuration_data, forecast_stub, format): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [{ "DatasetImportJobArn": "arn:something", "LastModificationTime": datetime(2015, 1, 1), }] }, ) forecast_stub.add_response("describe_dataset_import_job", {"TimestampFormat": format}) dataset.cli = forecast_stub.client assert dataset.timestamp_format == format
def test_dataset_create_noop_errors(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) configured_dataset = configuration_data.get("RetailDemandTRM").get("Datasets")[2] for i in range(0, 2): forecast_stub.add_response( "describe_dataset", { "DatasetType": configured_dataset.get("DatasetType"), "DatasetName": "RetailDemandTRM", "Domain": configured_dataset.get("Domain"), "Schema": configured_dataset.get("Schema"), "DataFrequency": configured_dataset.get("DataFrequency"), }, ) # should not call anything dataset.cli = forecast_stub.client dataset.create() # clobber the values to trigger some exceptions # this is likey caused by a user changing configuration unexpectedly dataset._dataset_type = DatasetType.RELATED_TIME_SERIES dataset._dataset_domain = DatasetDomain.WORK_FORCE dataset._data_frequency = DataFrequency("1min") dataset._dataset_schema = {} with pytest.raises(ValueError) as excinfo: dataset.create() assert "dataset type" in str(excinfo.value) assert "dataset domain" in str(excinfo.value) assert "data frequency" in str(excinfo.value) assert "dataset schema" in str(excinfo.value)
def test_dataset_imports(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [ { "DatasetImportJobArn": "arn::", "DatasetImportJobName": "middle_job", "LastModificationTime": datetime(2018, 1, 1), }, { "DatasetImportJobArn": "arn::", "DatasetImportJobName": "end_job", "LastModificationTime": datetime(2019, 1, 1), }, { "DatasetImportJobArn": "arn::", "DatasetImportJobName": "early_job", "LastModificationTime": datetime(2017, 1, 1), }, ] }, ) dataset.cli = forecast_stub.client ds_imports = dataset.imports assert ds_imports[0].get("DatasetImportJobName") == "end_job" assert ds_imports[1].get("DatasetImportJobName") == "middle_job" assert ds_imports[2].get("DatasetImportJobName") == "early_job"