def test_factorization_machines(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("fm") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): fm = FactorizationMachines( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_factors=10, predictor_type="regressor", epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100, sagemaker_session=sagemaker_session, ) # training labels must be 'float32' fm.fit( fm.record_set(training_set[0][:200], training_set[1][:200].astype("float32")), job_name=job_name, ) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): model = FactorizationMachinesModel(fm.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_factorization_machines(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) fm = FactorizationMachines(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_factors=10, predictor_type='regressor', epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0/100, sagemaker_session=sagemaker_session, base_job_name='test-fm') # training labels must be 'float32' fm.fit(fm.record_set(train_set[0][:200], train_set[1][:200].astype('float32'))) endpoint_name = name_from_base('fm') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = FactorizationMachinesModel(fm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_factorization_machines(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) fm = FactorizationMachines(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_factors=10, predictor_type='regressor', epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100, sagemaker_session=sagemaker_session, base_job_name='test-fm') # training labels must be 'float32' fm.fit(fm.record_set(train_set[0][:200], train_set[1][:200].astype('float32'))) endpoint_name = name_from_base('fm') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = FactorizationMachinesModel(fm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_async_factorization_machines(): training_job_name = "" endpoint_name = name_from_base('factorizationMachines') sagemaker_session = sagemaker.Session(boto_session=boto3.Session( region_name=REGION)) with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) fm = FactorizationMachines(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_factors=10, predictor_type='regressor', epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100, sagemaker_session=sagemaker_session, base_job_name='test-fm') # training labels must be 'float32' fm.fit(fm.record_set(train_set[0][:200], train_set[1][:200].astype('float32')), wait=False) training_job_name = fm.latest_training_job.name print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): estimator = FactorizationMachines.attach( training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = FactorizationMachinesModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_async_factorization_machines(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("fm") with timeout(minutes=5): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) fm = FactorizationMachines( role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, num_factors=10, predictor_type="regressor", epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100, sagemaker_session=sagemaker_session, ) # training labels must be 'float32' fm.fit( fm.record_set(train_set[0][:200], train_set[1][:200].astype("float32")), job_name=job_name, wait=False, ) print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = FactorizationMachines.attach( training_job_name=job_name, sagemaker_session=sagemaker_session) model = FactorizationMachinesModel(estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_async_factorization_machines(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("fm") with timeout(minutes=5): fm = FactorizationMachines( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_factors=10, predictor_type="regressor", epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100, sagemaker_session=sagemaker_session, ) # training labels must be 'float32' fm.fit( fm.record_set(training_set[0][:200], training_set[1][:200].astype("float32")), job_name=job_name, wait=False, ) print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = FactorizationMachines.attach( training_job_name=job_name, sagemaker_session=sagemaker_session) model = FactorizationMachinesModel(estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_factorization_machines(sagemaker_session): job_name = unique_name_from_base("fm") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"} # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) fm = FactorizationMachines( role="SageMakerRole", train_instance_count=1, train_instance_type="ml.c4.xlarge", num_factors=10, predictor_type="regressor", epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100, sagemaker_session=sagemaker_session, ) # training labels must be 'float32' fm.fit( fm.record_set(train_set[0][:200], train_set[1][:200].astype("float32")), job_name=job_name, ) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): model = FactorizationMachinesModel( fm.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session ) predictor = model.deploy(1, "ml.c4.xlarge", endpoint_name=job_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None