def test_chainer_mnist_single_machine(docker_image, sagemaker_local_session, instance_type, tmpdir): customer_script = 'single_machine_customer_script.py' hyperparameters = {'batch-size': 10000, 'epochs': 1} estimator = Chainer(entry_point=customer_script, source_dir=mnist_path, role=role, image_name=docker_image, train_instance_count=1, train_instance_type=instance_type, sagemaker_session=sagemaker_local_session, hyperparameters=hyperparameters, output_path='file://{}'.format(tmpdir)) estimator.fit({ 'train': 'file://{}'.format(os.path.join(data_dir, 'train')), 'test': 'file://{}'.format(os.path.join(data_dir, 'test')) }) success_files = { 'model': ['model.npz'], 'output': [ 'success', 'data/accuracy.png', 'data/cg.dot', 'data/log', 'data/loss.png' ], } test_utils.files_exist(str(tmpdir), success_files) request_data = np.zeros((100, 784), dtype='float32') test_utils.predict_and_assert_response_length(estimator, request_data, instance_type) test_utils.predict_and_assert_response_length(estimator, request_data, instance_type, csv_serializer, csv_deserializer, 'text/csv') test_arrays = [ np.zeros((100, 784), dtype='float32'), np.zeros((100, 1, 28, 28), dtype='float32'), np.zeros((100, 28, 28), dtype='float32') ] with test_utils.local_mode_lock(): try: predictor = _json_predictor(estimator, instance_type) for array in test_arrays: response = predictor.predict(array) assert len(response) == len(array) finally: predictor.delete_endpoint()
def test_chainer_mnist_distributed(docker_image, sagemaker_local_session, instance_type, customer_script, tmpdir): if instance_type == 'local_gpu': pytest.skip('Local Mode does not support distributed GPU training.') # pure_nccl communicator hangs when only one gpu is available. cluster_size = 2 hyperparameters = { 'sagemaker_process_slots_per_host': 1, 'sagemaker_num_processes': cluster_size, 'batch-size': 10000, 'epochs': 1, 'communicator': 'hierarchical' } estimator = Chainer(entry_point=customer_script, source_dir=mnist_path, role=role, image_name=docker_image, train_instance_count=cluster_size, train_instance_type=instance_type, sagemaker_session=sagemaker_local_session, hyperparameters=hyperparameters, output_path='file://{}'.format(tmpdir)) estimator.fit({ 'train': 'file://{}'.format(os.path.join(data_dir, 'train')), 'test': 'file://{}'.format(os.path.join(data_dir, 'test')) }) success_files = { 'model': ['model.npz'], 'output': [ 'success', 'data/accuracy.png', 'data/cg.dot', 'data/log', 'data/loss.png' ], } test_utils.files_exist(str(tmpdir), success_files) request_data = np.zeros((100, 784), dtype='float32') test_utils.predict_and_assert_response_length(estimator, request_data, instance_type) test_utils.predict_and_assert_response_length(estimator, request_data, instance_type, json_serializer, json_deserializer, 'application/json') test_utils.predict_and_assert_response_length(estimator, request_data, instance_type, csv_serializer, csv_deserializer, 'text/csv')
def test_chainer_mnist_single_machine(docker_image, opt_ml, use_gpu): customer_script = 'single_machine_customer_script.py' hyperparameters = {'batch-size': 10000, 'epochs': 1} local_mode.train(customer_script, data_dir, docker_image, opt_ml, hyperparameters=hyperparameters, source_dir=mnist_path, use_gpu=use_gpu) files = [ 'model/model.npz', 'output/success', 'output/data/algo-1/accuracy.png', 'output/data/algo-1/cg.dot', 'output/data/algo-1/log', 'output/data/algo-1/loss.png' ] test_utils.files_exist(opt_ml, files) assert not local_mode.file_exists(opt_ml, 'output/failure'), 'Failure happened' script_path = os.path.join(mnist_path, customer_script) with local_mode.serve(script_path, model_dir=None, image_name=docker_image, opt_ml=opt_ml, use_gpu=use_gpu, source_dir=mnist_path): test_arrays = [ np.zeros((100, 784), dtype='float32'), np.zeros((100, 1, 28, 28), dtype='float32'), np.zeros((100, 28, 28), dtype='float32') ] request_data = np.zeros((100, 784), dtype='float32') data_as_list = request_data.tolist() test_utils.predict_and_assert_response_length(data_as_list, 'text/csv') for array in test_arrays: # JSON and NPY can take multidimensional (n > 2) arrays data_as_list = array.tolist() test_utils.predict_and_assert_response_length( data_as_list, 'application/json') test_utils.predict_and_assert_response_length( request_data, 'application/x-npy')
def test_chainer_mnist_distributed(docker_image, opt_ml, use_gpu, customer_script): cluster_size = 2 # pure_nccl communicator hangs when only one gpu is available. hyperparameters = { 'sagemaker_process_slots_per_host': 1, 'sagemaker_num_processes': cluster_size, 'batch-size': 10000, 'epochs': 1, 'communicator': 'hierarchical' } local_mode.train(customer_script, data_dir, docker_image, opt_ml, hyperparameters=hyperparameters, cluster_size=cluster_size, source_dir=mnist_path, use_gpu=use_gpu) files = [ 'model/model.npz', 'output/success', 'output/data/algo-1/accuracy.png', 'output/data/algo-1/cg.dot', 'output/data/algo-1/log', 'output/data/algo-1/loss.png' ] test_utils.files_exist(opt_ml, files) assert not local_mode.file_exists(opt_ml, 'output/failure'), 'Failure happened' with local_mode.serve(os.path.join(mnist_path, customer_script), model_dir=None, image_name=docker_image, opt_ml=opt_ml): request_data = np.zeros((100, 784), dtype='float32') data_as_list = request_data.tolist() test_utils.predict_and_assert_response_length(data_as_list, 'application/json') test_utils.predict_and_assert_response_length(data_as_list, 'text/csv') test_utils.predict_and_assert_response_length(request_data, 'application/x-npy')
def test_chainer_mnist_custom_loop(docker_image, sagemaker_local_session, instance_type, tmpdir): customer_script = 'single_machine_custom_loop.py' hyperparameters = {'batch-size': 10000, 'epochs': 1} estimator = Chainer(entry_point=customer_script, source_dir=mnist_path, role=role, image_name=docker_image, train_instance_count=1, train_instance_type=instance_type, sagemaker_session=sagemaker_local_session, hyperparameters=hyperparameters, output_path='file://{}'.format(tmpdir)) estimator.fit({ 'train': 'file://{}'.format(os.path.join(data_dir, 'train')), 'test': 'file://{}'.format(os.path.join(data_dir, 'test')) }) success_files = { 'model': ['model.npz'], 'output': ['success'], } test_utils.files_exist(str(tmpdir), success_files) request_data = np.zeros((100, 784), dtype='float32') test_utils.predict_and_assert_response_length(estimator, request_data, instance_type) test_utils.predict_and_assert_response_length(estimator, request_data, instance_type, json_serializer, json_deserializer, 'application/json') test_utils.predict_and_assert_response_length(estimator, request_data, instance_type, csv_serializer, csv_deserializer, 'text/csv')
def test_chainer_mnist_custom_loop(docker_image, opt_ml, use_gpu): customer_script = 'single_machine_custom_loop.py' hyperparameters = {'batch-size': 10000, 'epochs': 1} local_mode.train(customer_script, data_dir, docker_image, opt_ml, hyperparameters=hyperparameters, source_dir=mnist_path, use_gpu=use_gpu) files = ['model/model.npz', 'output/success'] test_utils.files_exist(opt_ml, files) assert not local_mode.file_exists(opt_ml, 'output/failure'), 'Failure happened' script_path = os.path.join(mnist_path, customer_script) with local_mode.serve(script_path, model_dir=None, image_name=docker_image, opt_ml=opt_ml): request_data = np.zeros((100, 784), dtype='float32') data_as_list = request_data.tolist() test_utils.predict_and_assert_response_length(data_as_list, 'application/json') test_utils.predict_and_assert_response_length(data_as_list, 'text/csv') test_utils.predict_and_assert_response_length(request_data, 'application/x-npy')