def train_densenet(client, train_dataset_path, val_dataset_path, gpus, hours): ''' Conducts training of model `PyDenseNetBc` on the CIFAR-10 dataset for IMAGE_CLASSIFICATION. Demonstrates hyperparameter tuning with distributed parameter sharing on SINGA-Auto. ''' task = 'IMAGE_CLASSIFICATION' app_id = gen_id() app = 'cifar10_densenet_{}'.format(app_id) model_name = 'PyDenseNetBc_{}'.format(app_id) print('Preprocessing datasets...') load_cifar10(train_dataset_path, val_dataset_path) print('Creating & uploading datasets onto SINGA-Auto...') train_dataset = client.create_dataset('{}_train'.format(app), task, train_dataset_path) pprint(train_dataset) val_dataset = client.create_dataset('{}_val'.format(app), task, val_dataset_path) pprint(val_dataset) print('Creating model...') model = client.create_model( name=model_name, task='IMAGE_CLASSIFICATION', model_file_path='examples/models/image_classification/PyDenseNetBc.py', model_class='PyDenseNetBc', dependencies={ ModelDependency.TORCH: '1.0.1', ModelDependency.TORCHVISION: '0.2.2' }) pprint(model) print('Creating train job...') budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus} train_job = client.create_train_job(app, task, train_dataset['id'], val_dataset['id'], budget, models=[model['id']]) pprint(train_job) print('Monitor the train job on SINGA-Auto Web Admin')
def run_enas(client, train_dataset_path, val_dataset_path, gpus, hours): ''' Conducts training of model `TfEnas` on the CIFAR-10 dataset for IMAGE_CLASSIFICATION. Demonstrates architecture tuning with ENAS on SINGA-Auto. ''' task = 'IMAGE_CLASSIFICATION' app_id = gen_id() app = 'cifar10_enas_{}'.format(app_id) model_name = 'TfEnas_{}'.format(app_id) print('Preprocessing datasets...') load_cifar10(train_dataset_path, val_dataset_path) print('Creating & uploading datasets onto SINGA-Auto...') train_dataset = client.create_dataset('{}_train'.format(app), task, train_dataset_path) pprint(train_dataset) val_dataset = client.create_dataset('{}_val'.format(app), task, val_dataset_path) pprint(val_dataset) print('Creating model...') model = client.create_model( name=model_name, task='IMAGE_CLASSIFICATION', model_file_path='examples/models/image_classification/TfEnas.py', model_class='TfEnas', dependencies={ModelDependency.TENSORFLOW: '1.12.0'}) pprint(model) print('Creating train job...') budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus} train_job = client.create_train_job(app, task, train_dataset['id'], val_dataset['id'], budget, models=[model['id']]) pprint(train_job) print('Monitor the train job on SINGA-Auto Web Admin')
def quickstart(client, train_dataset_path, val_dataset_path, gpus, hours, query_paths): ''' Conducts a full train-inference flow on the Fashion MNIST dataset with models `SkDt` and `TfFeedForward` for the task `IMAGE_CLASSIFICATION`. ''' task = 'IMAGE_CLASSIFICATION' # Randomly generate app & model names to avoid naming conflicts app_id = gen_id() app = 'image_classification_app_{}'.format(app_id) tf_model_name = 'TfFeedForward_{}'.format(app_id) sk_model_name = 'SkDt_{}'.format(app_id) print('Preprocessing datasets...') load_fashion_mnist(train_dataset_path, val_dataset_path) print('Creating & uploading datasets onto SINGA-Auto...') train_dataset = client.create_dataset('{}_train'.format(app), task, train_dataset_path) pprint(train_dataset) val_dataset = client.create_dataset('{}_val'.format(app), task, val_dataset_path) pprint(val_dataset) print('Adding models "{}" and "{}" to SINGA-Auto...'.format( tf_model_name, sk_model_name)) tf_model = client.create_model( tf_model_name, task, 'examples/models/image_classification/TfFeedForward.py', 'TfFeedForward', dependencies={ModelDependency.TENSORFLOW: '1.12.0'}) pprint(tf_model) sk_model = client.create_model( sk_model_name, task, 'examples/models/image_classification/SkDt.py', 'SkDt', dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'}) pprint(sk_model) model_ids = [tf_model['id'], sk_model['id']] print('Creating train job for app "{}" on SINGA-Auto...'.format(app)) budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus} train_job = client.create_train_job(app, task, train_dataset['id'], val_dataset['id'], budget, models=model_ids) pprint(train_job) print('Waiting for train job to complete...') print('This might take a few minutes') wait_until_train_job_has_stopped(client, app) print('Train job has been stopped') print('Listing best trials of latest train job for app "{}"...'.format(app)) pprint(client.get_best_trials_of_train_job(app)) print('Creating inference job for app "{}" on SINGA-Auto...'.format(app)) pprint(client.create_inference_job(app)) predictor_host = get_predictor_host(client, app) if not predictor_host: raise Exception('Inference job has errored') print('Inference job is running!') print('Making predictions for query images:') print(query_paths) queries = utils.dataset.load_images(query_paths).tolist() predictions = make_predictions(client, predictor_host, queries) print('Predictions are:') print(predictions) print('Stopping inference job...') pprint(client.stop_inference_job(app))