def docker_config_cache_from(self): ret_val = tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.ipynb"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], job_labels={ "job": "docker_config_cache_from", "team": "on_notebook_tests", }, ) return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.ipynb"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET, image=ret_val["docker_image"], cache_from=ret_val["docker_image"]), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], job_labels={ "job": "docker_config_cache_from", "team": "on_notebook_tests", }, )
def auto_one_device_strategy_cloud_build(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET), )
def docker_config_cloud_build(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def docker_config_parent_img(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig( parent_image="gcr.io/deeplearning-platform-release" "/tf2-gpu.2-2:latest"), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def auto_dist_strat_mwms_with_parent_img(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), distribution_strategy="auto", requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig( parent_image="gcr.io/deeplearning-platform-release" "/tf2-gpu.2-2:latest"), )
def auto_tpu_strategy(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements_tpu_strategy.txt"), chief_config=tfc.COMMON_MACHINE_CONFIGS["CPU"], worker_count=1, worker_config=tfc.COMMON_MACHINE_CONFIGS["TPU"], docker_config=tfc.DockerConfig( parent_image="tensorflow/tensorflow:2.1.0"), )
def auto_one_device_strategy_with_image(self): ret_val = tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), ) return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image=ret_val["docker_image"]), )
def docker_config_parent_img(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.ipynb"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig( parent_image="gcr.io/deeplearning-platform-release" "/tf2-gpu.2-2:latest"), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], job_labels={ "job": "docker_config_parent_img", "team": "on_notebook_tests", }, )
def auto_tpu_strategy(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.ipynb"), requirements_txt=os.path.join(self.test_data_path, "requirements_tpu_strategy.txt"), chief_config=tfc.COMMON_MACHINE_CONFIGS["CPU"], worker_count=1, worker_config=tfc.COMMON_MACHINE_CONFIGS["TPU"], docker_config=tfc.DockerConfig( parent_image="tensorflow/tensorflow:2.1.0"), job_labels={ "job": "auto_tpu_strategy", "team": "on_notebook_tests", }, )
def docker_config_image(self): ret_val = tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], ) return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image=ret_val["docker_image"]), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
callbacks = None model.fit(train_data, epochs=epochs, callbacks=callbacks, validation_data=test_data, verbose=2) # Calling `tfc.run` with `auto` distribution strategy with multi-gpu # chief_config. This will automate TensorFlow Mirrored distribution # strategy when training this model. # Tip: Move this call to the top of this file if you do not want to # train your model locally first. tfc.run( requirements_txt="tests/testdata/requirements.txt", chief_config=tfc.MachineConfig( cpu_cores=8, memory=30, accelerator_type=tfc.AcceleratorType.NVIDIA_TESLA_T4, accelerator_count=2, ), docker_config=tfc.DockerConfig(image_build_bucket=GCP_BUCKET), ) # Save, load and evaluate the model if tfc.remote(): SAVE_PATH = os.path.join("gs://", GCP_BUCKET, MODEL_PATH) model.save(SAVE_PATH) model = tf.keras.models.load_model(SAVE_PATH) model.evaluate(test_data)
import os import autokeras as ak import tensorflow_cloud as tfc from tensorflow.keras.datasets import mnist parser = argparse.ArgumentParser(description="Model save path arguments.") parser.add_argument("--path", required=True, type=str, help="Keras model save path") args = parser.parse_args() tfc.run( chief_config=tfc.COMMON_MACHINE_CONFIGS["V100_1X"], docker_config=tfc.DockerConfig(base_image="haifengjin/autokeras:1.0.3"), ) # Prepare the dataset. (x_train, y_train), (x_test, y_test) = mnist.load_data() print(x_train.shape) # (60000, 28, 28) print(y_train.shape) # (60000,) print(y_train[:3]) # array([7, 2, 1], dtype=uint8) # Initialize the ImageClassifier. clf = ak.ImageClassifier(max_trials=2) # Search for the best model. clf.fit(x_train, y_train, epochs=10) # Evaluate on the testing data. print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)[1]))
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import tensorflow_cloud as tfc parser = argparse.ArgumentParser( description="Model cloud bucket name argument.") parser.add_argument("--bucket_name", required=True, type=str, help="Cloud bucket name") args = parser.parse_args() # Automated MirroredStrategy: chief config with multiple GPUs tfc.run( entry_point="tests/testdata/mnist_example_using_fit_no_reqs.py", distribution_strategy="auto", chief_config=tfc.MachineConfig( cpu_cores=8, memory=30, accelerator_type=tfc.AcceleratorType.NVIDIA_TESLA_T4, accelerator_count=2, ), worker_count=0, stream_logs=True, docker_config=tfc.DockerConfig(image_build_bucket=args.bucket_name), )
# Copyright 2020 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import tensorflow_cloud as tfc gcp_bucket = "your-gcp-bucket" tfc.run( entry_point="train_model.py", requirements_txt="requirements.txt", docker_config=tfc.DockerConfig(image_build_bucket=gcp_bucket), stream_logs=True, )
# If you are using a custom image you can install modules via requirements # txt file. with open("requirements.txt", "w") as f: f.write("tensorflow-cloud\n") # Optional: Some recommended base images. If you provide none the system # will choose one for you. TF_GPU_IMAGE = "gcr.io/deeplearning-platform-release/tf2-cpu.2-5" TF_CPU_IMAGE = "gcr.io/deeplearning-platform-release/tf2-gpu.2-5" # Submit a single node training job using GPU. tfc.run( distribution_strategy="auto", requirements_txt="requirements.txt", docker_config=tfc.DockerConfig(parent_image=TF_GPU_IMAGE, image_build_bucket=GCS_BUCKET), chief_config=tfc.COMMON_MACHINE_CONFIGS["K80_1X"], job_labels={"job": JOB_NAME}, ) """ ## Training Results ### Reconnect your Colab instance Most remote training jobs are long running. If you are using Colab, it may time out before the training results are available. In that case, **rerun the following sections in order** to reconnect and configure your Colab instance to access the training results. 1. Import required modules