def add_tasks(self, part_file, JOB_ID): """ Adds a task for each input file in the collection to the specified job. """ # print("Adding {} tasks to job [{}]...".format(count, job_id)) tasks = list() for i in range(self.K): output_file = self.build_output_file(i) command_line = "/bin/bash -c 'echo $AZ_BATCH_TASK_WORKING_DIR && daemon status && scgrad {} {} {} {}'".format( _GRAD_COMMON_FILE, _GRAD_PART_FILE, _CONTAINER_OUTPUT_FILE, i) if self.config.REGISTRY_USERNAME: registry = models.ContainerRegistry( user_name=self.config.REGISTRY_USERNAME, password=self.config.REGISTRY_PASSWORD, registry_server=self.config.REGISTRY_SERVER, ) task_container_settings = models.TaskContainerSettings( image_name=self.config.DOCKER_IMAGE, registry=registry) else: task_container_settings = models.TaskContainerSettings( image_name=self.config.DOCKER_IMAGE) tasks.append( models.TaskAddParameter( id="grad_part_{}".format(i), command_line=command_line, resource_files=[self.common_file, part_file], output_files=[output_file], container_settings=task_container_settings, )) self.batch_client.task.add_collection(JOB_ID, [tasks[0]])
def create_container_config(config: Dict[str, str]) -> batchmodels.ContainerConfiguration: ada_cr = batchmodels.ContainerRegistry( registry_server="ada510.azurecr.io", user_name="ada510", password=config["CR_PASSWORD"], ) return batchmodels.ContainerConfiguration( container_image_names=[NEO_IMAGE], container_registries=[ada_cr], )
def create_pool(batch_service_client, pool_id): """ Creates a pool of compute nodes with the specified OS settings. :param batch_service_client: A Batch service client. :type batch_service_client: `azure.batch.BatchServiceClient` :param str pool_id: An ID for the new pool. :param str publisher: Marketplace image publisher :param str offer: Marketplace image offer :param str sku: Marketplace image sku """ print('Creating pool [{}]...'.format(pool_id)) # Create a new pool of Linux compute nodes using an Azure Virtual Machines # Marketplace image. For more information about creating pools of Linux # nodes, see: # https://azure.microsoft.com/documentation/articles/batch-linux-nodes/ # In this case is a pool ready to run Docker containers image_ref_to_use = batch.models.ImageReference( publisher='microsoft-azure-batch', offer='ubuntu-server-container', sku='16-04-lts', version='latest' ) # Specify a container registry # We got the credentials from config.py containerRegistry = batchmodels.ContainerRegistry( user_name=config._REGISTRY_USER_NAME, password=config._REGISTRY_PASSWORD, registry_server=config._REGISTRY_SERVER ) # The instance will pull the images defined here container_conf = batchmodels.ContainerConfiguration( container_image_names=[config._DOCKER_IMAGE], container_registries=[containerRegistry] ) new_pool = batch.models.PoolAddParameter( id=pool_id, virtual_machine_configuration=batchmodels.VirtualMachineConfiguration( image_reference=image_ref_to_use, container_configuration=container_conf, node_agent_sku_id='batch.node.ubuntu 16.04'), vm_size=config._POOL_VM_SIZE, target_dedicated_nodes=config._POOL_NODE_COUNT ) batch_service_client.pool.add(new_pool)
def create_processing_pool(batch_service_client, start_task): """ Creates a pool of compute nodes with the specified OS settings. :param batch_service_client: A Batch service client. :param str start_task: task start command. :type batch_service_client: `azure.batch.BatchServiceClient` """ LOGGER.info(f'Creating pool [{PROCESSING_POOL_ID}]...') image_ref_to_use = get_image_reference() container_registry = \ batch_models.ContainerRegistry( registry_server=REGISTRY_SERVER, user_name=REGISTRY_ACCOUNT_USER, password=REGISTRY_ACCOUNT_PASSWORD) container_conf = batch_models.ContainerConfiguration( container_image_names=[DOCKER_CONTAINER_URL], container_registries=[container_registry]) new_pool = batch_models.PoolAddParameter( id=PROCESSING_POOL_ID, virtual_machine_configuration= batch_models.VirtualMachineConfiguration( image_reference=image_ref_to_use, container_configuration=container_conf, node_agent_sku_id=VM_AGENT_SKU), vm_size=PROCESSING_POOL_VM_SIZE, start_task=batch_models.StartTask( command_line=start_task, user_identity=batch_models.UserIdentity( auto_user=batch_models.AutoUserSpecification( scope='pool', elevation_level='admin')) ), enable_auto_scale=True, auto_scale_evaluation_interval=datetime.timedelta( minutes=PROCESSING_POOL_SCALE_INTERVAL_MINUTES), auto_scale_formula=PROCESSING_POOL_SCALE_FORMULA) try: batch_service_client.pool.add(new_pool) LOGGER.info("Processing Pool Created") except batch_models.BatchErrorException as err: if 'The specified pool already exists.' in err.error.message.value: LOGGER.info("Pool already exists...") else: raise
def create_pool(config, batch_service_client): """ Creates a pool of compute nodes with the specified OS settings. :param batch_service_client: A Batch service client. :type batch_service_client: `azure.batch.BatchServiceClient` :param str pool_id: An ID for the new pool. :param str publisher: Marketplace image publisher :param str offer: Marketplace image offer :param str sku: Marketplace image sku """ # Create a new pool of Linux compute nodes using an Azure Virtual Machines # Marketplace image. For more information about creating pools of Linux # nodes, see: # https://azure.microsoft.com/documentation/articles/batch-linux-nodes/ image_ref_to_use = models.ImageReference( publisher="microsoft-azure-batch", offer="ubuntu-server-container", sku="16-04-lts", version="latest", ) if config.REGISTRY_USERNAME: registry = models.ContainerRegistry( user_name=config.REGISTRY_USERNAME, password=config.REGISTRY_PASSWORD, registry_server=config.REGISTRY_SERVER, ) container_conf = models.ContainerConfiguration( container_image_names=[config.DOCKER_CONTAINER], container_registries=[registry], ) else: container_conf = models.ContainerConfiguration( container_image_names=[config.DOCKER_CONTAINER]) new_pool = models.PoolAddParameter( id=config.POOL_ID, virtual_machine_configuration=models.VirtualMachineConfiguration( image_reference=image_ref_to_use, container_configuration=container_conf, node_agent_sku_id="batch.node.ubuntu 16.04", ), vm_size=config.POOL_VM_SIZE, target_dedicated_nodes=config.POOL_NODE_COUNT, target_low_priority_nodes=config.POOL_LOW_PRIORITY_NODE_COUNT, ) batch_service_client.pool.add(new_pool)
def _create_pool(self): """ Creates a pool of compute nodes with the specified OS settings. :param batch_service_client: A Batch service client. :type batch_service_client: `azure.batch.BatchServiceClient` :param str pool_id: An ID for the new pool. :param str publisher: Marketplace image publisher :param str offer: Marketplace image offer :param str sku: Marketplace image sku """ if self.config.REGISTRY_SERVER: print("Using a private registry") registry = models.ContainerRegistry( user_name=self.config.REGISTRY_USERNAME, password=self.config.REGISTRY_PASSWORD, registry_server=self.config.REGISTRY_SERVER, ) container_conf = models.ContainerConfiguration( container_image_names=[self.config.DOCKER_IMAGE], container_registries=[registry], ) else: container_conf = models.ContainerConfiguration( container_image_names=[self.config.DOCKER_IMAGE]) new_pool = models.PoolAddParameter( id=self.config.POOL_ID, virtual_machine_configuration=models.VirtualMachineConfiguration( image_reference=_IMAGE_REF, container_configuration=container_conf, node_agent_sku_id="batch.node.ubuntu 16.04", ), vm_size=self.config.POOL_VM_SIZE, target_dedicated_nodes=self.config.POOL_NODE_COUNT, target_low_priority_nodes=self.config.POOL_LOW_PRIORITY_NODE_COUNT, ) # Create the pool self.batch_client.pool.add(new_pool)
def test_batch_tasks(self, batch_job, **kwargs): client = self.create_sharedkey_client(**kwargs) # Test Create Task with Auto Complete exit_conditions = models.ExitConditions( exit_codes=[models.ExitCodeMapping(1, models.ExitOptions(models.JobAction.terminate))], exit_code_ranges=[models.ExitCodeRangeMapping(2, 4, models.ExitOptions(models.JobAction.disable))], default=models.ExitOptions(models.JobAction.none)) task_param = models.TaskAddParameter( id=self.get_resource_name('batch_task1_'), command_line='cmd /c "echo hello world"', exit_conditions=exit_conditions ) try: client.task.add(batch_job.id, task_param) except models.BatchErrorException as e: message = "{}: ".format(e.error.code, e.error.message) for v in e.error.values: message += "\n{}: {}".format(v.key, v.value) raise Exception(message) task = client.task.get(batch_job.id, task_param.id) self.assertIsInstance(task, models.CloudTask) self.assertEqual(task.exit_conditions.default.job_action, models.JobAction.none) self.assertEqual(task.exit_conditions.exit_codes[0].code, 1) self.assertEqual(task.exit_conditions.exit_codes[0].exit_options.job_action, models.JobAction.terminate) # Test Create Task with Output Files container_url = "https://test.blob.core.windows.net:443/test-container" outputs = [ models.OutputFile( file_pattern="../stdout.txt", destination=models.OutputFileDestination( container=models.OutputFileBlobContainerDestination( container_url=container_url, path="taskLogs/output.txt")), upload_options=models.OutputFileUploadOptions( upload_condition=models.OutputFileUploadCondition.task_completion)), models.OutputFile( file_pattern="../stderr.txt", destination=models.OutputFileDestination( container=models.OutputFileBlobContainerDestination( container_url=container_url, path="taskLogs/error.txt")), upload_options=models.OutputFileUploadOptions( upload_condition=models.OutputFileUploadCondition.task_failure)), ] task_param = models.TaskAddParameter( id=self.get_resource_name('batch_task2_'), command_line='cmd /c "echo hello world"', output_files=outputs ) client.task.add(batch_job.id, task_param) task = client.task.get(batch_job.id, task_param.id) self.assertIsInstance(task, models.CloudTask) self.assertEqual(len(task.output_files), 2) # Test Create Task with Auto User auto_user = models.AutoUserSpecification( scope=models.AutoUserScope.task, elevation_level=models.ElevationLevel.admin) task_param = models.TaskAddParameter( id=self.get_resource_name('batch_task3_'), command_line='cmd /c "echo hello world"', user_identity=models.UserIdentity(auto_user=auto_user) ) client.task.add(batch_job.id, task_param) task = client.task.get(batch_job.id, task_param.id) self.assertIsInstance(task, models.CloudTask) self.assertEqual(task.user_identity.auto_user.scope, models.AutoUserScope.task) self.assertEqual(task.user_identity.auto_user.elevation_level, models.ElevationLevel.admin) # Test Create Task with Token Settings task_param = models.TaskAddParameter( id=self.get_resource_name('batch_task4_'), command_line='cmd /c "echo hello world"', authentication_token_settings=models.AuthenticationTokenSettings( access=[models.AccessScope.job]) ) client.task.add(batch_job.id, task_param) task = client.task.get(batch_job.id, task_param.id) self.assertIsInstance(task, models.CloudTask) self.assertEqual(task.authentication_token_settings.access[0], models.AccessScope.job) # Test Create Task with Container Settings task_param = models.TaskAddParameter( id=self.get_resource_name('batch_task5_'), command_line='cmd /c "echo hello world"', container_settings=models.TaskContainerSettings( image_name='windows_container:latest', registry=models.ContainerRegistry('username', 'password')) ) client.task.add(batch_job.id, task_param) task = client.task.get(batch_job.id, task_param.id) self.assertIsInstance(task, models.CloudTask) self.assertEqual(task.container_settings.image_name, 'windows_container:latest') self.assertEqual(task.container_settings.registry.user_name, 'username') # Test Create Task with Run-As-User task_param = models.TaskAddParameter( id=self.get_resource_name('batch_task6_'), command_line='cmd /c "echo hello world"', user_identity=models.UserIdentity(user_name='task-user') ) client.task.add(batch_job.id, task_param) task = client.task.get(batch_job.id, task_param.id) self.assertIsInstance(task, models.CloudTask) self.assertEqual(task.user_identity.user_name, 'task-user') # Test Add Task Collection tasks = [] for i in range(7, 10): tasks.append(models.TaskAddParameter( self.get_resource_name('batch_task{}_'.format(i)), 'cmd /c "echo hello world"')) result = client.task.add_collection(batch_job.id, tasks) self.assertIsInstance(result, models.TaskAddCollectionResult) self.assertEqual(len(result.value), 3) self.assertEqual(result.value[0].status, models.TaskAddStatus.success) # Test List Tasks tasks = list(client.task.list(batch_job.id)) self.assertEqual(len(tasks), 9) # Test Count Tasks task_counts = client.job.get_task_counts(batch_job.id) self.assertIsInstance(task_counts, models.TaskCounts) self.assertEqual(task_counts.completed, 0) self.assertEqual(task_counts.succeeded, 0) self.assertEqual(task_counts.validation_status, models.TaskCountValidationStatus.validated) # Test Terminate Task response = client.task.terminate(batch_job.id, task_param.id) self.assertIsNone(response) task = client.task.get(batch_job.id, task_param.id) self.assertEqual(task.state, models.TaskState.completed) # Test Reactivate Task response = client.task.reactivate(batch_job.id, task_param.id) self.assertIsNone(response) task = client.task.get(batch_job.id, task_param.id) self.assertEqual(task.state, models.TaskState.active) # Test Update Task response = client.task.update( batch_job.id, task_param.id, constraints=models.TaskConstraints(max_task_retry_count=1)) self.assertIsNone(response) # Test Get Subtasks # TODO: Test with actual subtasks subtasks = client.task.list_subtasks(batch_job.id, task_param.id) self.assertIsInstance(subtasks, models.CloudTaskListSubtasksResult) self.assertEqual(subtasks.value, []) # Test Delete Task response = client.task.delete(batch_job.id, task_param.id) self.assertIsNone(response)
def create_pool_with_containers(batch_service_client, pool_id, resource_files, publisher, offer, sku): """ Creates a pool of compute nodes with the specified OS settings. :param batch_service_client: A Batch service client. :type batch_service_client: `azure.batch.BatchServiceClient` :param str pool_id: An ID for the new pool. :param list resource_files: A collection of resource files for the pool's start task. :param str publisher: Marketplace image publisher :param str offer: Marketplace image offer :param str sku: Marketplace image sku """ print('Creating pool [{}]...'.format(pool_id)) # Create a new pool of Linux compute nodes using an Azure Virtual Machines # Marketplace image. For more information about creating pools of Linux # nodes, see: # https://azure.microsoft.com/documentation/articles/batch-linux-nodes/ # Specify the commands for the pool's start task. The start task is run # on each node as it joins the pool, and when it's rebooted or re-imaged. # We use the start task to prep the node for running our task script. task_commands = [ # Copy the python_tutorial_task.py script to the "shared" directory # that all tasks that run on the node have access to. Note that # we are using the -p flag with cp to preserve the file uid/gid, # otherwise since this start task is run as an admin, it would not # be accessible by tasks run as a non-admin user. #'wget https://packages.microsoft.com/config/ubuntu/16.04/packages-microsoft-prod.deb', #'sudo dpkg -i packages-microsoft-prod.deb', #'wget -O azcopy.tar.gz https://aka.ms/downloadazcopylinux64', #'tar -xf azcopy.tar.gz', #'sudo ./install.sh', #'wget https://repo.anaconda.com/archive/Anaconda3-5.1.0-Linux-x86_64.sh -O ~/conda.sh', #'bash ~/conda.sh -b -p $AZ_BATCH_NODE_SHARED_DIR/conda', #'export PATH="$AZ_BATCH_NODE_SHARED_DIR/conda/bin:$PATH"', #'sudo apt-get -y update', #'sudo apt-get -y install azcopy', 'cp -p {} $AZ_BATCH_NODE_SHARED_DIR'.format(_TUTORIAL_TASK_FILE), #'cp -p {} $AZ_BATCH_NODE_SHARED_DIR'.format(_ENV_YML_FILE), 'azcopy --source https://{0}.blob.core.windows.net/model/ghanamines.h5 --destination $AZ_BATCH_NODE_SHARED_DIR/ghanamines.h5 --source-key {1}' .format(_STORAGE_ACCOUNT_NAME, _STORAGE_ACCOUNT_KEY), #'sudo $AZ_BATCH_NODE_SHARED_DIR/conda/bin/conda env create -f {}'.format(_ENV_YML_FILE) ] # Get the node agent SKU and image reference for the virtual machine # configuration. # For more information about the virtual machine configuration, see: # https://azure.microsoft.com/documentation/articles/batch-linux-nodes/ sku_to_use, image_ref_to_use = \ common_helpers.select_latest_verified_vm_image_with_node_agent_sku( batch_service_client, publisher, offer, sku) user = batchmodels.AutoUserSpecification( scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin) container_reg = batchmodels.ContainerRegistry(user_name=CLIENT_ID, password=SECRET, registry_server=_ACR_URL) container_cfg = batchmodels.ContainerConfiguration( container_image_names=[_ACR_IMG_NAME], container_registries=[container_reg]) my_img_ref = batchmodels.ImageReference( virtual_machine_image_id=_CUSTOM_VM_IMG_ID) vm_cfg = batchmodels.VirtualMachineConfiguration( image_reference=my_img_ref, node_agent_sku_id= sku_to_use, #'batch.node.ubuntu 16.04', ##verificare che l'immagine ghanaimg abbia gpu container_configuration=container_cfg) task_containersettings = batchmodels.TaskContainerSettings( image_name=_ACR_IMG_NAME) new_pool = batchmodels.PoolAddParameter( id=pool_id, virtual_machine_configuration=vm_cfg, vm_size=_POOL_VM_SIZE, target_dedicated_nodes=_POOL_NODE_COUNT, target_low_priority_nodes=1, start_task=batch.models.StartTask( command_line=common_helpers.wrap_commands_in_shell( 'linux', task_commands), user_identity=batchmodels.UserIdentity(auto_user=user), wait_for_success=True, resource_files=resource_files, container_settings=task_containersettings)) try: batch_service_client.pool.add(new_pool) except batchmodels.batch_error.BatchErrorException as err: print_batch_exception(err) raise