Python AmlCompute.update примеры использования

Язык программирования: Python

Пространство имен/Пакет: azureml.core.compute

Класс/Тип: AmlCompute

Метод/Функция: update

Примеров на hotexamples.com: 4

Python AmlCompute.update - 4 примера найдено. Это лучшие примеры Python кода для azureml.core.compute.AmlCompute.update, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

AmlCompute(30)

provisioning_configuration(30)

wait_for_completion(15)

supported_vmsizes(11)

create(8)

delete(6)

update(4)

get_status(2)

list(1)

refresh_state(1)

serialize(1)

Пример #1

Показать файл

Файл: setup.py Проект: akshay-0/broccoli

def setup(num):
    workspace_name = '%s-%s-%02d' % (workspace_prefix, location, num)

    try:
        ws = Workspace.get(
            name=workspace_name,
            subscription_id=subscription_id,
            resource_group=resource_group)
        print('Found existing workspace %s' % workspace_name)
    except WorkspaceException:
        print('Creating new workspace %s...' % workspace_name)

        ws = Workspace.create(
            name=workspace_name,
            subscription_id=subscription_id,
            resource_group=resource_group,
            location=location)

    try:
        compute_target = AmlCompute(ws, compute_name)
        print('Found existing compute %s' % compute_name)

        compute_target.update(min_nodes=min_nodes, max_nodes=max_nodes)
    except ComputeTargetException:
        print('Creating new compute target %s...' % compute_name)

        compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes)
        compute_target = ComputeTarget.create(ws, compute_name, compute_config)
        compute_target.wait_for_completion(show_output=True, timeout_in_minutes=20)

    ds = ws.get_default_datastore()
    ds.upload("testdata")

    dataset_name = 'sample_dataset'

    if dataset_name not in ws.datasets:
        data = Dataset.File.from_files(path=[(ds, 'testdata.txt')])

        data.register(
            workspace = ws,
            name = dataset_name,
            description = 'Sample data for load test')

        print('Dataset successfully registered')
    else:
        print('Dataset already exists')

Пример #2

Показать файл

Файл: clusterconnector.py Проект: numericalalgorithmsgroup/AzureML_Best_Practice

class ClusterConnector:
    def __init__(
        self,
        workspace,
        cluster_name,
        ssh_key,
        vm_type,
        admin_username="******",
    ):
        """Thin wrapper class around azureml.core.compute.AmlCluster

        Provides parallel ssh objects and helper for master node and all node commands
        and file copies.

        Usage:
        >>> cc = ClusterConnector(workspace, "MyCluster", sshkey, "Standard_ND40rs_v2")
        >>> cc.initialize(min_nodes=0, max_nodes=4, idle_timeout_secs=30)
        >>> cluster = cc.cluster
        >>> [print(node['name']) for node in cc.cluster.list_nodes()]
        """

        self.cluster_name = cluster_name
        self.workspace = workspace
        self.ssh_key = ssh_key
        self.vm_type = vm_type
        self.admin_username = admin_username

        enable_host_logger()
        hlog = logging.getLogger("pssh.host_logger")
        tstr = datetime.now().isoformat(timespec="minutes")
        [
            hlog.removeHandler(h) for h in hlog.handlers
            if isinstance(h, logging.StreamHandler)
        ]
        os.makedirs("clusterlogs", exist_ok=True)
        self.logfile = "clusterlogs/{}_{}.log".format(self.workspace.name,
                                                      tstr)
        hlog.addHandler(logging.FileHandler(self.logfile))

        self.cluster = None
        self._master_scp = None
        self._master_ssh = None
        self._all_ssh = None

    def initialise(self, min_nodes=0, max_nodes=0, idle_timeout_secs=1800):
        """Initialise underlying AmlCompute cluster instance"""
        self._create_or_update_cluster(min_nodes, max_nodes, idle_timeout_secs)

    def _check_logs_emessage(self, host, port):
        msg = "Remote command failed on {}:{}. For details see {}".format(
            host, port, self.logfile)
        return msg

    def terminate(self):

        print('Attempting to terminate cluster "{}"'.format(
            colored(self.cluster_name, "green")))
        try:
            self.cluster.update(min_nodes=0,
                                max_nodes=0,
                                idle_seconds_before_scaledown=10)
            self.cluster.wait_for_completion()
        except ComputeTargetException as err:
            raise RuntimeError(
                "Failed to terminate cluster nodes ({})".format(err))

        if len(self.cluster.list_nodes()):
            raise RuntimeError(
                "Failed to terminate cluster nodes (nodes still running)")

    @property
    def cluster_nodes(self):
        self.cluster.refresh_state()
        return sorted(self.cluster.list_nodes(), key=lambda n: n["port"])

    def _create_or_update_cluster(self, min_nodes, max_nodes,
                                  idle_timeout_secs):

        try:
            self.cluster = AmlCompute(workspace=self.workspace,
                                      name=self.cluster_name)
            print('Updating existing cluster "{}"'.format(
                colored(self.cluster_name, "green")))
            self.cluster.update(
                min_nodes=min_nodes,
                max_nodes=max_nodes,
                idle_seconds_before_scaledown=idle_timeout_secs,
            )
        except ComputeTargetException:
            print('Creating new cluster "{}"'.format(
                colored(self.cluster_name, "green")))
            cluster_config = AmlCompute.provisioning_configuration(
                vm_size=self.vm_type,
                min_nodes=min_nodes,
                max_nodes=max_nodes,
                idle_seconds_before_scaledown=idle_timeout_secs,
                admin_username=self.admin_username,
                admin_user_ssh_key=self.ssh_key,
                remote_login_port_public_access="Enabled",
            )
            self.cluster = AmlCompute.create(self.workspace, self.cluster_name,
                                             cluster_config)

        self.cluster.wait_for_completion()

        if len(self.cluster_nodes) < min_nodes:
            sleep(30)
            if len(self.cluster_nodes) < min_nodes:
                raise RuntimeError("Failed to provision sufficient nodes")

    def _copy_nodefile_to_nodes(self):

        if len(self.cluster_nodes) == 1:
            cprint("Single node cluster -- skipping IB config", "yellow")
            return

        print("Collecting cluster IB info")

        outputs = self._all_ssh.run_command(
            r'ifconfig ib0 | grep -oe "inet[^6][adr: ]*[0-9.]*" | cut -d" " -f2',
            shell="bash -c",
        )
        self._all_ssh.join(outputs)

        ibaddrs = []
        for output in outputs:
            host = output.host
            port = output.client.port
            if output.exit_code != 0:
                print(list(output.stdout))
                print(list(output.stderr))
                raise RuntimeError("Failed to get IB ip for {}:{}".format(
                    host, port))
            try:
                ibaddr = list(output.stdout)[0].split()[0]
            except IndexError:
                raise RuntimeError("Failed to get IB ip for {}:{} - "
                                   "No ib interface found!".format(host, port))
            print("Mapping {}:{} -> {}".format(host, port, ibaddr))
            if port == self._master_scp.port:
                cprint("IB Master: {}".format(ibaddr), "green")
                ibaddrs = [ibaddr] + ibaddrs
            else:
                ibaddrs.append(ibaddr)

        with NamedTemporaryFile(delete=False, mode="wt") as nfh:
            self.nodefile = nfh.name
            for addr in ibaddrs:
                nfh.write("{}\n".format(addr))

        self.ibaddrs = ibaddrs
        self.copy_to_all_nodes(self.nodefile, "./nodefile")

    def _create_cluster_ssh_conns(self):

        hostips = [n["publicIpAddress"] for n in self.cluster_nodes]
        hostconfigs = [HostConfig(port=n["port"]) for n in self.cluster_nodes]

        self._all_ssh = ParallelSSHClient(hostips,
                                          host_config=hostconfigs,
                                          user=self.admin_username)

        self._master_ssh = ParallelSSHClient(hostips[:1],
                                             host_config=hostconfigs[:1],
                                             user=self.admin_username)

        self._master_scp = SSHClient(hostips[0],
                                     port=hostconfigs[0].port,
                                     user=self.admin_username)

    def copy_to_all_nodes(self, source, dest):

        copy_jobs = self._all_ssh.copy_file(source, dest)
        joinall(copy_jobs, raise_error=True)

    def copy_to_master_node(self, source, dest):

        self._master_scp.copy_file(source, dest)

    def copy_from_master_node(self, source, dest):

        self._master_scp.copy_remote_file(source, dest)

    def run_on_all_nodes(self, command):

        outputs = self._all_ssh.run_command(command, shell="bash -c")
        self._all_ssh.join(outputs, consume_output=True)

        for output in outputs:
            if int(output.exit_code) != 0:
                host = output.host
                port = output.client.port
                raise RuntimeError(self._check_logs_emessage(host, port))

    def run_on_master_node(self, command):

        outputs = self._master_ssh.run_command(command, shell="bash -c")
        self._master_ssh.join(outputs)

        for output in outputs:
            if int(output.exit_code) != 0:
                host = output.host
                port = output.client.port
                raise RuntimeError(self._check_logs_emessage(host, port))

    def attempt_termination(self):
        try:
            self.terminate()
        except RuntimeError as err:
            print(colored("ERROR: {}\n\n", "red", attrs=["bold"]).format(err))
            self.warn_unterminated()

    def warn_unterminated(self):
        print(
            colored("WARNING: {}", "red", attrs=["bold"]).format(
                colored(
                    "Cluster {} is still running - terminate manually to avoid "
                    "additional compute costs".format(
                        colored(self.cluster_name, "green")),
                    "red",
                )))

Пример #3

Показать файл

Файл: 01-AttachAmlCluster.py Проект: awmatheson/MLDevOps

    # Check settings and redeploy if required settings have changed
    print("Found existing cluster")
    if cluster.vm_size.lower() != aml_settings["vm_size"].lower(
    ) or cluster.vm_priority.lower() != aml_settings["vm_priority"].lower():
        cluster.delete()
        cluster.wait_for_completion(show_output=True)
        raise ComputeTargetException(
            "Cluster is of incorrect size or has incorrect priority. Deleting cluster and provisioning a new one."
        )

    # Update AMLCompute
    #if cluster.provisioning_configuration.min_nodes != aml_settings["min_nodes"] or cluster.provisioning_configuration.max_nodes != aml_settings["max_nodes"] or cluster.provisioning_configuration.idle_seconds_before_scaledown != aml_settings["idle_seconds_before_scaledown"]:
    print("Updating settings of Cluster")
    cluster.update(min_nodes=aml_settings["min_nodes"],
                   max_nodes=aml_settings["max_nodes"],
                   idle_seconds_before_scaledown=aml_settings[
                       "idle_seconds_before_scaledown"])

    # Wait until the operation has completed
    cluster.wait_for_completion(show_output=True)

    print("Successfully updated Cluster definition")
except ComputeTargetException:
    print("Loading failed")
    print("Creating new AML Compute resource")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size=aml_settings["vm_size"],
        vm_priority=aml_settings["vm_priority"],
        min_nodes=aml_settings["min_nodes"],
        max_nodes=aml_settings["max_nodes"],
        idle_seconds_before_scaledown=aml_settings[

Пример #4

Показать файл

Файл: main.py Проект: marvinbuss/AMLCompute

def main():
    # Loading input values
    print("::debug::Loading input values")
    parameters_file = os.environ.get("INPUT_PARAMETERSFILE",
                                     default="workspace.json")
    azure_credentials = os.environ.get("INPUT_AZURECREDENTIALS", default="{}")
    azure_credentials = json.loads(azure_credentials)

    # Loading parameters file
    print("::debug::Loading parameters file")
    parameters_file_path = os.path.join(".aml", parameters_file)
    try:
        with open(parameters_file_path) as f:
            parameters = json.load(f)
    except FileNotFoundError:
        print(
            f"::error::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository (e.g. .aml/workspace.json)."
        )
        return

    # Loading Workspace
    sp_auth = ServicePrincipalAuthentication(
        tenant_id=azure_credentials.get("tenantId", ""),
        service_principal_id=azure_credentials.get("clientId", ""),
        service_principal_password=azure_credentials.get("clientSecret", ""))
    try:
        print("::debug::Loading existing Workspace")
        ws = Workspace.get(
            name=parameters.get("name", None),
            subscription_id=azure_credentials.get("subscriptionId", ""),
            resource_group=parameters.get("resourceGroup", None),
            auth=sp_auth)
        print("::debug::Successfully loaded existing Workspace")
    except AuthenticationException as exception:
        print(
            f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}"
        )
        return
    except AuthenticationError as exception:
        print(f"::error::Microsoft REST Authentication Error: {exception}")
        return
    except AdalError as exception:
        print(
            f"::error::Active Directory Authentication Library Error: {exception}"
        )
        return
    except ProjectSystemException as exception:
        print(f"::error::Workspace authorizationfailed: {exception}")
        return

    # TODO: Create compute if not existing.
    try:
        # Loading AMLCompute
        print("::debug::Loading existing AML Compute")
        cluster = AmlCompute(workspace=ws, name=parameters["name"])

        # Check settings and redeploy if required settings have changed
        print("::debug::Found existing cluster")
        if cluster.vm_size.lower() != parameters["vm_size"].lower(
        ) or cluster.vm_priority.lower() != parameters["vm_priority"].lower():
            cluster.delete()
            cluster.wait_for_completion(show_output=True)
            raise ComputeTargetException(
                "Cluster is of incorrect size or has incorrect priority. Deleting cluster and provisioning a new one."
            )

        # Update AMLCompute
        #if cluster.provisioning_configuration.min_nodes != aml_settings["min_nodes"] or cluster.provisioning_configuration.max_nodes != aml_settings["max_nodes"] or cluster.provisioning_configuration.idle_seconds_before_scaledown != aml_settings["idle_seconds_before_scaledown"]:
        print("::debug::Updating settings of Cluster")
        cluster.update(min_nodes=parameters["min_nodes"],
                       max_nodes=parameters["max_nodes"],
                       idle_seconds_before_scaledown=parameters[
                           "idle_seconds_before_scaledown"])

        # Wait until the operation has completed
        cluster.wait_for_completion(show_output=True)

        print("::debug::Successfully updated Cluster definition")
    except ComputeTargetException:
        print("::debug::Loading failed")
        print("::debug::Creating new AML Compute resource")
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=parameters["vm_size"],
            vm_priority=parameters["vm_priority"],
            min_nodes=parameters["min_nodes"],
            max_nodes=parameters["max_nodes"],
            idle_seconds_before_scaledown=parameters[
                "idle_seconds_before_scaledown"],
            tags=parameters["tags"],
            description=parameters["description"])

        # Deploy to VNET if provided
        if parameters["vnet_resource_group_name"] and parameters[
                "vnet_name"] and parameters["subnet_name"]:
            compute_config.vnet_resourcegroup_name = parameters[
                "vnet_resource_group_name"]
            compute_config.vnet_name = parameters["vnet_name"]
            compute_config.subnet_name = parameters["subnet_name"]

        # Set Credentials if provided
        if parameters["admin_username"] and parameters["admin_user_password"]:
            compute_config.admin_username = parameters["admin_username"]
            compute_config.admin_user_password = parameters[
                "admin_user_password"]
        elif parameters["admin_username"] and parameters["admin_user_ssh_key"]:
            compute_config.admin_username = parameters["admin_username"]
            compute_config.admin_user_ssh_key = parameters[
                "admin_user_ssh_key"]

        # Create Compute Target
        cluster = ComputeTarget.create(
            workspace=ws,
            name=parameters["name"],
            provisioning_configuration=compute_config)

        # Wait until the cluster is attached
        cluster.wait_for_completion(show_output=True)

    # Checking status of AMLCompute Cluster
    print("::debug::Checking status of AMLCompute Cluster")
    if cluster.provisioning_state == "Failed":
        cluster.delete()
        raise Exception(
            "::debug::Deployment of AMLCompute Cluster failed with the following status: {} and logs: \n{}"
            .format(cluster.provisioning_state, cluster.provisioning_errors))

    print(parameters)
    print(
        "::debug::Successfully finished Azure Machine Learning Compute Action")