def affinitize_task_to_master(spark_client, cluster_id, task): cluster = spark_client.get_cluster(cluster_id) if cluster.master_node_id is None: raise AztkError("Master has not yet been selected. Please wait until the cluster is finished provisioning.") master_node = spark_client.batch_client.compute_node.get(pool_id=cluster_id, node_id=cluster.master_node_id) task.affinity_info = batch_models.AffinityInformation(affinity_id=master_node.affinity_id) return task
def connect(hostname, port=22, username=None, password=None, pkey=None, timeout=None): import paramiko client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) if pkey: ssh_key = paramiko.RSAKey.from_private_key(file_obj=io.StringIO(pkey)) else: ssh_key = None timeout = timeout or 20 logging.debug("Connecting to %s@%s:%d, timeout=%d", username, hostname, port, timeout) try: client.connect(hostname, port=port, username=username, password=password, pkey=ssh_key, timeout=timeout) except socket.timeout: raise AztkError("Connection timed out to: {}".format(hostname)) return client
def from_dict(cls, args: dict): """ Create a new model from a dict values The dict is cleaned from null values and passed expanded to the constructor """ try: return cls._from_dict(args) except (ValueError, TypeError) as e: pretty_args = yaml.dump(args, default_flow_style=False) raise AztkError("{0} {1}\n{2}".format(cls.__name__, str(e), pretty_args))
def merge(self, other): if not isinstance(other, self.__class__): raise AztkError( "Cannot merge {0} as is it not an instance of {1}".format( other, self.__class__.__name__)) for field in other._fields.values(): if field in other._data: field.merge(self, other._data[field]) return self
def from_dict(cls, args: dict): """ Create a new model from a dict values The dict is cleaned from null values and passed expanded to the constructor """ try: clean = dict((k, v) for k, v in args.items() if v) return cls(**clean) except TypeError as e: pretty_args = yaml.dump(args, default_flow_style=False) raise AztkError("{0} {1}\n{2}".format(cls.__name__, str(e), pretty_args))
def wait_for_all_nodes(cluster_id, nodes): while True: for node in nodes: if node.state in [ batch_models.ComputeNodeState.unusable, batch_models.ComputeNodeState.start_task_failed ]: raise AztkError("Node {} in failed state.".format(node.id)) if node.state not in [ batch_models.ComputeNodeState.idle, batch_models.ComputeNodeState.running ]: break else: nodes = spark_client.cluster.get(cluster_id).nodes continue break
def wait_for_all_nodes(spark_client, id, nodes): nodes = [node for node in nodes] start_time = time.time() while (time.time() - start_time) < 300: if any([ node.state in [ batch_models.ComputeNodeState.unusable, batch_models.ComputeNodeState.start_task_failed ] for node in nodes ]): raise AztkError("A node is unusable or had its start task fail.") if not all(node.state in [ batch_models.ComputeNodeState.idle, batch_models.ComputeNodeState.running ] for node in nodes): nodes = [node for node in spark_client.cluster.get(id).nodes] time.sleep(1) else: break
def _validate_required(self, attrs): for attr in attrs: if not getattr(self, attr): raise AztkError("{0} missing {1}.".format( self.__class__.__name__, attr))