def get(self, relpath=None, params=None): """Invoke the GET method on a resource :param relpath: Optional. A relative path to this resource's path. :param params: Key-value data. :return: A dictionary of the JSON result. """ for retry in six.moves.xrange(self.retries + 1): if retry: context.sleep(self.retry_sleep) try: return self.invoke("GET", relpath, params) except (socket.error, urllib.error.URLError) as e: if "timed out" in six.text_type(e).lower(): if retry < self.retries: LOG.warning("Timeout issuing GET request for " "{path}. Will retry".format( path=self._join_uri(relpath))) else: LOG.warning("Timeout issuing GET request for " "{path}. No retries left".format( path=self._join_uri(relpath))) else: raise else: raise ex.CMApiException(_("Get retry max time reached."))
def wait_ambari_requests(self, requests, cluster_name): requests = set(requests) failed = [] context.sleep(20) while len(requests) > 0: completed, not_completed = set(), set() for req_id in requests: request = self.get_request_info(cluster_name, req_id) status = request.get("request_status") if status == 'COMPLETED': completed.add(req_id) elif status in ['IN_PROGRESS', 'PENDING']: not_completed.add(req_id) else: failed.append(request) if failed: msg = _("Some Ambari request(s) " "not in COMPLETED state: %(description)s.") descrs = [] for req in failed: descr = _( "request %(id)d: %(name)s - in status %(status)s") descrs.append(descr % {'id': req.get("id"), 'name': req.get("request_context"), 'status': req.get("request_status")}) raise p_exc.HadoopProvisionError(msg % {'description': descrs}) requests = not_completed context.sleep(5) LOG.debug("Waiting for %d ambari request(s) to be completed", len(not_completed)) LOG.debug("All ambari requests have been completed")
def _await_cldb(self, cluster_context, instances=None, timeout=600): instances = instances or cluster_context.get_instances() cldb_node = cluster_context.get_instance(mfs.CLDB) start_time = timeutils.utcnow() retry_count = 0 with cldb_node.remote() as r: LOG.debug("Waiting {count} seconds for CLDB initialization".format( count=timeout)) while timeutils.delta_seconds(start_time, timeutils.utcnow()) < timeout: ec, out = r.execute_command(NODE_LIST_CMD, raise_when_error=False) resp = json.loads(out) status = resp['status'] if str(status).lower() == 'ok': ips = [n['ip'] for n in resp['data']] retry_count += 1 for i in instances: if (i.internal_ip not in ips and (retry_count > DEFAULT_RETRY_COUNT)): msg = _("Node failed to connect to CLDB: %s" ) % i.internal_ip raise ex.HadoopProvisionError(msg) break else: context.sleep(DELAY) else: raise ex.HadoopProvisionError(_("CLDB failed to start"))
def wait(self, timeout=None): """Wait for command to finish :param timeout: (Optional) Max amount of time (in seconds) to wait. Wait forever by default. :return: The final ApiCommand object, containing the last known state. The command may still be running in case of timeout. """ if self.id == ApiCommand.SYNCHRONOUS_COMMAND_ID: return self SLEEP_SEC = 5 if timeout is None: deadline = None else: deadline = time.time() + timeout while True: cmd = self.fetch() if not cmd.active: return cmd if deadline is not None: now = time.time() if deadline < now: return cmd else: context.sleep(min(SLEEP_SEC, deadline - now)) else: context.sleep(SLEEP_SEC)
def _wait_all_processes_removed(cluster, instance): with _get_ambari_client(cluster) as client: while True: hdp_processes = client.list_host_processes(cluster.name, instance) if not hdp_processes: return context.sleep(5)
def update_configs(self, instances): # instances non-empty utils.add_provisioning_step( instances[0].cluster_id, _("Update configs"), len(instances)) with context.PluginsThreadGroup() as tg: for instance in instances: tg.spawn("update-configs-%s" % instance.instance_name, self._update_configs, instance) context.sleep(1)
def wait_ambari_request(self, request_id, cluster_name): context.sleep(20) while True: status = self.check_request_status(cluster_name, request_id) LOG.debug("Task %(context)s in %(status)s state. " "Completed %(percent).1f%%", {'context': status["request_context"], 'status': status["request_status"], 'percent': status["progress_percent"]}) if status["request_status"] == "COMPLETED": return if status["request_status"] in ["IN_PROGRESS", "PENDING"]: context.sleep(5) else: raise p_exc.HadoopProvisionError( _("Ambari request in %s state") % status["request_status"])
def decommission_dn(nn, inst_to_be_deleted, survived_inst): with utils.get_remote(nn) as r: r.write_file_to('/etc/hadoop/dn.excl', utils.generate_fqdn_host_names(inst_to_be_deleted)) run.refresh_nodes(utils.get_remote(nn), "dfsadmin") context.sleep(3) utils.plugin_option_poll(nn.cluster, _is_decommissioned, c_helper.DECOMMISSIONING_TIMEOUT, _("Decommission %s") % "DataNodes", 3, { 'r': r, 'inst_to_be_deleted': inst_to_be_deleted }) r.write_files_to({ '/etc/hadoop/dn.incl': utils.generate_fqdn_host_names(survived_inst), '/etc/hadoop/dn.excl': "" })
def await_no_heartbeat(): delay = WAIT_NODE_ALARM_NO_HEARTBEAT LOG.debug('Waiting for "NO_HEARBEAT" alarm') context.sleep(delay)
def _rebuild(self, cluster_context, instances): OOZIE.stop(filter(OOZIE.is_started, instances)) g.execute_on_instances( instances, self._rebuild_oozie_war, cluster_context) OOZIE.start(instances) context.sleep(OOZIE_START_DELAY)