def run(self): try: result = Result() ceph_api = CephAPI() cluster_status = ceph_api.get_ceph_cluster_status() if cluster_status is not None: cluster_status = json.loads(cluster_status) available_size = 0 used_size = 0 if cluster_status['pgmap']['bytes_total'] > 0: available_size = cluster_status['pgmap']['bytes_avail'] * 100.0 / cluster_status['pgmap']['bytes_total'] used_size = cluster_status['pgmap']['bytes_used'] * 100.0 / cluster_status['pgmap']['bytes_total'] notify_cluster_space_percent = ConfigAPI().get_notify_cluster_used_space_percent() if float(used_size) > float(notify_cluster_space_percent): check_state = self.__context.state.get(self.get_plugin_name(), False) if check_state == False: result.title = gettext("core_message_notify_title_cluster_out_space") result.message = '\n'.join(gettext("core_message_notify_cluster_out_space").split("\\n")).format(int(available_size)) # logger.warning(result.message) result.plugin_name = str(self.get_plugin_name()) self.__context.results.append(result) self.__context.state[self.get_plugin_name()] = True logger.warning("Cluster is running out of disk space") return self.__context.state[self.get_plugin_name()] = False except: logger.exception("Error occur during get cluster state")
def add_journal(args): try: di = ceph_disk_lib.get_disk_info(args.disk_name) if di is None: print(__output_split_text) print(gettext('core_scripts_admin_journal_adding_err')) logger.error("The disk does not exits.") return if di.usage == DiskUsage.osd or di.usage == DiskUsage.journal or di.usage == DiskUsage.system: print(__output_split_text) print(gettext('core_scripts_admin_add_journal_disk_on_use_err')) return if not ceph_disk_lib.clean_disk(args.disk_name): print(__output_split_text) print(gettext('core_scripts_admin_journal_adding_err')) return if not ceph_disk_lib.add_journal(args.disk_name): print(__output_split_text) print(gettext('core_scripts_admin_journal_adding_err')) return return except Exception as ex: err = "Cannot add journal for disk {} , Exception is {}".format( args.disk_name, ex.message) logger.error(err) print(err) print(__output_split_text) print(gettext('core_scripts_admin_add_journal_exception'))
def run(self): try: status = False consul = ConsulAPI() failed_jobs = consul.get_replication_failed_jobs() if len(failed_jobs) > 0: failed_jobs_str = "" for job_id, job_info in failed_jobs.iteritems(): failed_jobs_str += "\n job id: " + job_id + " job name: " + job_info.job_name status = consul.delete_failed_job(job_id) result = Result() result.plugin_name = self.get_plugin_name() result.title = gettext("core_message_notify_failed_jobs_title") result.message = '\n'.join( gettext("core_message_notify_failed_jobs_body").split( "\\n")).format(failed_jobs_str) self.__context.results.append(result) logger.info(result.message) logger.info("status of deleting failed jobs from consul is " + str(status)) except Exception as e: logger.exception(e) logger.error( "An error occurred while ReplicationNotificationPlugin was running." )
def decorated(*args, **kwargs): status = -1 if 'sid' in session: if session['sid'] == "-1": return render_template( 'admin/login.html', err=gettext("ui_admin_login_consul_connection_error")) else: return render_template( 'admin/login.html', err=gettext("ui_admin_login_consul_connection_error")) if 'user' in session: status = 1 elif status == -1: try: if request.url_rule.rule == '/': username = request.form["username"] password = request.form["password"] else: return render_template('admin/login.html') except Exception as e: return render_template('admin/login.html') if (username and password and check_auth(username, password)): status = 1 else: return render_template( 'admin/login.html', err="Incorrect username or password. Please try again.") if status == 1: return f(*args, **kwargs) else: return authenticate()
def interface_form(self, d): cust_dialog = Dialog_Customize() kwargs = { "ok_label": "Apply", "no_kill": 1, "no_collapse": 1, "colors": 1 } code, ifaces = d.form( title="Interface Naming", text="\n{}\n\n MAC Address\t\tModel\t\t\t\t\t\t Interface". format(gettext("console_interface_naming_form_title_msg")), elements=cust_dialog.build_interface_elements(), **kwargs) if code == d.DIALOG_CANCEL or code == d.DIALOG_ESC: pass for i in ifaces: if not i.startswith('eth'): d.msgbox("{}".format( gettext("console_interface_naming_empty_item_validation")), title="Error", height=15, width=45) if d.DIALOG_OK: return Console_Forms().interface_form(d) elif cust_dialog.Equal_Validation(ifaces) == True: d.msgbox("{}".format( gettext( "console_interface_naming_iterate_fields_validation")), title="Error", height=15, width=45) if d.DIALOG_OK: return Console_Forms().interface_form(d) elif i not in cust_dialog.get_ifaces(): d.msgbox("{}".format( gettext("console_interface_naming_ethernet_not_exist")), title="Error", height=15, width=45) if d.DIALOG_OK: return Console_Forms().interface_form(d) macs = cust_dialog.get_mac_list() new_list = dict(zip(macs, ifaces)) if code == d.DIALOG_OK: kwargs = {"extra_button": 1, "extra_label": "Cancel"} code = d.msgbox("{}".format( gettext("console_reboot_confirmation_apply_changes")), title="Warning", height=15, width=45, **kwargs) if code == d.DIALOG_OK: if cust_dialog.create_rule_file(new_list) == True: os.system("reboot") if code == d.DIALOG_EXTRA or code == d.DIALOG_ESC: return Console_Forms().interface_form(d)
def get_replicated_disks(): """ DOCSTRING : this function is called to get all destination cluster images. Args : None Returns : List of all destination images. """ if request.method == 'GET' or request.method == 'POST': mesg_err = "" mesg_success = "" mesg_warning = "" try: # Get data from destination cluster: dest_cluster_name = request.values['cluster_name'] manage_remote_replication = ManageRemoteReplication() manage_remote_replication.cluster_name = dest_cluster_name dest_disks_lis = manage_remote_replication.get_replicated_disks_list if "err" in session: mesg_err = session["err"] session.pop("err") elif "success" in session: mesg_success = session["success"] session.pop("success") elif "warning" in session: mesg_warning = session["warning"] session.pop("warning") json_data = json.dumps(dest_disks_lis) return json_data except ReplicationException as e: if e.id == ReplicationException.CONNECTION_REFUSED: result = gettext("ui_admin_add_job_connction_refused_err") elif e.id == ReplicationException.CONNECTION_TIMEOUT: result = gettext("ui_admin_add_job_connction_timeout_err") elif e.id == ReplicationException.PERMISSION_DENIED: result = gettext("ui_admin_add_dest_cluster_permission_denied") else: result = gettext("ui_admin_get_rep_disk_list") logger.error(e) err = {"err": result} result = json.dumps(err) return result except Exception as e: result = gettext("ui_admin_add_job_connction_timeout_err") result = json.dumps(result) logger.error(e) err = {"err": result} result = json.dumps(err) return result
def run(self): try: global old_pools current_pools = {} pools_used_space = {} notify_pool_size = ConfigAPI().get_notify_pool_used_space_percent() ## Getting the notional percentage of storage used for each pool ## ## ------------------------------------------------------------- ## pools_used_space = self.get_pools_used_space() ## Loop and do checks for each pool ## ## -------------------------------- ## for pool_name, pool_space in pools_used_space.iteritems(): # Check if pool space is greater than or equal 85% of pool total space or not # if pool_space >= notify_pool_size: # Check if the same pool name and the same pool space exist in "old_pools" dictionary # key, value = pool_name, pool_space if key in old_pools and value == old_pools[key]: continue current_pools[pool_name] = pool_space # If pool space is less than 85% --> remove pool from "old_pools" dictionary if exists # else: if pool_name in old_pools: del old_pools[pool_name] ## Notify user if length of "current_pools" > 0 ## ## -------------------------------------------- ## if len(current_pools) > 0: self.__context.state[self.get_plugin_name()] = current_pools result = Result() result.plugin_name = self.get_plugin_name() result.title = gettext( "core_message_notify_pool_used_space_title") result.message = '\n'.join(gettext("core_message_notify_pool_used_space_body").split("\\n")).format \ (''.join('\n - pool : {} , used space = {}%'.format(key, val) for key, val in current_pools.iteritems())) # logger.warning(result.message) self.__context.results.append(result) # Update the dictionary old_pools with the items of the dictionary current_pools # old_pools.update(current_pools) ## Empty the dictionary current_pools ## ## ----------------------------------- ## current_pools = dict() except Exception as e: logger.exception(e) logger.error("An error occurred while PoolSizePlugin was running.")
def check_disk_performance(args): try: disk_name = args.disk_name threads_no = args.threads_no test_modes = [] test_modes.append(TestModes.four_k) test_modes.append(TestModes.four_m) test_types = [] test_types.append(TestTypes.read) test_types.append(TestTypes.write) test_types.append(TestTypes.sync_write) test_types.append(TestTypes.random_read) test_types.append(TestTypes.random_write) for mode in test_modes: for type in test_types: cmd = "fio --name={} --filename=/dev/{} --iodepth=1 --rw={} --bs={} " \ "--direct=1 --runtime=20 --time_based --numjobs={} --group_reporting --output-format=json" if type == TestTypes.read: test_name = gettext(mode + "_sequential_read") cmd = cmd.format( test_name, disk_name, type, mode, threads_no) elif type == TestTypes.write: test_name = gettext(mode + "_sequential_write") cmd = cmd.format( test_name, disk_name, type, mode, threads_no) elif type == TestTypes.sync_write: test_name = gettext(mode + "_sequential_write_sync") type = TestTypes.write cmd = cmd.format( test_name, disk_name, type, mode, threads_no) + ' --sync=1' elif type == TestTypes.random_read: test_name = gettext(mode + "_random_read") cmd = cmd.format( test_name, disk_name, type, mode, threads_no) type = TestTypes.read elif type == TestTypes.random_write: test_name = gettext(mode + "_random_write") cmd = cmd.format( test_name, disk_name, type, mode, threads_no) type = TestTypes.write call_cmd("sync") call_cmd("echo 3 > /proc/sys/vm/drop_caches") out, err = exec_command(cmd) print_results(out, type, mode, disk_name, threads_no) except Exception as e: print e sys.exit(0)
def run(self): try: result = Result() ceph_status_overall = "" ceph_api = CephAPI() cluster_status = ceph_api.get_ceph_cluster_status() # ceph status --format json-pretty if cluster_status is not None: cluster_status = json.loads(cluster_status) # Ceph 12 : if "overall_status" in cluster_status["health"] and cluster_status["health"]["overall_status"] is not None: ceph_status_overall = cluster_status["health"]["overall_status"] else: ceph_status_overall = cluster_status["health"]["status"] if ceph_status_overall == "HEALTH_ERR": prv_err = self.__context.state.get(self.get_plugin_name(), False) if not prv_err: ceph_health_obj = cluster_status["health"] summary_messages = "" summary_messages_ls = [] if "checks" in ceph_health_obj: for key in ceph_health_obj["checks"]: if ceph_health_obj["checks"][key] is not None: msg = ceph_health_obj["checks"][key]["summary"]["message"] summary_messages_ls.append(msg) summary_messages = '\n '.join(summary_messages_ls) result.title = gettext("core_message_notify_cluster_status_title") result.message = '\n'.join(gettext("core_message_notify_cluster_status_body").split("\\n")).format(summary_messages) result.plugin_name = str(self.get_plugin_name()) self.__context.results.append(result) self.__context.state[self.get_plugin_name()] = True logger.warning("Cluster overall health status is HEALTH_ERR") return self.__context.state[self.get_plugin_name()] = False except Exception as e: logger.exception(e) logger.error("An error occurred while ClusterStatusPlugin was running.")
def print_results(data, type, mode, disk_name, threads_no): sys.stdout.flush() output = json.loads(data) test_name = output['jobs'][0]['jobname'].replace("_", " ") if mode == TestModes.four_k: value = output['jobs'][0][type]['iops'] print gettext("4k_test_result_message").format(disk_name, test_name, threads_no, int(value)) elif mode == TestModes.four_m: value = output['jobs'][0][type]['bw'] * 1024 if value <= 1024: value = str(value) + ' B/s' elif value <= (1024 * 1024): value = str((value / 1024)) + ' KB/s' elif value <= (1024 * 1024 * 1024): value = str(((value / 1024) / 1024)) + ' MB/s' elif value <= (1024 * 1024 * 1024 * 1024): value = str((((value / 1024) / 1024) / 1024)) + ' GB/s' print gettext("4m_test_result_message").format(disk_name, test_name, threads_no, value) sleep(1)
def test_email(self, sender_email, server, port, password, security, logged_in_user_name): user_api = UserAPI() status = EmailStatus() user = user_api.get_user(logged_in_user_name) if not user.notfiy: status.success = False status.err_msg = gettext( "core_email_test_user_not_set_receive_notify") return status elif len(user.email.strip()) == 0: status.success = False status.err_msg = gettext( "core_email_test_user_not_set_receive_email") return status msg = email_utils.create_msg(sender_email, user.email, gettext("core_email_test_subject"), gettext("core_email_test_body")) status = email_utils.send_email(server, port, msg, password, security) return status
def delete_journal(args): try: di = ceph_disk_lib.get_disk_info(args.disk_name) if di is None or di.usage != DiskUsage.journal: print(__output_split_text) print(gettext('core_scripts_admin_delete_journal_err')) return if not ceph_disk_lib.clean_disk(args.disk_name): print(__output_split_text) print(gettext('core_scripts_admin_delete_journal_err')) return return except Exception as ex: logger.exception(ex) print(__output_split_text) print(gettext('core_scripts_admin_delete_journal_err')) return
def disk_menu(self, d): kv, disks = Dialog_Customize().build_disk_choices() if len(kv) != 0 or len(disks) != 0: disk_dic = dict(kv) code, tag = d.menu("Test Disk Performance", menu_height=len(kv), height=HEIGHT, width=WIDTH, choices=disks) if code == d.DIALOG_CANCEL or code == d.DIALOG_ESC: run_console() name = disk_dic.get(tag) return name else: code = d.msgbox("{}".format(gettext("console_no_disk_found_msg")), title="{}".format( gettext("console_no_disk_found_msg_title")), height=15, width=45) if code == d.DIALOG_OK: run_console()
def __notify_list(self): try: result = Result() old_node_list = self.__context.state.get(self.get_plugin_name(), []) down_node_list = self.__get_down_node_list() for node in down_node_list: if node not in old_node_list: result.message = '\n'.join( gettext("core_message_notify_down_node_list").split( "\\n")).format(''.join('\n- node:{} '.format(node) for node in down_node_list)) #logger.warning(result.message) result.plugin_name = str(self.get_plugin_name()) result.title = gettext( "core_message_notify_down_node_list_title") self.__context.results.append(result) break self.__context.state[self.get_plugin_name()] = down_node_list except Exception as e: logger.exception("error notify down node list")
def warning_box(self, d): kwargs = {"extra_button": 1, "extra_label": "Cancel"} code = d.msgbox("{}".format( gettext("console_disk_performance_warning_msg")), title="Warning", height=15, width=45, **kwargs) if code == d.DIALOG_EXTRA or code == d.DIALOG_CANCEL or code == d.DIALOG_ESC: self.run_disk_performance(d) if code == d.DIALOG_OK: return True
def run(self): try: old_osd_down_list = self.__context.state.get( self.get_plugin_name(), {}) current_osd_down_list = ceph_down_osds() for osd_id, node_name in current_osd_down_list.iteritems(): if osd_id not in old_osd_down_list.keys(): result = Result() result.plugin_name = self.get_plugin_name() result.title = gettext( "core_message_notify_osd_down_title") result.message = '\n'.join(gettext("core_message_notify_osd_down_body").split("\\n")).format \ (''.join('\n- osd.{}/{} '.format(key, val) for key, val in current_osd_down_list.iteritems())) #logger.warning(result.message) self.__context.results.append(result) break self.__context.state[ self.get_plugin_name()] = current_osd_down_list except Exception as e: logger.exception(e) logger.error("An error occurred while OSDDownPlugin was running.")
def run(self): try: global smart_counter node_failed_disks = [] differences_nodes_disks_down = {} smart_counter += 1 if smart_counter % 10 == 0: old_node_disks_health = self.__context.state.get( self.get_plugin_name(), {}) current_node_failed_disks = self.__get_failed_disks() for node, failed_disks in current_node_failed_disks.iteritems( ): if node in old_node_disks_health.keys(): for failed_disk in failed_disks: if failed_disk not in old_node_disks_health[node]: node_failed_disks.append(failed_disk) if len(node_failed_disks) > 0: differences_nodes_disks_down.update( {node: node_failed_disks}) else: differences_nodes_disks_down.update( {node: failed_disks}) if len(differences_nodes_disks_down) > 0: new_failures = '' for node, failed_disks in differences_nodes_disks_down.iteritems( ): new_failures += str("\n Node : " + node + " Disk(s) : ") for failed_disk in failed_disks: if failed_disk != failed_disks[-1]: new_failures += str(failed_disk + ", ") else: new_failures += str(failed_disk) all_failures = '' for node, failed_disks in current_node_failed_disks.iteritems( ): all_failures += str("\n Node : " + node + " Disk(s) : ") for failed_disk in failed_disks: if failed_disk != failed_disks[-1]: all_failures += str(failed_disk + " , ") else: all_failures += str(failed_disk) result = Result() result.plugin_name = self.get_plugin_name() result.title = gettext( "core_message_notify_smart_disk_health_failed_title") result.message = '\n'.join( gettext( "core_message_notify_smart_disk_health_failed_body" ).split("\\n")).format(new_failures, all_failures) self.__context.results.append(result) logger.info(result.message) self.__context.state[ self.get_plugin_name()] = current_node_failed_disks except Exception as e: logger.exception(e) logger.error( "An error occurred while SMARTFailurePlugin was running.")
def add_osd(args): try: di = ceph_disk_lib.get_disk_info(args.disk_name) if di.usage == DiskUsage.osd: print(__output_split_text) print(gettext('core_scripts_admin_add_osd_disk_is_osd_err')) return if not ceph_disk_lib.clean_disk(args.disk_name): print(__output_split_text) print(gettext('core_scripts_admin_add_osd_cleaning_err')) return journal = str(args.journal) if journal == "": logger.info( "User didn't select a journal for disk {}, so the journal will be on the same disk." .format(args.disk_name)) elif journal == "auto": logger.info("Auto select journal for disk {}.".format( args.disk_name)) journal = ceph_disk_lib.get_valid_journal() if journal is None: print(__output_split_text) print(gettext('core_scripts_admin_add_osd_journal_err')) logger.error(gettext('core_scripts_admin_add_osd_journal_err')) return logger.info( "User selected Auto journal, selected device is {} disk for disk {}." .format(journal, args.disk_name)) else: ji = ceph_disk_lib.get_disk_info(journal) if ji is None or ji.usage != DiskUsage.journal: print(__output_split_text) print(gettext('core_scripts_admin_osd_adding_err')) logger.info( "User selected journal {} does not exist or is not a valid journal." .format(journal)) return if len(ji.linked_osds) == 0: ceph_disk_lib.clean_disk(journal) logger.info("User selected journal {} disk for disk {}.".format( journal, args.disk_name)) if not ceph_disk_lib.prepare_osd(args.disk_name, journal): print(__output_split_text) print(gettext('core_scripts_admin_osd_adding_err')) return ceph_disk_lib.wait_for_osd_up(args.disk_name) except Exception as ex: err = "Cannot add osd for disk {} , Exception is {}".format( args.disk_name, ex.message) logger.error(err) print(err) print(__output_split_text) print(gettext('core_scripts_admin_add_osd_exception'))
def add_osd(args): # Read arguments # disk_name = str(args.disk_name) journal = str(args.journal) cache = str(args.cache) cache_type = str(args.cache_type) device_name = disk_name try: di = ceph_disk_lib.get_disk_info(disk_name) # Check if disk is not used already as OSD # if di.usage == DiskUsage.osd: print(__output_split_text) print(gettext('core_scripts_admin_add_osd_disk_is_osd_err')) return # Cleaning disk # if not ceph_disk_lib.clean_disk(disk_name): print(__output_split_text) print(gettext('core_scripts_admin_add_osd_cleaning_err')) return # Journals # if journal == "" or journal is None: # If user didn't select a journal for disk and cluster is filestore logger.info( "User didn't select a journal for disk {}, so the journal will be on the same disk." .format(disk_name)) config = configuration() storage_engine = config.get_cluster_info().storage_engine if storage_engine == "filestore": # manually create journal partition in the same disk for filestore journal_part_num = ceph_disk.create_journal_partition( device_name, external=False) journal = ceph_disk.get_partition_name(device_name, journal_part_num) # If journal value equals "auto" : elif journal == "auto": logger.info("Auto select journal for disk {}.".format(disk_name)) journal = ceph_disk_lib.get_valid_journal() if journal is None: print(__output_split_text) print(gettext('core_scripts_admin_add_osd_journal_err')) logger.error(gettext('core_scripts_admin_add_osd_journal_err')) return logger.info( "User selected Auto journal, selected device is {} disk for disk {}." .format(journal, disk_name)) # If user selected a specific journal for disk : else: ji = ceph_disk_lib.get_disk_info(journal) if ji is None or ji.usage != DiskUsage.journal: print(__output_split_text) print(gettext('core_scripts_admin_osd_adding_err')) logger.info( "User selected journal {} does not exist or is not a valid journal." .format(journal)) return if len(ji.linked_osds) == 0: ceph_disk_lib.clean_disk(journal) logger.info("User selected journal {} disk for disk {}.".format( journal, disk_name)) ############ # Caches # ############ # If user didn't select a cache for disk : if cache == "": logger.info( "User didn't select a cache for disk {}.".format(disk_name)) # If cache value equals "auto" : elif cache == "auto": logger.info("Auto select cache for disk {}.".format(disk_name)) cache = ceph_disk_lib.get_valid_cache() if cache is None: print(__output_split_text) print(gettext('core_scripts_admin_add_osd_cache_err')) logger.error(gettext('core_scripts_admin_add_osd_cache_err')) return logger.info( "User selected Auto cache, selected device is {} disk for disk {}." .format(cache, disk_name)) # If user selected a specific cache for disk : else: selected_cache = ceph_disk_lib.get_disk_info(cache) if selected_cache is None or selected_cache.usage != DiskUsage.cache: print(__output_split_text) print(gettext('core_scripts_admin_osd_adding_err')) logger.info( "User selected cache {} does not exist or is not a valid cache." .format(cache)) return logger.info("User selected cache {} disk for disk {}.".format( cache, disk_name)) # Creating "write-lvm" before creating "OSD" # # where lv_name of HDD = main , lv_name of SSD = cache , and vg_name = ps-wc-osd_000{rand_num} # -------------------------------------------------------------------------------------------- main_path = None cache_part = None vg_name = None if cache is not None and len(cache) > 0: # create cache # cm = CacheManager() main_path, cache_part = cm.create( disk_name, cache, cache_type) # main_path = lv_path # = vg_name + "/" + main_lv_name if main_path is not None and cache_part is not None: vg_name = main_path.split("/")[ 0] # For renaming VG after the osd id later device_name = main_path ################# # Preparing OSD # ################# if not ceph_disk_lib.prepare_osd(device_name, journal): if cache_part is not None: cm = CacheManager() cm.delete(disk_name, cache_type) # After the failure of adding OSD , clean both disk & cache partition and # change ptype of cache partition to "cache_avail_ptype" : ceph_disk_lib.clean_disk(disk_name) ceph_disk_lib.clean_disk(cache_part) ceph_disk_lib.set_cache_part_avail(cache_part) print(__output_split_text) print(gettext('core_scripts_admin_osd_adding_err')) return ceph_disk_lib.wait_for_osd_up(disk_name) except Exception as ex: err = "Cannot add osd for disk {} , Exception is : {}".format( disk_name, ex.message) logger.exception(err) print(err) print(__output_split_text) print(gettext('core_scripts_admin_add_osd_exception'))