def diskgroup_warn(database): query = '\n SELECT\n NAME,\n STATE,\n round(TOTAL_MB/1024) TOTAL_GB,\n round((TOTAL_MB-FREE_MB)/1024) USED_GB,\n round((TOTAL_MB-FREE_MB)/TOTAL_MB*100) USED_PCT,\n OFFLINE_DISKS\n FROM\n V$ASM_DISKGROUP' flag, json_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return json_data created_at = datetime.now().replace(microsecond=0) warn = WARN_ENUM.get(database.db_type).DiskGroup_Offline_Disks_Warn for x in json_data: options = {'name': x.get('NAME')} p = Performance(inst_id=database.db_name, name=warn.name, value=x.get('OFFLINE_DISKS'), created_at=created_at) customized_warn_scanner(warn, p, database, False, options) warn = WARN_ENUM.get(database.db_type).DiskGroup_Status_Warn for x in json_data: options = {'name': x.get('NAME')} p = Performance(inst_id=database.db_name, name=warn.name, value=x.get('STATE'), created_at=created_at) customized_warn_scanner(warn, p, database, False, options) warn = WARN_ENUM.get(database.db_type).DiskGroup_Used_Percent_Warn for x in json_data: options = {'name':x.get('NAME'), 'total':x.get('TOTAL_GB'), 'used':x.get('USED_GB'), 'used_pct':x.get('USED_PCT')} p = Performance(inst_id=database.db_name, name=warn.name, value=x.get('STATE'), created_at=created_at) customized_warn_scanner(warn, p, database, False, options)
def get_space(database): query = Space_Query.get(database.db_type) flag, space_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(space_data))) return created_at = datetime.now().replace(microsecond=0) space_detail = Space_Detail.objects.update_or_create(database=database, defaults={'detail':space_data, 'created_at':created_at}) for x in space_data: space = Space() space.database = database space.name = x.get('TABLESPACE_NAME') space.total_mb = x.get('TOTAL_MB') space.free = x.get('FREE') space.used = x.get('USED') space.type = x.get('CONTENTS') space.used_pct = x.get('USED_PCT') space.created_at = created_at space.save() options = {'name':x.get('TABLESPACE_NAME'), 'total':x.get('TOTAL_MB'), 'used':x.get('USED'), 'used_pct':x.get('USED_PCT')} if database.db_type not in ('mysql', 'sqlserver'): warn = WARN_ENUM.get(database.db_type).Tablespace_Warn customized_warn_scanner(warn, space, database, False, options)
def check_database_alive(self): disabled_database_list = [] for database in Database.objects.exclude(is_switch_off=True): if not database.is_alive(): if not database.disabled: database.disabled = True if not database._state.adding: database.save() warn = WARN_ENUM.get(database.db_type).Database_Access_Warn warn_level, warn_config = alarm_judger(database, warn.name, None, data=1) options = { 'warn_level': warn_level, 'created_at': datetime.now().replace(microsecond=0), 'alias': database.alias } warn_message = warn.value.get('message_template').format( **options) warn_result = Warn_Result(database=database, warn_message=warn_message, warn=warn_config) warn_result.save() warn_alert = {'warn_message': warn_message, 'link': {}} send_alarm(database.id, json.dumps(warn_alert)) else: if database.disabled: database.disabled = False database.save()
def oracle_activity(database): if database.version == '10': query = "\n select /*+ leading(b a)*/\n a.inst_id,\n SESSION_ID sid,\n SESSION_SERIAL# serial,\n SESSION_ID || ',' || SESSION_SERIAL# || '@'|| a.inst_id SESSION_ID,\n (select username from dba_users u where u.user_id = a.user_id) username,\n '' machine,\n program,\n --status,\n case SQL_OPCODE\n when 1 then 'CREATE TABLE'\n when 2 then 'INSERT'\n when 3 then 'SELECT'\n when 6 then 'UPDATE'\n when 7 then 'DELETE'\n when 9 then 'CREATE INDEX'\n when 11 then 'ALTER INDEX'\n when 15 then 'ALTER INDEX' else 'Others' end command,\n SQL_ID,\n SQL_PLAN_HASH_VALUE,\n nvl(event, 'ON CPU') event,\n p1,\n p2,\n p3,\n nvl(wait_class, 'ON CPU') wait_class ,\n module,\n action,\n (select name from V$ACTIVE_SERVICES s where s.NAME_HASH = a.SERVICE_HASH) service_name,\n '' plsql_object_name,\n '' plsql_entry_object_name,\n BLOCKING_SESSION,\n BLOCKING_SESSION_SERIAL# BLOCKING_SESSION_SERIAL,\n null SQL_PLAN_LINE_ID,\n '' SQL_PLAN_OPERATION,\n SESSION_TYPE,\n (select SQL_TEXT from v$sql b where b.sql_id = a.sql_id and rownum =1) SQL_TEXT\n from gv$ACTIVE_SESSION_HISTORY a\n where a.SAMPLE_TIME between systimestamp - numtodsinterval(2,'SECOND') and systimestamp - numtodsinterval(1,'SECOND')\n and nvl(a.wait_class,'ON CPU') <> 'Idle'" else: if database.version >= '11': query = "\n select /*+ leading(b a)*/\n a.inst_id,\n SESSION_ID sid,\n SESSION_SERIAL# serial,\n SESSION_ID || ',' || SESSION_SERIAL# || '@'|| a.inst_id SESSION_ID,\n round((cast(sample_time as date)-a.sql_exec_start)*24*3600) SQL_ELAPSED_TIME,\n (select username from dba_users u where u.user_id = a.user_id) username,\n machine,\n program,\n --status,\n case SQL_OPCODE\n when 1 then 'CREATE TABLE'\n when 2 then 'INSERT'\n when 3 then 'SELECT'\n when 6 then 'UPDATE'\n when 7 then 'DELETE'\n when 9 then 'CREATE INDEX'\n when 11 then 'ALTER INDEX'\n when 15 then 'ALTER INDEX' else 'Others' end command,\n SQL_ID,\n SQL_PLAN_HASH_VALUE,\n nvl(event, 'ON CPU') event,\n p1,\n p2,\n p3,\n nvl(wait_class, 'ON CPU') wait_class,\n module,\n action,\n (select name from V$ACTIVE_SERVICES s where s.NAME_HASH = a.SERVICE_HASH) SERVER_NAME ,\n -- (select object_name from dba_objects s where s.object_id = a.PLSQL_OBJECT_ID) plsql_object_name,\n -- (select object_name from dba_objects s where s.object_id = a.PLSQL_ENTRY_OBJECT_ID) plsql_entry_object_name,\n '' plsql_object_name,\n '' plsql_entry_object_name,\n BLOCKING_SESSION,\n BLOCKING_SESSION_SERIAL# BLOCKING_SESSION_SERIAL,\n SQL_PLAN_LINE_ID,\n SQL_PLAN_OPERATION || ' ' || SQL_PLAN_OPTIONS SQL_PLAN_OPERATION,\n SESSION_TYPE,\n (select sql_fulltext from v$sql b where b.sql_id = a.sql_id and rownum =1) SQL_TEXT\n from gv$ACTIVE_SESSION_HISTORY a\n where a.SAMPLE_TIME between systimestamp - numtodsinterval(2,'SECOND') and systimestamp - numtodsinterval(1,'SECOND')\n and nvl(a.wait_class,'ON CPU') <> 'Idle'\n " ash_date = get_10s_time() flag, json_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return for x in json_data: ash = Oracle_ASH() ash.inst_id = x.get('INST_ID') ash.sid = x.get('SID') ash.serial = x.get('SERIAL') ash.username = x.get('USERNAME') ash.db_name = x.get('USERNAME') ash.machine = x.get('MACHINE') ash.program = x.get('PROGRAM') ash.status = x.get('STATUS') ash.command = x.get('COMMAND') ash.sql_hash_value = x.get('SQL_HASH_VALUE') ash.sql_id = x.get('SQL_ID') ash.sql_text = x.get('SQL_TEXT') ash.sql_plan_hash_value = x.get('SQL_PLAN_HASH_VALUE') ash.event = x.get('EVENT') ash.p1 = x.get('P1') ash.p2 = x.get('P2') ash.p3 = x.get('P3') ash.wait_class = x.get('WAIT_CLASS') ash.module = x.get('MODULE') ash.action = x.get('ACTION') ash.service_name = x.get('SERVICE_NAME') ash.plsql_object_name = x.get('PLSQL_OBJECT_NAME') ash.plsql_entry_object_name = x.get('PLSQL_ENTRY_OBJECT_NAME') ash.blocking_session = x.get('BLOCKING_SESSION') ash.blocking_session_serial = x.get('BLOCKING_SESSION_SERIAL') ash.sql_plan_line_id = x.get('SQL_PLAN_LINE_ID') ash.sql_plan_operation = x.get('SQL_PLAN_OPERATION') ash.session_type = x.get('SESSION_TYPE') ash.session_id = x.get('SESSION_ID') ash.sql_elapsed_time = x.get('SQL_ELAPSED_TIME') ash.created_at = ash_date ash.database = database try: ash.save() except Exception as e: logger.error(str(e)) warn = WARN_ENUM.get(database.db_type).Active_Session_Warn p = Performance(inst_id=database.db_name, name=warn.name, value=len(json_data), created_at=ash_date) customized_warn_scanner(warn, p, database, False)
def job_failure_warn(database): query = "\n select SCHEMA_USER OWNER, job || ' '|| what JOB_NAME, failures from dba_jobs where failures > 0\nunion all\nselect OWNER, JOB_NAME, count(*)\nFROM dba_scheduler_job_log\nwhere\nlog_date > sysdate - 1/24 and\nSTATUS != 'SUCCEEDED'\ngroup by OWNER, job_name" flag, json_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return json_data created_at = datetime.now().replace(microsecond=0) warn = WARN_ENUM.get(database.db_type).Job_Warn for x in json_data: options = {'name':x.get('JOB_NAME'), 'schema':x.get('OWNER')} p = Performance(inst_id=database.db_name, name=warn.name, value=x.get('FAILURES'), created_at=created_at) customized_warn_scanner(warn, p, database, True, options)
def plan_change_warn(database): query = '\nselect sql_id,\n round(max(elapsed_time/decode(executions,0,1,executions))/min(elapsed_time/decode(executions,0,1,executions))) DIFF,\n min(inst_id) INST_ID\nfrom\n gv$sql\nwhere elapsed_time > 0\ngroup by sql_id\nhaving count(distinct plan_hash_value) > 1' flag, json_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return json_data created_at = datetime.now().replace(microsecond=0) warn = WARN_ENUM.get(database.db_type).Plan_Change_Warn for x in json_data: options = {'sql_id': x.get('SQL_ID')} p = Performance(inst_id=x.get('INST_ID'), name=warn.name, value=x.get('DIFF'), created_at=created_at) customized_warn_scanner(warn, p, database, True, options)
def object_change_warn(database): query = "\nselect object_name,owner, to_char(last_ddl_time, 'yyyy-mm-dd hh24:mi:ss') last_ddl_time\nfrom dba_objects\nwhere last_ddl_time > sysdate - 1/24\nand owner not in ('SCOTT','MGMT_VIEW','MDDATA','MDSYS','SI_INFORMTN_SCHEMA','ORDPLUGINS','ORDSYS','OLAPSYS','SYSMAN','ANONYMOUS','XDB','CTXSYS','EXFSYS','WMSYS','ORACLE_OCM','DBSNMP','TSMSYS','DMSYS','DIP','OUTLN','SYSTEM','SYS') " flag, json_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return json_data created_at = datetime.now().replace(microsecond=0) warn = WARN_ENUM.get(database.db_type).DB_Object_Change_Warn for x in json_data: options = {'schema':x.get('OWNER'), 'object_name':x.get('OBJECT_NAME'), 'last_ddl_time':x.get('LAST_DDL_TIME')} p = Performance(inst_id=database.db_name, name=warn.name, value=1, created_at=created_at) customized_warn_scanner(warn, p, database, True, options)
def oracle_standby_warn(database): query = "\nSELECT a.thread#, b.last_seq, a.applied_seq, a. last_app_timestamp, b.last_seq-a.applied_seq ARC_DIFF, dest_name\nFROM\n (SELECT thread#, dest_name, MAX(sequence#) applied_seq, MAX(next_time) last_app_timestamp\n FROM gv$archived_log log,\n v$ARCHIVE_DEST dest WHERE log.applied = 'YES' and dest.dest_name is not null and log.dest_id = dest.dest_id GROUP BY dest.dest_name, thread#) a,\n (SELECT thread#, MAX (sequence#) last_seq FROM gv$archived_log GROUP BY thread#) b\nWHERE a.thread# = b.thread#" flag, json_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return json_data created_at = datetime.now().replace(microsecond=0) warn = WARN_ENUM.get(database.db_type).Standby_Gap_Warn for x in json_data: options = {'name':x.get('DEST_NAME'), 'applied_seq':x.get('APPLIED_SEQ'), 'max_seq':x.get('LAST_SEQ'), 'thread':x.get('THREAD#')} p = Performance(inst_id=database.db_name, name=warn.name, value=x.get('ARC_DIFF'), created_at=created_at) customized_warn_scanner(warn, p, database, False, options)
def db2_activity(database): padding_str = '_v97' if database.is_v97() else '' query1 = "\n SELECT distinct rtrim(app.db_name) DB_NAME,\n app.agent_id,\n app.appl_id,\n app.appl_name,\n app.appl_status,\n app.authid,\n t.activity_type,\n (select cast(p.stmt_text as varchar(2000)) from table(mon_get_pkg_cache_stmt(NULL, t.executable_id, NULL, -2)) as p FETCH FIRST 1 ROWS ONLY) stmt_text,\n hex(t.EXECUTABLE_ID) EXECUTABLE_ID,\n uow.ELAPSED_TIME_SEC,\n round(uow.TOTAL_CPU_TIME/1000000) TOTAL_CPU_TIME,\n uow.TOTAL_ROWS_READ,\n uow.TOTAL_ROWS_RETURNED\nFROM table(wlm_get_workload_occurrence_activities(NULL, -2)) as t,\n sysibmadm.applications app,\n SYSIBMADM.MON_CURRENT_UOW uow\nWHERE\n app.agent_id = t.application_handle\n and t.application_handle = uow.application_handle\n and app.appl_id != (values application_id())\n and app.appl_status not in ('CONNECTED',\n 'UOWWAIT')" query1_v97_base = "\n SELECT\n distinct rtrim(app.db_name) DB_NAME, app.agent_id, app.appl_id, app.appl_name, app.appl_status, app.authid,\n t.activity_type, cast(p.stmt_text as varchar(2000)) stmt_text, hex(t.EXECUTABLE_ID) EXECUTABLE_ID\n FROM table(wlm_get_workload_occurrence_activities_v97(NULL, -2)) as t,\n table(mon_get_pkg_cache_stmt(NULL, NULL, NULL, -2)) as p,\n sysibmadm.applications app\n WHERE t.executable_id = p.executable_id\n and app.agent_id = t.application_handle\n and app.appl_id != (values application_id())\n and app.appl_status not in ('CONNECTED','UOWWAIT')" query1_v97 = "\n SELECT distinct rtrim(app.db_name) DB_NAME,\n app.agent_id,\n app.appl_id,\n app.appl_name,\n app.appl_status,\n app.authid,\n t.activity_type,\n (select cast(p.stmt_text as varchar(2000)) from table(mon_get_pkg_cache_stmt(NULL, t.executable_id, NULL, -2)) as p FETCH FIRST 1 ROWS ONLY) stmt_text,\n hex(t.EXECUTABLE_ID) EXECUTABLE_ID,\n uow.ELAPSED_TIME_SEC,\n round(uow.TOTAL_CPU_TIME/1000000) TOTAL_CPU_TIME,\n uow.TOTAL_ROWS_READ,\n uow.TOTAL_ROWS_RETURNED\nFROM table(wlm_get_workload_occurrence_activities_v97(NULL, -2)) as t,\n sysibmadm.applications app,\n SYSIBMADM.MON_CURRENT_UOW uow\nWHERE\n app.agent_id = t.application_handle\n and t.application_handle = uow.application_handle\n and app.appl_id != (values application_id())\n and app.appl_status not in ('CONNECTED',\n 'UOWWAIT')" query2 = "\n SELECT\n app.db_name, app.agent_id, app.appl_id, app.appl_name, app.appl_status, app.authid,\n t.activity_type, (select VALUE from table(WLM_GET_ACTIVITY_DETAILS(t.application_handle,t.uow_id,t.activity_id,-2)) where name = 'STMT_TEXT') STMT_TEXT\n FROM table(wlm_get_workload_occurrence_activities(cast(null as bigint), -1)) as t,\n sysibmadm.applications app\n WHERE app.agent_id = t.application_handle\n and app.appl_id != (values application_id())\n and app.appl_status not in ('CONNECTED','UOWWAIT')" ash_date = get_10s_time() if not database.is_v95_base(): if database.is_v97(): flag, json_data = run_sql(database, query1_v97) if not flag: flag, json_data = run_sql(database, query1_v97_base) else: flag, json_data = run_sql(database, query1) else: flag, json_data = run_sql(database, query2) if not flag: print(str(build_exception_from_java(json_data))) return for x in json_data: ash = DB2_ASH() ash.db_name = x.get('AUTHID').strip() ash.session_id = x.get('AGENT_ID') ash.machine = x.get('APPL_ID') ash.program = x.get('APPL_NAME') ash.appl_status = x.get('APPL_STATUS') ash.username = x.get('AUTHID').strip() ash.command = x.get('ACTIVITY_TYPE') ash.sql_text = x.get('STMT_TEXT') if not database.is_v95_base(): ash.sql_id = x.get('EXECUTABLE_ID') ash.sql_elapsed_time = x.get('ELAPSED_TIME_SEC') ash.total_cpu_time = x.get('TOTAL_CPU_TIME') ash.rows_read = x.get('TOTAL_ROWS_READ') ash.rows_returned = x.get('TOTAL_ROWS_RETURNED') else: ash.sql_id = gen_sql_id( x.get('STMT_TEXT')) if x.get('STMT_TEXT') else None ash.created_at = ash_date ash.database = database ash.save() warn = WARN_ENUM.get(database.db_type).Active_Session_Warn p = Performance(inst_id=database.db_name, name=warn.name, value=len(json_data), created_at=ash_date) customized_warn_scanner(warn, p, database, False)
def get_sqlserver_activity(databases): query = "\n SELECT /* sample_query */\n req.SESSION_ID,\n convert(varchar(25), req.START_TIME, 120) START_TIME,\n req.STATUS,\n req.COMMAND,\n (select name from master..sysdatabases where dbid = req.database_id) DB_NAME,\n ses.LOGIN_NAME,\n ses.HOST_NAME,\n ses.PROGRAM_NAME,\n req.BLOCKING_SESSION_ID,\n req.WAIT_TYPE,\n req.WAIT_TIME,\n req.WAIT_RESOURCE,\n req.TOTAL_ELAPSED_TIME,\n req.ROW_COUNT,\n sqltext.TEXT SQLTEXT,\n substring(sys.fn_sqlvarbasetostr(req.sql_handle),3,1000) SQL_HANDLE,\n con.CLIENT_NET_ADDRESS,\n case when req.wait_resource like '%SPID%' then SUBSTRING(wait_resource, 1, CHARINDEX(' ', wait_resource)-1) else '' end LINKED_IP,\n cast(case when req.wait_resource like '%SPID%' then SUBSTRING(wait_resource, CHARINDEX('=', wait_resource)+1, CHARINDEX(')', wait_resource)-CHARINDEX('=', wait_resource)-1) else '0' end as int) LINKED_SPID,\n DATEDIFF(SECOND, req.START_TIME, getdate()) TIME\n FROM sys.dm_exec_requests req\n inner join sys.dm_exec_sessions ses on req.session_id = ses.session_id\n inner join sys.dm_exec_connections con on ses.session_id = con.session_id\n CROSS APPLY sys.dm_exec_sql_text(sql_handle) AS sqltext\n where sqltext.TEXT not like '%sample_query%'" ash_date = get_10s_time() result_set = {} db_set = {} for db in databases: flag, json_data = run_sql(db, query) if not flag: print(str(build_exception_from_java(json_data))) continue result_set[str(db.id)] = json_data db_set[str(db.id)] = db for db_id, ash_data in result_set.items(): for x in ash_data: ash = MSSQL_ASH() ash.session_id = x.get('SESSION_ID') ash.start_time = x.get('START_TIME') ash.status = x.get('STATUS').upper() ash.command = x.get('COMMAND') ash.db_name = x.get('DB_NAME') ash.username = x.get('LOGIN_NAME') ash.machine = x.get('HOST_NAME') ash.program = x.get('PROGRAM_NAME') ash.b_blocker = x.get('BLOCKING_SESSION_ID') ash.wait_type = x.get('WAIT_TYPE') ash.wait_time = x.get('WAIT_TIME') ash.wait_resource = x.get('WAIT_RESOURCE') ash.total_elapsed_time = x.get('TOTAL_ELAPSED_TIME') ash.row_count = x.get('ROW_COUNT') ash.sql_text = x.get('SQLTEXT') ash.sql_id = x.get('SQL_HANDLE') ash.client_net_address = x.get('CLIENT_NET_ADDRESS') ash.linked_ip = x.get('LINKED_IP') ash.linked_spid = x.get('LINKED_SPID') ash.sql_elapsed_time = x.get('TIME') ash.created_at = ash_date ash.database = db_set.get(db_id) ash.save() database = db_set.get(db_id) warn = WARN_ENUM.get(database.db_type).Active_Session_Warn p = Performance(inst_id=database.db_name, name=warn.name, value=len(ash_data), created_at=ash_date) customized_warn_scanner(warn, p, database, False)
def mysql_standby_warn(database): query = 'show slave status' flag, json_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return json_data created_at = datetime.now().replace(microsecond=0) warn = WARN_ENUM.get(database.db_type).Standby_Latency_Warn if json_data: if json_data[0].get('Seconds_Behind_Master'): options = { 'master_host': json_data.get('Master_Host'), 'master_user': json_data.get('Master_User'), 'master_port': json_data.get('Master_Port') } p = Performance(inst_id=database.db_name, name=warn.name, value=json_data.get('Seconds_Behind_Master'), created_at=created_at) customized_warn_scanner(warn, p, database, False, options)
def object_warn_scanner(instance, database, warn, realtime=True, p_options={}): warn_level, warn_config = alarm_judger(database, warn.name, instance) if not warn_level: return False else: alarm_attr = getattr(WARN_ENUM.get(database.db_type), warn.name).value.get('alarm_attr') options = {'warn_level':warn_level, 'message':getattr(instance, alarm_attr), 'created_at':instance.created_at, 'alias':database.alias} if p_options: options = {**options, **p_options} message_template = warn.value.get('message_template') warn_message = message_template.format(**options) link = warn.value.get('link') link['database'] = {'id':str(database.id), 'alias':database.alias, 'db_type':database.db_type, 'instance_id_list':database.instance_id_list, 'db_name':database.db_name} link['json']['instance_id'] = '' if 'sql_id' in options: link['json']['begin_time'] = instance.created_at.timestamp() - History_Timestamp_GAP_Hour link['json']['end_time'] = instance.created_at.timestamp() + History_Timestamp_GAP_Hour else: link['json']['begin_time'] = instance.created_at.timestamp() - History_Timestamp_GAP_Minute link['json']['end_time'] = instance.created_at.timestamp() + History_Timestamp_GAP_Minute link['json']['time_span'] = instance.created_at.timestamp() if p_options: link['json'] = {**(link['json']), **p_options} warn_result = Warn_Result(database=database, warn_message=warn_message, warn=warn_config, link=link) if realtime: link['json']['time_span'] = 'realtime' warn_alert = {'warn_message':warn_message, 'link':link} send_alarm(database.id, json.dumps(warn_alert)) warn_result.save() return True
def mysql_activity(database): query = "SELECT * FROM information_schema.processlist\n WHERE command != 'Sleep' and id != CONNECTION_ID()\n and state not in\n ('Master has sent all binlog to slave; waiting for binlog to be up','Slave has read all relay log; waiting for the slave I/O thread t','Waiting for master to send event')\n ORDER BY id" state_list = [ ('optimizing', 'preparing', 'statistics'), ('copy to tmp table', 'Copying to tmp table', 'Copying to tmp table on disk', 'Creating tmp table', 'removing tmp table'), ('Opening table', 'Opening tables', 'Reopen tables', 'Checking table', 'closing tables', 'creating table', 'discard_or_import_tablespace', 'Flushing tables'), ('Copying to group table', 'Sorting for group', 'Sorting for order', 'Sorting index', 'Sorting result'), ('update', 'updating', 'updating main table', 'updating reference tables', 'deleting from main table', 'deleting from reference tables'), ('System lock', 'User lock', 'Waiting for commit lock', 'Waiting for global read lock', 'Waiting for event metadata lock', 'Waiting for schema metadata lock', 'Waiting for stored function metadata lock', 'Waiting for stored procedure metadata lock', 'Waiting for table level lock', 'Waiting for table metadata lock', 'Waiting for trigger metadata lock'), ('checking privileges on cached query', 'checking query cache for query', 'invalidating query cache entries', 'sending cached result to client', 'storing result in query cache', 'Waiting for query cache lock'), ('Reading from net', 'Writing to net', 'Sending data'), ('Finished reading one binlog; switching to next binlog', 'Sending binlog event to slave', 'Master has sent all binlog to slave; waiting for binlog to be up', ' Waiting to finalize termination', 'Waiting to finalize termination'), ('Waiting for master update', 'Connecting to master', 'Checking master version', 'Registering slave on master', 'Requesting binlog dump', 'Waiting to reconnect after a failed binlog dump request', 'Reconnecting after a failed binlog dump request', 'Waiting for master to send event', 'Queueing master event to the relay log', 'Waiting to reconnect after a failed master event read', 'Reconnecting after a failed master event read', 'Waiting for the slave SQL thread to free enough relay log space', 'Waiting for slave mutex on exit', 'Waiting for its turn to commit'), ('Making temp file', 'Waiting for the next event in relay log', 'Reading event from the relay log', 'Slave has read all relay log; waiting for the slave I/O thread t', 'Waiting for slave mutex on exit') ] wait_classses = [ 'Optimization', 'Tmp Table', 'Table Operation', 'Sort', 'Update/Delete', 'Lock', 'Query Cache', 'Network', 'Master Thread', 'I/O Thread', 'SQL Thread', 'Others' ] ash_date = get_10s_time() flag, json_data = run_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return for x in json_data: ash = MySQL_ASH() ash.session_id = x.get('ID') ash.username = x.get('USER') ash.machine = x.get('HOST') ash.db_name = x.get('DB') ash.command = x.get('COMMAND') ash.sql_elapsed_time = x.get('TIME') ash.state = x.get('STATE') ash.sql_text = x.get('INFO') ash.sql_id = gen_sql_id(x.get('INFO')) if x.get('INFO') else None ash.created_at = ash_date ash.database = database others_flag = True for idx, val in enumerate(state_list): if ash.state in val: ash.wait_class = wait_classses[idx] others_flag = False break if others_flag: ash.wait_class = wait_classses[len(wait_classses) - 1] ash.save() warn = WARN_ENUM.get(database.db_type).Active_Session_Warn p = Performance(inst_id=database.db_name, name=warn.name, value=len(json_data), created_at=ash_date) customized_warn_scanner(warn, p, database, False)
def lock_history(database): query = get_lock_query(database) flag, json_data = run_batch_sql(database, query) if not flag: print(str(build_exception_from_java(json_data))) return created_at = datetime.now.replace(microsecond=0) session_list = [] if json_data.get('lock'): session_list = get_blocking_session_detail(database) lock_list = json_data.get('lock') if json_data.get('lock') else [] save_lock_history(database, lock_list, created_at) transactions = json_data.get('transaction') trans = Transaction trans.database = database trans.created_at = created_at trans.transactions = transactions trans.save db_type = database.db_type locks = len(json_data.get('lock')) if json_data.get('lock') else 0 warn = WARN_ENUM.get(db_type).Blocking_Session_Warn p = Performance(inst_id=database.db_name, name=warn.name, value=locks, created_at=created_at) customized_warn_scanner(warn, p, database, False) if transactions: warn = WARN_ENUM.get(db_type).Long_Transaction_Warn p = Performance(inst_id=database.db_name, name=warn.name, created_at=created_at) for t in transactions: options = {'SESSION_ID':t.get('SESSION_ID'), 'MACHINE':t.get('MACHINE'), 'TRX_STARTED':t.get('TRX_STARTED')} if t.get('INST_ID'): p.inst_id = t.get('INST_ID') p.value = t.get('TRX_SECONDS') if customized_warn_scanner(warn, p, database, False, options, True): session_list.append(t.get('SESSION_ID')) warn = WARN_ENUM.get(db_type).Transaction_Warn p = Performance(inst_id=database.db_name, name=warn.name, value=len(transactions), created_at=created_at) customized_warn_scanner(warn, p, database, False, {}, True) warn = WARN_ENUM.get(db_type).Big_Transaction_Warn p = Performance(inst_id=database.db_name, name=warn.name, created_at=created_at) big_transaction_key_dict = {'oracle':'USED_UBLK', 'mysql':'TRX_ROWS_MODIFIED', 'db2':'UOW_LOG_SPACE_USED', 'sqlserver':'LOG_BYTES'} for x in transactions: options = {'session_id':x.get('SESSION_ID'), 'start_time':x.get('TRX_STARTED')} if t.get('INST_ID'): p.inst_id = t.get('INST_ID') p.value = x.get(big_transaction_key_dict.get(db_type)) if customized_warn_scanner(warn, p, database, False, options, True): session_list.append(t.get('SESSION_ID')) save_session_detail_list(database, list(set(session_list))) if db_type == 'mysql': lock_tables = json_data.get('tables') if lock_tables: if len(lock_tables): warn = WARN_ENUM.get(database.db_type).Locked_Table_Warn p = Performance(inst_id=database.db_name, name=warn.name, value=len(lock_tables), created_at=created_at) table_list = [('{}.{}').format(x.get('Database'), x.get('Table')) for x in lock_tables] options = {'table_list': (' ').join(table_list)} customized_warn_scanner(warn, p, database, False, options)
def performance_warn_scanner(**kwargs): instance = kwargs.get('instance') instance.name = get_performance_name(instance.name_id) database = instance.database if not database: logger.error('database is required in performance instance.') return if database.db_type == 'oracle': db_type = database.db_type warn_list = [ WARN_ENUM.get(db_type).IO_Latency_Warn, WARN_ENUM.get(db_type).Parse_Failure_Warn, WARN_ENUM.get(db_type).Session_Count_Warn, WARN_ENUM.get(db_type).Hard_Parse_Warn, WARN_ENUM.get(db_type).Host_CPU_Warn, WARN_ENUM.get(db_type).RAC_Interconnect_Warn, WARN_ENUM.get(db_type).Standby_Gap_Warn, WARN_ENUM.get(db_type).READ_IOPS_Warn, WARN_ENUM.get(db_type).WRITE_IOPS_Warn] for w in warn_list: options = {'name': instance.name} if w == WARN_ENUM.get(db_type).Host_CPU_Warn: options['name'] = instance.inst_id general_warn_scanner(instance, database, w, p_options=options) if database.db_type == 'db2': db_type = database.db_type warn_list = [ WARN_ENUM.get(db_type).Connection_Warn] for w in warn_list: options = {'name': instance.name} general_warn_scanner(instance, database, w, p_options=options) if database.db_type == 'mysql': db_type = database.db_type warn_list = [ WARN_ENUM.get(db_type).Connection_Warn] for w in warn_list: options = {'name': instance.name} general_warn_scanner(instance, database, w, p_options=options) if database.db_type == 'sqlserver': db_type = database.db_type warn_list = [ WARN_ENUM.get(db_type).Connection_Warn] for w in warn_list: options = {'name': instance.name} general_warn_scanner(instance, database, w, p_options=options) instance.name = None
def alarm_judger(database, alarm_type, alarm_data, data=None): if not database: print('database is None') return else: string_match = False try: warn_config = Warn_Config.objects.get(category=alarm_type, database=database) except Exception as e: print('alarm_type:' + alarm_type) print(str(e)) return (None, None) from monitor.models import Space if isinstance(alarm_data, Space): redis_key = str(database.id) + str( warn_config.id) + alarm_data.name else: redis_key = str(database.id) + str(warn_config.id) alarm_attr = alarm_data and getattr(WARN_ENUM.get(database.db_type), alarm_type).value.get('alarm_attr') data = getattr(alarm_data, alarm_attr) if data is None: return (None, None) try: data = float(data) except ValueError: string_match = True if warn_config.optional: exclude_optional = warn_config.optional.get('exclude', []) if hasattr(alarm_data, 'name'): if alarm_data.name in exclude_optional: return (None, None) last_datetime = redis.hmget(redis_key, 'last_send_datetime')[0] if last_datetime != 'None': last_datetime = datetime.datetime.strptime( last_datetime[:19], '%Y-%m-%d %H:%M:%S' ) if last_datetime is not None else last_datetime key = redis.hmget(redis_key, 'alarm_times')[0] alarm_times = int(key) if key else 0 if not warn_config.status: return (None, None) if last_datetime is not None: pass if last_datetime != 'None': if (datetime.datetime.now() - last_datetime).seconds < int( warn_config.warning_interval): return (None, None) if alarm_times < int(str(warn_config.pre_warning_times)): cache_data = { 'last_send_datetime': last_datetime, 'alarm_times': alarm_times + 1 } redis.hmset(redis_key, cache_data) return (None, None) if not string_match: if data > float(warn_config.critical_threshold): level = '' if not string_match: if data > float(warn_config.warn_threshold): level = '' if string_match: if data.upper() == warn_config.critical_threshold: level = '' if string_match: if data.upper() == warn_config.warn_threshold: level = '' level = None if level is not None: cache_data = { 'last_send_datetime': datetime.datetime.now(), 'alarm_times': 0 } redis.hmset(redis_key, cache_data) else: cache_data = { 'last_send_datetime': last_datetime, 'alarm_times': alarm_times } redis.hmset(redis_key, cache_data) return (level, warn_config)