def do_get_top_user_demo( from_time=get_one_day_ago_from(), to_time=get_now(), duration=900): hive_info = HiveInfo() top_users = hive_info.get_top_user_demo(from_time=from_time, to_time=to_time, duration=duration) massage_dfs = [] if top_users.applications: for i in top_users.applications: line = {} line['category'] = "YARN_APPLICATION" line['service_name'] = "yarn" line['pool'] = i.pool line['user'] = i.user try: line['cpu_milliseconds'] = i.attributes['cpu_milliseconds'] except KeyError: line['cpu_milliseconds'] = 0 line['name'] = i.attributes['hive_query_string'] line['entityName'] = i.applicationId line['time'] = zone_conversion(timestamp=i.startTime, format=u'YYYY-MM-DD HH:mm:ss') attr_val = round_milli_time(i.startTime, i.endTime) line['application_duration1'] = attr_val if int(float(attr_val)) > 60 * 60 * 1000: attr_val = ('%.2f' % (float(attr_val) / 60 / 60 / 1000)) + "h" elif int(float(attr_val)) > 60 * 1000: attr_val = str(int(float(attr_val)) / 60 / 1000) + "m" # 分 elif int(float(attr_val)) > 1000: attr_val = str(int(float(attr_val)) / 1000) + "s" # 秒 line['application_duration'] = attr_val massage_dfs.append(line) return sorted(massage_dfs, key=lambda t: t['application_duration1'], reverse=True)
def do_get_hdfs_used_quarterly(pic=''): days, quarter_from = get_last_n_quarter_from() responses = _do_get_hdfs_used(quarter_from, None, 'WEEKLY') capacity = [] labels = [] color = ['r', 'g', 'b'] file_path = './hdfs_quarterly.png' test = {} for i in days: test[i] = 0 for response in responses: if response.timeSeries: for ts in response.timeSeries: test1 = collections.OrderedDict() bar_labels = [] hdfs_used_values = [] metadata = ts.metadata unit = metadata.unitNumerators[0] for data in ts.data: time = zone_conversion(timestamp=data.timestamp, format=u'YYYY-MM') if time in test.keys(): test1[time] = data.value for key, value in test1.items(): bar_labels.append(key) hdfs_used_values.append(check_digital_storage_without_unit(value, unit, 'bytes')) capacity.append(hdfs_used_values[-5::]) labels = bar_labels[-5::] labels = [Quarter.from_string(text=x).__str__() for x in labels] if not pic: two_bar_charts(labels[-4::], capacity[-4::], color, file_path) list, remaining = ring_table_clo(capacity[0], labels) metadata = [u'日期', u'已用容量(T)', u'上季容量(T)', u'增量(T)', u'季度增长率(%)'] return list[-4::][::-1], metadata
def do_get_hdfs_used_monthly(pic=''): responses = _do_get_hdfs_used(get_last_n_month_from(), None, 'WEEKLY') capacity = [] labels = [] color = ['r', 'g', 'b'] file_path = './hdfs_monthly.png' for response in responses: if response.timeSeries: for ts in response.timeSeries: test1 = collections.OrderedDict() bar_labels = [] hdfs_used_values = [] metadata = ts.metadata unit = metadata.unitNumerators[0] for data in ts.data: test1[zone_conversion(timestamp=data.timestamp, format=u'YYYY-MM')] = data.value for key, value in test1.items(): bar_labels.append(key) hdfs_used_values.append(check_digital_storage_without_unit(value, unit, 'bytes')) capacity.append(hdfs_used_values[-5::]) labels = bar_labels[-5::] if not pic: two_bar_charts(labels[-4::], capacity[-4::], color, file_path) list, remaining = ring_table_clo(capacity[0], labels) metadata = [u'日期', u'已用容量(T)', u'上月容量(T)', u'增量(T)', u'月增长率(%)'] return list[-4::][::-1], metadata
def do_get_dfs_net(pic=''): responses = do_query_rollup("select total_bytes_transmit_rate_across_network_interfaces where category = CLUSTER", get_n_hour_ago_from(n=24), None, 'HOURLY') file_path = "./net.png" code = ['-', ':', '--', '-.', '-.'] test = [] index = 0 y_max = 0 for response in responses: if response.timeSeries: for ts in response.timeSeries: x = [] max_values = [] mean_values = [] metadata = ts.metadata unit = metadata.unitNumerators[0].encode("utf-8") for data in ts.data: x_time = pl.datetime.datetime.strptime( zone_conversion(timestamp=data.timestamp, format=u'YYYY-MM-DD HH:mm:ss'), "%Y-%m-%d %H:%M:%S") x.append(x_time) mean_values.append("%.2f" % data.value) data_type = data.type legend = metadata.entityName label_mean = legend test.append((x, mean_values, label_mean, code[index], 1)) index = index + 1 if max_values: y_max = max(y_max, max(max_values)) y_max = max(y_max, max(mean_values)) if not pic: n_lines_charts(test, file_path, unit, data_type, float(y_max), u'群集Net传输量') return test, unit
def do_get_dfs_cpu(pic=''): responses = do_query_rollup("select cpu_percent_across_hosts WHERE category = CLUSTER", get_n_hour_ago_from(n=24), None, 'HOURLY') file_path = "./cpu.png" code = ['-', ':', '--', '-.', '-.'] test = [] index = 0 y_max = 100 for response in responses: if response.timeSeries: for ts in response.timeSeries: x = [] max_values = [] mean_values = [] metadata = ts.metadata unit = metadata.unitNumerators[0].encode("utf-8") for data in ts.data: x_time = pl.datetime.datetime.strptime( zone_conversion(timestamp=data.timestamp, format=u'YYYY-MM-DD HH:mm:ss'), "%Y-%m-%d %H:%M:%S") x.append(x_time) if data.aggregateStatistics: max_values.append("%.2f" % data.aggregateStatistics.max) mean_values.append("%.2f" % data.value) data_type = data.type legend = metadata.entityName if max_values: label_max = legend + "Max" test.append((x, max_values, label_max, code[index], 1)) label_mean = legend + "Avg" test.append((x, mean_values, label_mean, code[index], 1)) index = index + 1 if not pic: n_lines_charts(test, file_path, unit, data_type, y_max, u'群集CPU使用率') return test, unit
def do_get_impala_top( from_time=get_last_week_to(), to_time=get_now(), duration=300000.0): attrs = [ 'user', 'database', 'query_duration', 'thread_cpu_time', 'category', 'executing', 'service_name', 'coordinator_host_id', 'stats_missing', 'statement', 'entityName', 'pool' ] responses = do_query(IMPALA_QUERY % duration, from_time, to_time) massage_dfs = [] for response in responses: if response.timeSeries: for ts in response.timeSeries: metadata = ts.metadata line = {} if metadata.attributes: for attr in attrs: if attr in metadata.attributes: attr_val = metadata.attributes[attr] if attr == 'query_duration': line['query_duration1'] = int(attr_val) if int(attr_val) > 60 * 60 * 1000: attr_val = ('%.2f' % (float(attr_val) / 60 / 60 / 1000)) + "h" elif int(attr_val) > 60 * 1000: attr_val = str( int(attr_val) / 60 / 1000) + "m" # 分 elif int(attr_val) > 1000: attr_val = str( int(attr_val) / 1000) + "s" # 秒 line[attr] = attr_val continue line[attr] = attr_val for data in ts.data: line['time'] = zone_conversion( timestamp=data.timestamp, format=u'YYYY-MM-DD HH:mm:ss') massage_dfs.append(line) return sorted(massage_dfs, key=lambda t: t['query_duration1'], reverse=True)
def do_get_hive_top( from_time=get_one_day_ago_from(), to_time=get_now(), duration=900000.0): attrs = [ 'user', 'name', 'application_duration', 'entityName', 'pool', 'cpu_milliseconds', 'category', 'service_name' ] responses = do_query(HIVE_QUERY % duration, from_time, to_time) massage_dfs = [] for response in responses: if response.timeSeries: for ts in response.timeSeries: metadata = ts.metadata line = {} if metadata.attributes: for attr in attrs: if attr in metadata.attributes: attr_val = metadata.attributes[attr] if 'application_duration' == attr: line['application_duration1'] = float(attr_val) if int(float(attr_val)) > 60 * 60 * 1000: attr_val = ('%.2f' % (float(attr_val) / 60 / 60 / 1000)) + "h" elif int(float(attr_val)) > 60 * 1000: attr_val = str( int(float(attr_val)) / 60 / 1000) + "m" # 分 elif int(float(attr_val)) > 1000: attr_val = str( int(float(attr_val)) / 1000) + "s" # 秒 line[attr] = attr_val for data in ts.data: line['time'] = zone_conversion( timestamp=data.timestamp, format=u'YYYY-MM-DD HH:mm:ss') massage_dfs.append(line) return sorted(massage_dfs, key=lambda t: t['application_duration1'], reverse=True)
def do_get_hdfs_used_weekly(pic=''): responses = _do_get_hdfs_used(get_last_n_week_from(), get_now(), 'WEEKLY') capacity = [] labels = [] color = ['r', 'g', 'b'] file_path = './hdfs_weekly.png' for response in responses: if response.timeSeries: for ts in response.timeSeries: bar_labels = [] hdfs_used_values = [] metadata = ts.metadata unit = metadata.unitNumerators[0] for data in ts.data: bar_labels.append(zone_conversion(data.timestamp)) hdfs_used_values.append(check_digital_storage_without_unit(data.value, unit, 'bytes')) capacity.append(hdfs_used_values) labels = bar_labels if not pic: two_bar_charts(labels[-4::], capacity[-4::], color, file_path) list, remaining = ring_table_clo(capacity[0], labels) metadata = [u'日期', u'已用容量(T)', u'上周容量(T)', u'增量(T)', u'周增长率(%)'] return list[-4::][::-1], "%.1f" % (remaining * 7), metadata
def overall_report(): os.chdir(os.path.dirname(sys.argv[0])) data = do_get_dfs_capacity_email() rows, remaining, metadata = do_get_hdfs_used_weekly() rows1, metadata = do_get_hdfs_used_monthly() rows2, metadata = do_get_hdfs_used_quarterly() rows3 = do_get_hive_top_email() rows4 = do_get_impala_top_email() rows5 = query_small_files() get_impala_job_summary() get_hive_job_summary() query_file_incr_info() do_get_dfs_cpu() do_get_dfs_mem() do_get_dfs_net() with open('./cpu.png', 'rb') as lena1: image1 = lena1.read() with open('./mem.png', 'rb') as lena2: image2 = lena2.read() with open('./net.png', 'rb') as lena3: image3 = lena3.read() with open('./hdfs_weekly.png', 'rb') as lena4: image4 = lena4.read() with open('./hdfs_monthly.png', 'rb') as lena5: image5 = lena5.read() with open('./hdfs_quarterly.png', 'rb') as lena6: image6 = lena6.read() with open('./file.png', 'rb') as lena7: image7 = lena7.read() with open('./file2.png', 'rb') as lena8: image8 = lena8.read() with open('./impala_pie.png', 'rb') as lena9: image9 = lena9.read() with open('./hive_pie.png', 'rb') as lena10: image10 = lena10.read() inline_image1 = InlineImage(filename="cpu.png", content=image1) inline_image2 = InlineImage(filename="mem.png", content=image2) inline_image3 = InlineImage(filename="net.png", content=image3) inline_image4 = InlineImage(filename="hdfs_weekly.png", content=image4) inline_image5 = InlineImage(filename="hdfs_monthly.png", content=image5) inline_image6 = InlineImage(filename="hdfs_quarterly.png", content=image6) inline_image7 = InlineImage(filename="file.png", content=image7) inline_image8 = InlineImage(filename="file2.png", content=image8) inline_image9 = InlineImage(filename="impala_pie.png", content=image9) inline_image10 = InlineImage(filename="hive_pie.png", content=image10) send_templated_mail( template_name='report_template2', from_email='*****@*****.**', recipient_list=['*****@*****.**', '*****@*****.**'], context={ 'time': zone_conversion(get_now()), 'data': data, 'rows': rows, 'rows1': rows1, 'rows2': rows2, 'rows3': rows3, 'rows4': rows4, 'rows5': rows5, 'remaining': remaining, 'cpu_image': inline_image1, 'mem_image': inline_image2, 'net_image': inline_image3, 'hdfs_weekly_image': inline_image4, 'hdfs_monthly_image': inline_image5, 'hdfs_quarterly_image': inline_image6, 'file_image': inline_image7, 'file2_image': inline_image8, 'impala_pie_image': inline_image9, 'hive_pie_image': inline_image10 }, # Optional: # cc=['*****@*****.**'], # bcc=['*****@*****.**'], # headers={'My-Custom-Header':'Custom Value'}, # template_prefix="my_emails/", # template_suffix="email", ) print 'email send'