def get_freeze_locations(scan_log_filename, scan): """ [Wed Nov 1 23:43:16 2017 - debug] ... [Wed Nov 1 23:43:19 2017 - debug] ... 3 seconds without writing anything to the log? That is a freeze! :param scan: The file handler for the scan log :return: None, all printed to the output """ scan.seek(0) freezes = [] previous_line_time = get_line_epoch(scan.readline()) for line in scan: try: current_line_epoch = get_line_epoch(line) except InvalidTimeStamp: continue time_spent = current_line_epoch - previous_line_time if time_spent > 5: line = line.strip() freezes.append(line) previous_line_time = current_line_epoch return KeyValueOutput( 'debug_log_freeze', 'Delays greater than 5 seconds between two log lines', freezes)
def get_freeze_locations(scan_log_filename, scan): """ [Wed Nov 1 23:43:16 2017 - debug] ... [Wed Nov 1 23:43:19 2017 - debug] ... 3 seconds without writing anything to the log? That is a freeze! :param scan: The file handler for the scan log :return: None, all printed to the output """ scan.seek(0) freezes = [] previous_line_time = get_line_epoch(scan.readline()) for line in scan: try: current_line_epoch = get_line_epoch(line) except InvalidTimeStamp: continue time_spent = current_line_epoch - previous_line_time if time_spent > 5: line = line.strip() freezes.append(line) previous_line_time = current_line_epoch return KeyValueOutput('debug_log_freeze', 'Delays greater than 5 seconds between two log lines', freezes)
def get_http_requests_over_time_data(scan_log_filename, scan): scan.seek(0) requests_by_minute = [] requests_in_this_minute = 0 line = scan.readline() prev_line_epoch = get_line_epoch(line) for line in scan: match = HTTP_CODE_RE.search(line) if match: requests_in_this_minute += 1 try: cur_line_epoch = get_line_epoch(line) except InvalidTimeStamp: continue if cur_line_epoch - prev_line_epoch > 60: prev_line_epoch = cur_line_epoch requests_by_minute.append(requests_in_this_minute) requests_in_this_minute = 0 return requests_by_minute
def get_consumer_pool_size_data(scan_log_filename, scan): scan.seek(0) consumer_pool_perc_audit = [] consumer_pool_timestamps_audit = [] consumer_pool_perc_crawl = [] consumer_pool_timestamps_crawl = [] worker_pool_perc = [] worker_pool_timestamps = [] for line in scan: match = IDLE_CONSUMER_WORKERS.search(line) if not match: continue percent = int(match.group(1)) is_audit = 'audit' in match.group(2).lower() is_crawl = 'crawl' in match.group(2).lower() if is_audit: consumer_pool_perc_audit.append(percent) consumer_pool_timestamps_audit.append(get_line_epoch(line)) elif is_crawl: consumer_pool_perc_crawl.append(percent) consumer_pool_timestamps_crawl.append(get_line_epoch(line)) else: worker_pool_perc.append(percent) worker_pool_timestamps.append(get_line_epoch(line)) return (consumer_pool_perc_audit, consumer_pool_timestamps_audit, consumer_pool_perc_crawl, consumer_pool_timestamps_crawl, worker_pool_perc, worker_pool_timestamps)
def show_connection_pool_wait(scan_log_filename, scan): scan.seek(0) connection_pool_waits = [] connection_pool_timestamps = [] for line in scan: match = CONNECTION_POOL_WAIT.search(line) if match: connection_pool_waits.append(float(match.group(1))) connection_pool_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp connection_pool_timestamps = [ts - first_timestamp for ts in connection_pool_timestamps] if not connection_pool_waits: print('No connection pool wait data found') return print('Time waited for worker threads for an available TCP/IP connection') print(' Total: %.2f sec' % sum(connection_pool_waits)) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Waited time' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(connection_pool_timestamps, connection_pool_waits) print(fig.show()) print('') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Count' fig.x_label = 'Time waiting for available TCP/IP connection' fig.set_x_limits(min_=0) fig.set_y_limits(min_=0) fig.color_mode = 'byte' print('Time waiting for available TCP/IP connection') print('') print(plotille.hist(connection_pool_waits, bins=25)) print('') print('')
def show_http_requests_over_time(scan_log_filename, scan): scan.seek(0) requests_by_minute = [] requests_in_this_minute = 0 line = scan.readline() prev_line_epoch = get_line_epoch(line) for line in scan: match = HTTP_CODE_RE.search(line) if match: requests_in_this_minute += 1 try: cur_line_epoch = get_line_epoch(line) except InvalidTimeStamp: continue if cur_line_epoch - prev_line_epoch > 60: prev_line_epoch = cur_line_epoch requests_by_minute.append(requests_in_this_minute) requests_in_this_minute = 0 print('HTTP requests sent by minute') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'HTTP requests' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=None) fig.set_y_limits(min_=0, max_=None) fig.plot(xrange(len(requests_by_minute)), requests_by_minute) print(fig.show()) print('') print('')
def show_freeze_locations(scan_log_filename, scan): """ [Wed Nov 1 23:43:16 2017 - debug] ... [Wed Nov 1 23:43:19 2017 - debug] ... 3 seconds without writing anything to the log? That is a freeze! :param scan: The file handler for the scan log :return: None, all printed to the output """ scan.seek(0) freezes = [] previous_line_time = get_line_epoch(scan.readline()) for line in scan: try: current_line_epoch = get_line_epoch(line) except InvalidTimeStamp: continue time_spent = current_line_epoch - previous_line_time if time_spent > 5: line = line.strip() if len(line) >= 80: msg = 'Found %s second freeze at: %s...' % (time_spent, line[:80]) else: msg = 'Found %s second freeze at: %s' % (time_spent, line) freezes.append(msg) previous_line_time = current_line_epoch if not freezes: print('No delays greater than 3 seconds were found between two scan log lines') return print('Found the delays greater than 3 seconds around these scan log lines:') for freeze in freezes: print(' - %s' % freeze)
def get_error_rate_data(scan_log_filename, scan): error_rate = [] error_rate_timestamps = [] for line in scan: match = EXTENDED_URLLIB_ERRORS_RE.search(line) if match: error_rate.append(int(match.group(1))) error_rate_timestamps.append(get_line_epoch(line)) return error_rate, error_rate_timestamps
def get_rtt_data(scan_log_filename, scan): scan.seek(0) rtt = [] rtt_timestamps = [] for line in scan: match = RTT_RE.search(line) if match: rtt.append(float(match.group(1))) rtt_timestamps.append(get_line_epoch(line)) return rtt, rtt_timestamps
def get_timeout_data(scan_log_filename, scan): scan.seek(0) timeouts = [] timeout_timestamps = [] for line in scan: match = SOCKET_TIMEOUT.search(line) if match: timeouts.append(float(match.group(1))) timeout_timestamps.append(get_line_epoch(line)) return timeouts, timeout_timestamps
def get_parser_process_memory_limit_data(scan_log_filename, scan): scan.seek(0) memory_limit = [] memory_limit_timestamps = [] for line in scan: match = PARSER_PROCESS_MEMORY_LIMIT.search(line) if match: memory_limit.append(int(match.group(1))) memory_limit_timestamps.append(get_line_epoch(line)) return memory_limit, memory_limit_timestamps
def get_queue_size_crawl_data(scan_log_filename, scan): scan.seek(0) crawl_queue_sizes = [] crawl_queue_timestamps = [] for line in scan: match = CRAWLINFRA_DISK_DICT.search(line) if match: crawl_queue_sizes.append(int(match.group(1))) crawl_queue_timestamps.append(get_line_epoch(line)) return crawl_queue_sizes, crawl_queue_timestamps
def get_queue_size_audit_data(scan_log_filename, scan): scan.seek(0) auditor_queue_sizes = [] auditor_queue_timestamps = [] for line in scan: match = AUDITOR_DISK_DICT.search(line) if match: auditor_queue_sizes.append(int(match.group(1))) auditor_queue_timestamps.append(get_line_epoch(line)) return auditor_queue_sizes, auditor_queue_timestamps
def get_active_threads_data(scan_log_filename, scan): scan.seek(0) active_threads = [] active_threads_timestamps = [] for line in scan: match = ACTIVE_THREADS.search(line) if match: active_threads.append(float(match.group(1))) active_threads_timestamps.append(get_line_epoch(line)) return active_threads, active_threads_timestamps
def get_queue_size_grep_data(scan_log_filename, scan): scan.seek(0) grep_queue_sizes = [] grep_queue_timestamps = [] for line in scan: match = GREP_DISK_DICT.search(line) if match: grep_queue_sizes.append(int(match.group(1))) grep_queue_timestamps.append(get_line_epoch(line)) return grep_queue_sizes, grep_queue_timestamps
def get_connection_pool_wait_data(scan_log_filename, scan): scan.seek(0) connection_pool_waits = [] connection_pool_timestamps = [] for line in scan: match = CONNECTION_POOL_WAIT.search(line) if match: connection_pool_waits.append(float(match.group(1))) connection_pool_timestamps.append(get_line_epoch(line)) return connection_pool_waits, connection_pool_timestamps
def get_worker_pool_size_data(scan_log_filename, scan): scan.seek(0) worker_pool_sizes = [] worker_pool_timestamps = [] for line in scan: match = WORKER_POOL_SIZE.search(line) if match: worker_pool_sizes.append(int(match.group(1))) worker_pool_timestamps.append(get_line_epoch(line)) return worker_pool_sizes, worker_pool_timestamps
def get_parser_errors_data(scan_log_filename, scan): scan.seek(0) timeout_count = 0 timeout_errors = [] timeout_errors_timestamps = [] memory_count = 0 memory_errors = [] memory_errors_timestamps = [] for line in scan: if PARSER_TIMEOUT in line: timeout_count += 1 timeout_errors.append(timeout_count) timeout_errors_timestamps.append(get_line_epoch(line)) if PARSER_MEMORY_LIMIT in line: memory_count += 1 memory_errors.append(memory_count) memory_errors_timestamps.append(get_line_epoch(line)) return (timeout_count, timeout_errors, timeout_errors_timestamps, memory_count, memory_errors, memory_errors_timestamps)
def get_not_found_requests_over_time_data(scan_log_filename, scan): scan.seek(0) requests_by_minute = [] requests_in_this_minute = 0 line = scan.readline() prev_line_epoch = get_line_epoch(line) for line in scan: if NOT_FOUND_REQUEST in line: requests_in_this_minute += 1 try: cur_line_epoch = get_line_epoch(line) except InvalidTimeStamp: continue if cur_line_epoch - prev_line_epoch > 60: prev_line_epoch = cur_line_epoch requests_by_minute.append(requests_in_this_minute) requests_in_this_minute = 0 return requests_by_minute
def show_queue_size_audit(scan_log_filename, scan): scan.seek(0) auditor_queue_sizes = [] auditor_queue_timestamps = [] for line in scan: match = AUDITOR_DISK_DICT.search(line) if match: auditor_queue_sizes.append(int(match.group(1))) auditor_queue_timestamps.append(get_line_epoch(line)) # Get the last timestamp to use as max in the graphs first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp auditor_queue_timestamps = [ ts - first_timestamp for ts in auditor_queue_timestamps ] if not auditor_queue_sizes: print('No audit consumer queue size data found') print('') return print('Audit consumer queue size') print(' Latest queue size value: %s' % auditor_queue_sizes[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Items in Audit queue' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(auditor_queue_timestamps, auditor_queue_sizes, label='Audit') print(fig.show()) print('') print('')
def show_extended_urllib_error_rate(scan_log_filename, scan): error_rate = [] error_rate_timestamps = [] for line in scan: match = EXTENDED_URLLIB_ERRORS_RE.search(line) if match: error_rate.append(int(match.group(1))) error_rate_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp error_rate_timestamps = [ ts - first_timestamp for ts in error_rate_timestamps ] if not error_rate: print('No error rate information found') print('') return print('Extended URL library error rate') print(' Error rate exceeded 10%%: %s' % (max(error_rate) > 10, )) print(' Error rate exceeded 20%%: %s' % (max(error_rate) > 10, )) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Error rate' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(error_rate) * 1.1) fig.plot(error_rate_timestamps, error_rate, label='Error rate') print(fig.show()) print('') print('')
def show_parser_process_memory_limit(scan_log_filename, scan): scan.seek(0) memory_limit = [] memory_limit_timestamps = [] for line in scan: match = PARSER_PROCESS_MEMORY_LIMIT.search(line) if match: memory_limit.append(int(match.group(1))) memory_limit_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp memory_limit_timestamps = [ts - first_timestamp for ts in memory_limit_timestamps] if not memory_limit: print('No parser process memory limit information found') return print('Parser process memory limit') print(' Latest memory limit: %s MB' % memory_limit[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Parser memory limit (MB)' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(memory_limit) * 1.1) fig.plot(memory_limit_timestamps, memory_limit, label='Memory limit') print(fig.show()) print('') print('')
def show_worker_pool_size(scan_log_filename, scan): scan.seek(0) worker_pool_sizes = [] worker_pool_timestamps = [] for line in scan: match = WORKER_POOL_SIZE.search(line) if match: worker_pool_sizes.append(int(match.group(1))) worker_pool_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp worker_pool_timestamps = [ ts - first_timestamp for ts in worker_pool_timestamps ] if not worker_pool_sizes: print('No worker pool size data found') return print('Worker pool size over time') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Worker pool size' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(worker_pool_timestamps, worker_pool_sizes, label='Workers') print(fig.show()) print('') print('')
def get_should_grep_data(scan_log_filename, scan): scan.seek(0) should_grep_data = [] should_grep_timestamps = [] for line in scan: match = SHOULD_GREP_STATS.search(line) if not match: continue try: stats_dict = to_dict(match.group(1)) except: print('Warning: %s is not valid JSON' % match.group(1)) continue else: should_grep_data.append(stats_dict) should_grep_timestamps.append(get_line_epoch(line)) return should_grep_data, should_grep_timestamps
def show_timeout(scan_log_filename, scan): scan.seek(0) timeouts = [] timeout_timestamps = [] for line in scan: match = SOCKET_TIMEOUT.search(line) if match: timeouts.append(float(match.group(1))) timeout_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp timeout_timestamps = [ts - first_timestamp for ts in timeout_timestamps] if not timeouts: print('No socket timeout data found') return print('Socket timeout over time') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Socket timeout' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(timeout_timestamps, timeouts, label='Timeout') print(fig.show()) print('') print('')
def show_progress_delta(scan_log_filename, scan): first_timestamp = get_first_timestamp(scan) # # Find the end times for crawl, audit, grep # scan.seek(0) crawl_end_timestamp = None audit_end_timestamp = None grep_end_timestamp = None for line in scan: if CRAWL_INFRA_FINISHED in line: crawl_end_timestamp = get_line_epoch(line) if 'seconds to join' not in line: continue match = JOIN_TIMES.search(line) if match: if 'audit' in line.lower(): audit_end_timestamp = get_line_epoch(line) if 'grep' in line.lower(): grep_end_timestamp = get_line_epoch(line) # # Find the crawl, audit and grep progress estimations # scan.seek(0) crawl_progress = [] crawl_progress_timestamps = [] audit_progress = [] audit_progress_timestamps = [] grep_progress = [] grep_progress_timestamps = [] for line in scan: match = CALCULATED_ETA.search(line) if match: ts = get_line_epoch(line) eta = match.group(2) if eta == 'None': eta = '0.0' eta = float(eta) percentage = (ts - first_timestamp) / (ts - first_timestamp + eta) * 100 if 'crawl' in line.lower(): crawl_progress_timestamps.append(ts) crawl_progress.append(percentage) if 'audit' in line.lower(): audit_progress_timestamps.append(ts) audit_progress.append(percentage) if 'grep' in line.lower(): grep_progress_timestamps.append(ts) grep_progress.append(percentage) # Make the timestamps relative to the scan finish crawl_progress_timestamps = [ts - first_timestamp for ts in crawl_progress_timestamps] audit_progress_timestamps = [ts - first_timestamp for ts in audit_progress_timestamps] grep_progress_timestamps = [ts - first_timestamp for ts in grep_progress_timestamps] # # Find the overall progress estimations # scan.seek(0) progress = [] progress_timestamps = [] for line in scan: match = SCAN_PROGRESS.search(line) if match: progress.append(int(match.group(1))) progress_timestamps.append(get_line_epoch(line)) # Get the last timestamp to use as max in the graphs progress_timestamps = [ts - first_timestamp for ts in progress_timestamps] scan.seek(0) first_timestamp = get_first_timestamp(scan) finished_timestamp = None for line in scan: match = SCAN_FINISHED_IN.search(line) if match: finished_timestamp = get_line_epoch(line) finished_timestamp = finished_timestamp or get_last_timestamp(scan) spent_time_epoch = finished_timestamp - first_timestamp print('Progress delta (estimated vs. real)') print('') if crawl_progress and crawl_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(crawl_progress_timestamps, crawl_progress, label='Crawl (estimated)') crawl_real_spent = int(crawl_end_timestamp) - int(first_timestamp) crawl_real_progress_timestamps = range(int(first_timestamp), int(crawl_end_timestamp), 1) crawl_real_progress_timestamps = [ts - first_timestamp for ts in crawl_real_progress_timestamps] crawl_real_progress = [] for ts in crawl_real_progress_timestamps: crawl_real_progress.append(float(ts) / crawl_real_spent * 100) fig.plot(crawl_real_progress_timestamps, crawl_real_progress, label='Crawl (real)') print(fig.show(legend=True)) print('') print('') if audit_progress and audit_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(audit_progress_timestamps, audit_progress, label='Audit (estimated)') audit_real_spent = int(audit_end_timestamp) - int(first_timestamp) audit_real_progress_timestamps = range(int(first_timestamp), int(audit_end_timestamp), 1) audit_real_progress_timestamps = [ts - first_timestamp for ts in audit_real_progress_timestamps] audit_real_progress = [] for ts in audit_real_progress_timestamps: audit_real_progress.append(float(ts) / audit_real_spent * 100) fig.plot(audit_real_progress_timestamps, audit_real_progress, label='Audit (real)') print(fig.show(legend=True)) print('') print('') if grep_progress and grep_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(grep_progress_timestamps, grep_progress, label='Grep (estimated)') grep_real_spent = int(grep_end_timestamp) - int(first_timestamp) grep_real_progress_timestamps = range(int(first_timestamp), int(grep_end_timestamp), 1) grep_real_progress_timestamps = [ts - first_timestamp for ts in grep_real_progress_timestamps] grep_real_progress = [] for ts in grep_real_progress_timestamps: grep_real_progress.append(float(ts) / grep_real_spent * 100) fig.plot(grep_real_progress_timestamps, grep_real_progress, label='Grep (real)') print(fig.show(legend=True)) print('') print('') if not progress: print('No progress data to calculate deltas (requirement: enable xml_file plugin)') return if finished_timestamp is None: print('The scan did not finish. Can not show progress delta.') return fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(progress_timestamps, progress, label='Overall (estimated)') real_progress = [] for ts in progress_timestamps: real_progress_i = ts / float(spent_time_epoch) * 100 real_progress.append(real_progress_i) fig.plot(progress_timestamps, real_progress, label='Overall (real)') print(fig.show(legend=True)) print('') print('')
def show_consumer_pool_size(scan_log_filename, scan): scan.seek(0) consumer_pool_perc_audit = [] consumer_pool_timestamps_audit = [] consumer_pool_perc_crawl = [] consumer_pool_timestamps_crawl = [] worker_pool_perc = [] worker_pool_timestamps = [] for line in scan: match = IDLE_CONSUMER_WORKERS.search(line) if not match: continue percent = int(match.group(1)) is_audit = 'audit' in match.group(2).lower() is_crawl = 'crawl' in match.group(2).lower() if is_audit: consumer_pool_perc_audit.append(percent) consumer_pool_timestamps_audit.append(get_line_epoch(line)) elif is_crawl: consumer_pool_perc_crawl.append(percent) consumer_pool_timestamps_crawl.append(get_line_epoch(line)) else: worker_pool_perc.append(percent) worker_pool_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp consumer_pool_timestamps_audit = [ ts - first_timestamp for ts in consumer_pool_timestamps_audit ] consumer_pool_timestamps_crawl = [ ts - first_timestamp for ts in consumer_pool_timestamps_crawl ] worker_pool_timestamps = [ ts - first_timestamp for ts in worker_pool_timestamps ] if not consumer_pool_perc_audit and not consumer_pool_perc_crawl: print('No thread pool data found') return print('Idle thread pool workers over time') print(' Latest idle core workers %s%%' % worker_pool_perc[-1]) if consumer_pool_perc_audit: print(' Latest idle audit workers %s%%' % consumer_pool_perc_audit[-1]) if consumer_pool_perc_crawl: print(' Latest idle crawl-infra workers %s%%' % consumer_pool_perc_crawl[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Idle worker (%)' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=101) fig.plot(consumer_pool_timestamps_audit, consumer_pool_perc_audit, label='Idle audit workers', lc=50) fig.plot(consumer_pool_timestamps_crawl, consumer_pool_perc_crawl, label='Idle crawl workers', lc=170) fig.plot(worker_pool_timestamps, worker_pool_perc, label='Idle core workers', lc=250) print(fig.show(legend=True)) print('') print('')
def show_progress_delta(scan_log_filename, scan): first_timestamp = get_first_timestamp(scan) # # Find the end times for crawl, audit, grep # scan.seek(0) crawl_end_timestamp = None audit_end_timestamp = None grep_end_timestamp = None for line in scan: if CRAWL_INFRA_FINISHED in line: crawl_end_timestamp = get_line_epoch(line) if 'seconds to join' not in line: continue match = JOIN_TIMES.search(line) if match: if 'audit' in line.lower(): audit_end_timestamp = get_line_epoch(line) if 'grep' in line.lower(): grep_end_timestamp = get_line_epoch(line) # # Find the crawl, audit and grep progress estimations # scan.seek(0) crawl_progress = [] crawl_progress_timestamps = [] audit_progress = [] audit_progress_timestamps = [] grep_progress = [] grep_progress_timestamps = [] for line in scan: match = CALCULATED_ETA.search(line) if match: ts = get_line_epoch(line) eta = match.group(2) if eta == 'None': eta = '0.0' eta = float(eta) percentage = (ts - first_timestamp) / (ts - first_timestamp + eta) * 100 if 'crawl' in line.lower(): crawl_progress_timestamps.append(ts) crawl_progress.append(percentage) if 'audit' in line.lower(): audit_progress_timestamps.append(ts) audit_progress.append(percentage) if 'grep' in line.lower(): grep_progress_timestamps.append(ts) grep_progress.append(percentage) # Make the timestamps relative to the scan finish crawl_progress_timestamps = [ ts - first_timestamp for ts in crawl_progress_timestamps ] audit_progress_timestamps = [ ts - first_timestamp for ts in audit_progress_timestamps ] grep_progress_timestamps = [ ts - first_timestamp for ts in grep_progress_timestamps ] # # Find the overall progress estimations # scan.seek(0) progress = [] progress_timestamps = [] for line in scan: match = SCAN_PROGRESS.search(line) if match: progress.append(int(match.group(1))) progress_timestamps.append(get_line_epoch(line)) # Get the last timestamp to use as max in the graphs progress_timestamps = [ts - first_timestamp for ts in progress_timestamps] scan.seek(0) first_timestamp = get_first_timestamp(scan) finished_timestamp = None for line in scan: match = SCAN_FINISHED_IN.search(line) if match: finished_timestamp = get_line_epoch(line) finished_timestamp = finished_timestamp or get_last_timestamp(scan) spent_time_epoch = finished_timestamp - first_timestamp print('Progress delta (estimated vs. real)') print('') if crawl_progress and crawl_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(crawl_progress_timestamps, crawl_progress, label='Crawl (estimated)') crawl_real_spent = int(crawl_end_timestamp) - int(first_timestamp) crawl_real_progress_timestamps = range(int(first_timestamp), int(crawl_end_timestamp), 1) crawl_real_progress_timestamps = [ ts - first_timestamp for ts in crawl_real_progress_timestamps ] crawl_real_progress = [] for ts in crawl_real_progress_timestamps: crawl_real_progress.append(float(ts) / crawl_real_spent * 100) fig.plot(crawl_real_progress_timestamps, crawl_real_progress, label='Crawl (real)') print(fig.show(legend=True)) print('') print('') if audit_progress and audit_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(audit_progress_timestamps, audit_progress, label='Audit (estimated)') audit_real_spent = int(audit_end_timestamp) - int(first_timestamp) audit_real_progress_timestamps = range(int(first_timestamp), int(audit_end_timestamp), 1) audit_real_progress_timestamps = [ ts - first_timestamp for ts in audit_real_progress_timestamps ] audit_real_progress = [] for ts in audit_real_progress_timestamps: audit_real_progress.append(float(ts) / audit_real_spent * 100) fig.plot(audit_real_progress_timestamps, audit_real_progress, label='Audit (real)') print(fig.show(legend=True)) print('') print('') if grep_progress and grep_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(grep_progress_timestamps, grep_progress, label='Grep (estimated)') grep_real_spent = int(grep_end_timestamp) - int(first_timestamp) grep_real_progress_timestamps = range(int(first_timestamp), int(grep_end_timestamp), 1) grep_real_progress_timestamps = [ ts - first_timestamp for ts in grep_real_progress_timestamps ] grep_real_progress = [] for ts in grep_real_progress_timestamps: grep_real_progress.append(float(ts) / grep_real_spent * 100) fig.plot(grep_real_progress_timestamps, grep_real_progress, label='Grep (real)') print(fig.show(legend=True)) print('') print('') if not progress: print( 'No progress data to calculate deltas (requirement: enable xml_file plugin)' ) return if finished_timestamp is None: print('The scan did not finish. Can not show progress delta.') return fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(progress_timestamps, progress, label='Overall (estimated)') real_progress = [] for ts in progress_timestamps: real_progress_i = ts / float(spent_time_epoch) * 100 real_progress.append(real_progress_i) fig.plot(progress_timestamps, real_progress, label='Overall (real)') print(fig.show(legend=True)) print('') print('')
def show_parser_errors(scan_log_filename, scan): scan.seek(0) timeout_count = 0 timeout_errors = [] timeout_errors_timestamps = [] memory_count = 0 memory_errors = [] memory_errors_timestamps = [] for line in scan: if PARSER_TIMEOUT in line: timeout_count += 1 timeout_errors.append(timeout_count) timeout_errors_timestamps.append(get_line_epoch(line)) if PARSER_MEMORY_LIMIT in line: memory_count += 1 memory_errors.append(memory_count) memory_errors_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp timeout_errors_timestamps = [ ts - first_timestamp for ts in timeout_errors_timestamps ] memory_errors_timestamps = [ ts - first_timestamp for ts in memory_errors_timestamps ] if not memory_errors and not timeout_errors: print('No parser errors found') print('') return print('Parser errors') print(' Timeout errors: %s' % timeout_count) print(' Memory errors: %s' % memory_count) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Parser errors' fig.x_label = 'Time' fig.color_mode = 'byte' #fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(memory_count, timeout_count)) fig.plot(timeout_errors, timeout_errors_timestamps, label='Timeout errors', lc=50) fig.plot(memory_errors, memory_errors_timestamps, label='Memory errors', lc=200) print(fig.show(legend=True)) print('') print('')