def draw_timeout(scan_log_filename, scan): scan.seek(0) timeouts, timeout_timestamps = get_timeout_data(scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp timeout_timestamps = [ts - first_timestamp for ts in timeout_timestamps] if not timeouts: print('No socket timeout data found') return print('Socket timeout over time') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Socket timeout' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(timeout_timestamps, timeouts, label='Timeout') print(fig.show()) print('')
def draw_rtt(scan_log_filename, scan): scan.seek(0) rtt, rtt_timestamps = get_rtt_data(scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp rtt_timestamps = [ts - first_timestamp for ts in rtt_timestamps] if not rtt: print('No RTT data found') return print('RTT over time') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'RTT' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(rtt_timestamps, rtt, label='RTT') print(fig.show()) print('')
def draw_queue_size_grep(scan_log_filename, scan): grep_queue_sizes, grep_queue_timestamps = get_queue_size_grep_data(scan_log_filename, scan) # Get the last timestamp to use as max in the graphs first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp grep_queue_timestamps = [ts - first_timestamp for ts in grep_queue_timestamps] if not grep_queue_sizes: print('No grep consumer queue size data found') return print('Grep consumer queue size') print(' Latest queue size value: %s' % grep_queue_sizes[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Items in Grep queue' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(grep_queue_timestamps, grep_queue_sizes, label='Grep') print(fig.show()) print('')
def draw_worker_pool_size(scan_log_filename, scan): worker_pool_sizes, worker_pool_timestamps = get_worker_pool_size_data(scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp worker_pool_timestamps = [ts - first_timestamp for ts in worker_pool_timestamps] if not worker_pool_sizes: print('No worker pool size data found') return print('Worker pool size over time') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Worker pool size' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(worker_pool_timestamps, worker_pool_sizes, label='Workers') print(fig.show()) print('')
def draw_queue_size_grep(scan_log_filename, scan): grep_queue_sizes, grep_queue_timestamps = get_queue_size_grep_data( scan_log_filename, scan) # Get the last timestamp to use as max in the graphs first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp grep_queue_timestamps = [ ts - first_timestamp for ts in grep_queue_timestamps ] if not grep_queue_sizes: print('No grep consumer queue size data found') return print('Grep consumer queue size') print(' Latest queue size value: %s' % grep_queue_sizes[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Items in Grep queue' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(grep_queue_timestamps, grep_queue_sizes, label='Grep') print(fig.show()) print('')
def draw_parser_process_memory_limit(scan_log_filename, scan): memory_limit, memory_limit_timestamps = get_parser_process_memory_limit_data(scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp memory_limit_timestamps = [ts - first_timestamp for ts in memory_limit_timestamps] if not memory_limit: print('No parser process memory limit information found') return fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Parser memory limit (MB)' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(memory_limit) * 1.1) fig.plot(memory_limit_timestamps, memory_limit, label='Memory limit') print(fig.show()) print('')
def draw_active_threads(scan_log_filename, scan): active_threads, active_threads_timestamps = get_active_threads_data( scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp active_threads_timestamps = [ ts - first_timestamp for ts in active_threads_timestamps ] if not active_threads: print('No active thread data found') return print('Active thread count over time') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Thread count' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(active_threads_timestamps, active_threads) print(fig.show()) print('')
def draw_extended_urllib_error_rate(scan_log_filename, scan): error_rate, error_rate_timestamps = get_error_rate_data( scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp error_rate_timestamps = [ ts - first_timestamp for ts in error_rate_timestamps ] if not error_rate: print('No error rate information found') print('') return fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Error rate' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(error_rate) * 1.1) fig.plot(error_rate_timestamps, error_rate, label='Error rate') print(fig.show()) print('')
def show_connection_pool_wait(scan_log_filename, scan): scan.seek(0) connection_pool_waits = [] connection_pool_timestamps = [] for line in scan: match = CONNECTION_POOL_WAIT.search(line) if match: connection_pool_waits.append(float(match.group(1))) connection_pool_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp connection_pool_timestamps = [ts - first_timestamp for ts in connection_pool_timestamps] if not connection_pool_waits: print('No connection pool wait data found') return print('Time waited for worker threads for an available TCP/IP connection') print(' Total: %.2f sec' % sum(connection_pool_waits)) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Waited time' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(connection_pool_timestamps, connection_pool_waits) print(fig.show()) print('') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Count' fig.x_label = 'Time waiting for available TCP/IP connection' fig.set_x_limits(min_=0) fig.set_y_limits(min_=0) fig.color_mode = 'byte' print('Time waiting for available TCP/IP connection') print('') print(plotille.hist(connection_pool_waits, bins=25)) print('') print('')
def draw_should_grep(scan_log_filename, scan): should_grep_data, should_grep_timestamps = get_should_grep_data( scan_log_filename, scan) # Get the last timestamp to use as max in the graphs first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp should_grep_timestamps = [ ts - first_timestamp for ts in should_grep_timestamps ] if not should_grep_data: print('No should_grep data found') return last_data = should_grep_data[-1] print('should_grep() stats') print(' Latest should_grep() count: %r' % last_data) # Calculate % last_data = should_grep_data[-1] total = sum(v for k, v in last_data.iteritems()) total = float(total) data_percent = dict( (k, round((v / total) * 100)) for k, v in last_data.iteritems()) print(' Latest should_grep() percentages: %r' % data_percent) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Percentage of rejected and accepted HTTP request and response grep tasks' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) for key in should_grep_data[-1].keys(): key_slice = [] for data_point in should_grep_data: total = sum(v for k, v in data_point.iteritems()) total = float(total) if total == 0: key_slice.append(0) continue data_percent = dict( (k, (v / total) * 100) for k, v in data_point.iteritems()) key_slice.append(data_percent[key]) fig.plot(should_grep_timestamps, key_slice, label=key) print(fig.show(legend=True)) print('')
def draw_consumer_pool_size(scan_log_filename, scan): (consumer_pool_perc_audit, consumer_pool_timestamps_audit, consumer_pool_perc_crawl, consumer_pool_timestamps_crawl, worker_pool_perc, worker_pool_timestamps) = get_consumer_pool_size_data(scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp consumer_pool_timestamps_audit = [ts - first_timestamp for ts in consumer_pool_timestamps_audit] consumer_pool_timestamps_crawl = [ts - first_timestamp for ts in consumer_pool_timestamps_crawl] worker_pool_timestamps = [ts - first_timestamp for ts in worker_pool_timestamps] if not consumer_pool_perc_audit and not consumer_pool_perc_crawl: print('No thread pool data found') return print('Idle thread pool workers over time') print(' Latest idle core workers %s%%' % worker_pool_perc[-1]) if consumer_pool_perc_audit: print(' Latest idle audit workers %s%%' % consumer_pool_perc_audit[-1]) if consumer_pool_perc_crawl: print(' Latest idle crawl-infra workers %s%%' % consumer_pool_perc_crawl[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Idle worker (%)' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=101) fig.plot(consumer_pool_timestamps_audit, consumer_pool_perc_audit, label='Idle audit workers', lc=50) fig.plot(consumer_pool_timestamps_crawl, consumer_pool_perc_crawl, label='Idle crawl workers', lc=170) fig.plot(worker_pool_timestamps, worker_pool_perc, label='Idle core workers', lc=250) print(fig.show(legend=True)) print('')
def draw_connection_pool_wait(scan_log_filename, scan): connection_pool_waits, connection_pool_timestamps = get_connection_pool_wait_data(scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp connection_pool_timestamps = [ts - first_timestamp for ts in connection_pool_timestamps] if not connection_pool_waits: print('No connection pool wait data found') return fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Waited time' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(connection_pool_timestamps, connection_pool_waits) print(fig.show()) print('') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Count' fig.x_label = 'Time waiting for available TCP/IP connection' fig.set_x_limits(min_=0) fig.set_y_limits(min_=0) fig.color_mode = 'byte' if len(connection_pool_waits) <= 2: print('Not enough connection pool wait data to create histogram') return print('Time waiting for available TCP/IP connection') print('') print(plotille.hist(connection_pool_waits, bins=25)) print('') print('')
def show_scan_finished_in(scan_log_filename, scan): scan.seek(0) first_timestamp = get_first_timestamp(scan) for line in scan: match = SCAN_FINISHED_IN.search(line) if match: print(match.group(0)) return last_timestamp = get_last_timestamp(scan) scan_run_time = last_timestamp - first_timestamp print('Scan is still running!') print(' Started %s ago' % epoch_to_string(scan_run_time))
def show_queue_size_audit(scan_log_filename, scan): scan.seek(0) auditor_queue_sizes = [] auditor_queue_timestamps = [] for line in scan: match = AUDITOR_DISK_DICT.search(line) if match: auditor_queue_sizes.append(int(match.group(1))) auditor_queue_timestamps.append(get_line_epoch(line)) # Get the last timestamp to use as max in the graphs first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp auditor_queue_timestamps = [ ts - first_timestamp for ts in auditor_queue_timestamps ] if not auditor_queue_sizes: print('No audit consumer queue size data found') print('') return print('Audit consumer queue size') print(' Latest queue size value: %s' % auditor_queue_sizes[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Items in Audit queue' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(auditor_queue_timestamps, auditor_queue_sizes, label='Audit') print(fig.show()) print('') print('')
def show_parser_process_memory_limit(scan_log_filename, scan): scan.seek(0) memory_limit = [] memory_limit_timestamps = [] for line in scan: match = PARSER_PROCESS_MEMORY_LIMIT.search(line) if match: memory_limit.append(int(match.group(1))) memory_limit_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp memory_limit_timestamps = [ts - first_timestamp for ts in memory_limit_timestamps] if not memory_limit: print('No parser process memory limit information found') return print('Parser process memory limit') print(' Latest memory limit: %s MB' % memory_limit[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Parser memory limit (MB)' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(memory_limit) * 1.1) fig.plot(memory_limit_timestamps, memory_limit, label='Memory limit') print(fig.show()) print('') print('')
def show_extended_urllib_error_rate(scan_log_filename, scan): error_rate = [] error_rate_timestamps = [] for line in scan: match = EXTENDED_URLLIB_ERRORS_RE.search(line) if match: error_rate.append(int(match.group(1))) error_rate_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp error_rate_timestamps = [ ts - first_timestamp for ts in error_rate_timestamps ] if not error_rate: print('No error rate information found') print('') return print('Extended URL library error rate') print(' Error rate exceeded 10%%: %s' % (max(error_rate) > 10, )) print(' Error rate exceeded 20%%: %s' % (max(error_rate) > 10, )) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Error rate' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(error_rate) * 1.1) fig.plot(error_rate_timestamps, error_rate, label='Error rate') print(fig.show()) print('') print('')
def show_worker_pool_size(scan_log_filename, scan): scan.seek(0) worker_pool_sizes = [] worker_pool_timestamps = [] for line in scan: match = WORKER_POOL_SIZE.search(line) if match: worker_pool_sizes.append(int(match.group(1))) worker_pool_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp worker_pool_timestamps = [ ts - first_timestamp for ts in worker_pool_timestamps ] if not worker_pool_sizes: print('No worker pool size data found') return print('Worker pool size over time') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Worker pool size' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(worker_pool_timestamps, worker_pool_sizes, label='Workers') print(fig.show()) print('') print('')
def get_scan_finished_in(scan_log_filename, scan): scan.seek(0) first_timestamp = get_first_timestamp(scan) for line in scan: match = SCAN_FINISHED_IN.search(line) if match: return KeyValueOutput('scan_time', 'Scan time and state', {'finished': True, 'scan_time': match.group(1)}) last_timestamp = get_last_timestamp(scan) scan_run_time = last_timestamp - first_timestamp return KeyValueOutput('scan_time', 'Scan time and state', {'finished': False, 'scan_time': epoch_to_string(scan_run_time)})
def show_timeout(scan_log_filename, scan): scan.seek(0) timeouts = [] timeout_timestamps = [] for line in scan: match = SOCKET_TIMEOUT.search(line) if match: timeouts.append(float(match.group(1))) timeout_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp timeout_timestamps = [ts - first_timestamp for ts in timeout_timestamps] if not timeouts: print('No socket timeout data found') return print('Socket timeout over time') print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Socket timeout' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(timeout_timestamps, timeouts, label='Timeout') print(fig.show()) print('') print('')
def draw_parser_errors(scan_log_filename, scan): (timeout_count, timeout_errors, timeout_errors_timestamps, memory_count, memory_errors, memory_errors_timestamps) = get_parser_errors_data(scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp timeout_errors_timestamps = [ts - first_timestamp for ts in timeout_errors_timestamps] memory_errors_timestamps = [ts - first_timestamp for ts in memory_errors_timestamps] if not memory_errors and not timeout_errors: print('No parser errors found') print('') return fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Parser errors' fig.x_label = 'Time' fig.color_mode = 'byte' #fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(memory_count, timeout_count)) fig.plot(timeout_errors, timeout_errors_timestamps, label='Timeout errors', lc=50) fig.plot(memory_errors, memory_errors_timestamps, label='Memory errors', lc=200) print(fig.show(legend=True)) print('')
def show_progress_delta(scan_log_filename, scan): first_timestamp = get_first_timestamp(scan) # # Find the end times for crawl, audit, grep # scan.seek(0) crawl_end_timestamp = None audit_end_timestamp = None grep_end_timestamp = None for line in scan: if CRAWL_INFRA_FINISHED in line: crawl_end_timestamp = get_line_epoch(line) if 'seconds to join' not in line: continue match = JOIN_TIMES.search(line) if match: if 'audit' in line.lower(): audit_end_timestamp = get_line_epoch(line) if 'grep' in line.lower(): grep_end_timestamp = get_line_epoch(line) # # Find the crawl, audit and grep progress estimations # scan.seek(0) crawl_progress = [] crawl_progress_timestamps = [] audit_progress = [] audit_progress_timestamps = [] grep_progress = [] grep_progress_timestamps = [] for line in scan: match = CALCULATED_ETA.search(line) if match: ts = get_line_epoch(line) eta = match.group(2) if eta == 'None': eta = '0.0' eta = float(eta) percentage = (ts - first_timestamp) / (ts - first_timestamp + eta) * 100 if 'crawl' in line.lower(): crawl_progress_timestamps.append(ts) crawl_progress.append(percentage) if 'audit' in line.lower(): audit_progress_timestamps.append(ts) audit_progress.append(percentage) if 'grep' in line.lower(): grep_progress_timestamps.append(ts) grep_progress.append(percentage) # Make the timestamps relative to the scan finish crawl_progress_timestamps = [ ts - first_timestamp for ts in crawl_progress_timestamps ] audit_progress_timestamps = [ ts - first_timestamp for ts in audit_progress_timestamps ] grep_progress_timestamps = [ ts - first_timestamp for ts in grep_progress_timestamps ] # # Find the overall progress estimations # scan.seek(0) progress = [] progress_timestamps = [] for line in scan: match = SCAN_PROGRESS.search(line) if match: progress.append(int(match.group(1))) progress_timestamps.append(get_line_epoch(line)) # Get the last timestamp to use as max in the graphs progress_timestamps = [ts - first_timestamp for ts in progress_timestamps] scan.seek(0) first_timestamp = get_first_timestamp(scan) finished_timestamp = None for line in scan: match = SCAN_FINISHED_IN.search(line) if match: finished_timestamp = get_line_epoch(line) finished_timestamp = finished_timestamp or get_last_timestamp(scan) spent_time_epoch = finished_timestamp - first_timestamp print('Progress delta (estimated vs. real)') print('') if crawl_progress and crawl_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(crawl_progress_timestamps, crawl_progress, label='Crawl (estimated)') crawl_real_spent = int(crawl_end_timestamp) - int(first_timestamp) crawl_real_progress_timestamps = range(int(first_timestamp), int(crawl_end_timestamp), 1) crawl_real_progress_timestamps = [ ts - first_timestamp for ts in crawl_real_progress_timestamps ] crawl_real_progress = [] for ts in crawl_real_progress_timestamps: crawl_real_progress.append(float(ts) / crawl_real_spent * 100) fig.plot(crawl_real_progress_timestamps, crawl_real_progress, label='Crawl (real)') print(fig.show(legend=True)) print('') print('') if audit_progress and audit_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(audit_progress_timestamps, audit_progress, label='Audit (estimated)') audit_real_spent = int(audit_end_timestamp) - int(first_timestamp) audit_real_progress_timestamps = range(int(first_timestamp), int(audit_end_timestamp), 1) audit_real_progress_timestamps = [ ts - first_timestamp for ts in audit_real_progress_timestamps ] audit_real_progress = [] for ts in audit_real_progress_timestamps: audit_real_progress.append(float(ts) / audit_real_spent * 100) fig.plot(audit_real_progress_timestamps, audit_real_progress, label='Audit (real)') print(fig.show(legend=True)) print('') print('') if grep_progress and grep_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(grep_progress_timestamps, grep_progress, label='Grep (estimated)') grep_real_spent = int(grep_end_timestamp) - int(first_timestamp) grep_real_progress_timestamps = range(int(first_timestamp), int(grep_end_timestamp), 1) grep_real_progress_timestamps = [ ts - first_timestamp for ts in grep_real_progress_timestamps ] grep_real_progress = [] for ts in grep_real_progress_timestamps: grep_real_progress.append(float(ts) / grep_real_spent * 100) fig.plot(grep_real_progress_timestamps, grep_real_progress, label='Grep (real)') print(fig.show(legend=True)) print('') print('') if not progress: print( 'No progress data to calculate deltas (requirement: enable xml_file plugin)' ) return if finished_timestamp is None: print('The scan did not finish. Can not show progress delta.') return fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(progress_timestamps, progress, label='Overall (estimated)') real_progress = [] for ts in progress_timestamps: real_progress_i = ts / float(spent_time_epoch) * 100 real_progress.append(real_progress_i) fig.plot(progress_timestamps, real_progress, label='Overall (real)') print(fig.show(legend=True)) print('') print('')
def show_progress_delta(scan_log_filename, scan): first_timestamp = get_first_timestamp(scan) # # Find the end times for crawl, audit, grep # scan.seek(0) crawl_end_timestamp = None audit_end_timestamp = None grep_end_timestamp = None for line in scan: if CRAWL_INFRA_FINISHED in line: crawl_end_timestamp = get_line_epoch(line) if 'seconds to join' not in line: continue match = JOIN_TIMES.search(line) if match: if 'audit' in line.lower(): audit_end_timestamp = get_line_epoch(line) if 'grep' in line.lower(): grep_end_timestamp = get_line_epoch(line) # # Find the crawl, audit and grep progress estimations # scan.seek(0) crawl_progress = [] crawl_progress_timestamps = [] audit_progress = [] audit_progress_timestamps = [] grep_progress = [] grep_progress_timestamps = [] for line in scan: match = CALCULATED_ETA.search(line) if match: ts = get_line_epoch(line) eta = match.group(2) if eta == 'None': eta = '0.0' eta = float(eta) percentage = (ts - first_timestamp) / (ts - first_timestamp + eta) * 100 if 'crawl' in line.lower(): crawl_progress_timestamps.append(ts) crawl_progress.append(percentage) if 'audit' in line.lower(): audit_progress_timestamps.append(ts) audit_progress.append(percentage) if 'grep' in line.lower(): grep_progress_timestamps.append(ts) grep_progress.append(percentage) # Make the timestamps relative to the scan finish crawl_progress_timestamps = [ts - first_timestamp for ts in crawl_progress_timestamps] audit_progress_timestamps = [ts - first_timestamp for ts in audit_progress_timestamps] grep_progress_timestamps = [ts - first_timestamp for ts in grep_progress_timestamps] # # Find the overall progress estimations # scan.seek(0) progress = [] progress_timestamps = [] for line in scan: match = SCAN_PROGRESS.search(line) if match: progress.append(int(match.group(1))) progress_timestamps.append(get_line_epoch(line)) # Get the last timestamp to use as max in the graphs progress_timestamps = [ts - first_timestamp for ts in progress_timestamps] scan.seek(0) first_timestamp = get_first_timestamp(scan) finished_timestamp = None for line in scan: match = SCAN_FINISHED_IN.search(line) if match: finished_timestamp = get_line_epoch(line) finished_timestamp = finished_timestamp or get_last_timestamp(scan) spent_time_epoch = finished_timestamp - first_timestamp print('Progress delta (estimated vs. real)') print('') if crawl_progress and crawl_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(crawl_progress_timestamps, crawl_progress, label='Crawl (estimated)') crawl_real_spent = int(crawl_end_timestamp) - int(first_timestamp) crawl_real_progress_timestamps = range(int(first_timestamp), int(crawl_end_timestamp), 1) crawl_real_progress_timestamps = [ts - first_timestamp for ts in crawl_real_progress_timestamps] crawl_real_progress = [] for ts in crawl_real_progress_timestamps: crawl_real_progress.append(float(ts) / crawl_real_spent * 100) fig.plot(crawl_real_progress_timestamps, crawl_real_progress, label='Crawl (real)') print(fig.show(legend=True)) print('') print('') if audit_progress and audit_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(audit_progress_timestamps, audit_progress, label='Audit (estimated)') audit_real_spent = int(audit_end_timestamp) - int(first_timestamp) audit_real_progress_timestamps = range(int(first_timestamp), int(audit_end_timestamp), 1) audit_real_progress_timestamps = [ts - first_timestamp for ts in audit_real_progress_timestamps] audit_real_progress = [] for ts in audit_real_progress_timestamps: audit_real_progress.append(float(ts) / audit_real_spent * 100) fig.plot(audit_real_progress_timestamps, audit_real_progress, label='Audit (real)') print(fig.show(legend=True)) print('') print('') if grep_progress and grep_end_timestamp is not None: fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(grep_progress_timestamps, grep_progress, label='Grep (estimated)') grep_real_spent = int(grep_end_timestamp) - int(first_timestamp) grep_real_progress_timestamps = range(int(first_timestamp), int(grep_end_timestamp), 1) grep_real_progress_timestamps = [ts - first_timestamp for ts in grep_real_progress_timestamps] grep_real_progress = [] for ts in grep_real_progress_timestamps: grep_real_progress.append(float(ts) / grep_real_spent * 100) fig.plot(grep_real_progress_timestamps, grep_real_progress, label='Grep (real)') print(fig.show(legend=True)) print('') print('') if not progress: print('No progress data to calculate deltas (requirement: enable xml_file plugin)') return if finished_timestamp is None: print('The scan did not finish. Can not show progress delta.') return fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Progress' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=None) fig.plot(progress_timestamps, progress, label='Overall (estimated)') real_progress = [] for ts in progress_timestamps: real_progress_i = ts / float(spent_time_epoch) * 100 real_progress.append(real_progress_i) fig.plot(progress_timestamps, real_progress, label='Overall (real)') print(fig.show(legend=True)) print('') print('')
def show_parser_errors(scan_log_filename, scan): scan.seek(0) timeout_count = 0 timeout_errors = [] timeout_errors_timestamps = [] memory_count = 0 memory_errors = [] memory_errors_timestamps = [] for line in scan: if PARSER_TIMEOUT in line: timeout_count += 1 timeout_errors.append(timeout_count) timeout_errors_timestamps.append(get_line_epoch(line)) if PARSER_MEMORY_LIMIT in line: memory_count += 1 memory_errors.append(memory_count) memory_errors_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_epoch = last_timestamp - first_timestamp timeout_errors_timestamps = [ ts - first_timestamp for ts in timeout_errors_timestamps ] memory_errors_timestamps = [ ts - first_timestamp for ts in memory_errors_timestamps ] if not memory_errors and not timeout_errors: print('No parser errors found') print('') return print('Parser errors') print(' Timeout errors: %s' % timeout_count) print(' Memory errors: %s' % memory_count) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Parser errors' fig.x_label = 'Time' fig.color_mode = 'byte' #fig.set_x_limits(min_=0, max_=spent_epoch) fig.set_y_limits(min_=0, max_=max(memory_count, timeout_count)) fig.plot(timeout_errors, timeout_errors_timestamps, label='Timeout errors', lc=50) fig.plot(memory_errors, memory_errors_timestamps, label='Memory errors', lc=200) print(fig.show(legend=True)) print('') print('')
def draw_consumer_pool_size(scan_log_filename, scan): (consumer_pool_perc_audit, consumer_pool_timestamps_audit, consumer_pool_perc_crawl, consumer_pool_timestamps_crawl, worker_pool_perc, worker_pool_timestamps) = get_consumer_pool_size_data( scan_log_filename, scan) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp consumer_pool_timestamps_audit = [ ts - first_timestamp for ts in consumer_pool_timestamps_audit ] consumer_pool_timestamps_crawl = [ ts - first_timestamp for ts in consumer_pool_timestamps_crawl ] worker_pool_timestamps = [ ts - first_timestamp for ts in worker_pool_timestamps ] if not consumer_pool_perc_audit and not consumer_pool_perc_crawl: print('No thread pool data found') return print('Idle thread pool workers over time') print(' Latest idle core workers %s%%' % worker_pool_perc[-1]) if consumer_pool_perc_audit: print(' Latest idle audit workers %s%%' % consumer_pool_perc_audit[-1]) if consumer_pool_perc_crawl: print(' Latest idle crawl-infra workers %s%%' % consumer_pool_perc_crawl[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, num_formatter) fig.register_label_formatter(int, num_formatter) fig.y_label = 'Idle worker (%)' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=101) fig.plot(consumer_pool_timestamps_audit, consumer_pool_perc_audit, label='Idle audit workers', lc=50) fig.plot(consumer_pool_timestamps_crawl, consumer_pool_perc_crawl, label='Idle crawl workers', lc=170) fig.plot(worker_pool_timestamps, worker_pool_perc, label='Idle core workers', lc=250) print(fig.show(legend=True)) print('')
def show_consumer_pool_size(scan_log_filename, scan): scan.seek(0) consumer_pool_perc_audit = [] consumer_pool_timestamps_audit = [] consumer_pool_perc_crawl = [] consumer_pool_timestamps_crawl = [] worker_pool_perc = [] worker_pool_timestamps = [] for line in scan: match = IDLE_CONSUMER_WORKERS.search(line) if not match: continue percent = int(match.group(1)) is_audit = 'audit' in match.group(2).lower() is_crawl = 'crawl' in match.group(2).lower() if is_audit: consumer_pool_perc_audit.append(percent) consumer_pool_timestamps_audit.append(get_line_epoch(line)) elif is_crawl: consumer_pool_perc_crawl.append(percent) consumer_pool_timestamps_crawl.append(get_line_epoch(line)) else: worker_pool_perc.append(percent) worker_pool_timestamps.append(get_line_epoch(line)) first_timestamp = get_first_timestamp(scan) last_timestamp = get_last_timestamp(scan) spent_time_epoch = last_timestamp - first_timestamp consumer_pool_timestamps_audit = [ ts - first_timestamp for ts in consumer_pool_timestamps_audit ] consumer_pool_timestamps_crawl = [ ts - first_timestamp for ts in consumer_pool_timestamps_crawl ] worker_pool_timestamps = [ ts - first_timestamp for ts in worker_pool_timestamps ] if not consumer_pool_perc_audit and not consumer_pool_perc_crawl: print('No thread pool data found') return print('Idle thread pool workers over time') print(' Latest idle core workers %s%%' % worker_pool_perc[-1]) if consumer_pool_perc_audit: print(' Latest idle audit workers %s%%' % consumer_pool_perc_audit[-1]) if consumer_pool_perc_crawl: print(' Latest idle crawl-infra workers %s%%' % consumer_pool_perc_crawl[-1]) print('') fig = plotille.Figure() fig.width = 90 fig.height = 20 fig.register_label_formatter(float, _num_formatter) fig.register_label_formatter(int, _num_formatter) fig.y_label = 'Idle worker (%)' fig.x_label = 'Time' fig.color_mode = 'byte' fig.set_x_limits(min_=0, max_=spent_time_epoch) fig.set_y_limits(min_=0, max_=101) fig.plot(consumer_pool_timestamps_audit, consumer_pool_perc_audit, label='Idle audit workers', lc=50) fig.plot(consumer_pool_timestamps_crawl, consumer_pool_perc_crawl, label='Idle crawl workers', lc=170) fig.plot(worker_pool_timestamps, worker_pool_perc, label='Idle core workers', lc=250) print(fig.show(legend=True)) print('') print('')