def main(): if not SLR_URI: logger.error( 'SLR_URI environment variable is required. Terminating ...') sys.exit(1) try: subprocess.check_output(['which', 'gnuplot']) except subprocess.CalledProcessError: logger.error( 'Missing system dependency. Please install *gnuplot* system package!' ) sys.exit(1) successful_reports = [] t_start = datetime.now() # Get last reports sync_reports(to_local=True) try: token = SLR_TOKEN if SLR_TOKEN else zign.api.get_token('uid', ['uid']) client = Client(SLR_URI, token) # 1. Get all products logger.info('Starting reports generation ...') products = client.product_list(limit=1000) for product in products: # Token could expire if report generation takes a long time! token = SLR_TOKEN if SLR_TOKEN else zign.api.get_token( 'uid', ['uid']) client = Client(SLR_URI, token) name = product['name'] try: # Make sure the product has the minimum req for generating a report slos = client.slo_list(product) if not slos: logger.info( 'Skipping generating report for product "{}". Reason: No SLO defined!' .format(name)) continue slis = client.sli_list(product) if not slis: logger.info( 'Skipping generating report for product "{}". Reason: No SLI defined!' .format(name)) continue # Finally, generate the report logger.info('Generating report for product: {}'.format(name)) generate_weekly_report(client, product, OUTPUT_DIR) logger.info( 'Finished generating report for product: {}'.format(name)) successful_reports.append(name) if not len(successful_reports) % 10: time.sleep(60) except KeyboardInterrupt: logger.info('Report generation interrupted. Terminating ...') return except: logger.exception( 'Failed to generate report for product: {}'.format(name)) except KeyboardInterrupt: logger.info('Report generation interrupted. Terminating ...') return except: logger.exception('Failed in generating reports. Terminating ...') sys.exit(1) duration = datetime.now() - t_start logger.info('Finished generating reports for products: {}'.format( successful_reports)) logger.info( 'Finished generating reports for {} products successfully in {} minutes' .format(len(successful_reports), duration.seconds / 60)) # Upload latest reports to s3 sync_reports(to_local=False) logger.info('Done!')
def plot(client: Client, product: dict, slo_id: int, output_file): slos = client.slo_list(product, id=slo_id) slo = slos[0] targets = client.target_list(slo) targets_by_unit = collections.defaultdict(list) for i, target in enumerate(targets): target['maxval'] = 0 target['minval'] = 0 fn = '/tmp/data{}.tsv'.format(i) target['fn'] = fn sli_name = target['sli_name'] slis = client.sli_list(product, name=sli_name) sli = slis[0] target['unit'] = sli['unit'] targets_by_unit[sli['unit']].append(target) data = client.sli_values(sli, sli_from=10080) with open(fn, 'w') as fd: values = [row['value'] for row in data] target['maxval'] = max(values) if values else target['maxval'] target['minval'] = min(values) if values else target['minval'] for row in data: fd.write('{}\t{}\n'.format(row['timestamp'], row['value'])) plot = subprocess.Popen(['gnuplot'], stdin=subprocess.PIPE) gnuplot_data = ''' set output '{}' set term png enhanced size 1100, 400 set xdata time set samples 300 set grid xtics lt 0 lw 1 lc rgb "#bbbbbb" set format x "%m-%d" set timefmt "%Y-%m-%dT%H:%M:%SZ" '''.format(output_file) i = 0 for unit, _targets in reversed(sorted(targets_by_unit.items())): if unit: if i == 0: suff = '' else: suff = '2' gnuplot_data += 'set format y{} "%.{}f {}"\n'.format( suff, precision.get(unit, 0), unit.replace('%', '%%')) from_list = [ t['from'] for t in _targets if t['from'] is not None and t['from'] != float('-inf') ] or [0] to_list = [ t['to'] for t in _targets if t['to'] is not None and t['to'] != float('inf') ] or [0] min_list = [t['minval'] for t in _targets] max_list = [t['maxval'] for t in _targets] ymin, ymax = (min(from_list + min_list), max(to_list + max_list)) padding = (0.1 * (ymax - ymin)) ymin = ymin - padding ymax = ymax + padding gnuplot_data += 'set y{}range [{}:{}]\n'.format( suff, ymin or '', ymax or '') gnuplot_data += 'set y{}tics\n'.format(suff) for target in _targets: target['yaxis'] = 'y1' if i == 0 else 'y2' coord = 'first' if i == 0 else 'second' if target['from'] and target['from'] != float('-inf'): gnuplot_data += ( 'set arrow from graph 0,{} {} to graph 1, {} {} head linecolor rgb "#ffcece" linewidth 2\n' ).format(coord, target['from'], coord, target['from']) if target['to'] and target['to'] != float('inf'): gnuplot_data += ( 'set arrow from graph 0,{} {} to graph 1, {} {} backhead linecolor rgb "#ffcece" linewidth 2\n' ).format(coord, target['to'], coord, target['to']) i += 1 gnuplot_data += 'plot ' plots = [] for target in sorted(targets, key=lambda t: t['unit']): if target['unit']: plots.append( '"{}" using 1:2 lw 2 axes x1{} with lines title "{}"'.format( target['fn'], target['yaxis'], target['sli_name'].replace('_', ' '))) gnuplot_data += ', '.join(plots) + '\n' plot.communicate(gnuplot_data.encode('utf-8'))
def get_client(config): token = SLR_TOKEN if SLR_TOKEN else zign.api.get_token('uid', ['uid']) return Client(config['url'], token)
def generate_weekly_report(client: Client, product: dict, output_dir: str) -> None: report_data = client.product_report(product) product_group = report_data['product_group_slug'] period_from = period_to = None for slo in report_data['slo']: if slo['days']: period_from = min(slo['days'].keys())[:10] period_to = max(slo['days'].keys())[:10] break if not period_from or not period_to: raise RuntimeError( 'Can not determine "period_from" and "period_to" for the report. Aborting!' ) period_id = '{}-{}'.format(period_from.replace('-', ''), period_to.replace('-', '')) report_dir = os.path.join(output_dir, product_group, product['slug'], period_id) os.makedirs(report_dir, exist_ok=True) loader = jinja2.FileSystemLoader( os.path.join(os.path.dirname(__file__), 'templates')) env = jinja2.Environment(loader=loader) data = { 'product': { 'name': report_data['product_name'], 'product_group_name': report_data['product_group_name'], }, 'period': '{} - {}'.format(period_from, period_to), 'slos': [] } for slo in report_data['slo']: slo['slis'] = {} slo['data'] = [] breaches_by_sli = defaultdict(int) counts_by_sli = defaultdict(int) values_by_sli = defaultdict(lambda: defaultdict(list)) for day, day_data in sorted(slo['days'].items()): slis = {} for sli, sli_data in day_data.items(): breaches_by_sli[sli] += sli_data['breaches'] counts_by_sli[sli] += sli_data['count'] aggregation = sli_data['aggregation'] values_by_sli[sli]['avg'].append(sli_data['avg']) values_by_sli[sli]['min'].append(sli_data['min']) values_by_sli[sli]['max'].append(sli_data['max']) values_by_sli[sli]['sum'].append(sli_data['sum']) classes = set() unit = '' if sli_data['breaches']: classes.add('orange') for target in slo['targets']: sli_name = target['sli_name'] if sli_name == sli: unit = target['unit'] if target['to'] and sli_data['avg'] > target['to']: classes.add('red') elif target[ 'from'] and sli_data['avg'] < target['from']: classes.add('red') if not classes: classes.add('ok') if sli_data['count'] < 1400: classes.add('not-enough-samples') if sli == 'requests': # interpolate total number of requests per day from average per sec sli_data['total'] = int(sli_data['avg'] * sli_data['count'] * 60) slis[sli] = sli_data slis[sli]['unit'] = unit slis[sli]['classes'] = classes slis[sli]['aggregate'] = '{:.2f} {}'.format( get_aggregate(aggregation, sli_data), unit) dt = datetime.datetime.strptime(day[:10], '%Y-%m-%d') dow = dt.strftime('%a') slo['data'].append({ 'caption': '{} {}'.format(dow, day[5:10]), 'slis': slis }) slo['breaches'] = max_or_zero(breaches_by_sli.values()) slo['count'] = max_or_zero(counts_by_sli.values()) for target in slo['targets']: sli_name = target['sli_name'] aggregation = target['aggregation'] val = None slo['slis'][sli_name] = { 'unit': target['unit'], } val = get_aggregate(aggregation, values_by_sli[sli_name]) ok = True if val is not None and target['to'] and val > target['to']: ok = False if val is not None and target['from'] and val < target['from']: ok = False slo['slis'][sli_name][ 'aggregate'] = '-' if val is None else '{:.2f} {}'.format( val, target['unit']) slo['slis'][sli_name]['ok'] = ok fn = os.path.join(report_dir, 'chart-{}.png'.format(slo['id'])) plot(client, product, slo['id'], fn) slo['chart'] = os.path.basename(fn) data['slos'].append(slo) data['now'] = datetime.datetime.utcnow() env.filters['sli_title'] = title env.filters['human_time'] = human_time template = env.get_template('slr-weekly.html') template.stream(**data).dump(os.path.join(report_dir, 'index.html')) generate_directory_index(output_dir)