def report_memcache_statistics(stats, download_dt, graphite_host, verbose=False, dry_run=False): """Store memcache statistics in mongo and maybe graphite. Arguments: stats: Dict returned by parsers.Memcache.statistics(). download_dt: Datetime when /memcache was downloaded. graphite_host: host:port of graphite server to send data to, or ''/None verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ record = {'utc_datetime': download_dt, 'hit_count': stats['hit_count'].value(), 'miss_count': stats['miss_count'].value(), 'hit_ratio': stats['hit_ratio'].value(), 'item_count': stats['item_count'].value(), 'total_cache_size_bytes': stats['total_cache_size'].value(), 'oldest_item_age_seconds': stats['oldest_item_age'].value(), } if verbose: print record if not dry_run: graphite_util.maybe_send_to_graphite(graphite_host, 'memcache', [record])
def report_instance_summary(summary, module, download_dt, graphite_host, verbose=False, dry_run=False): """Send instance summary to graphite. Arguments: summary: Dict returned by parsers.InstanceSummary.summary(). module: the name of the GAE module that this summary has info for. download_dt: Datetime when /instance_summary was downloaded. graphite_host: host:port of graphite server to send data to, or ''/None verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ record = { 'utc_datetime': download_dt, 'num_instances': summary['total_instances'], 'average_qps': summary['average_qps'], 'average_latency_ms': summary['average_latency_ms'], 'average_memory_mb': summary['average_memory_mb'], } if verbose: print record if not dry_run: graphite_util.maybe_send_to_graphite(graphite_host, 'instances', [record], module=module)
def report_memcache_statistics(stats, download_dt, graphite_host, verbose=False, dry_run=False): """Store memcache statistics in mongo and maybe graphite. Arguments: stats: Dict returned by parsers.Memcache.statistics(). download_dt: Datetime when /memcache was downloaded. graphite_host: host:port of graphite server to send data to, or ''/None verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ record = { 'utc_datetime': download_dt, 'hit_count': stats['hit_count'].value(), 'miss_count': stats['miss_count'].value(), 'hit_ratio': stats['hit_ratio'].value(), 'item_count': stats['item_count'].value(), 'total_cache_size_bytes': stats['total_cache_size'].value(), } if 'oldest_item_age' in stats: record['oldest_item_age_seconds'] = stats['oldest_item_age'].value() if verbose: print record if not dry_run: graphite_util.maybe_send_to_graphite(graphite_host, 'memcache', [record])
def report_instance_summary(summary, download_dt, graphite_host='', verbose=False, dry_run=False): """Store instance summary in mongo and maybe graphite. Arguments: summary: Dict returned by parsers.InstanceSummary.summary(). download_dt: Datetime when /instance_summary was downloaded. graphite_host: host:port of graphite server to send data to, or ''/None verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ record = { 'utc_datetime': download_dt, 'num_instances': summary['total_instances'], 'average_qps': summary['average_qps'], 'average_latency_ms': summary['average_latency_ms'], 'average_memory_mb': summary['average_memory_mb'], } if verbose: print record if not dry_run: # Do the graphite send first, since mongo modifies 'records' in place. graphite_util.maybe_send_to_graphite(graphite_host, 'instances', [record]) _mongo_db()['gae_dashboard_instance_reports'].insert(record)
def report_instance_summary(summary, download_dt, graphite_host='', verbose=False, dry_run=False): """Store instance summary in mongo and maybe graphite. Arguments: summary: Dict returned by parsers.InstanceSummary.summary(). download_dt: Datetime when /instance_summary was downloaded. graphite_host: host:port of graphite server to send data to, or ''/None verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ record = {'utc_datetime': download_dt, 'num_instances': summary['total_instances'], 'average_qps': summary['average_qps'], 'average_latency_ms': summary['average_latency_ms'], 'average_memory_mb': summary['average_memory_mb'], } if verbose: print record if not dry_run: # Do the graphite send first, since mongo modifies 'records' in place. graphite_util.maybe_send_to_graphite(graphite_host, 'instances', [record]) _mongo_db()['gae_dashboard_instance_reports'].insert(record)
def main(csv_iter): """Parse App Engine usage report CSV and bring a mongo db collection up-to-date with it. csv_input is any object that returns a line of the usage report CSV for each iteration. This includes the header line containing field names. """ parser = argparse.ArgumentParser(description=__doc__.split('\n\n', 1)[0]) parser.add_argument('--graphite_host', default='carbon.hostedgraphite.com:2004', help=('host:port to send stats to graphite ' '(using the pickle protocol). ' '[default: %(default)s]')) parser.add_argument('-v', '--verbose', action='store_true', default=False, help='print report on stdout') parser.add_argument('-n', '--dry-run', action='store_true', default=False, help='do not store report in the database') args = parser.parse_args() csvreader = csv.DictReader(csv_iter) start_date = _time_t_of_latest_record() if start_date is None: print 'No record of previous fetches; importing all records as new.' start_date = datetime.date(2000, 1, 1) else: start_date = datetime.date.fromtimestamp(start_date) start_date = start_date.strftime('%Y-%m-%d') print 'Importing usage reports starting from %s' % start_date records_to_add = [] for (dt, key, value) in _reports_since_dt(csvreader, start_date): records_to_add.append({'utc_datetime': dt, _munge_key(key): value}) if args.verbose: print records_to_add print 'Importing %s documents' % len(records_to_add) if args.dry_run: print 'Skipping import during dry-run.' records_to_add = [] elif records_to_add: graphite_util.maybe_send_to_graphite(args.graphite_host, 'usage', records_to_add) if records_to_add: _write_time_t_of_latest_record(records_to_add)
def main(csv_iter): """Parse App Engine usage report CSV and bring a mongo db collection up-to-date with it. csv_input is any object that returns a line of the usage report CSV for each iteration. This includes the header line containing field names. """ yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) parser = argparse.ArgumentParser(description=__doc__.split('\n\n', 1)[0]) parser.add_argument('start_date', nargs='?', default=yesterday.strftime('%Y-%m-%d'), help=('Ignore data before this date (YYYY-MM-DD) ' '[default: %(default)s]')) parser.add_argument('--graphite_host', default='carbon.hostedgraphite.com:2004', help=('host:port to send stats to graphite ' '(using the pickle protocol). ' '[default: %(default)s]')) parser.add_argument('-v', '--verbose', action='store_true', default=False, help='print report on stdout') parser.add_argument('-n', '--dry-run', action='store_true', default=False, help='do not store report in the database') args = parser.parse_args() csvreader = csv.DictReader(csv_iter) print 'Importing usage reports starting from %s' % args.start_date records_to_add = [] for (dt, key, value) in _reports_since_dt(csvreader, args.start_date): records_to_add.append({'utc_datetime': dt, _munge_key(key): value}) if args.verbose: print records_to_add print 'Importing %s documents' % len(records_to_add) if args.dry_run: print >> sys.stderr, 'Skipping import during dry-run.' elif records_to_add: graphite_util.maybe_send_to_graphite(args.graphite_host, 'usage', records_to_add)
def main(csv_iter): """Parse App Engine usage report CSV and bring a mongo db collection up-to-date with it. csv_input is any object that returns a line of the usage report CSV for each iteration. This includes the header line containing field names. """ yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) parser = argparse.ArgumentParser(description=__doc__.split('\n\n', 1)[0]) parser.add_argument('start_date', nargs='?', default=yesterday.strftime('%Y-%m-%d'), help=('Ignore data before this date (YYYY-MM-DD) ' '[default: %(default)s]')) parser.add_argument('--graphite_host', default='carbon.hostedgraphite.com:2004', help=('host:port to send stats to graphite ' '(using the pickle protocol). ' '[default: %(default)s]')) parser.add_argument('-v', '--verbose', action='store_true', default=False, help='print report on stdout') parser.add_argument('-n', '--dry-run', action='store_true', default=False, help='do not store report in the database') args = parser.parse_args() csvreader = csv.DictReader(csv_iter) print 'Importing usage reports starting from %s' % args.start_date records_to_add = [] for (dt, key, value) in _reports_since_dt(csvreader, args.start_date): records_to_add.append({'utc_datetime': dt, _munge_key(key): value}) if args.verbose: print records_to_add print 'Importing %s documents' % len(records_to_add) if args.dry_run: print >>sys.stderr, 'Skipping import during dry-run.' elif records_to_add: graphite_util.maybe_send_to_graphite(args.graphite_host, 'usage', records_to_add)
def report_instance_summary(summary, download_dt, graphite_host, verbose=False, dry_run=False): """Send instance summary to graphite. Arguments: summary: Dict returned by parsers.InstanceSummary.summary(). download_dt: Datetime when /instance_summary was downloaded. graphite_host: host:port of graphite server to send data to, or ''/None verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ record = {'utc_datetime': download_dt, 'num_instances': summary['total_instances'], 'average_qps': summary['average_qps'], 'average_latency_ms': summary['average_latency_ms'], 'average_memory_mb': summary['average_memory_mb'], } if verbose: print record if not dry_run: graphite_util.maybe_send_to_graphite(graphite_host, 'instances', [record])
def parse_and_commit_record(input_json, download_time_t, graphite_host='', verbose=False, dry_run=False): """Parse and store dashboard chart data. Arguments: input_json: A JSON list of dicts containing the chart-url for one chart, along with an int describing which chart it is and other identifying data; see the help for <infile> in main(), or just look at how this json is constructed in fetch_stats.sh. download_time_t: When /dashboard was downloaded in seconds (UTC). graphite_host: host:port of graphite server to send data to, or ''/None verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ # Strip off the None sentinel we add to the end of the input json. input_json = [j for j in input_json if j is not None] if not input_json: return # Extract named time series data from the raw HTML. named_series = {} for chart_json in input_json: chart_label_index = chart_json['chart_num'] chart_label = _label_to_field_map.keys()[chart_label_index] time_label_index = chart_json['time_window'] (time_label, time_duration) = _time_windows[time_label_index] time_delta = datetime.timedelta(hours=time_duration) chart_url = chart_json['chart_url_data']['chart_url'] chart_data = unpack_chart_data(chart_url, time_delta.total_seconds()) for series_label, xy_pairs in chart_data: field_name = lookup_field_name(chart_label, series_label) named_series[field_name] = xy_pairs # Assume all elements of our input_json list have the same time window. assert all(input_json[i]['time_window'] == input_json[0]['time_window'] for i in xrange(len(input_json))) chart_start_time_t = download_time_t - time_delta.total_seconds() # Determine the starting point for records we want to add. This # script may be run by cron and fetches a minimum of 6 hours of # data, but chances are good that it runs more frequently. mongo_collection = _mongo_collection() time_t_of_latest_record = _time_t_of_latest_record(mongo_collection) if time_t_of_latest_record is None: print >>sys.stderr, ('No dashboard records found in mongo. ' 'Importing all records as new.') time_t_of_latest_record = 0 # Build time-keyed records from the named time series data and # decide which records will be stored. records = [] for time_value, record in aggregate_series_by_time(named_series): record_time_t = chart_start_time_t + time_value if record_time_t > time_t_of_latest_record: record['utc_datetime'] = datetime.datetime.utcfromtimestamp( record_time_t) records.append(record) if verbose: print records print >>sys.stderr, 'Importing %d record%s' % (len(records), 's'[len(records) == 1:]) if dry_run: print >>sys.stderr, 'Skipping import during dry-run.' elif records: # Do the graphite send first, since mongo modifies 'records' in place. graphite_util.maybe_send_to_graphite(graphite_host, 'summary', records) mongo_collection.insert(records)
def parse_and_commit_record(input_json, start_time_t, download_time_t, graphite_host, verbose=False, dry_run=False): """Parse and store dashboard chart data. Arguments: input_json: A JSON list of dicts containing the chart-url for one chart, along with an int describing which chart it is and other identifying data; see the help for <infile> in main(), or just look at how this json is constructed in fetch_stats.sh. start_time_t: Ignore all datapoints before this time_t (given that the last datapoint is at time download_time_t). download_time_t: When /dashboard was downloaded in seconds (UTC). graphite_host: host:port of graphite server to send data to. verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ # Strip off the None sentinel we add to the end of the input json. input_json = [j for j in input_json if j is not None] if not input_json: return # Extract named time series data from the raw HTML. named_series = {} for chart_json in input_json: chart_label_index = chart_json['chart_num'] chart_label = _label_to_field_map.keys()[chart_label_index] time_label_index = chart_json['time_window'] (time_label, time_duration) = _time_windows[time_label_index] time_delta = datetime.timedelta(hours=time_duration) chart_url = chart_json['chart_url_data']['chart_url'] chart_data = unpack_chart_data(chart_url, time_delta.total_seconds()) for series_label, xy_pairs in chart_data: field_name = lookup_field_name(chart_label, series_label) named_series[field_name] = xy_pairs # Assume all elements of our input_json list have the same time window. assert all(input_json[i]['time_window'] == input_json[0]['time_window'] for i in xrange(len(input_json))) chart_start_time_t = download_time_t - time_delta.total_seconds() # Build time-keyed records from the named time series data and # decide which records will be stored. records = [] for time_value, record in aggregate_series_by_time(named_series): record_time_t = chart_start_time_t + time_value if record_time_t > start_time_t: record['utc_datetime'] = datetime.datetime.utcfromtimestamp( record_time_t) records.append(record) if verbose: print records print >> sys.stderr, 'Importing %d record%s' % (len(records), 's'[len(records) == 1:]) if dry_run: print >> sys.stderr, 'Skipping import during dry-run.' elif records: graphite_util.maybe_send_to_graphite(graphite_host, 'summary', records)
def parse_and_commit_record(input_json, start_time_t, download_time_t, graphite_host, verbose=False, dry_run=False): """Parse and store dashboard chart data. Arguments: input_json: A JSON list of dicts containing the chart-url for one chart, along with an int describing which chart it is and other identifying data; see the help for <infile> in main(), or just look at how this json is constructed in fetch_stats.sh. start_time_t: Ignore all datapoints before this time_t (given that the last datapoint is at time download_time_t). download_time_t: When /dashboard was downloaded in seconds (UTC). graphite_host: host:port of graphite server to send data to. verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ # Strip off the None sentinel we add to the end of the input json. input_json = [j for j in input_json if j is not None] if not input_json: return # Extract named time series data from the raw HTML. named_series = {} for chart_json in input_json: chart_label_index = chart_json['chart_num'] chart_label = _label_to_field_map.keys()[chart_label_index] time_label_index = chart_json['time_window'] (time_label, time_duration) = _time_windows[time_label_index] time_delta = datetime.timedelta(hours=time_duration) chart_url = chart_json['chart_url_data']['chart_url'] chart_data = unpack_chart_data(chart_url, time_delta.total_seconds()) for series_label, xy_pairs in chart_data: field_name = lookup_field_name(chart_label, series_label) named_series[field_name] = xy_pairs # Assume all elements of our input_json list have the same time window. assert all(input_json[i]['time_window'] == input_json[0]['time_window'] for i in xrange(len(input_json))) chart_start_time_t = download_time_t - time_delta.total_seconds() # Build time-keyed records from the named time series data and # decide which records will be stored. records = [] for time_value, record in aggregate_series_by_time(named_series): record_time_t = chart_start_time_t + time_value if record_time_t > start_time_t: record['utc_datetime'] = datetime.datetime.utcfromtimestamp( record_time_t) records.append(record) if verbose: print records print >>sys.stderr, 'Importing %d record%s' % (len(records), 's'[len(records) == 1:]) if dry_run: print >>sys.stderr, 'Skipping import during dry-run.' elif records: graphite_util.maybe_send_to_graphite(graphite_host, 'summary', records)
def parse_and_commit_record(input_json, download_time_t, graphite_host='', verbose=False, dry_run=False): """Parse and store dashboard chart data. Arguments: input_json: A JSON list of dicts containing the chart-url for one chart, along with an int describing which chart it is and other identifying data; see the help for <infile> in main(), or just look at how this json is constructed in fetch_stats.sh. download_time_t: When /dashboard was downloaded in seconds (UTC). graphite_host: host:port of graphite server to send data to, or ''/None verbose: If True, print report to stdout. dry_run: If True, do not store report in the database. """ # Strip off the None sentinel we add to the end of the input json. input_json = [j for j in input_json if j is not None] if not input_json: return # Extract named time series data from the raw HTML. named_series = {} for chart_json in input_json: chart_label_index = chart_json['chart_num'] chart_label = _label_to_field_map.keys()[chart_label_index] time_label_index = chart_json['time_window'] (time_label, time_duration) = _time_windows[time_label_index] time_delta = datetime.timedelta(hours=time_duration) chart_url = chart_json['chart_url_data']['chart_url'] chart_data = unpack_chart_data(chart_url, time_delta.total_seconds()) for series_label, xy_pairs in chart_data: field_name = lookup_field_name(chart_label, series_label) named_series[field_name] = xy_pairs # Assume all elements of our input_json list have the same time window. assert all(input_json[i]['time_window'] == input_json[0]['time_window'] for i in xrange(len(input_json))) chart_start_time_t = download_time_t - time_delta.total_seconds() # Determine the starting point for records we want to add. This # script may be run by cron and fetches a minimum of 6 hours of # data, but chances are good that it runs more frequently. mongo_collection = _mongo_collection() time_t_of_latest_record = _time_t_of_latest_record(mongo_collection) if time_t_of_latest_record is None: print >> sys.stderr, ('No dashboard records found in mongo. ' 'Importing all records as new.') time_t_of_latest_record = 0 # Build time-keyed records from the named time series data and # decide which records will be stored. records = [] for time_value, record in aggregate_series_by_time(named_series): record_time_t = chart_start_time_t + time_value if record_time_t > time_t_of_latest_record: record['utc_datetime'] = datetime.datetime.utcfromtimestamp( record_time_t) records.append(record) if verbose: print records print >> sys.stderr, 'Importing %d record%s' % (len(records), 's'[len(records) == 1:]) if dry_run: print >> sys.stderr, 'Skipping import during dry-run.' elif records: # Do the graphite send first, since mongo modifies 'records' in place. graphite_util.maybe_send_to_graphite(graphite_host, 'summary', records) mongo_collection.insert(records)