def main(): json_points = [] client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_mms_metrics') extracted_metrics = extract_metrics_from_mms_dump(args.input_file) json_points = [] for tagset, metrics_for_all_timestamps in extracted_metrics.items(): for timestamp, metrics_for_one_timestamp in metrics_for_all_timestamps.items( ): json_points.append({ "timestamp": timestamp, "measurement": "cloudmanager_data", "tags": { "project": tagset[0], # Magic number - not great "hostname": tagset[1] }, "fields": metrics_for_one_timestamp }) if len(json_points) >= args.batch_size: print(len(json_points)) write_points(logger, client, json_points, "N/A") json_points = [] write_points(logger, client, json_points, "N/A")
def main(): json_points = [] client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_mms_metrics') extracted_metrics = extract_metrics_from_mms_dump(args.input_file) json_points = [] for tagset, metrics_for_all_timestamps in extracted_metrics.items(): for timestamp, metrics_for_one_timestamp in metrics_for_all_timestamps.items(): json_points.append({ "timestamp": timestamp, "measurement": "cloudmanager_data", "tags": { "project": tagset[0], # Magic number - not great "hostname": tagset[1] }, "fields": metrics_for_one_timestamp }) if len(json_points) >= args.batch_size: print(len(json_points)) write_points(logger, client, json_points, "N/A") json_points = [] write_points(logger, client, json_points, "N/A")
def main(): logger = configure_logging('parse_serverstatus') client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) with open(args.input_file, 'r') as f: for line_number, chunk in enumerate(grouper(f, args.batch_size)): # print(line_number) json_points = [] for line in chunk: # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch if line: try: server_status_json = json.loads(line) # print((line_number + 0) * _BATCH_SIZE) # print((line_number + 1) * _BATCH_SIZE) common_metric_data = get_metrics("serverstatus", server_status_json, common_metrics, line_number) json_points.append(create_point(*common_metric_data)) wiredtiger_metric_data = get_metrics("serverstatus_wiredtiger", server_status_json, wiredtiger_metrics, line_number) json_points.append(create_point(*wiredtiger_metric_data)) # for metric_data in get_metrics(server_status_json, common_metrics, line_number): # import ipdb; ipdb.set_trace() # print(json_points) # json_points.append(create_point(*metric_data)) # # for metric in get_metrics(server_status_json, wiredtiger_metrics, line_number): # json_points.append(create_point(*metric)) # for metric in get_metrics(server_status_json, mmapv1_metrics, line_number): # json_points.append(create_point(*metric)) except ValueError: logger.error("Line {} does not appear to be valid JSON - \"{}\"".format(line_number, line.strip())) write_points(logger, client, json_points, line_number)
def main(argv): user = None if argv: user = argv[0] total, largest_process, largest_process_name = get_memory_usage(user) series_name = 'default.{0}.memory.usage'.format(settings.SERVER_NAME) data = [{ 'measurement': series_name, 'columns': ['value', 'largest_process', 'largest_process_name', ], 'points': [[total, largest_process, largest_process_name]], }] write_points(data)
def main(): client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_sar_disk') sar_timezone = timezone(args.timezone) with open(args.input_file, 'r') as f: header_split = f.readline().split() hostname = header_split[2].strip("()") logger.info("Found hostname {}".format(hostname)) date = header_split[3] logger.info("Found date {} (MM/DD/YYYY)".format(date)) json_points = [] for line_number, line in enumerate(f): if line.strip() and 'Average:' not in line: # We skip any empty lines, and also the "Average:" lines at the end if all(header_keyword in line for header_keyword in ['DEV', 'tps', 'rd_sec/s', 'wr_sec/s']): # Skip the header lines - if a device name contains all of the four keywords below, I will eat my hat pass else: disk_stats = dict(zip(SAR_DISK_HEADERS, line.split())) values = {} local_timestamp = datetime.strptime("{} {} {}".format(date, disk_stats['timestamp'], disk_stats['AM_OR_PM']), "%m/%d/%Y %I:%M:%S %p") timestamp = sar_timezone.localize(local_timestamp) for metric_name, value in disk_stats.items(): if metric_name == 'device': disk_name = value elif metric_name in ['AM_OR_PM', 'timestamp']: pass else: values[metric_name] = float(value) json_points.append({ "measurement": "sar_disk", "tags": { "project": args.project, "hostname": hostname, "device": disk_name, }, "time": timestamp.isoformat(), "fields": values }) if len(json_points) >= args.batch_size: write_points(logger, client, json_points, line_number) json_points = [] write_points(logger, client, json_points, line_number)
def main(): client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_operations') with open(args.input_file, 'r', encoding="latin-1") as f: line_count = 0 for chunk in grouper(f, args.batch_size): json_points = [] for line in chunk: # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch line_count += 1 if line and line.strip().endswith("ms"): values = {} tags = { 'project': args.project, 'hostname': args.hostname, } try: tags['operation'] = line.split("] ", 1)[1].split()[0] except IndexError as e: logger.error( "Unable to get operation type - {} - {}".format( e, line)) break if tags['operation'] in [ 'command', 'query', 'getmore', 'insert', 'update', 'remove', 'aggregate', 'mapreduce' ]: thread = line.split("[", 1)[1].split("]")[0] # Alternately - print(split_line[3]) if tags['operation'] == 'command': tags['command'] = line.split( "command: ")[1].split()[0] if "conn" in thread: tags['connection_id'] = thread split_line = line.split() values['duration_in_milliseconds'] = int( split_line[-1].rstrip('ms')) # TODO 2.4.x timestamps have spaces timestamp = parse(split_line[0]) if split_line[1].startswith("["): # TODO - Parse locks from 2.6 style loglines # 2.4 Logline: tags['namespace'] = split_line[3] for stat in reversed(split_line): if "ms" in stat: pass elif ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) elif stat == "locks(micros)": pass else: break else: # 3.x logline: tags['namespace'] = split_line[5] # TODO - Should we be splitting on "locks:{" instead? pre_locks, locks = line.rsplit("locks:", 1) # Strip duration from locks locks = locks.rsplit(" ", 1)[0] # Add quotation marks around string, so that it is valid JSON locks = re.sub(r"(\w+):", "\"\g<1>\":", locks) locks_document = flatdict.FlatDict( json.loads(locks), delimiter="_") for key, value in locks_document.iteritems(): values["locks_{}".format(key)] = int(value) # We work backwards from the end, until we run out of key:value pairs # TODO - Can we assume these are always integers? for stat in reversed(pre_locks.split()): if ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) else: break # TODO - Parse the full query plan for IXSCAN if 'planSummary: ' in line: tags['plan_summary'] = (line.split( 'planSummary: ', 1)[1].split()[0]) json_points.append( create_point(timestamp, "operations", values, tags)) else: logger.info( "'{}' is not a recognised operation type - not parsing this line ({})" .format(tags['operation'], line)) if json_points: # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator try: # TODO - Have a dry-run mode write_points(logger, client, json_points, line_count) pass except Exception as e: logger.error("Retries exceeded. Giving up on this point.")
w_local_phero, w_history, c_greed, cost_func=ants.graph_distance) LOGN( "\tTransform the resulting nodes permutation into a path on the graph") # by finding the shortest path between two cities. traj = [] for start, end in utils.tour(best["permutation"]): p, c = shortpath.astar(G, start, end) traj += p trajs.append(traj) with open("d%i_tour.points" % depth, "w") as fd: utils.write_points(traj, fd) with open("d%i_pheromones.mat" % depth, "w") as fd: utils.write_matrix(phero, fd) ######################################################################## # TRIANGULATION ######################################################################## triangulated = [] if ask_for.triangulation: with open(ask_for.triangulation) as fd: triangulated = triangulation.load(fd) else:
def main(): client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_operations') with open(args.input_file, 'r', encoding="latin-1") as f: line_count = 0 for chunk in grouper(f, args.batch_size): json_points = [] for line in chunk: # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch line_count += 1 if line and line.strip().endswith("ms"): values = {} tags = { 'project': args.project, 'hostname': args.hostname, } try: tags['operation'] = line.split("] ", 1)[1].split()[0] except IndexError as e: logger.error("Unable to get operation type - {} - {}".format(e, line)) break if tags['operation'] in ['command', 'query', 'getmore', 'insert', 'update', 'remove', 'aggregate', 'mapreduce']: thread = line.split("[", 1)[1].split("]")[0] # Alternately - print(split_line[3]) if tags['operation'] == 'command': tags['command'] = line.split("command: ")[1].split()[0] if "conn" in thread: tags['connection_id'] = thread split_line = line.split() values['duration_in_milliseconds'] = int(split_line[-1].rstrip('ms')) # TODO 2.4.x timestamps have spaces timestamp = parse(split_line[0]) if split_line[1].startswith("["): # TODO - Parse locks from 2.6 style loglines # 2.4 Logline: tags['namespace'] = split_line[3] for stat in reversed(split_line): if "ms" in stat: pass elif ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) elif stat == "locks(micros)": pass else: break else: # 3.x logline: tags['namespace'] = split_line[5] # TODO - Should we be splitting on "locks:{" instead? pre_locks, locks = line.rsplit("locks:", 1) # Strip duration from locks locks = locks.rsplit(" ", 1)[0] # Add quotation marks around string, so that it is valid JSON locks = re.sub(r"(\w+):", "\"\g<1>\":", locks) locks_document = flatdict.FlatDict(json.loads(locks), delimiter="_") for key, value in locks_document.iteritems(): values["locks_{}".format(key)] = int(value) # We work backwards from the end, until we run out of key:value pairs # TODO - Can we assume these are always integers? for stat in reversed(pre_locks.split()): if ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) else: break # TODO - Parse the full query plan for IXSCAN if 'planSummary: ' in line: tags['plan_summary'] = (line.split('planSummary: ', 1)[1].split()[0]) json_points.append(create_point(timestamp, "operations", values, tags)) else: logger.info("'{}' is not a recognised operation type - not parsing this line ({})".format(tags['operation'], line)) if json_points: # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator try: # TODO - Have a dry-run mode write_points(logger, client, json_points, line_count) pass except Exception as e: logger.error("Retries exceeded. Giving up on this point.")
def main(): client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_iostat') iostat_timezone = timezone(args.timezone) with open(args.input_file, 'r') as f: if args.hostname: f.__next__() # Skip the "Linux..." line else: hostname = re.split(r'[()]', f.readline())[1] logger.info("Found hostname {}".format(hostname)) f.__next__() # Skip the blank line line_counter = 2 for chunk_index, chunk in enumerate(grouper(parse_iostat(f), args.batch_size)): json_points = [] for block in chunk: if block: try: for i, line in enumerate(block): line_counter += 1 if i == 0: timestamp = iostat_timezone.localize(line) # print(timestamp) # import ipdb;ipdb.set_trace() # print("timestamp is {}".format(timestamp)) # TODO: Timezone? # TODO: Better way of storing timestamp elif i == 1: # CPU Metric Headings pass elif i==2: system_stats = dict(zip(system_stat_headers, line.split())) values = {} for metric_name, value in system_stats.items(): values[metric_name] = float(value) json_points.append({ "measurement": "iostat", "tags": { "project": args.project, "hostname": hostname }, "time": timestamp.isoformat(), "fields": values }) elif i==4: # Disk metric headings pass elif i >= 5 and line: disk_stats = {} device = line.split()[0] disk_stats[device] = dict(zip(disk_stat_headers, line.split()[1:])) for disk_name, metrics in disk_stats.items(): values = {} for metric_name, value in metrics.items(): # Nasty hack to deal with bad data from Morgan Stanley # if disk_name not in ['sda', 'sdb', 'dm-0', 'dm-1', 'dm-2']: # print(block) # raise ValueError values[metric_name] = float(value) json_points.append({ "measurement": "iostat", "tags": { "project": args.project, "hostname": hostname, "device": disk_name, }, "time": timestamp.isoformat(), "fields": values }) except ValueError as e: print("Bad output seen - skipping") print(e) print(block) write_points(logger, client, json_points, line_counter)
except ValueError as e: logger.error("Error parsing line - {} - {}".format(e, line)) break if ' connections now open)' in line: connection_count = line.split("(")[1].split()[0] # TODO - We should be sending an int, not a float - connection counters are integral values json_points.append(create_generic_point('connection_counters', connection_count, timestamp, base_tags)) if '[initandlisten] connection accepted from' in line: event = OpenConnectionEvent(timestamp, logline) json_points.append(event.get_json()) elif '] end connection ' in line: event = CloseConnectionEvent(timestamp, logline) json_points.append(event.get_json()) if json_points: # We need to deal with 500: timeout - some kind of retry behaviour # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator try: write_points(logger, client, json_points, line_counter) except Exception as e: logger.error("Retries exceeded. Giving up on this point.") else: print("empty points!!!") print("Number of connections: {}".format(len(connections))) i = 0 for connection in connections.items(): if i < 10: print(connection) i += 1 else: break
w_local_phero = 0.1 c_greed = 0.9 w_history = 1.0 best,phero = ants.search( G, max_it, num_ants, decay, w_heur, w_local_phero, w_history, c_greed, cost_func = ants.graph_distance ) LOGN( "\tTransform the resulting nodes permutation into a path on the graph" ) # by finding the shortest path between two cities. traj = [] for start,end in utils.tour(best["permutation"]): p,c = shortpath.astar( G, start, end ) traj += p trajs.append(traj) with open("d%i_tour.points" % depth, "w") as fd: utils.write_points( traj, fd ) with open("d%i_pheromones.mat" % depth, "w") as fd: utils.write_matrix( phero, fd ) ######################################################################## # TRIANGULATION ######################################################################## triangulated = [] if ask_for.triangulation: with open(ask_for.triangulation) as fd: triangulated = triangulation.load(fd)
def main(argv): folder = argv[0] total = get_disk_usage(folder) series_name = 'default.{0}.disk.usage'.format(settings.SERVER_NAME) write_points(series_name, total)
calib_path = 'kitti/training/calib/{}.txt'.format(args.idx) calib = Calibration(calib_path) points = utils.load_point_clouds(points_path) bboxes = utils.load_3d_boxes(label_path, args.category) bboxes = calib.bbox_rect_to_lidar(bboxes) corners3d = utils.boxes_to_corners_3d(bboxes) points_flag = utils.is_within_3d_box(points, corners3d) points_is_within_3d_box = [] for i in range(len(points_flag)): p = points[points_flag[i]] if len(p)>0: points_is_within_3d_box.append(p) box = bboxes[i] points_canonical, box_canonical = utils.points_to_canonical(p, box) points_canonical, box_canonical = utils.lidar_to_shapenet(points_canonical, box_canonical) pts_name = 'output/{}_{}_point_{}'.format(args.idx, args.category, i) box_name = 'output/{}_{}_bbox_{}'.format(args.idx, args.category, i) utils.write_points(points_canonical, pts_name) utils.write_bboxes(box_canonical, box_name) points_is_within_3d_box = np.concatenate(points_is_within_3d_box, axis=0) points = points_is_within_3d_box utils.write_points(points, 'output/points') utils.write_bboxes(bboxes, 'output/bboxes')
def main(): client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_operations') with open(args.input_file, 'r') as f: line_count = 0 for chunk in grouper(f, args.batch_size): json_points = [] for line in chunk: # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch line_count += 1 if line and line.endswith("ms"): values = {} tags = { 'project': args.project, 'hostname': args.hostname, } try: tags['operation'] = line.split("] ", 1)[1].split()[0] except IndexError as e: logger.error("Unable to parse line - {} - {}".format(e, line)) break if tags['operation'] in ['command', 'query', 'getmore', 'insert', 'update', 'remove', 'aggregate', 'mapreduce']: # print(line.strip()) thread = line.split("[", 1)[1].split("]")[0] # Alternately - print(split_line[3]) if tags['operation'] == 'command': tags['command'] = line.split("command: ")[1].split()[0] if "conn" in thread: tags['connection_id'] = thread split_line = line.split() values['duration_in_milliseconds'] = int(split_line[-1].rstrip('ms')) # TODO 2.4.x timestamps have spaces timestamp = parse(split_line[0]) if split_line[1].startswith("["): # 2.4 Logline: tags['namespace'] = split_line[3] for stat in reversed(split_line): if "ms" in stat: pass elif ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) elif stat == "locks(micros)": pass else: break else: # 3.x logline: tags['namespace'] = split_line[5] # TODO - Parse locks pre_locks, locks = line.split("locks:{", 1) # We work backwards from the end, until we run out of key:value pairs # TODO - Can we assume these are always integers? for stat in reversed(pre_locks.split()): if ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) else: break # TODO - Parse the full query plan for IXSCAN if 'planSummary: ' in line: tags['plan_summary'] = (line.split('planSummary: ', 1)[1].split()[0]) json_points.append(create_point(timestamp, "operations", values, tags)) if json_points: # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator try: write_points(logger, client, json_points, line_count) except Exception as e: logger.error("Retries exceeded. Giving up on this point.")
def main(): client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_iostat') iostat_timezone = timezone(args.timezone) with open(args.input_file, 'r') as f: if args.hostname: f.__next__() # Skip the "Linux..." line else: hostname = re.split(r'[()]', f.readline())[1] logger.info("Found hostname {}".format(hostname)) f.__next__() # Skip the blank line line_counter = 2 for chunk_index, chunk in enumerate( grouper(parse_iostat(f), args.batch_size)): json_points = [] for block in chunk: if block: try: for i, line in enumerate(block): line_counter += 1 if i == 0: timestamp = iostat_timezone.localize(line) # print(timestamp) # import ipdb;ipdb.set_trace() # print("timestamp is {}".format(timestamp)) # TODO: Timezone? # TODO: Better way of storing timestamp elif i == 1: # CPU Metric Headings pass elif i == 2: system_stats = dict( zip(system_stat_headers, line.split())) values = {} for metric_name, value in system_stats.items(): values[metric_name] = float(value) json_points.append({ "measurement": "iostat", "tags": { "project": args.project, "hostname": hostname }, "time": timestamp.isoformat(), "fields": values }) elif i == 4: # Disk metric headings pass elif i >= 5 and line: disk_stats = {} device = line.split()[0] disk_stats[device] = dict( zip(disk_stat_headers, line.split()[1:])) for disk_name, metrics in disk_stats.items(): values = {} for metric_name, value in metrics.items(): # Nasty hack to deal with bad data from Morgan Stanley # if disk_name not in ['sda', 'sdb', 'dm-0', 'dm-1', 'dm-2']: # print(block) # raise ValueError values[metric_name] = float(value) json_points.append({ "measurement": "iostat", "tags": { "project": args.project, "hostname": hostname, "device": disk_name, }, "time": timestamp.isoformat(), "fields": values }) except ValueError as e: print("Bad output seen - skipping") print(e) print(block) write_points(logger, client, json_points, line_counter)
def main(): client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_operations') with open(args.input_file, 'r') as f: line_count = 0 for chunk in grouper(f, args.batch_size): json_points = [] for line in chunk: # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch line_count += 1 if line and line.endswith("ms"): values = {} tags = { 'project': args.project, 'hostname': args.hostname, } try: tags['operation'] = line.split("] ", 1)[1].split()[0] except IndexError as e: logger.error("Unable to parse line - {} - {}".format( e, line)) break if tags['operation'] in [ 'command', 'query', 'getmore', 'insert', 'update', 'remove', 'aggregate', 'mapreduce' ]: # print(line.strip()) thread = line.split("[", 1)[1].split("]")[0] # Alternately - print(split_line[3]) if tags['operation'] == 'command': tags['command'] = line.split( "command: ")[1].split()[0] if "conn" in thread: tags['connection_id'] = thread split_line = line.split() values['duration_in_milliseconds'] = int( split_line[-1].rstrip('ms')) # TODO 2.4.x timestamps have spaces timestamp = parse(split_line[0]) if split_line[1].startswith("["): # 2.4 Logline: tags['namespace'] = split_line[3] for stat in reversed(split_line): if "ms" in stat: pass elif ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) elif stat == "locks(micros)": pass else: break else: # 3.x logline: tags['namespace'] = split_line[5] # TODO - Parse locks pre_locks, locks = line.split("locks:{", 1) # We work backwards from the end, until we run out of key:value pairs # TODO - Can we assume these are always integers? for stat in reversed(pre_locks.split()): if ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) else: break # TODO - Parse the full query plan for IXSCAN if 'planSummary: ' in line: tags['plan_summary'] = (line.split( 'planSummary: ', 1)[1].split()[0]) json_points.append( create_point(timestamp, "operations", values, tags)) if json_points: # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator try: write_points(logger, client, json_points, line_count) except Exception as e: logger.error("Retries exceeded. Giving up on this point.")