Exemplo n.º 1
0
def process_node(output_dir, node_dir):
    ncp = map_utils.node_name_from_dir(node_dir)

    data_written = False
    output_file = output_dir / f"{ncp}.csv"
    with open(output_file, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(
            ['timestamp', 'service', 'source region', 'attribute', 'value'])

        for time_dir in node_dir.iterdir():
            if not time_dir.is_dir():
                continue

            timestamp = int(time_dir.stem)

            total_demand_file = time_dir / 'totalDemand.json'
            if total_demand_file.exists():
                if process_file(writer, timestamp, total_demand_file):
                    data_written = True

    if not data_written:
        # no data for this node
        get_logger().debug("No total demand data for %s", ncp)
        output_file.unlink()
Exemplo n.º 2
0
def process_node(node_dir):
    """
    Arguments:
        node_dir (Path): directory to read
    Returns:
        list<str>: lines to add to the timeline
    """
    lines = list()

    node_name = map_utils.node_name_from_dir(node_dir)

    get_logger().debug("Processing %s for node %s", node_dir, node_name)

    agent_log_dir = node_dir / 'agent'
    if agent_log_dir.exists():
        lines.extend(parse_agent_log(agent_log_dir, node_name))

    client_log_dir = node_dir / 'client'
    if client_log_dir.exists():
        lines.extend(parse_client_log(client_log_dir, node_name))

    sim_log_dir = node_dir / 'sim-driver'
    if sim_log_dir.exists():
        lines.extend(parse_sim_log(sim_log_dir, node_name))

    return lines
Exemplo n.º 3
0
def process_node(first_timestamp, all_apps, data_load, data_demand_short, data_demand_long, data_capacity, node_dir, container_node):
    """
    Args:
        first_timestamp(datetime.datetime): when the simulation started
    """
    
    node_all_times = set()
    node_containers = set()
    node_name_dir = map_utils.find_ncp_folder(node_dir)
    if node_name_dir is None:
        get_logger().debug("No NCP folder found in %s", node_name_dir)
        return

    ncp = map_utils.node_name_from_dir(node_name_dir)

    first_timestamp_ms = first_timestamp.timestamp() * 1000
    
    get_logger().debug("Processing ncp folder %s from %s. NCP name: %s", node_name_dir, node_dir, ncp)
    for time_dir in node_name_dir.iterdir():
        if not time_dir.is_dir():
            continue

        time = int(time_dir.stem)
        if time < first_timestamp_ms:
            # ignore data before the start of the simulation
            continue
        
        node_all_times.add(time)

        resource_report_file = time_dir / 'resourceReport-SHORT.json'
        if resource_report_file.exists():
            try:
                with open(resource_report_file, 'r') as f:
                    resource_report = json.load(f)
                process_resource_report(ncp, data_load, data_demand_short, data_capacity, node_containers, time, resource_report)
            except json.decoder.JSONDecodeError:
                get_logger().warning("Problem reading %s, skipping", resource_report_file)

        resource_report_file = time_dir / 'resourceReport-LONG.json'
        if resource_report_file.exists():
            try:
                with open(resource_report_file, 'r') as f:
                    resource_report = json.load(f)
                process_resource_report(ncp, None, data_demand_long, None, node_containers, time, resource_report)
            except json.decoder.JSONDecodeError:
                get_logger().warning("Problem reading %s, skipping", resource_report_file)
                
    fill_in_missing_apps(all_apps, data_load, ncp, node_all_times)
    fill_in_missing_apps(all_apps, data_demand_short, ncp, node_all_times)
    fill_in_missing_apps(all_apps, data_demand_long, ncp, node_all_times)
            
    fill_in_missing_containers(all_apps, data_capacity, node_all_times, node_containers)

    for container in node_containers:
        container_node[container] = ncp
Exemplo n.º 4
0
def process_client_node(node_dir, container_data_dir):
    """
    Arguments:
        node_dir (Path): directory for the node
        container_data_dir (Path): container data directory

    Returns:
        str: client
        dict: sums over time (service -> timestamp -> ServiceCounts)
        dict: values to increment at time, can be used to compute
              global sums (service -> timestamp -> ServiceCounts)
    """

    try:
        client = map_utils.node_name_from_dir(node_dir)

        # service -> timestamp -> ServiceCounts
        client_data = dict()
        for service_dir in container_data_dir.iterdir():
            service = service_dir.name

            for time_dir in service_dir.iterdir():
                status_file = time_dir / 'app_metrics_data/request_status.csv'
                if status_file.exists():
                    with open(status_file) as f:
                        reader = csv.DictReader(map_utils.skip_null_lines(f))
                        for row in reader:
                            if 'timestamp' not in row or 'success' not in row:
                                get_logger().debug("Misformed file %s, could not find timestamp or success", status_file)
                                continue

                            time = int(row['timestamp'])

                            service_data = client_data.get(service, dict())
                            count = service_data.get(time, ServiceCounts(0, 0))
                            success = row['success'].lower() == 'true'
                            if success:
                                count.success_count = count.success_count + 1
                            else:
                                count.failure_count = count.failure_count + 1
                            service_data[time] = count
                            client_data[service] = service_data

        client_counts = compute_success_sums(client_data)

        return client, client_counts, client_data
    except:
        get_logger().exception("Unexpected error")
Exemplo n.º 5
0
def process_server_node(node_dir, container_data_dir):
    """
    Arguments:
        node_dir (Path): directory for the node
        container_data_dir (Path): container data directory

    Returns:
        str: node name
        dict: sums over time (service -> timestamp -> ServiceCounts)
        dict: values to increment at time, can be used to compute
              global sums (service -> timestamp -> ServiceCounts)
    """

    try:
        node_name = map_utils.node_name_from_dir(node_dir)

        # service -> timestamp -> ServiceCounts
        counts = dict()
        for service_dir in container_data_dir.iterdir():
            for container_dir in service_dir.iterdir():
                for time_dir in container_dir.iterdir():
                    service = latency_analysis.get_container_service(time_dir, service_dir.name)
                    status_file = time_dir / 'app_metrics_data/processing_latency.csv'
                    if status_file.exists():
                        with open(status_file) as f:
                            reader = csv.DictReader(map_utils.skip_null_lines(f))
                            for row in reader:
                                if 'timestamp' not in row or 'event' not in row:
                                    get_logger().debug("Misformed file %s, could not find timestamp or event", status_file)
                                    continue

                                time = int(row['timestamp'])

                                service_data = counts.get(service, dict())
                                count = service_data.get(time, ServiceCounts(0, 0))
                                if re.search('failure', row['event'].lower()):
                                    count.failure_count = count.failure_count + 1
                                else:
                                    count.success_count = count.success_count + 1
                                service_data[time] = count
                                counts[service] = service_data

        sums = compute_success_sums(counts)

        return node_name, sums, counts
    except:
        get_logger().exception("Unexpected error")
Exemplo n.º 6
0
def process_server(name, server_container_data):
    s_frames = list()
    c_frames = list()
    for image_dir in server_container_data.iterdir():
        image = image_dir.name
        for container_dir in image_dir.iterdir():
            container_name = map_utils.node_name_from_dir(container_dir)
            for time_dir in container_dir.iterdir():
                service = get_container_service(time_dir, image)
                latency_file = time_dir / 'app_metrics_data/processing_latency.csv'
                df = process_server_latency_file(latency_file, service, container_name, latency_file)
                if df is not None:
                    s_frames.append(df)

                dependent_services_dir = time_dir / 'dependent-services'
                if dependent_services_dir.exists():
                    for d_service_dir in dependent_services_dir.iterdir():
                        d_latency_file = d_service_dir / 'processing_latency.csv'
                        d_df = process_client_latency_file(d_latency_file)
                        if d_df is not None:
                            c_frames.append(d_df)

    return s_frames, c_frames
Exemplo n.º 7
0
def main_method(args):
    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    first_timestamp_ms = first_timestamp.timestamp() * 1000
    get_logger().info("Simulation started at %s -> %d", first_timestamp,
                      first_timestamp_ms)

    output = Path(args.output) / 'rlg-resource-report-lag'
    output.mkdir(parents=True, exist_ok=True)

    for node_dir in sim_output.iterdir():
        max_diff = None
        if not node_dir.is_dir():
            continue

        get_logger().debug("Processing node %s", node_dir)
        node_name_dir = map_utils.find_ncp_folder(node_dir)
        if node_name_dir is None:
            get_logger().debug("No NCP folder found")
            continue
        ncp = map_utils.node_name_from_dir(node_name_dir)

        all_known_nodes = set()
        xs = list()
        ys = dict()  # report node -> series of diffs
        for time_dir in sorted(node_name_dir.iterdir()):
            if not time_dir.is_dir():
                continue

            directory_time = int(time_dir.stem)

            get_logger().debug("\t\tProcessing time %s", time_dir)
            resource_report_file = time_dir / 'regionResourceReports-SHORT.json'
            if resource_report_file.exists():
                try:
                    with open(resource_report_file, 'r') as f:
                        resource_reports = json.load(f)
                except json.decoder.JSONDecodeError:
                    get_logger().warning("Problem reading %s, skipping",
                                         resource_report_file)
                    continue

                xs.append(directory_time)

                seen_nodes = set()
                for resource_report in resource_reports:
                    report_time = int(resource_report['timestamp'])
                    if report_time < 1:
                        # skip reports that we don't have a rational time for
                        diff = None
                    else:
                        diff = map_utils.timestamp_to_seconds(directory_time -
                                                              report_time)
                    node = resource_report['nodeName']['name']

                    if node in seen_nodes:
                        get_logger().warning(
                            "Saw multiple reports from %s in %s, skipping the second one",
                            node, time_dir)
                        continue

                    seen_nodes.add(node)
                    all_known_nodes.add(node)

                    # default value is setup to ensure that newly discovered
                    # nodes have a list of values the same length as the other lists
                    node_series = ys.get(node, [None] * (len(xs) - 1))
                    node_series.append(diff)
                    ys[node] = node_series
                    get_logger().debug("Added %s to %s xs: %d node_series: %d",
                                       diff, node, len(xs), len(node_series))

                    if max_diff is None or diff > max_diff:
                        max_diff = diff

                # make sure we skip values for any nodes that we should have seen
                missing_nodes = all_known_nodes - seen_nodes
                if len(missing_nodes) > 0:
                    get_logger().debug("Missing nodes: %s Seen: %s",
                                       missing_nodes, seen_nodes)
                    for missing_node in missing_nodes:
                        # the key must exist by now
                        node_series = ys[missing_node]
                        node_series.append(None)
                        ys[missing_node] = node_series
                        get_logger().debug("Added None to %s", missing_node)

                for node, node_series in ys.items():
                    if len(xs) != len(node_series):
                        raise RuntimeError(
                            f"List sizes not correct for {node} {len(xs)} != {len(node_series)}"
                        )

        if len(xs) > 0:
            get_logger().info("Maximum diff for %s is %s ms", ncp, max_diff)
            output_graph(output, ncp, xs, ys, first_timestamp_ms)
Exemplo n.º 8
0
def process_node(all_services, node_dir):
    """
    Arguments:
        all_services(set): names of all services
        node_dir(Path): directory to the node
    Returns:
        str: node name or None if not found
        NodeNetworkData: network load data from resource summaries or None on an error
        NodeNetworkData: network demand data from resource summaries or None on an error
        NodeNetworkData: network load data from resource report or None on an error
        int: first timestamp
    """

    node_name_dir = map_utils.find_ncp_folder(node_dir)
    if node_name_dir is None:
        get_logger().debug("No NCP folder found in %s", node_dir)
        return None, None, None, None, None

    node_name = map_utils.node_name_from_dir(node_name_dir)
    network_summary_load = NodeNetworkData(all_services)
    network_summary_demand = NodeNetworkData(all_services)
    network_report_load = NodeNetworkData(all_services)

    first_timestamp = None
    get_logger().debug("Processing ncp folder %s from %s. NCP name: %s",
                       node_name_dir, node_dir, node_name)
    for time_dir in node_name_dir.iterdir():
        if not time_dir.is_dir():
            continue

        time = int(time_dir.stem)
        if first_timestamp is None or time < first_timestamp:
            first_timestamp = time

        resource_summary_file = time_dir / 'resourceSummary-LONG.json'
        if resource_summary_file.exists():
            try:
                with open(resource_summary_file, 'r') as f:
                    resource_summary = json.load(f)

                if 'networkLoad' in resource_summary:
                    network_load = resource_summary['networkLoad']
                    network_summary_load.add_data(time, network_load)

                if 'networkDemand' in resource_summary:
                    network_demand = resource_summary['networkDemand']
                    network_summary_demand.add_data(time, network_demand)
            except json.decoder.JSONDecodeError:
                get_logger().warning("Problem reading %s, skipping",
                                     resource_summary_file)

        resource_report_file = time_dir / 'resourceReport-SHORT.json'
        if resource_report_file.exists():
            try:
                with open(resource_report_file, 'r') as f:
                    resource_report = json.load(f)

                if 'networkLoad' in resource_report:
                    network_load = resource_report['networkLoad']
                    network_report_load.add_data(time, network_load)

            except json.decoder.JSONDecodeError:
                get_logger().warning("Problem reading %s, skipping",
                                     resource_summary_file)

    return node_name, network_summary_load, network_summary_demand, network_report_load, first_timestamp