def basic_statistics_per_batch():
    out_path = "{}/prints/basic_statistics_per_batch.csv".format(script_dir)
    with open(out_path, "w") as f:
        f.write("str_dt," "cnt_servers," "cnt_valid_clients," "cnt_clients\n")

        for str_dt in os.listdir("{}/prints".format(script_dir)):
            if os.path.isdir("{}/prints/{}".format(script_dir, str_dt)):
                servers = set()
                cnt_clients = 0
                cnt_valid_clients = 0

                in_path = (
                    "{}/prints/{}/filtered/traceroute_per_mac.csv".format(
                        script_dir, str_dt))
                df = pd.read_csv(in_path)
                for _, row in df.iterrows():
                    cnt_clients += 1
                    for traceroute_type in \
                            unsupervised_utils.iter_traceroute_types():
                        valid_traceroute_field, traceroute_field = \
                            cp_utils.get_traceroute_fields(traceroute_type)

                        if (row["valid_cnt_samples"]
                                and row[valid_traceroute_field]):
                            servers.add(row["server"])
                            cnt_valid_clients += 1
                            break
                cnt_servers = len(servers)

                l = "{}" + ",{}" * 3 + "\n"
                l = l.format(str_dt, cnt_servers, cnt_valid_clients,
                             cnt_clients)
                f.write(l)
def process_graphs(dt_start, dt_end):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    out_dir = "{}/prints/{}/filtered/graph/".format(script_dir, str_dt)
    utils.create_dirs([out_dir])

    in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format(
        script_dir, str_dt)
    servers = np.unique(pd.read_csv(in_path)["server"].values)

    for traceroute_type in unsupervised_utils.iter_traceroute_types():
        valid_traceroute_field, traceroute_field = \
            cp_utils.get_traceroute_fields(traceroute_type)

        for server in servers:
            utils.create_dirs([
                "{}/prints/{}/filtered/graph/".format(script_dir, str_dt),
                "{}/prints/{}/filtered/graph/{}".format(
                    script_dir, str_dt, server)
            ])

            out_dir = "{}/prints/{}/filtered/graph/{}".format(
                script_dir, str_dt, server)
            out_path = "{}/{}_graph.gv".format(out_dir, traceroute_field)

            name_neigh = get_graph(dt_start, dt_end, valid_traceroute_field,
                                   traceroute_field, server)
            write_graph(out_path, name_neigh)
            check_graph(out_dir, name_neigh, traceroute_field)
def plot_latencies_traceroute(dt_start, dt_end, preprocess_args):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format(script_dir,
                                                                    str_dt)
    df = pd.read_csv(in_path)
    for _, row, in df.iterrows():
        if row["valid_cnt_samples"]:
            in_path = utils.get_in_path(row["server"], row["mac"], dt_start,
                                        dt_end)
            ts_traceroute = TimeSeries(in_path=in_path, metric="traceroute",
                                       dt_start=dt_start, dt_end=dt_end)

            for traceroute_type in unsupervised_utils.iter_traceroute_types():
                valid_traceroute_field, traceroute_field = \
                    cp_utils.get_traceroute_fields(traceroute_type)
                if row[valid_traceroute_field]:
                    traceroute = ast.literal_eval(row[traceroute_field])
                    name_ts = get_ts_per_name(traceroute_type, ts_traceroute,
                                              dt_start, dt_end)

                    dir_path = ("{}/plots/paths/{}/{}/{}/{}".
                                format(script_dir, str_dt, "latency",
                                       traceroute_type, row["server"]))
                    traceroute_path = "/".join(map(str,
                                                   list(reversed(traceroute))))
                    dir_path = "{}/{}".format(dir_path, traceroute_path)

                    utils.create_dirs(["{}/traceroute_latencies/".
                                       format(dir_path),
                                       "{}/traceroute_latencies/{}".
                                       format(dir_path, row["mac"])])

                    for i in range(len(traceroute) - 1):
                        name = traceroute[i][0][0]
                        traceroute_path = "hop{}_{}".format(str(i).zfill(2),
                                                            name)
                        out_path = ("{}/traceroute_latencies/{}/{}.png".
                                    format(dir_path, row["mac"],
                                           traceroute_path))

                        ts_preprocessed = name_ts[name].copy()
                        cp_utils.preprocess(ts_preprocessed, preprocess_args)

                        # plot_procedures.plot_ts_share_x(
                        #     name_ts[name],
                        #     ts_preprocessed,
                        #     out_path,
                        #     plot_type2="scatter",
                        #     title1="raw",
                        #     title2="median filtered",
                        #     default_ylabel=True)
                        ts_preprocessed.metric = "latency"
                        plot_procedures.plot_ts(ts_preprocessed, out_path,
                                                title="median filtered")
def get_first_hops(dt_start, dt_end, server, traceroute_type):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    valid_traceroute_field, traceroute_field = \
        cp_utils.get_traceroute_fields(traceroute_type)

    first_hops = set()
    in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format(script_dir,
                                                                    str_dt)
    df = pd.read_csv(in_path)
    df = df[df["server"] == server]
    for idx, row in df.iterrows():
        if row["valid_cnt_samples"] and row[valid_traceroute_field]:
            traceroute = ast.literal_eval(row[traceroute_field])
            first_hops.add(traceroute[0])
    return first_hops
def print_per_path(dt_start, dt_end, metric, file_name):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    out_dir = "{}/change_point/unsupervised/".format(base_dir)
    utils.create_dirs([
        "{}/plots".format(out_dir), "{}/plots/paths".format(out_dir),
        "{}/plots/paths/{}".format(out_dir, str_dt),
        "{}/plots/paths/{}/{}".format(out_dir, str_dt, metric)
    ])

    in_path = "{}/prints/{}/filtered/{}/{}".format(script_dir, str_dt, metric,
                                                   file_name)

    for traceroute_type in iter_traceroute_types():
        valid_traceroute_field, traceroute_field = \
            cp_utils.get_traceroute_fields(traceroute_type)

        client_traceroute = cp_utils.get_client_traceroute(
            dt_start, dt_end, traceroute_type)

        path_dirs = set()

        df = pd.read_csv(in_path)
        for idx, row in df.iterrows():
            client = utils.get_client(row["server"], row["mac"])
            if client in client_traceroute:
                traceroute = client_traceroute[client]

                dir_path = "{}/plots/paths/{}/{}/{}/{}".format(
                    out_dir, str_dt, metric, traceroute_type, row["server"])
                utils.create_dirs([dir_path])

                for name in reversed(traceroute):
                    if name[0][0].split(".")[0] == "192":
                        continue

                    dir_path = "{}/{}".format(dir_path, name)
                    utils.create_dirs([dir_path])

                    out_path = "{}/{}".format(dir_path, file_name)
                    if dir_path not in path_dirs:
                        create_csv_with_same_header(out_path, df)
                    pd.DataFrame(row).T.to_csv(out_path,
                                               mode="a",
                                               header=False,
                                               index=False)
                    path_dirs.add(dir_path)
def plot_per_name(dt_start, dt_end, metric, preprocess_args, plot_cps=True):
    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    utils.create_dirs([
        "{}/plots/".format(script_dir), "{}/plots/names".format(script_dir),
        "{}/plots/names/{}".format(script_dir, str_dt),
        "{}/plots/names/{}/{}".format(script_dir, str_dt, metric)
    ])

    client_cps = unsupervised_utils.get_client_cps(plot_cps, str_dt, metric)

    # avoid reploting
    client_plotPath = {}

    for traceroute_type in unsupervised_utils.iter_traceroute_types():
        valid_traceroute_field, traceroute_field = \
            cp_utils.get_traceroute_fields(traceroute_type)

        utils.create_dirs([
            "{}/plots/names/{}/{}/{}".format(script_dir, str_dt, metric,
                                             traceroute_type)
        ])

        df = pd.read_csv("{}/prints/{}/filtered/traceroute_per_mac.csv".format(
            script_dir, str_dt))
        cnt = 0
        for idx, row in df.iterrows():
            if row["valid_cnt_samples"] and row[valid_traceroute_field]:
                print("cnt={}, traceroute_type={}, str_dt={}".format(
                    cnt, traceroute_type, str_dt))
                cnt += 1

                client = utils.get_client(row["server"], row["mac"])

                for name in cp_utils.iter_names_traceroute_filtered(
                        ast.literal_eval(row[traceroute_field])):

                    utils.create_dirs([
                        "{}/plots/names/{}/{}/{}/{}".format(
                            script_dir, str_dt, metric, traceroute_type,
                            row["server"]),
                        "{}/plots/names/{}/{}/{}/{}/{}".format(
                            script_dir, str_dt, metric, traceroute_type,
                            row["server"], name)
                    ])

                    out_file_name = utils.get_out_file_name(
                        row["server"], row["mac"], dt_start, dt_end)
                    out_path = ("{}/plots/names/{}/{}/{}/{}/{}/{}.png".format(
                        script_dir, str_dt, metric, traceroute_type,
                        row["server"], name, out_file_name))

                    # avoid reploting
                    if client in client_plotPath:
                        shutil.copyfile(client_plotPath[client], out_path)
                    else:
                        client_plotPath[client] = out_path
                        cp_dts = client_cps[client]

                        in_path = "{}/input/{}/{}/{}.csv".format(
                            base_dir, dt_dir, row["server"], row["mac"])

                        ts = TimeSeries(in_path, metric, dt_start, dt_end)
                        cp_utils.preprocess(ts, preprocess_args)
                        plot_procedures.plot_ts(ts,
                                                out_path,
                                                dt_axvline=cp_dts,
                                                title="median filtered")