def print_names_per_mac(dt_start, dt_end, mac_node):
    # TODO: Probably deprecated

    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    out_path = ("{}/prints/{}/not_filtered/names_per_mac.csv".format(
        script_dir, str_dt))
    with open(out_path, "w") as f:
        f.write("server,node,mac,names\n")
        for server, mac, in_path in utils.iter_server_mac(dt_dir, True):
            ts = TimeSeries(in_path=in_path,
                            metric="traceroute",
                            dt_start=dt_start,
                            dt_end=dt_end)
            names = set()
            for traceroute in ts.y:
                if traceroute:
                    ip_name = get_ip_name(traceroute)
                    for hop in traceroute:
                        for name in hop["names"]:
                            names.add(get_name(name, ip_name))
            node = mac_node.get(mac)
            f.write("{},{},{},\"{}\"\n".format(server, node, mac,
                                               sorted(list(names))))

    utils.sort_csv_file(out_path, ["server", "node"])
示例#2
0
def add_cp_ids():
    """
    write to in_path a new column: the change points ids (index of change
    points when points are sorted by measure datetime)
    """

    in_path = "{}/data_web_system.csv".format(script_dir)
    df = pd.read_csv(in_path)
    if "change_points_ids" not in df:
        cp_ids = []
        for idx, row in df.iterrows():
            dt_start = dt_procedures.from_strdt_to_dt(row["dt_start"])
            dt_end = dt_procedures.from_strdt_to_dt(row["dt_end"])
            dt_dir = utils.get_dt_dir(dt_start, dt_end)
            in_path = "{}/input/{}/{}/{}.csv".format(base_dir, dt_dir,
                                                     row["server"], row["mac"])

            ts = TimeSeries(in_path, "loss", dt_start=dt_start, dt_end=dt_end)
            if not ts.x:
                cp_ids.append("")
                continue

            if str(row["change_points"]) != "\'\'":
                l_dt = map(dt_procedures.from_js_strdt_to_dt,
                           row["change_points"].split(","))
                l_id = from_dt_to_id(in_path, "loss", dt_start, dt_end, l_dt)
                cp_ids.append(",".join(map(str, l_id)))
            else:
                cp_ids.append("")

        df["change_points_ids"] = cp_ids
        df.to_csv("{}/data_web_system.csv".format(script_dir), index=False)
def print_macs_per_name(dt_start, dt_end, mac_node):
    # TODO: Probably deprecated

    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    name_macs = {}
    for server, mac, in_path in utils.iter_server_mac(dt_dir, True):
        ts = TimeSeries(in_path=in_path,
                        metric="traceroute",
                        dt_start=dt_start,
                        dt_end=dt_end)
        for traceroute in ts.y:
            if traceroute:
                ip_name = get_ip_name(traceroute)
                for hop in traceroute:
                    for name in hop["names"]:
                        name = get_name(name, ip_name)
                        if name not in name_macs:
                            name_macs[name] = set()
                        name_macs[name].add((server, mac_node.get(mac), mac))

    out_path = ("{}/prints/{}/not_filtered/macs_per_name.csv".format(
        script_dir, str_dt))
    with open(out_path, "w") as f:
        f.write("name,macs\n")
        names = sorted(name_macs.keys())
        for name in names:
            f.write("{},\"{}\"\n".format(name, sorted(list(name_macs[name]))))
def print_name_ips(dt_start, dt_end):
    # TODO: Probably deprecated

    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    name_ip = {}
    for server, mac, in_path in utils.iter_server_mac(dt_dir, True):
        ts = TimeSeries(in_path=in_path,
                        metric="traceroute",
                        dt_start=dt_start,
                        dt_end=dt_end)
        for traceroute in ts.y:
            if traceroute:
                for hop in traceroute:
                    for name, ip in izip(hop["names"], hop["ips"]):
                        if name not in name_ip:
                            name_ip[name] = set()
                        name_ip[name].add(ip)

    out_path = "{}/prints/{}/not_filtered/name_ips.csv".format(
        script_dir, str_dt)
    with open(out_path, "w") as f:
        f.write("name,ips\n")
        for name in sorted(name_ip.keys()):
            f.write("{},{}\n".format(name, sorted(list(name_ip[name]))))
示例#5
0
def unpack_pandas_row(row):
    dt_start = dt_procedures.from_strdt_to_dt(row["dt_start"])
    dt_end = dt_procedures.from_strdt_to_dt(row["dt_end"])
    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    in_path = "{}/input/{}/{}/{}.csv".format(base_dir, dt_dir, row["server"],
                                             row["mac"])
    return in_path, dt_start, dt_end
def print_traceroute_per_mac(dt_start, dt_end):
    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    out_path = ("{}/prints/{}/not_filtered/traceroute_per_mac.csv".format(
        script_dir, str_dt))
    with open(out_path, "w") as f:
        f.write("server,mac,"
                "valid_traceroute_compress_embratel,"
                "traceroute_compress_embratel,"
                "valid_traceroute_compress_embratel_without_last_hop_embratel,"
                "traceroute_compress_embratel_without_last_hop_embratel,"
                "valid_traceroute_without_embratel,"
                "traceroute_without_embratel,"
                "valid_traceroute,"
                "traceroute\n")
        for server, mac, in_path in utils.iter_server_mac(dt_dir, True):
            ts_traceroute = TimeSeries(in_path=in_path,
                                       metric="traceroute",
                                       dt_start=dt_start,
                                       dt_end=dt_end)

            (valid_traceroute_compress_embratel,
             traceroute_compress_embratel) = \
                get_traceroute(ts_traceroute, True, True, True)

            (valid_traceroute_compress_embratel_without_last_hop_embratel,
             traceroute_compress_embratel_without_last_hop_embratel) = \
                get_traceroute(ts_traceroute, True, True, False)

            (valid_traceroute_without_embratel,
             traceroute_without_embratel) = \
                get_traceroute(ts_traceroute, False, False, False)

            (valid_traceroute, traceroute) = \
                get_traceroute(ts_traceroute, True, False, False)

            l = "{},{}" + ",{},\"{}\"" * 4 + "\n"
            l = l.format(
                server, mac, valid_traceroute_compress_embratel,
                traceroute_compress_embratel,
                valid_traceroute_compress_embratel_without_last_hop_embratel,
                traceroute_compress_embratel_without_last_hop_embratel,
                valid_traceroute_without_embratel, traceroute_without_embratel,
                valid_traceroute, traceroute)
            f.write(l)
    utils.sort_csv_file(out_path, ["server", "mac"])
def plot(dt_start, dt_end, metric):
    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)
    utils.create_dirs([
        "{}/{}".format(script_dir, str_dt),
        "{}/{}/{}".format(script_dir, str_dt, metric)
    ])
    for server, mac, in_path in utils.iter_server_mac(dt_dir, True):
        out_file_name = utils.get_out_file_name(server, mac, dt_start, dt_end)
        out_path = "{}/{}/{}/{}.png".format(script_dir, str_dt, metric,
                                            out_file_name)

        # comparison between not filtered and filtered
        ts = TimeSeries(in_path, metric, dt_start, dt_end)
        ts_filter = TimeSeries(in_path, metric, dt_start, dt_end)
        ts_filter.percentile_filter(win_len=5, p=0.5)

        # if len(ts_filter.y) > 100:
        #     plot_procedures.plot_stl_decomposition(ts_filter,
        #                                            "median_filtered",
        #                                            out_path)

        # comparison between with cross traffic and without
        # ts = TimeSeries(in_path, metric, dt_start, dt_end)
        # ts.percentile_filter(win_len=13, p=0.5)
        # ts_filter = TimeSeries(in_path, metric, dt_start, dt_end,
        #                        cross_traffic_thresh=0)
        # ts_filter.percentile_filter(win_len=13, p=0.5)

        # plot_procedures.plot_ts_share_x(ts, ts_filter, out_path,
        #                                 compress=True,
        #                                 plot_type2="scatter",
        #                                 title1="raw",
        #                                 title2="median filtered",
        #                                 default_ylabel=True,
        #                                 xlabel="$i$")

        ylabel = plot_procedures.get_default_ylabel(ts)
        plot_procedures.plot_ts(ts_filter,
                                out_path,
                                ylabel=ylabel,
                                compress=False,
                                title="median filtered")
def get_data(dt_start_sp, dt_end_sp):
    """
    [dt_start_sp, dt_end_sp) must define a month
    """

    dt_dir = utils.get_dt_dir(dt_start_sp, dt_end_sp)

    out_dir = "{}/{}".format(script_dir, dt_dir)
    if os.path.isdir(out_dir):
        shutil.rmtree(out_dir)

    client = MongoClient("cabul", 27017)
    collection = client["NET"]["measures"]

    dt_start = dt_procedures.from_sp_to_utc(dt_start_sp)
    dt_end = dt_procedures.from_sp_to_utc(dt_end_sp)

    cursor = collection.find({"_id.date": {"$gte": dt_start, "$lt": dt_end}})
    write_csvs(dt_dir, dt_start, dt_end, cursor, collection)
示例#9
0
def plot_per_node(dt_start, dt_end, metric, only_unique_traceroute):
    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    utils.create_dirs([
        "{}/plots/".format(script_dir), "{}/plots/nodes".format(script_dir),
        "{}/plots/nodes/{}".format(script_dir, str_dt),
        "{}/plots/nodes/{}/{}".format(script_dir, str_dt, metric)
    ])

    valid_nodes = read_input.get_valid_nodes()
    mac_node = read_input.get_mac_node()

    macs_unique_traceroute = read_input.get_macs_traceroute_filter(
        dt_start, dt_end, "filtered")

    for server, mac, in_path in utils.iter_server_mac(dt_dir, True):
        if only_unique_traceroute and (mac not in macs_unique_traceroute):
            continue

        if mac_node[mac] in valid_nodes:
            utils.create_dirs([
                "{}/plots/nodes/{}/{}/{}".format(script_dir, str_dt, metric,
                                                 mac_node[mac])
            ])
            out_file_name = utils.get_out_file_name(server, mac, dt_start,
                                                    dt_end)
            out_path = ("{}/plots/nodes/{}/{}/{}/{}.png".format(
                script_dir, str_dt, metric, mac_node[mac], out_file_name))

            ts = TimeSeries(in_path, metric, dt_start, dt_end)
            ts_filter = TimeSeries(in_path, metric, dt_start, dt_end)
            ts_filter.percentile_filter(win_len=13, p=0.5)
            plot_procedures.plot_ts_share_x(ts,
                                            ts_filter,
                                            out_path,
                                            compress=False,
                                            plot_type2="scatter")
def plot_per_name(dt_start, dt_end, metric, preprocess_args, plot_cps=True):
    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    utils.create_dirs([
        "{}/plots/".format(script_dir), "{}/plots/names".format(script_dir),
        "{}/plots/names/{}".format(script_dir, str_dt),
        "{}/plots/names/{}/{}".format(script_dir, str_dt, metric)
    ])

    client_cps = unsupervised_utils.get_client_cps(plot_cps, str_dt, metric)

    # avoid reploting
    client_plotPath = {}

    for traceroute_type in unsupervised_utils.iter_traceroute_types():
        valid_traceroute_field, traceroute_field = \
            cp_utils.get_traceroute_fields(traceroute_type)

        utils.create_dirs([
            "{}/plots/names/{}/{}/{}".format(script_dir, str_dt, metric,
                                             traceroute_type)
        ])

        df = pd.read_csv("{}/prints/{}/filtered/traceroute_per_mac.csv".format(
            script_dir, str_dt))
        cnt = 0
        for idx, row in df.iterrows():
            if row["valid_cnt_samples"] and row[valid_traceroute_field]:
                print("cnt={}, traceroute_type={}, str_dt={}".format(
                    cnt, traceroute_type, str_dt))
                cnt += 1

                client = utils.get_client(row["server"], row["mac"])

                for name in cp_utils.iter_names_traceroute_filtered(
                        ast.literal_eval(row[traceroute_field])):

                    utils.create_dirs([
                        "{}/plots/names/{}/{}/{}/{}".format(
                            script_dir, str_dt, metric, traceroute_type,
                            row["server"]),
                        "{}/plots/names/{}/{}/{}/{}/{}".format(
                            script_dir, str_dt, metric, traceroute_type,
                            row["server"], name)
                    ])

                    out_file_name = utils.get_out_file_name(
                        row["server"], row["mac"], dt_start, dt_end)
                    out_path = ("{}/plots/names/{}/{}/{}/{}/{}/{}.png".format(
                        script_dir, str_dt, metric, traceroute_type,
                        row["server"], name, out_file_name))

                    # avoid reploting
                    if client in client_plotPath:
                        shutil.copyfile(client_plotPath[client], out_path)
                    else:
                        client_plotPath[client] = out_path
                        cp_dts = client_cps[client]

                        in_path = "{}/input/{}/{}/{}.csv".format(
                            base_dir, dt_dir, row["server"], row["mac"])

                        ts = TimeSeries(in_path, metric, dt_start, dt_end)
                        cp_utils.preprocess(ts, preprocess_args)
                        plot_procedures.plot_ts(ts,
                                                out_path,
                                                dt_axvline=cp_dts,
                                                title="median filtered")
def print_empty_segs(dt_start,
                     dt_end,
                     metric,
                     min_seg_len,
                     filtered,
                     plot=False):
    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    utils.create_dirs([
        "{}/prints/".format(script_dir),
        "{}/prints/{}".format(script_dir, str_dt),
        "{}/prints/{}/{}".format(script_dir, str_dt, filtered),
        "{}/prints/{}/{}/{}".format(script_dir, str_dt, filtered, metric)
    ])

    out_path = "{}/prints/{}/{}/{}/empty_segs_per_mac.csv".format(
        script_dir, str_dt, filtered, metric)
    with open(out_path, "w") as f:
        f.write("server,mac,empty_segs\n")

        target_macs = read_input.get_macs_traceroute_filter(
            dt_start, dt_end, filtered)
        for server, mac, in_path in utils.iter_server_mac(dt_dir, True):
            if mac not in target_macs:
                continue

            ts = TimeSeries(in_path=in_path,
                            metric=metric,
                            dt_start=dt_start,
                            dt_end=dt_end)

            axvline_dts = []
            empty_segs = []
            if len(ts.x) >= 2:
                if is_empty_seg(dt_start, ts.x[0], min_seg_len):
                    axvline_dts.append(ts.x[0])
                    empty_segs.append([str(dt_start), str(ts.x[0])])
                for i in xrange(1, len(ts.x)):
                    if is_empty_seg(ts.x[i - 1], ts.x[i], min_seg_len):
                        axvline_dts.append(ts.x[i - 1])
                        axvline_dts.append(ts.x[i])
                        empty_segs.append([str(ts.x[i - 1]), str(ts.x[i])])
                if is_empty_seg(ts.x[-1], dt_end, min_seg_len):
                    axvline_dts.append(ts.x[i - 1])
                    empty_segs.append([str(ts.x[-1]), str(dt_end)])

            f.write("{},{},\"{}\"\n".format(server, mac, empty_segs))

            if plot:
                utils.create_dirs([
                    "{}/plots/".format(script_dir),
                    "{}/plots/empty_segs".format(script_dir),
                    "{}/plots/empty_segs/{}".format(script_dir, str_dt),
                    "{}/plots/empty_segs/{}/{}".format(script_dir, str_dt,
                                                       metric)
                ])

                out_file_name = utils.get_out_file_name(
                    server, mac, dt_start, dt_end)
                out_path = ("{}/plots/empty_segs/{}/{}/{}.png".format(
                    script_dir, str_dt, metric, out_file_name))
                plot_procedures.plot_ts(ts, out_path, dt_axvline=axvline_dts)