def print_names_per_mac(dt_start, dt_end, mac_node): # TODO: Probably deprecated dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) out_path = ("{}/prints/{}/not_filtered/names_per_mac.csv".format( script_dir, str_dt)) with open(out_path, "w") as f: f.write("server,node,mac,names\n") for server, mac, in_path in utils.iter_server_mac(dt_dir, True): ts = TimeSeries(in_path=in_path, metric="traceroute", dt_start=dt_start, dt_end=dt_end) names = set() for traceroute in ts.y: if traceroute: ip_name = get_ip_name(traceroute) for hop in traceroute: for name in hop["names"]: names.add(get_name(name, ip_name)) node = mac_node.get(mac) f.write("{},{},{},\"{}\"\n".format(server, node, mac, sorted(list(names)))) utils.sort_csv_file(out_path, ["server", "node"])
def add_cp_ids(): """ write to in_path a new column: the change points ids (index of change points when points are sorted by measure datetime) """ in_path = "{}/data_web_system.csv".format(script_dir) df = pd.read_csv(in_path) if "change_points_ids" not in df: cp_ids = [] for idx, row in df.iterrows(): dt_start = dt_procedures.from_strdt_to_dt(row["dt_start"]) dt_end = dt_procedures.from_strdt_to_dt(row["dt_end"]) dt_dir = utils.get_dt_dir(dt_start, dt_end) in_path = "{}/input/{}/{}/{}.csv".format(base_dir, dt_dir, row["server"], row["mac"]) ts = TimeSeries(in_path, "loss", dt_start=dt_start, dt_end=dt_end) if not ts.x: cp_ids.append("") continue if str(row["change_points"]) != "\'\'": l_dt = map(dt_procedures.from_js_strdt_to_dt, row["change_points"].split(",")) l_id = from_dt_to_id(in_path, "loss", dt_start, dt_end, l_dt) cp_ids.append(",".join(map(str, l_id))) else: cp_ids.append("") df["change_points_ids"] = cp_ids df.to_csv("{}/data_web_system.csv".format(script_dir), index=False)
def print_macs_per_name(dt_start, dt_end, mac_node): # TODO: Probably deprecated dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) name_macs = {} for server, mac, in_path in utils.iter_server_mac(dt_dir, True): ts = TimeSeries(in_path=in_path, metric="traceroute", dt_start=dt_start, dt_end=dt_end) for traceroute in ts.y: if traceroute: ip_name = get_ip_name(traceroute) for hop in traceroute: for name in hop["names"]: name = get_name(name, ip_name) if name not in name_macs: name_macs[name] = set() name_macs[name].add((server, mac_node.get(mac), mac)) out_path = ("{}/prints/{}/not_filtered/macs_per_name.csv".format( script_dir, str_dt)) with open(out_path, "w") as f: f.write("name,macs\n") names = sorted(name_macs.keys()) for name in names: f.write("{},\"{}\"\n".format(name, sorted(list(name_macs[name]))))
def print_name_ips(dt_start, dt_end): # TODO: Probably deprecated dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) name_ip = {} for server, mac, in_path in utils.iter_server_mac(dt_dir, True): ts = TimeSeries(in_path=in_path, metric="traceroute", dt_start=dt_start, dt_end=dt_end) for traceroute in ts.y: if traceroute: for hop in traceroute: for name, ip in izip(hop["names"], hop["ips"]): if name not in name_ip: name_ip[name] = set() name_ip[name].add(ip) out_path = "{}/prints/{}/not_filtered/name_ips.csv".format( script_dir, str_dt) with open(out_path, "w") as f: f.write("name,ips\n") for name in sorted(name_ip.keys()): f.write("{},{}\n".format(name, sorted(list(name_ip[name]))))
def unpack_pandas_row(row): dt_start = dt_procedures.from_strdt_to_dt(row["dt_start"]) dt_end = dt_procedures.from_strdt_to_dt(row["dt_end"]) dt_dir = utils.get_dt_dir(dt_start, dt_end) in_path = "{}/input/{}/{}/{}.csv".format(base_dir, dt_dir, row["server"], row["mac"]) return in_path, dt_start, dt_end
def print_traceroute_per_mac(dt_start, dt_end): dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) out_path = ("{}/prints/{}/not_filtered/traceroute_per_mac.csv".format( script_dir, str_dt)) with open(out_path, "w") as f: f.write("server,mac," "valid_traceroute_compress_embratel," "traceroute_compress_embratel," "valid_traceroute_compress_embratel_without_last_hop_embratel," "traceroute_compress_embratel_without_last_hop_embratel," "valid_traceroute_without_embratel," "traceroute_without_embratel," "valid_traceroute," "traceroute\n") for server, mac, in_path in utils.iter_server_mac(dt_dir, True): ts_traceroute = TimeSeries(in_path=in_path, metric="traceroute", dt_start=dt_start, dt_end=dt_end) (valid_traceroute_compress_embratel, traceroute_compress_embratel) = \ get_traceroute(ts_traceroute, True, True, True) (valid_traceroute_compress_embratel_without_last_hop_embratel, traceroute_compress_embratel_without_last_hop_embratel) = \ get_traceroute(ts_traceroute, True, True, False) (valid_traceroute_without_embratel, traceroute_without_embratel) = \ get_traceroute(ts_traceroute, False, False, False) (valid_traceroute, traceroute) = \ get_traceroute(ts_traceroute, True, False, False) l = "{},{}" + ",{},\"{}\"" * 4 + "\n" l = l.format( server, mac, valid_traceroute_compress_embratel, traceroute_compress_embratel, valid_traceroute_compress_embratel_without_last_hop_embratel, traceroute_compress_embratel_without_last_hop_embratel, valid_traceroute_without_embratel, traceroute_without_embratel, valid_traceroute, traceroute) f.write(l) utils.sort_csv_file(out_path, ["server", "mac"])
def plot(dt_start, dt_end, metric): dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) utils.create_dirs([ "{}/{}".format(script_dir, str_dt), "{}/{}/{}".format(script_dir, str_dt, metric) ]) for server, mac, in_path in utils.iter_server_mac(dt_dir, True): out_file_name = utils.get_out_file_name(server, mac, dt_start, dt_end) out_path = "{}/{}/{}/{}.png".format(script_dir, str_dt, metric, out_file_name) # comparison between not filtered and filtered ts = TimeSeries(in_path, metric, dt_start, dt_end) ts_filter = TimeSeries(in_path, metric, dt_start, dt_end) ts_filter.percentile_filter(win_len=5, p=0.5) # if len(ts_filter.y) > 100: # plot_procedures.plot_stl_decomposition(ts_filter, # "median_filtered", # out_path) # comparison between with cross traffic and without # ts = TimeSeries(in_path, metric, dt_start, dt_end) # ts.percentile_filter(win_len=13, p=0.5) # ts_filter = TimeSeries(in_path, metric, dt_start, dt_end, # cross_traffic_thresh=0) # ts_filter.percentile_filter(win_len=13, p=0.5) # plot_procedures.plot_ts_share_x(ts, ts_filter, out_path, # compress=True, # plot_type2="scatter", # title1="raw", # title2="median filtered", # default_ylabel=True, # xlabel="$i$") ylabel = plot_procedures.get_default_ylabel(ts) plot_procedures.plot_ts(ts_filter, out_path, ylabel=ylabel, compress=False, title="median filtered")
def get_data(dt_start_sp, dt_end_sp): """ [dt_start_sp, dt_end_sp) must define a month """ dt_dir = utils.get_dt_dir(dt_start_sp, dt_end_sp) out_dir = "{}/{}".format(script_dir, dt_dir) if os.path.isdir(out_dir): shutil.rmtree(out_dir) client = MongoClient("cabul", 27017) collection = client["NET"]["measures"] dt_start = dt_procedures.from_sp_to_utc(dt_start_sp) dt_end = dt_procedures.from_sp_to_utc(dt_end_sp) cursor = collection.find({"_id.date": {"$gte": dt_start, "$lt": dt_end}}) write_csvs(dt_dir, dt_start, dt_end, cursor, collection)
def plot_per_node(dt_start, dt_end, metric, only_unique_traceroute): dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) utils.create_dirs([ "{}/plots/".format(script_dir), "{}/plots/nodes".format(script_dir), "{}/plots/nodes/{}".format(script_dir, str_dt), "{}/plots/nodes/{}/{}".format(script_dir, str_dt, metric) ]) valid_nodes = read_input.get_valid_nodes() mac_node = read_input.get_mac_node() macs_unique_traceroute = read_input.get_macs_traceroute_filter( dt_start, dt_end, "filtered") for server, mac, in_path in utils.iter_server_mac(dt_dir, True): if only_unique_traceroute and (mac not in macs_unique_traceroute): continue if mac_node[mac] in valid_nodes: utils.create_dirs([ "{}/plots/nodes/{}/{}/{}".format(script_dir, str_dt, metric, mac_node[mac]) ]) out_file_name = utils.get_out_file_name(server, mac, dt_start, dt_end) out_path = ("{}/plots/nodes/{}/{}/{}/{}.png".format( script_dir, str_dt, metric, mac_node[mac], out_file_name)) ts = TimeSeries(in_path, metric, dt_start, dt_end) ts_filter = TimeSeries(in_path, metric, dt_start, dt_end) ts_filter.percentile_filter(win_len=13, p=0.5) plot_procedures.plot_ts_share_x(ts, ts_filter, out_path, compress=False, plot_type2="scatter")
def plot_per_name(dt_start, dt_end, metric, preprocess_args, plot_cps=True): dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) utils.create_dirs([ "{}/plots/".format(script_dir), "{}/plots/names".format(script_dir), "{}/plots/names/{}".format(script_dir, str_dt), "{}/plots/names/{}/{}".format(script_dir, str_dt, metric) ]) client_cps = unsupervised_utils.get_client_cps(plot_cps, str_dt, metric) # avoid reploting client_plotPath = {} for traceroute_type in unsupervised_utils.iter_traceroute_types(): valid_traceroute_field, traceroute_field = \ cp_utils.get_traceroute_fields(traceroute_type) utils.create_dirs([ "{}/plots/names/{}/{}/{}".format(script_dir, str_dt, metric, traceroute_type) ]) df = pd.read_csv("{}/prints/{}/filtered/traceroute_per_mac.csv".format( script_dir, str_dt)) cnt = 0 for idx, row in df.iterrows(): if row["valid_cnt_samples"] and row[valid_traceroute_field]: print("cnt={}, traceroute_type={}, str_dt={}".format( cnt, traceroute_type, str_dt)) cnt += 1 client = utils.get_client(row["server"], row["mac"]) for name in cp_utils.iter_names_traceroute_filtered( ast.literal_eval(row[traceroute_field])): utils.create_dirs([ "{}/plots/names/{}/{}/{}/{}".format( script_dir, str_dt, metric, traceroute_type, row["server"]), "{}/plots/names/{}/{}/{}/{}/{}".format( script_dir, str_dt, metric, traceroute_type, row["server"], name) ]) out_file_name = utils.get_out_file_name( row["server"], row["mac"], dt_start, dt_end) out_path = ("{}/plots/names/{}/{}/{}/{}/{}/{}.png".format( script_dir, str_dt, metric, traceroute_type, row["server"], name, out_file_name)) # avoid reploting if client in client_plotPath: shutil.copyfile(client_plotPath[client], out_path) else: client_plotPath[client] = out_path cp_dts = client_cps[client] in_path = "{}/input/{}/{}/{}.csv".format( base_dir, dt_dir, row["server"], row["mac"]) ts = TimeSeries(in_path, metric, dt_start, dt_end) cp_utils.preprocess(ts, preprocess_args) plot_procedures.plot_ts(ts, out_path, dt_axvline=cp_dts, title="median filtered")
def print_empty_segs(dt_start, dt_end, metric, min_seg_len, filtered, plot=False): dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) utils.create_dirs([ "{}/prints/".format(script_dir), "{}/prints/{}".format(script_dir, str_dt), "{}/prints/{}/{}".format(script_dir, str_dt, filtered), "{}/prints/{}/{}/{}".format(script_dir, str_dt, filtered, metric) ]) out_path = "{}/prints/{}/{}/{}/empty_segs_per_mac.csv".format( script_dir, str_dt, filtered, metric) with open(out_path, "w") as f: f.write("server,mac,empty_segs\n") target_macs = read_input.get_macs_traceroute_filter( dt_start, dt_end, filtered) for server, mac, in_path in utils.iter_server_mac(dt_dir, True): if mac not in target_macs: continue ts = TimeSeries(in_path=in_path, metric=metric, dt_start=dt_start, dt_end=dt_end) axvline_dts = [] empty_segs = [] if len(ts.x) >= 2: if is_empty_seg(dt_start, ts.x[0], min_seg_len): axvline_dts.append(ts.x[0]) empty_segs.append([str(dt_start), str(ts.x[0])]) for i in xrange(1, len(ts.x)): if is_empty_seg(ts.x[i - 1], ts.x[i], min_seg_len): axvline_dts.append(ts.x[i - 1]) axvline_dts.append(ts.x[i]) empty_segs.append([str(ts.x[i - 1]), str(ts.x[i])]) if is_empty_seg(ts.x[-1], dt_end, min_seg_len): axvline_dts.append(ts.x[i - 1]) empty_segs.append([str(ts.x[-1]), str(dt_end)]) f.write("{},{},\"{}\"\n".format(server, mac, empty_segs)) if plot: utils.create_dirs([ "{}/plots/".format(script_dir), "{}/plots/empty_segs".format(script_dir), "{}/plots/empty_segs/{}".format(script_dir, str_dt), "{}/plots/empty_segs/{}/{}".format(script_dir, str_dt, metric) ]) out_file_name = utils.get_out_file_name( server, mac, dt_start, dt_end) out_path = ("{}/plots/empty_segs/{}/{}/{}.png".format( script_dir, str_dt, metric, out_file_name)) plot_procedures.plot_ts(ts, out_path, dt_axvline=axvline_dts)