def mptcp_compute_throughput_extended( rawdf, # need the rawdf to classify_reinjections stats, # result of mptcp_compute_throughput destination: ConnectionRoles, ) -> Tuple[bool, Any]: """ df expects an extended dataframe Should display goodput """ df_both = classify_reinjections(rawdf) df = df_both[df_both.mptcpdest == destination] print(stats["subflow_stats"]) print(df.columns) for sf in stats["subflow_stats"]: log.debug("for tcpstream %d" % sf["tcpstreamid"]) # columns.get_loc(_first('abstime'))] df_sf = df[df.tcpstream == sf["tcpstreamid"]] # TODO eliminate retransmissions too # sum( map(lambda x: x['bytes'], subflow_stats)), # inexact, we should drop lost packets tcp_throughput = df_sf["bytes"].sum() # mptcp_goodput = df[df_sf.redundant == False, "throughput_bytes"].sum() # won seq_min = df_sf.tcpseq.min() seq_max = df_sf.tcpseq.max() tcp_goodput = seq_max - seq_min mptcp_goodput = df[df_sf.redundant == False, "throughput_bytes"].sum() sf_mptcp_throughput = tcp_throughput sf.update({ # "tcp_througput": tcp_goodput, "tcp_goodput": tcp_goodput, # cumulative sum of nonredundant dsn packets "mptcp_goodput": mptcp_goodput, # can be > 1 in case of redundant packets "throughput_contribution": sf_mptcp_throughput / stats["mptcp_throughput_bytes"], "goodput_contribution": mptcp_goodput / stats["mptcp_goodput"], }) # for every subflow # for tcpstream, group in df.groupby( _sender("tcpstream")): # print("for tcpstream %d" % tcpstream) # group[ df.redundant == False, "redundant"].sum() return True, stats
def plot(self, pcap, pcapstream, **kwargs): """ getcallargs """ df = pcap # Need to compute reinjections df.mptcp.fill_dest(pcapstream) df = classify_reinjections(df) fig = plt.figure() # log.info("%d streams in the MPTCP flow" % len(tcpstreams)) log.info("Plotting reinjections ") axes = fig.gca() fields = ["tcpstream", "mptcpdest"] fig.suptitle( "Reinjections CDF ", verticalalignment="top", ) # il n'a pas encore eu les destinations !! debug_dataframe(df, "DATASET HEAD") for idx, subdf in df.groupby(_sender(fields), sort=False): log.info("len(df)= %d" % len(df)) # TODO check destination # TODO skip if no reinjection debug_dataframe(subdf, "DATASET HEAD") # for idx, (streamid, ds) in enumerate(tcpstreams): # subdf[_sender("reinj_delta")].plot.line( # x="abstime", # ax=axes, # # use_index=False, # legend=False, # grid=True, # ) subdf[_sender("reinj_delta")].hist(cumulative=True, density=1, bins=100) axes.set_xlabel("Time (s)") axes.set_ylabel("Reinjection delay") handles, labels = axes.get_legend_handles_labels() # Generate "subflow X" labels # location: 3 => bottom left, 4 => bottom right axes.legend(handles, ["Subflow %d" % (x) for x, _ in enumerate(labels)], loc=4) return fig
def classify_reinjections(self): """ not a copy """ return classify_reinjections(self._obj)
def do_qualify_reinjections(self, args, unknown): """ test with: mp qualify_reinjections 0 TODO move the code into a proper function """ # TODO this should be done automatically right ? df_all = load_merged_streams_into_pandas( args.pcap1, args.pcap2, args.pcap1stream, args.pcap2stream, mptcp=True, tshark_config=self.tshark_config ) # adds a redundant column df = classify_reinjections(df_all) # print(df_all[ pd.notnull(df_all[_sender("reinjection_of")])] [ # _sender(["reinjection_of", "reinjected_in", "packetid", "reltime"]) + # _receiver(["packetid", "reltime"]) # ]) # to help debug # df.to_excel("temp.xls") def _print_reinjection_comparison(original_packet, reinj, ): """ Expects tuples of original and reinjection packets """ # original_packet = sender_df.loc[ sender_df.packetid == initial_packetid, ].iloc[0] row = reinj reinjection_packetid = getattr(row, _sender("packetid")), reinjection_start = getattr(row, _sender("abstime")), reinjection_arrival = getattr(row, _receiver("abstime")), original_start = original_packet[_sender("abstime")], original_arrival = original_packet[_receiver("abstime")] if reinj.redundant == False: # print(original_packet["packetid"]) msg = ("packet {pktid} is a successful reinjection of {initial_packetid}." " It arrived at {reinjection_arrival} to compare with {original_arrival}" " while being transmitted at {reinjection_start} to compare with " "{original_start}, i.e., {reinj_delta} before") # TODO use assert instead if getattr(row, _receiver("abstime")) > original_packet[ _receiver("abstime") ]: print("BUG: this is not a valid reinjection after all ?") elif args.failed: # only de msg = "packet {pktid} is a failed reinjection of {initial_packetid}." else: return msg = msg.format( pktid = reinjection_packetid, initial_packetid = initial_packetid, reinjection_start = reinjection_start, reinjection_arrival = reinjection_arrival, original_start = original_start, original_arrival = original_arrival, reinj_delta = reinj.reinj_delta, ) self.poutput(msg) # with pd.option_context('display.max_rows', None, 'display.max_columns', 300): # print(reinjected_packets[["packetid", "packetid_receiver", *_receiver(["reinjected_in", "reinjection_of"])]].head()) # TODO filter depending on --failed and --destinations if args.csv: self.pfeedback("Exporting to csv") # keep redundant # only export a subset ? # for # df1 = df[['a','d']] # smalldf = df.drop() columns = _sender(["abstime", "reinjection_of", "reinjected_in", "packetid", "tcpstream", "mptcpstream", "tcpdest", "mptcpdest"]) columns += _receiver(["abstime", "packetid"]) columns += ["redundant", "owd", "reinj_delta"] df[columns].to_csv( self.stdout, sep="|", index=False, header=True, ) return for destination in ConnectionRoles: if args.destinations and destination not in args.destinations: log.debug("ignoring destination %s " % destination) continue self.poutput("looking for reinjections towards mptcp %s" % destination) sender_df = df[df.mptcpdest == destination] log.debug("%d reinjections in that direction" % (len(sender_df), )) # TODO we now need to display successful reinjections reinjections = sender_df[pd.notnull(sender_df[_sender("reinjection_of")])] successful_reinjections = reinjections[reinjections.redundant == False] self.poutput("%d successful reinjections" % len(successful_reinjections)) # print(successful_reinjections[ _sender(["packetid", "reinjection_of"]) + _receiver(["packetid"]) ]) for row in reinjections.itertuples(index=False): # loc ? this is an array, sort it and take the first one ? initial_packetid = row.reinjection_of[0] # print("initial_packetid = %r %s" % (initial_packetid, type(initial_packetid))) original_packet = df_all.loc[df_all.packetid == initial_packetid].iloc[0] # print("original packet = %r %s" % (original_packet, type(original_packet))) # if row.redundant == True and args.failed: # _print_failed_reinjection(original_packet, row, debug=args.debug) _print_reinjection_comparison(original_packet, row, )
def mptcp_compute_throughput(rawdf, mptcpstreamid: MpTcpStreamId, destination: ConnectionRoles, merged_df: bool) -> MpTcpUnidirectionalStats: """ Very raw computation: substract highest dsn from lowest by the elapsed time Args: merged_df: True if merged_df Returns: a tuple (True/false, dict) """ assert isinstance(destination, ConnectionRoles), "destination is %r" % destination con = rawdf.mptcp.connection(mptcpstreamid) q = con.generate_direction_query(destination) df = unidirectional_df = rawdf.query(q, engine="python") # -1 because of syn dsn_range, dsn_max, dsn_min = transmitted_seq_range(df, "dsn") msg = "dsn_range ({}) = {} (dsn_max) - {} (dsn_min) - 1" log.debug(msg.format(dsn_range, dsn_max, dsn_min)) _col = _sender if merged_df else lambda x: x # print("test _sender %s" % _col("toto")) # Could groupby destination as well groups = df.groupby(_col('tcpstream')) subflow_stats: List[TcpUnidirectionalStats] = [] for tcpstream, subdf in groups: # subdf.iloc[0, subdf.columns.get_loc(_second('abstime'))] # debug_dataframe(subdf, "subdf for stream %d" % tcpstream) dest = subdf.iloc[0, subdf.columns.get_loc(_col('tcpdest'))] sf_stats = tcp_get_stats( subdf, tcpstream, # work around pandas issue (since for now it's a float ConnectionRoles(dest), True) fields = ["tcpdest", "mptcpdest", "dss_dsn", "dss_length"] # debug_dataframe(subdf, "Debugging", usecols=[fields]) # DSNs can be discontinuous, so we have to look at each packet # we drop duplicates transmitted_dsn_df = subdf.drop_duplicates(subset="dsn") sf_stats.mptcp_application_bytes = transmitted_dsn_df["tcplen"].sum() # + 1 to deal with syn oddity assert sf_stats.mptcp_application_bytes <= sf_stats.tcp_byte_range + 1, sf_stats log.log(mp.TRACE, "Adding subflow stats %r", sf_stats) subflow_stats.append(sf_stats) times = df["abstime"] duration = times.iloc[-1] - times.iloc[0] total_tput = sum(map(lambda x: x.throughput_bytes, subflow_stats)) for sf in subflow_stats: # can be > 1 in case of redundant packets if total_tput > 0: sf.throughput_contribution = sf.throughput_bytes.bytes / total_tput else: sf.throughput_contribution = 0 log.warn("Total Throughput <= 0. Something fishy possibly ?") """ If it's a merged df, then we can classify reinjections and give more results on the goodput """ if merged_df: df = classify_reinjections(unidirectional_df) debug_dataframe(df, "after reinjections have been analyzed") # mptcp_application_bytes = df.loc[df.redundant == False, "tcplen"].sum() for sf in subflow_stats: log.debug("for tcpstream %d" % sf.tcpstreamid) # columns.get_loc(_first('abstime'))] df_sf = df[df.tcpstream == sf.tcpstreamid] non_redundant_pkts = df_sf.loc[df_sf.redundant == False, "tcplen"] # print("non_redundant_pkts") # print(non_redundant_pkts) sf.mptcp_application_bytes = non_redundant_pkts.sum() # print("sf.mptcp_application_bytes" , sf.mptcp_application_bytes) sf.goodput_contribution = sf.mptcp_application_bytes / dsn_range return MpTcpUnidirectionalStats( mptcpstreamid=mptcpstreamid, mptcp_application_bytes=Byte(dsn_range), mptcp_duration=duration, subflow_stats=subflow_stats, )
def plot(self, pcap, pcapstream, window, **kwargs): """ Should be very similar to the thoughput one, except with """ fig = plt.figure() axes = fig.gca() fields = ["tcpdest", "tcpstream", "mptcpdest"] # TODO this should be configured in the parser # destinations = kwargs.get("destinations", list(mp.ConnectionRoles)) destinations = kwargs.get("pcap_destinations") skipped = kwargs.get("skipped_subflows", []) df = pcap # df Classified df_classified = classify_reinjections(df) # then it's the same as for throughput log.debug("Dropping redundant packets") df_useful = df_classified[df_classified.redundant == False] df_useful = df_useful.copy() df_useful.dropna( axis="index", subset=[_sender("abstime")], inplace=True, ) # print("after dropna") # print(df_useful) pd_abstime = pd.to_datetime(df_useful[_sender("abstime")], unit="s", errors="raise") df_useful.set_index(pd_abstime, inplace=True) df_useful.sort_index(inplace=True) suffix = " towards MPTCP {mptcpdest}" # plots MPTCP level goodput ################################################## label_fmt = "Aggregated" + (suffix if len(destinations) > 1 else "") for mptcpdest, subdf in df_useful.groupby("mptcpdest"): # tcpdest, tcpstream, mptcpdest = idx if mptcpdest not in destinations: log.debug("Ignoring destination %s", mptcpdest) continue log.debug("Plotting mptcp destination %s", mptcpdest) # add id plot_tput( fig, subdf["tcplen"], subdf["abstime"], window, label=label_fmt.format( mptcpdest=mp.ConnectionRoles(mptcpdest).to_string()), ) label_fmt = "Subflow {tcpstream}" if len(destinations) == 1: # TODO as we look at acks, it should be swapped ! self.title_fmt = self.title_fmt + suffix else: # label_suffix = suffix label_fmt = label_fmt + suffix for idx, subdf in df_useful.groupby(_sender(fields), as_index=False, sort=False): # print("len= %r" % len(subdf)) tcpdest, tcpstream, mptcpdest = idx print("tcpdest= %r, tcpstream %r mptcpdest %r" % (tcpdest, tcpstream, mptcpdest)) if mptcpdest not in destinations: log.debug("skipping MPTCP dest %s", tcpdest) continue if tcpstream in skipped: log.debug("skipping subflow %d", tcpstream) continue # log.debug("plotting MPTCP dest %s" % tcpdest) # if len(destinations) >= 2: # label_fmt = label_fmt + suffix plot_tput( fig, # subdf["dack"], subdf["tcplen"], subdf.index, # no need window, label=label_fmt.format( tcpstream=tcpstream, mptcpdest=mp.ConnectionRoles(mptcpdest).to_string()), ) self.title_fmt = self.title_fmt.format( tcpstream=tcpstream, mptcpdest=mp.ConnectionRoles(mptcpdest).to_string()) return fig