def __bw_cmds(self): """Returns the tc commands to enforce BWs. Raises: RuntimeError: When encountered a critial error. """ cmds = [] abs_time = 0 for i, bw in enumerate(self.__bws): if not bw and not self.__buf: continue tc_cmd = 'add' if i == 0 else 'change' loss = self.__loss oloss = self.__oloss in_slot = self.__slot.in_slot if self.__slot else None out_slot = self.__slot.out_slot if self.__slot else None # Here we install a 100gbps, and then enforce the BW # in netem where we also enforce buffer sizes. bond_iface = self.get_bond_iface() bond_ifb = self.get_ifb_for_iface(bond_iface) cmd = ('%s class %s dev %s parent 1: ' 'classid 1:1 htb rate 100Gbit') % (path.tc(), tc_cmd, bond_iface) cmds.append((cmd, abs_time)) cmd = ('%s class %s dev %s parent 1: ' 'classid 1:1 htb rate 100Gbit') % (path.tc(), tc_cmd, bond_ifb) cmds.append((cmd, abs_time)) if not self.__buf: raise RuntimeError( 'netem with undefined or zero limit. ' 'Check if buf is misisng or 0 but bw,loss>0 in cfg.') cmd = ('%s qdisc %s dev %s parent 1:1 ' 'handle 11: netem rate %sMbit limit %s %s %s') % ( path.tc(), tc_cmd, bond_iface, bw.uplink, self.__buf, 'loss %s' % oloss if oloss > 0 else '', out_slot.netem_str() if out_slot else '') cmds.append((cmd, abs_time)) cmd = ('%s qdisc %s dev %s parent 1:1 ' 'handle 11: netem rate %sMbit limit %s %s %s') % ( path.tc(), tc_cmd, bond_ifb, bw.downlink, self.__buf, 'loss %s' % loss if loss > 0 else '', in_slot.netem_str() if in_slot else '') cmds.append((cmd, abs_time)) abs_time += bw.dur return cmds
def reset(self, cmd): """Resets the sender. Stops the sender thread, resets all the output directories, and kills netperf and tcpdump processes. Args: cmd: The command to run before starting an experiment. """ self.maybe_join() if os.path.exists(path.get_tmp_dir()): shutil.rmtree(path.get_tmp_dir()) os.makedirs(path.get_tmp_dir()) for tool in transperf.TOOLS.values(): for binary in tool.binaries: shell.run('pkill %s' % binary) shell.run('killall -q tcpdump') for iface in self.get_all_ifaces(): shell.run('%s qdisc del dev %s root' % (path.tc(), iface)) if cmd: shell.run(cmd)
def reset(self, cmd): """Cleans all the settings and reinitializes the receiver. Args: cmd: The command to run before starting an experiment. """ for tool in transperf.TOOLS.values(): for binary in tool.binaries: shell.run('pkill %s' % binary) shell.run('killall -q tcpdump') shell.run(path.tc() + ' qdisc show') for iface in self.get_all_ifaces(): iface_ifb = self.get_ifb_for_iface(iface) for dev in [iface, iface_ifb]: shell.run(path.tc() + ' filter del dev ' + dev + ' pref 10 parent ffff:') shell.run(path.tc() + ' filter del dev ' + dev + ' pref 10') shell.run(path.tc() + ' qdisc del dev ' + dev + ' ingress') shell.run(path.tc() + ' qdisc del dev ' + dev + ' clsact') shell.run(path.tc() + ' qdisc del dev ' + dev + ' root') if cmd: shell.run(cmd) self.__bws = None self.__policer = None self.__buf = 0 self.__loss = 0 self.__oloss = 0 self.__port_range = None self.__port_to_addr = dict()
def __policer_cmds(self): """Returns the tc commands to install policer filters. Returns: [] if there is no policer config and otherwise returns a list of commands along with the time they should run. """ if not self.__policer: return [] bond_iface = self.get_bond_iface() bond_ifb = self.get_ifb_for_iface(bond_iface) bw_policers = self.bw_policers() port = self.__port_range[0] mask = 0xFFFF - (self.__port_range[1] - 1) abs_time = 0 cmds = [] for i, (bw, po, dur) in enumerate(bw_policers): # Delete the previous policer filter. if i > 0: cmd = ('%s filter del dev %s ' 'parent ffff: protocol %s pref 10 u32 ' % (path.tc(), bond_iface, self.__proto)) cmds.append((cmd, abs_time)) burst = ('burst %smb' % po.burst) if po.burst else '' cmd = ('%s filter add dev %s parent ffff: ' 'protocol %s pref 10 u32 match %s sport %d 0x%04X ' 'flowid 1:1 ' 'action police rate %sMbit peakrate %sMbit %s ' 'buffer 100K mtu 66000 conform-exceed drop/pipe ' 'action mirred egress redirect dev %s') % ( path.tc(), bond_iface, self.__proto, self.__match, port, mask, po.bw, bw.downlink, burst, bond_ifb) cmds.append((cmd, abs_time)) abs_time += dur return cmds
def __init__(self, iface_cfg, singlesrv_mode, ip_mode, save_pcap, save_kern_debug, hosts): self.__singlesrv_mode = singlesrv_mode self.__done = True self.__conns = [] self.__cmds = [] self.__run_thread = None self.__ip_mode = ip_mode hostname = socket.gethostname() if hosts is not None: self.__ip_map = parse_ip_map(hosts) else: self.__ip_map = {} LOG.info('No hosts file provided, skip parsing ip map.') if singlesrv_mode: assert self.__ip_map assert hostname in self.__ip_map LOG.info('IP Address map is: %s', str(self.__ip_map)) self.__ip_addr = (self.__ip_map[hostname] if hostname in self.__ip_map else socket.getaddrinfo( socket.gethostname(), 0, self.__ip_mode, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0][4][0]) LOG.info('IPAddr: %s', self.__ip_addr) self.__first_port = -1 self.__recv = None self.__phys_ifaces = [] self.__set_ifaces(iface_cfg) self.__ss_interval_second = 0.1 self.__save_pcap = save_pcap self.__save_kern_debug = save_kern_debug if path.tc(): shell.run('chmod a+x %s' % path.tc()) if path.netperf(): shell.run('chmod a+x %s' % path.netperf()) if path.netserver(): shell.run('chmod a+x %s' % path.netserver())
def __init__(self, iface_cfg, singlesrv_mode, ip_mode, hosts): self.__singlesrv_mode = singlesrv_mode self.__senders = [] self.__done = True self.__run_thread = None self.__bws = None self.__slot = None self.__policer = None self.__buf = 0 self.__loss = 0 self.__oloss = 0 self.__port_range = None self.__port_to_addr = dict() self.__cmds = [] if hosts is not None: self.__ip_map = parse_ip_map(hosts) else: self.__ip_map = {} LOG.info('No hosts file provided, skip parsing ip map.') if singlesrv_mode: assert self.__ip_map self.__ip_mode = ip_mode self.__proto = Receiver.tc_protocol_map[ip_mode] self.__match = Receiver.tc_match_map[ip_mode] self.__prev_lro = None self.__prev_gro = None self.__bond_iface = None self.__phys_ifaces = [] self.__set_ifaces(iface_cfg) self.setup_ifb(not self.__singlesrv_mode) if path.tc(): shell.run('chmod a+x %s' % path.tc()) if path.netperf(): shell.run('chmod a+x %s' % path.netperf()) if path.netserver(): shell.run('chmod a+x %s' % path.netserver())
def __do_run(self, tools, start_ts, dur, nsenders, out_dir): """Runs the experiment.""" self.__servers = [] till_start_sec = start_ts - calendar.timegm( datetime.datetime.utcnow().utctimetuple()) # Build a set of unique tools and their associated ports. tool_to_ports = {} for tool, port in zip(tools, self.__port_to_addr.keys()): existing = tool_to_ports.setdefault(tool, []) existing.append((port, self.__port_to_addr[port])) # Have each tool add receiver commands to support the senders. for tool, ports in tool_to_ports.iteritems(): toolobj = transperf.TOOLS[tool] toolobj.options_dict['ip_mode'] = ( '-6' if self.__ip_mode == socket.AF_INET6 else '-4') for cmd in transperf.TOOLS[tool].receiver_cmds(ports, till_start_sec): proc = shell.bg(cmd) self.__servers.append(proc) if proc.poll(): raise RuntimeError('cannot run ' + cmd) if not self.__servers: raise RuntimeError('no server to run') LOG.debug('creating commands') if self.__qdisc_noop(nsenders): # If there is no RTT, BW, nor Policer, don't install any qdisc. cmds = [] else: # Setup root qdiscs. for iface in self.get_all_ifaces(): # Skip setting up eth0 and ifb0, if bandwidth is noop. if iface == self.get_bond_iface() and self.__bw_qdisc_noop(): continue iface_ifb = self.get_ifb_for_iface(iface) _, err, _ = shell.run(''' %(tc)s qdisc replace dev %(iface)s handle 1: root htb %(tc)s qdisc replace dev %(iface)s handle ffff: ingress %(tc)s class replace dev %(iface)s parent 1: classid 1:1 \ htb rate 100Gbit ''' % { 'tc': path.tc(), 'iface': iface, }) # Some tc versions print 'Success' to stderr. if any(l and l != 'RTNETLINK answers: Success' for l in err.split('\n')): raise RuntimeError('Error in setting up %s: %s' % (iface, err)) _, err, _ = shell.run(''' %(tc)s qdisc replace dev %(iface)s handle 1: root htb %(tc)s class replace dev %(iface)s parent 1: classid 1:1 \ htb rate 100Gbit ''' % { 'tc': path.tc(), 'iface': iface_ifb, }) if any(l and l != 'RTNETLINK answers: Success' for l in err.split('\n')): raise RuntimeError('Error setting up %s: %s' % (iface_ifb, err)) # We generate commands and their wait time before starting the loop. cmds = self.__cmds cmds += self.__bw_cmds() cmds += self.__rtt_cmds(nsenders) cmds += self.__filter_cmds(nsenders) cmds += self.__policer_cmds() cmds.sort(key=lambda c: c[1]) for cmd in cmds: LOG.debug('at %s will run %s', cmd[1], cmd[0]) cmds_at_zero = [cmd for cmd in cmds if not cmd[1]] cmds_rest = [cmd for cmd in cmds if cmd[1]] # Run all the commands that should be run at 0. for cmd in cmds_at_zero: shell.run(cmd[0]) now = calendar.timegm(datetime.datetime.utcnow().utctimetuple()) sdur = start_ts - now LOG.debug('sleeping for %s seconds', sdur) if start_ts > now: time.sleep(start_ts - now) now = 0.0 # Run the commands that has a later deadline. for cmd in cmds_rest: if cmd[1] < now: LOG.warning('command %s is ran after its deadline', cmd) if cmd[1] > now: LOG.debug('sleeping from %s til %s', now, cmd[1]) time.sleep(cmd[1] - now) now = cmd[1] shell.run(cmd[0]) end_time = datetime.datetime.utcnow().utctimetuple() delta = calendar.timegm(end_time) - start_ts if delta < dur: time.sleep(dur - delta) LOG.info('saving qdisc state in %s', out_dir) if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) # Save qdisc stats. tcs = '\n'.join([shell.run(path.tc() + ' -d -s -p qdisc show')[0], shell.run(path.tc() + ' -d -s -p class show')[0], shell.run(path.tc() + ' -d -s -p class show')[0], shell.run(path.tc() + ' -d -s -p filter show')[0], shell.run(path.tc() + ' -d -s -p filter show')[0]]) tcf = open(os.path.join(out_dir, 'tc.out'), 'w') tcf.write(tcs) tcf.close() hostname = socket.gethostname() if self.__singlesrv_mode: assert hostname in self.__ip_map if hostname in self.__ip_map: rcv_ip = self.__ip_map[hostname] else: rcv_ip = socket.getaddrinfo(hostname, 0, self.__ip_mode, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0][4][0] ipf = open(os.path.join(out_dir, 'recv.info'), 'w') ipf.write(rcv_ip) ipf.close()
def __filter_cmds(self, nsenders): """Returns the commands to setup flow filters using tc.""" # TODO(arjunroy): There is a regression for IPv6, possibly having to do # with filter matching for ACK packets. Specifically, # if we run netperf with a TCP_STREAM test, packets are # being delayed correctly upon reception via IFB but no # packets are hitting the outbound delay rule. Thus, we # see an RTT of X/2 when we configure a link RTT of X. # On the other hand, for a TCP_RR test, we see hits on # the outbound delay rule and the measured RTT is X as # desired. Also, in IPv4, both TCP_STREAM and TCP_RR # work. Not sure if this is transperf's fault, or an # issue within linux/tc. cmds = [] bond_iface = self.get_bond_iface() bond_ifb = self.get_ifb_for_iface(bond_iface) for i, s in enumerate(self.__senders[:nsenders]): LOG.debug('generating commands for sender %d', i) class_id, _ = self.__sender_class_handle(i) # Add filters. for port in xrange(s.ports[0], s.ports[1]): cmd = ('{tc} filter add dev {dev} parent 1: ' 'protocol {proto} pref 10 u32 match {match} dst {ip} ' 'match {match} dport {port} 0xffff flowid 1:1').format( tc=path.tc(), dev=bond_iface, proto=self.__proto, match=self.__match, ip=s.ip, port=port ) cmds.append((cmd, 0)) cmd = ('{tc} filter add dev {dev} parent 1: ' 'protocol {proto} pref 10 u32 match {match} src {ip} ' 'match {match} sport {port} 0xffff flowid 1:1').format( tc=path.tc(), dev=bond_ifb, proto=self.__proto, match=self.__match, ip=s.ip, port=port ) cmds.append((cmd, 0)) for iface in self.get_all_ifaces(): iface_ifb = self.get_ifb_for_iface(iface) cmd = ('{tc} filter add dev {dev} parent 1: ' 'protocol {proto} pref 10 u32 match {match} dst {ip}' ' match {match} dport {port} 0xffff ' 'flowid 1:{class_id}').format( tc=path.tc(), dev=iface, proto=self.__proto, match=self.__match, ip=s.ip, port=port, class_id=class_id) cmds.append((cmd, 0)) cmd = ('{tc} filter add dev {dev} parent 1: ' 'protocol {proto} pref 10 u32 match {match} src {ip}' ' match {match} sport {port} 0xffff ' 'flowid 1:{class_id}').format( tc=path.tc(), dev=iface_ifb, proto=self.__proto, match=self.__match, ip=s.ip, port=port, class_id=class_id) cmds.append((cmd, 0)) # If there is a policer, do not add the redirect filter # on eth0. Note that eth1+ can should all have the # redirect filter even if there is a policer. if iface != self.get_bond_iface() or not self.__policer: cmd = ('{tc} filter add dev {dev} parent ffff: ' 'protocol {proto} pref 10 u32 ' 'match {match} src {ip} ' 'match {match} sport {port} 0xffff flowid 1:1 ' 'action mirred egress ' 'redirect dev {ifb}').format( tc=path.tc(), dev=iface, proto=self.__proto, match=self.__match, ip=s.ip, port=port, ifb=iface_ifb) cmds.append((cmd, 0)) return cmds
def __rtt_cmds(self, nsenders): """Returns the commands to setup RTT qdiscs.""" # eth0 and ifb0 (the bonding interfaces) are only used for enforcing # bandwidth, so we should not use them to install delay qdiscs. phy_ifaces = self.get_physical_ifaces() cmds = [] for i, s in enumerate(self.__senders[:nsenders]): LOG.debug('generating commands for sender %d', i) class_id, handle = self.__sender_class_handle(i) for iface in phy_ifaces: cmd = ('%s class add dev %s parent 1: ' 'classid 1:%s htb rate 100Gbit') % (path.tc(), iface, class_id) cmds.append((cmd, 0)) iface_ifb = self.get_ifb_for_iface(iface) cmd = ('%s class add dev %s parent 1: ' 'classid 1:%s htb rate 100Gbit') % (path.tc(), iface_ifb, class_id) cmds.append((cmd, 0)) abs_time = 0 for j, rtt in enumerate(s.rtts): tc_cmd = 'add' if j == 0 else 'change' # Format the delay model descriptions. If we have # distributions for both directions, then use that info # exactly. Otherwise, compute a mean value for any # direction without a distribution. With no distributions, # just split rtt between the two directions. irtt = rtt.val / 2 ortt = rtt.val - irtt ivar = rtt.var ovar = rtt.out_var if rtt.in_dist and not rtt.out_dist: ortt = rtt.val - rtt.in_dist.mean if rtt.out_dist and not rtt.in_dist: irtt = rtt.val - rtt.out_dist.mean def fmt_delay(dist, mean, var): if dist: return '%sms %sms distribution %s' % ( dist.mean, dist.var, dist.netem_dist_name()) else: var_spec = (' %sms' % var) if var else '' return ('%sms' % mean) + var_spec odelay = fmt_delay(rtt.out_dist, ortt, ovar) idelay = fmt_delay(rtt.in_dist, irtt, ivar) # TODO(soheil): Actually, we don't update the BW when RTT # changes. It's only the other way around and only # because of buffer sizing. Maybe move into its # own loop. for iface in phy_ifaces: cmd = ('%s qdisc %s dev %s parent 1:%s ' 'handle %s: netem limit %s delay %s') % ( path.tc(), tc_cmd, iface, class_id, handle, INFINITY_BUFFER, odelay) cmds.append((cmd, abs_time)) iface_ifb = self.get_ifb_for_iface(iface) cmd = ('%s qdisc %s dev %s parent 1:%s ' 'handle %s: netem limit %s delay %s') % ( path.tc(), tc_cmd, iface_ifb, class_id, handle, INFINITY_BUFFER, idelay) cmds.append((cmd, abs_time)) abs_time += rtt.dur return cmds