def _send_loop(self): if self.is_done: return self.cwnd = 100 # Get next packet, send it while len(self.in_flight) + 1 <= self.cwnd: # What packet? if len(self.retransmit_q) > 0: p = self.retransmit_q.pop() if self.id == PARAMS.flow_print: vprint("flow : %s retransmit" % p) else: try: p = next(self.packets) except StopIteration: # no more packets! break # Check it's not gotten acked... if self.is_acked(p.seq_num): continue if self.id == PARAMS.flow_print: vprint("flow : %s sent, cwnd: %s/%.1f" % (p, len(self.in_flight)+1, self.cwnd)) self.in_flight.add(p.seq_num) p.sent_ms = R.time self.src_send_q.enq(p) #vprint(self, len(self.in_flight)) # Setup the timeout R.call_in(self.rto, self.timeout, p, rto = self.rto)
def open_connection(self, flow, use_gen=True): if flow is not None: global FLOWS, N_FLOWS flow.id = N_FLOWS[0] # Server -> flow self.servers[flow.src].add_flow(flow, flow.src_recv) self.servers[flow.dst].add_flow(flow, flow.dst_recv) # Flow -> server flow.add_src_send(self.servers[flow.src].uplink) flow.add_dst_send(self.servers[flow.dst].uplink) # Global book-keeping FLOWS[flow.id] = flow N_FLOWS[0] += 1 flow.add_callback_done(self.del_flow) # Actually start things... flow.start() if use_gen: try: flow = next(self.flow_gen) R.call_at(flow.arrival, priority=-1, fn=self.open_connection, flow=flow) except StopIteration: # No more flows pass
def start(self): self._disable() self.install_matchings(self.matchings_by_slot[0]) # Create a recursive call self.new_slice = Delay(PARAMS.slot_duration + PARAMS.reconfiguration_time, priority=2)(self._new_slice) R.call_in(PARAMS.slot_duration, priority=1, fn=self._disable) self._new_slice()
def print_time(time_limit): if PARAMS.verbose: end = "\n" else: end = "" print( "\x1b[2K\r\033[1;91m%.3fms of %dms \033[00m %d (%d)" % (R.time, time_limit, len(FLOWS), N_FLOWS[0]), end=end, ) #print("%dms of %dms \033[00m %d" % (R.time, time_limit, len(FLOWS)), end = "") R.call_in(1, print_time, time_limit)
def del_flow(flow_id): global FLOWS, N_DONE vprint("%s done!" % (FLOWS[flow_id])) flow = FLOWS[flow_id] flow.end = R.time LOG.log_flow_done(FLOWS[flow_id]) N_DONE[0] += 1 del FLOWS[flow_id] if PARAMS.arrive_at_start: if N_DONE[0] == N_FLOWS[0]: R.stop()
def _new_slice(self): n_slots = len(self.matchings_by_slot) slot_id = self.slot_t % n_slots #vprint("%.6f %s slot_id %d" % (R.time, self, slot_id)) #vprint("%.6f %s switching to slot_id %d" % (R.time, self, slot_id)) # Compute our new matching current_matchings = self.matchings_by_slot[self.slot_t % n_slots] self.install_matchings(current_matchings) # Re-call ourselves self._enable() R.call_in(PARAMS.slot_duration, self._disable) self.new_slice()
def connect_to(self, port_id, tor): """This gets called for every rotor and starts the process for that one""" # Set the connection #vprint("%s:%d -> %s" % (self, port_id, tor)) self.ports_dst[port_id] = tor self.ports_tx[port_id].resume() R.call_in(PARAMS.slot_duration - .008, self.disconnect_from, port_id, priority=-1) # Get capacities for indirection if rotor if port_id < PARAMS.n_rotor: self.capacities[port_id] = tor.capacity
def run(self, time_limit, flow_gen): """Run the simulation for n_cycles cycles""" self.flow_gen = flow_gen flow = next(flow_gen) # make sure this isn't the first thing we do R.call_at(flow.arrival, self.open_connection, flow, priority=-1) # Register first events for s_id, s in enumerate(self.switches): s.start() for t in self.tors: t.start() # Start events if not PARAMS.arrive_at_start: R.limit = time_limit R.run_next()
def start(self): """Call once at setup""" # Rotor ####### # This is the first time, we need to connect everyone slot_t = 0 matchings_in_effect = self.matchings_by_slot_rotor[slot_t % PARAMS.n_slots] # For all active matchings, connect them up! for rotor_id in rotor_ports: dst = matchings_in_effect[rotor_id] self.connect_to(rotor_id, dst) # Set a countdown for the next slot, just like normal if PARAMS.slot_duration is not None: self.slot_id = 0 self.new_slice = Delay(PARAMS.slot_duration + PARAMS.reconfiguration_time, priority=1000)(self.new_slice) #if PARAMS.slice_duration is not None: # self.new_slice = Delay(self.slice_duration + self.reconf_time, priority = 1000)(self.new_slice) R.call_in(0, self.make_route, priority=-10) R.call_in(0, self.new_slice, priority=-1) R.call_in(0, self._send, priority=10)
def _send(self): # Currently sending something, or paused, or no packets to send if not self._enabled or self._paused: return if len(self._queue) == 0: if self.empty_callback is not None: self.empty_callback() return # Disable self._enabled = False # Get packet and compute tx time pkt = self._queue.pop() self.q_size_B -= pkt.size_B tx_delay = pkt.size_B * self.ms_per_byte if pkt.flow_id == PARAMS.flow_print: vprint("queue: %s sent %s tx %.6f lat %.6f" % (pkt, self, tx_delay, self.prop_delay)) R.call_in(tx_delay, self._enable) R.call_in(self.prop_delay + tx_delay, self.dst_recv, pkt)
def recv(self, packet): """Receives packets for `port_id`""" if packet.flow_id == PARAMS.flow_print: vprint("%s: %s recv" % (self, packet)) # Sanity check if packet.intended_dest != None: assert packet.intended_dest == self.id, \ "@%.3f %s received %s, was intendd for %s" % (R.time, self, packet, packet.intended_dest) # Update hop count packet.hop_count += 1 assert packet.hop_count < 50, "Hop count >50? %s" % packet # Deliver locally if packet.dst_id in self.local_dests: if packet.flow_id == PARAMS.flow_print: vprint("%s: %s Local destination" % (self, packet)) next_port_id = self.local_dests[packet.dst_id] self.ports_tx[next_port_id].enq(packet) else: packet._tor_arrival = R.time next_tor_id = self.dst_to_tor[packet.dst_id] dst_tag = ToRSwitch.packet_tag(packet.tag) # CACHE handling if packet.src_id in self.local_dests and dst_tag == "cache" and next_tor_id not in self.will_have_cache_to: for port_id in cache_ports: if self.ports_dst[port_id] is None: if self.switches[port_id].request_matching( self, next_tor_id): # Stops us from requesting this again self.will_have_cache_to.add(next_tor_id) R.call_in(15, self.activate_cache_link, port_id, next_tor_id) FLOWS[packet.flow_id].add_callback_done( self.deactivate_cache_link(next_tor_id)) break # If we don't have a cache yet, make it rotor if dst_tag == "cache" and next_tor_id not in self.have_cache_to: dst_tag = "rotor" # TODO can just enqueue right here? #if dst_tag == "cache": #vprint("%s %s going to cache" % (self, packet)) # ROTOR requires some handling... # ...adapt our capacity on rx if dst_tag == "rotor": self.capacity[next_tor_id] -= 1 # ... if indirect, put it in higher queue... if packet.src_id not in self.local_dests: if packet.flow_id == PARAMS.flow_print: vprint("%s: %s is old indirect" % (self, packet)) dst_tag = "rotor-old" else: self.nonempty_rotor_dst.add(next_tor_id) self.buffers_dst_type[next_tor_id][dst_tag].append(packet) self.buffers_dst_type_sizes[next_tor_id][dst_tag] += 1 # debug print if packet.flow_id == PARAMS.flow_print: vprint("%s: %s Outer destination %s/%s (%d)" % (self, packet, next_tor_id, dst_tag, len(self.buffers_dst_type[next_tor_id][packet.tag]))) # trigger send loop buf = self.buffers_dst_type[next_tor_id][dst_tag] sz = self.buffers_dst_type_sizes[next_tor_id][dst_tag] #assert len(buf) == sz, "%s: recv buffer[%s][%s] size %s, recorded %s" % (self, next_tor_id, dst_tag, len(buf), sz) self._send()
def main(load, n_tor, n_switches, n_xpand, n_cache, bandwidth, arrive_at_start, latency, time_limit, workload, slice_duration, reconfiguration_time, jitter, uuid, log, verbose, no_log, no_pause, skewed, cache_policy, is_ml, valiant): # Set parameters # (Mb/s)*us/8 works out to (B/s)*s packets_per_slot = int(bandwidth * slice_duration / (BYTES_PER_PACKET * 8)) slice_duration /= 1000 #divide to be in ms reconfiguration_time /= 1000 #divide to be in ms bandwidth_Bms = bandwidth * 1e6 / 1e3 / 8 random.seed(40) # TODO Just to make things reproducible # Compute switch counts if n_xpand is not None: assert n_xpand <= n_switches n_xpand = n_xpand else: n_xpand = 0 #round(min(5, n_switches/3)) if n_cache is not None: assert n_cache + n_xpand <= n_switches assert n_cache < n_switches n_cache = n_cache else: n_cache = floor((n_switches - n_xpand) / 2) n_rotor = n_switches - n_xpand - n_cache print("%d xpander, %d rotor, %d cache. %d total" % (n_xpand, n_rotor, n_cache, n_switches)) if uuid is None: uuid = _uuid.uuid4() slot_duration = slice_duration #*n_rotor cycle_duration = slice_duration * n_rotor del slice_duration PARAMS.set_many(locals()) PARAMS.flow_print = -1 print(PARAMS) gen_ports() print("Setting up network...") # Uses global params object net = RotorNet() if n_rotor > 0: n_slots = math.ceil(time_limit / slot_duration) n_cycles = math.ceil(time_limit / (n_rotor * n_slots * PARAMS.slot_duration)) else: n_cycles = 1 n_slots = 1 max_slots = n_cycles * n_slots #cycle_duration = slot_duration*n_slots slice_duration = slot_duration print("Time limit %dms, cycle %.3fms, slot %.3fms, slice %.3fms" % (PARAMS.time_limit, PARAMS.cycle_duration, PARAMS.slot_duration, slice_duration)) print("#tor: %d, #rotor: %d, #links: %d, bw: %dGb/s, capacity: %.3fGb/s" % (PARAMS.n_tor, PARAMS.n_rotor, PARAMS.n_tor * PARAMS.n_rotor, PARAMS.bandwidth / 1e3, PARAMS.n_tor * PARAMS.n_switches * PARAMS.bandwidth / 1e3)) print("Setting up flows, load %d%%..." % (100 * load)) # generate flows flow_gen = generate_flows( load=load, n_tor=n_tor, bandwidth=bandwidth, time_limit=time_limit, n_switches=n_switches, workload_name=workload, arrive_at_start=arrive_at_start, skewed=skewed, ) # Start the log if not no_log: init_log(fn=None, **locals()) # set up printing time = 0 while time < time_limit * 10: time += slice_duration if verbose and not no_pause: R.call_in(time, pause, priority=100) #print time R.call_in(0, print_time, time_limit) print("Starting simulator...") if is_ml: ml_generator(network=net, n_jobs=3, servers_per_ring=4, model_name="resnet") ml_generator(network=net, n_jobs=3, servers_per_ring=4, model_name="vgg") ml_generator(network=net, n_jobs=3, servers_per_ring=4, model_name="gpt2") # Start the simulator net.run(flow_gen=flow_gen, time_limit=time_limit) # Force log the unfinished flows for f in FLOWS.values(): LOG.log_flow_done(f) # Create a new log with the u_fn = "utilization-" + str(LOG.sim_id) + ".csv" max_packets = (R.time / 1000) * (bandwidth * 1e6) / (BYTES_PER_PACKET * 8) """ with open(u_fn, "w") as f: print("switch,type,port,n_packets,divisor", file = f) for s in net.switches: #if s.tag == "cache": #divisor = R.time #else: divisor = max_packets for port_id, n in enumerate(s.n_packets): print(",".join(str(x) for x in [s.id, s.tag, port_id, n, divisor]), file = f)# """ # Done! if LOG is not None: LOG.close() print("done")