def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': fatal("This script requires the MESI_Three_Level protocol to be\ built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes # must be listed before the directory nodes and directory nodes before # dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) num_cpus_per_cluster = options.num_cpus // options.num_clusters assert (options.num_l2caches % options.num_clusters == 0) num_l2caches_per_cluster = options.num_l2caches // options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # for i in range(options.num_clusters): for j in range(num_cpus_per_cluster): # # First create the Ruby objects associated with this cpu # l0i_cache = L0Cache(size = options.l0i_size, assoc = options.l0i_assoc, is_icache = True, start_index_bit = block_size_bits, replacement_policy = LRURP()) l0d_cache = L0Cache(size = options.l0d_size, assoc = options.l0d_assoc, is_icache = False, start_index_bit = block_size_bits, replacement_policy = LRURP()) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain # Ruby prefetcher prefetcher = RubyPrefetcher( num_streams=16, unit_filter = 256, nonunit_filter = 256, train_misses = 5, num_startup_pfs = 4, cross_page = True ) l0_cntrl = L0Cache_Controller( version = i * num_cpus_per_cluster + j, Icache = l0i_cache, Dcache = l0d_cache, transitions_per_cycle = options.l0_transitions_per_cycle, prefetcher = prefetcher, enable_prefetch = options.enable_prefetch, send_evictions = send_evicts(options), clk_domain = clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j, icache = l0i_cache, clk_domain = clk_domain, dcache = l0d_cache, ruby_system = ruby_system) l0_cntrl.sequencer = cpu_seq l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) l1_cntrl = L1Cache_Controller( version = i * num_cpus_per_cluster + j, cache = l1_cache, l2_select_num_bits = l2_bits, cluster_id = i, transitions_per_cycle = options.l1_transitions_per_cycle, ruby_system = ruby_system) exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( i * num_cpus_per_cluster + j)) exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) l0_cntrl_nodes.append(l0_cntrl) l1_cntrl_nodes.append(l1_cntrl) # Connect the L0 and L1 controllers l0_cntrl.prefetchQueue = MessageBuffer() l0_cntrl.mandatoryQueue = MessageBuffer() l0_cntrl.bufferToL1 = MessageBuffer(ordered = True) l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1 l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True) l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1 # Connect the L1 controllers and the network l1_cntrl.requestToL2 = MessageBuffer() l1_cntrl.requestToL2.master = ruby_system.network.slave l1_cntrl.responseToL2 = MessageBuffer() l1_cntrl.responseToL2.master = ruby_system.network.slave l1_cntrl.unblockToL2 = MessageBuffer() l1_cntrl.unblockToL2.master = ruby_system.network.slave l1_cntrl.requestFromL2 = MessageBuffer() l1_cntrl.requestFromL2.slave = ruby_system.network.master l1_cntrl.responseFromL2 = MessageBuffer() l1_cntrl.responseFromL2.slave = ruby_system.network.master for j in range(num_l2caches_per_cluster): l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller( version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, transitions_per_cycle =\ options.l2_transitions_per_cycle, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l0_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: for i in range(options.num_clusters): for j in range(num_cpus_per_cluster): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = range(options.num_cpus), core_id = i*num_cpus_per_cluster+j, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = options.l0i_size, line_size =\ options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l0d_size, line_size =\ options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = str(MemorySize(options.l2_size) * \ num_l2caches_per_cluster)+'B', line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [n for n in range(i*num_cpus_per_cluster, \ (i+1)*num_cpus_per_cluster)]) ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_hammer': panic("This script requires the MOESI_hammer protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in range(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True) l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits) l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, L2cache=l2_cache, no_mig_atomic=not \ options.allow_atomic_migration, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave l1_cntrl.triggerQueue = MessageBuffer() # Connect the buffers from the network to the controller l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert (options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: pf = ProbeFilter(size=pf_size, assoc=4, start_index_bit=pf_start_bit) dir_cntrl.probeFilter = pf dir_cntrl.probe_filter_enabled = options.pf_on dir_cntrl.full_bit_dir_enabled = options.dir_on if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: FileSystemConfig.config_filesystem(options) for i in xrange(options.num_cpus): FileSystemConfig.register_cpu(physical_package_id=0, core_siblings=[], core_id=i, thread_siblings=[]) FileSystemConfig.register_cache(level=1, idu_type='Instruction', size=options.l1i_size, line_size=options.cacheline_size, assoc=options.l1i_assoc, cpus=[i]) FileSystemConfig.register_cache(level=1, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i]) FileSystemConfig.register_cache(level=2, idu_type='Unified', size=options.l2_size, line_size=options.cacheline_size, assoc=options.l2_assoc, cpus=[i]) ruby_system.network.number_of_virtual_networks = 6 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_devices, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'GPU_VIPER': panic("This script requires the GPU_VIPER protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes # must be listed before the directory nodes and directory nodes before # dma nodes, etc. # cp_cntrl_nodes = [] tcp_cntrl_nodes = [] sqc_cntrl_nodes = [] tcc_cntrl_nodes = [] dir_cntrl_nodes = [] l3_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # # For an odd number of CPUs, still create the right number of controllers TCC_bits = int(math.log(options.num_tccs, 2)) # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu # Clusters crossbar_bw = None mainCluster = None if options.numa_high_bit: numa_bit = options.numa_high_bit else: # if the numa_bit is not specified, set the directory bits as the # lowest bits above the block offset bits, and the numa_bit as the # highest of those directory bits dir_bits = int(math.log(options.num_dirs, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) numa_bit = block_size_bits + dir_bits - 1 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: #Assuming a 2GHz clock crossbar_bw = 16 * options.num_compute_units * options.bw_scalor mainCluster = Cluster(intBW=crossbar_bw) else: mainCluster = Cluster(intBW=8) # 16 GB/s for i in range(options.num_dirs): dir_ranges = [] for r in system.mem_ranges: addr_range = m5.objects.AddrRange(r.start, size=r.size(), intlvHighBit=numa_bit, intlvBits=dir_bits, intlvMatch=i) dir_ranges.append(addr_range) dir_cntrl = DirCntrl(noTCCdir=True, TCC_select_num_bits=TCC_bits) dir_cntrl.create(options, dir_ranges, ruby_system, system) dir_cntrl.number_of_TBEs = options.num_tbes dir_cntrl.useL3OnWT = options.use_L3_on_WT # the number_of_TBEs is inclusive of TBEs below # Connect the Directory controller to the ruby network dir_cntrl.requestFromCores = MessageBuffer(ordered=True) dir_cntrl.requestFromCores.slave = ruby_system.network.master dir_cntrl.responseFromCores = MessageBuffer() dir_cntrl.responseFromCores.slave = ruby_system.network.master dir_cntrl.unblockFromCores = MessageBuffer() dir_cntrl.unblockFromCores.slave = ruby_system.network.master dir_cntrl.probeToCore = MessageBuffer() dir_cntrl.probeToCore.master = ruby_system.network.slave dir_cntrl.responseToCore = MessageBuffer() dir_cntrl.responseToCore.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.L3triggerQueue = MessageBuffer(ordered=True) dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() dir_cntrl.requestFromDMA = MessageBuffer(ordered=True) dir_cntrl.requestFromDMA.slave = ruby_system.network.master dir_cntrl.responseToDMA = MessageBuffer() dir_cntrl.responseToDMA.master = ruby_system.network.slave dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mainCluster.add(dir_cntrl) cpuCluster = None if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: cpuCluster = Cluster(extBW=crossbar_bw, intBW=crossbar_bw) else: cpuCluster = Cluster(extBW=8, intBW=8) # 16 GB/s for i in range((options.num_cpus + 1) // 2): cp_cntrl = CPCntrl() cp_cntrl.create(options, ruby_system, system) exec("ruby_system.cp_cntrl%d = cp_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) # Connect the CP controllers and the network cp_cntrl.requestFromCore = MessageBuffer() cp_cntrl.requestFromCore.master = ruby_system.network.slave cp_cntrl.responseFromCore = MessageBuffer() cp_cntrl.responseFromCore.master = ruby_system.network.slave cp_cntrl.unblockFromCore = MessageBuffer() cp_cntrl.unblockFromCore.master = ruby_system.network.slave cp_cntrl.probeToCore = MessageBuffer() cp_cntrl.probeToCore.slave = ruby_system.network.master cp_cntrl.responseToCore = MessageBuffer() cp_cntrl.responseToCore.slave = ruby_system.network.master cp_cntrl.mandatoryQueue = MessageBuffer() cp_cntrl.triggerQueue = MessageBuffer(ordered=True) cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) if not full_system: for i in range((options.num_cpus + 1) // 2): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = \ range(options.num_cpus), core_id = i*2, thread_siblings = []) FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = \ range(options.num_cpus), core_id = i*2+1, thread_siblings = []) FileSystemConfig.register_cache(level=0, idu_type='Instruction', size=options.l1i_size, line_size=options.cacheline_size, assoc=options.l1i_assoc, cpus=[i * 2, i * 2 + 1]) FileSystemConfig.register_cache(level=0, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i * 2]) FileSystemConfig.register_cache(level=0, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i * 2 + 1]) FileSystemConfig.register_cache(level=1, idu_type='Unified', size=options.l2_size, line_size=options.cacheline_size, assoc=options.l2_assoc, cpus=[i * 2, i * 2 + 1]) for i in range(options.num_dirs): FileSystemConfig.register_cache( level=2, idu_type='Unified', size=options.l3_size, line_size=options.cacheline_size, assoc=options.l3_assoc, cpus=[n for n in range(options.num_cpus)]) gpuCluster = None if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: gpuCluster = Cluster(extBW=crossbar_bw, intBW=crossbar_bw) else: gpuCluster = Cluster(extBW=8, intBW=8) # 16 GB/s for i in range(options.num_compute_units): tcp_cntrl = TCPCntrl(TCC_select_num_bits=TCC_bits, issue_latency=1, number_of_TBEs=2560) # TBEs set to max outstanding requests tcp_cntrl.create(options, ruby_system, system) tcp_cntrl.WB = options.WB_L1 tcp_cntrl.disableL1 = options.noL1 tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(tcp_cntrl.coalescer) tcp_cntrl_nodes.append(tcp_cntrl) # Connect the TCP controller to the ruby network tcp_cntrl.requestFromTCP = MessageBuffer(ordered=True) tcp_cntrl.requestFromTCP.master = ruby_system.network.slave tcp_cntrl.responseFromTCP = MessageBuffer(ordered=True) tcp_cntrl.responseFromTCP.master = ruby_system.network.slave tcp_cntrl.unblockFromCore = MessageBuffer() tcp_cntrl.unblockFromCore.master = ruby_system.network.slave tcp_cntrl.probeToTCP = MessageBuffer(ordered=True) tcp_cntrl.probeToTCP.slave = ruby_system.network.master tcp_cntrl.responseToTCP = MessageBuffer(ordered=True) tcp_cntrl.responseToTCP.slave = ruby_system.network.master tcp_cntrl.mandatoryQueue = MessageBuffer() gpuCluster.add(tcp_cntrl) for i in range(options.num_sqc): sqc_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits) sqc_cntrl.create(options, ruby_system, system) exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(sqc_cntrl.sequencer) # Connect the SQC controller to the ruby network sqc_cntrl.requestFromSQC = MessageBuffer(ordered=True) sqc_cntrl.requestFromSQC.master = ruby_system.network.slave sqc_cntrl.probeToSQC = MessageBuffer(ordered=True) sqc_cntrl.probeToSQC.slave = ruby_system.network.master sqc_cntrl.responseToSQC = MessageBuffer(ordered=True) sqc_cntrl.responseToSQC.slave = ruby_system.network.master sqc_cntrl.mandatoryQueue = MessageBuffer() # SQC also in GPU cluster gpuCluster.add(sqc_cntrl) for i in xrange(options.num_scalar_cache): scalar_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits) scalar_cntrl.create(options, ruby_system, system) exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i) cpu_sequencers.append(scalar_cntrl.sequencer) scalar_cntrl.requestFromSQC = MessageBuffer(ordered=True) scalar_cntrl.requestFromSQC.master = ruby_system.network.slave scalar_cntrl.probeToSQC = MessageBuffer(ordered=True) scalar_cntrl.probeToSQC.slave = ruby_system.network.master scalar_cntrl.responseToSQC = MessageBuffer(ordered=True) scalar_cntrl.responseToSQC.slave = ruby_system.network.master scalar_cntrl.mandatoryQueue = \ MessageBuffer(buffer_size=options.scalar_buffer_size) gpuCluster.add(scalar_cntrl) for i in xrange(options.num_cp): tcp_ID = options.num_compute_units + i sqc_ID = options.num_sqc + i tcp_cntrl = TCPCntrl(TCC_select_num_bits=TCC_bits, issue_latency=1, number_of_TBEs=2560) # TBEs set to max outstanding requests tcp_cntrl.createCP(options, ruby_system, system) tcp_cntrl.WB = options.WB_L1 tcp_cntrl.disableL1 = options.noL1 tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(tcp_cntrl.sequencer) tcp_cntrl_nodes.append(tcp_cntrl) # Connect the CP (TCP) controllers to the ruby network tcp_cntrl.requestFromTCP = MessageBuffer(ordered=True) tcp_cntrl.requestFromTCP.master = ruby_system.network.slave tcp_cntrl.responseFromTCP = MessageBuffer(ordered=True) tcp_cntrl.responseFromTCP.master = ruby_system.network.slave tcp_cntrl.unblockFromCore = MessageBuffer(ordered=True) tcp_cntrl.unblockFromCore.master = ruby_system.network.slave tcp_cntrl.probeToTCP = MessageBuffer(ordered=True) tcp_cntrl.probeToTCP.slave = ruby_system.network.master tcp_cntrl.responseToTCP = MessageBuffer(ordered=True) tcp_cntrl.responseToTCP.slave = ruby_system.network.master tcp_cntrl.mandatoryQueue = MessageBuffer() gpuCluster.add(tcp_cntrl) sqc_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits) sqc_cntrl.create(options, ruby_system, system) exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(sqc_cntrl.sequencer) # SQC also in GPU cluster gpuCluster.add(sqc_cntrl) for i in range(options.num_tccs): tcc_cntrl = TCCCntrl(l2_response_latency=options.TCC_latency) tcc_cntrl.create(options, ruby_system, system) tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency tcc_cntrl.l2_response_latency = options.TCC_latency tcc_cntrl_nodes.append(tcc_cntrl) tcc_cntrl.WB = options.WB_L2 tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units # the number_of_TBEs is inclusive of TBEs below # Connect the TCC controllers to the ruby network tcc_cntrl.requestFromTCP = MessageBuffer(ordered=True) tcc_cntrl.requestFromTCP.slave = ruby_system.network.master tcc_cntrl.responseToCore = MessageBuffer(ordered=True) tcc_cntrl.responseToCore.master = ruby_system.network.slave tcc_cntrl.probeFromNB = MessageBuffer() tcc_cntrl.probeFromNB.slave = ruby_system.network.master tcc_cntrl.responseFromNB = MessageBuffer() tcc_cntrl.responseFromNB.slave = ruby_system.network.master tcc_cntrl.requestToNB = MessageBuffer(ordered=True) tcc_cntrl.requestToNB.master = ruby_system.network.slave tcc_cntrl.responseToNB = MessageBuffer() tcc_cntrl.responseToNB.master = ruby_system.network.slave tcc_cntrl.unblockToNB = MessageBuffer() tcc_cntrl.unblockToNB.master = ruby_system.network.slave tcc_cntrl.triggerQueue = MessageBuffer(ordered=True) exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i) # connect all of the wire buffers between L3 and dirs up # TCC cntrls added to the GPU cluster gpuCluster.add(tcc_cntrl) for i, dma_device in enumerate(dma_devices): dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, ruby_system=ruby_system) exec('system.dma_cntrl%d = dma_cntrl' % i) if dma_device.type == 'MemTest': exec('system.dma_cntrl%d.dma_sequencer.slave = dma_devices.test' % i) else: exec('system.dma_cntrl%d.dma_sequencer.slave = dma_device.dma' % i) dma_cntrl.requestToDir = MessageBuffer(buffer_size=0) dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size=0) gpuCluster.add(dma_cntrl) # Add cpu/gpu clusters to main cluster mainCluster.add(cpuCluster) mainCluster.add(gpuCluster) ruby_system.network.number_of_virtual_networks = 11 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
def create_system(options, full_system, system, dma_devices, bootmem, ruby_system, cpus): if buildEnv['PROTOCOL'] != 'GPU_VIPER': panic("This script requires the GPU_VIPER protocol to be built.") cpu_sequencers = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu # Clusters crossbar_bw = None mainCluster = None cpuCluster = None gpuCluster = None if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: #Assuming a 2GHz clock crossbar_bw = 16 * options.num_compute_units * options.bw_scalor mainCluster = Cluster(intBW = crossbar_bw) cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) else: mainCluster = Cluster(intBW = 8) # 16 GB/s cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s # Create CPU directory controllers dir_cntrl_nodes = \ construct_dirs(options, system, ruby_system, ruby_system.network) for dir_cntrl in dir_cntrl_nodes: mainCluster.add(dir_cntrl) # Create CPU core pairs (cp_sequencers, cp_cntrl_nodes) = \ construct_corepairs(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(cp_sequencers) for cp_cntrl in cp_cntrl_nodes: cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) if not full_system: for i in range((options.num_cpus + 1) // 2): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = \ range(options.num_cpus), core_id = i*2, thread_siblings = []) FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = \ range(options.num_cpus), core_id = i*2+1, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = options.l1i_size, line_size = options.cacheline_size, assoc = options.l1i_assoc, cpus = [i*2, i*2+1]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*2]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*2+1]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l2_size, line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [i*2, i*2+1]) for i in range(options.num_dirs): FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = options.l3_size, line_size = options.cacheline_size, assoc = options.l3_assoc, cpus = [n for n in range(options.num_cpus)]) # Create TCPs (tcp_sequencers, tcp_cntrl_nodes) = \ construct_tcps(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(tcp_sequencers) for tcp_cntrl in tcp_cntrl_nodes: gpuCluster.add(tcp_cntrl) # Create SQCs (sqc_sequencers, sqc_cntrl_nodes) = \ construct_sqcs(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(sqc_sequencers) for sqc_cntrl in sqc_cntrl_nodes: gpuCluster.add(sqc_cntrl) # Create Scalars (scalar_sequencers, scalar_cntrl_nodes) = \ construct_scalars(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(scalar_sequencers) for scalar_cntrl in scalar_cntrl_nodes: gpuCluster.add(scalar_cntrl) # Create command processors (cmdproc_sequencers, cmdproc_cntrl_nodes) = \ construct_cmdprocs(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(cmdproc_sequencers) for cmdproc_cntrl in cmdproc_cntrl_nodes: gpuCluster.add(cmdproc_cntrl) # Create TCCs tcc_cntrl_nodes = \ construct_tccs(options, system, ruby_system, ruby_system.network) for tcc_cntrl in tcc_cntrl_nodes: gpuCluster.add(tcc_cntrl) for i, dma_device in enumerate(dma_devices): dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, ruby_system=ruby_system) exec('system.dma_cntrl%d = dma_cntrl' % i) # IDE doesn't have a .type but seems like everything else does. if not hasattr(dma_device, 'type'): exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_device' % i) elif dma_device.type == 'MemTest': exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_devices.test' % i) else: exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_device.dma' % i) dma_cntrl.requestToDir = MessageBuffer(buffer_size=0) dma_cntrl.requestToDir.out_port = ruby_system.network.in_port dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0) dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size = 0) gpuCluster.add(dma_cntrl) # Add cpu/gpu clusters to main cluster mainCluster.add(cpuCluster) mainCluster.add(gpuCluster) ruby_system.network.number_of_virtual_networks = 11 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
def create_system(options, full_system, system, dma_devices, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base': panic("This script requires the MOESI_AMD_Base protocol.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to # be consistent with the NetDest list. Therefore the l1 controller # nodes must be listed before the directory nodes and directory nodes # before dma nodes, etc. # l1_cntrl_nodes = [] l3_cntrl_nodes = [] dir_cntrl_nodes = [] control_count = 0 # # Must create the individual controllers before the network to ensure # the controller constructors are called before the network constructor # # This is the base crossbar that connects the L3s, Dirs, and cpu # Cluster mainCluster = Cluster(extBW=512, intBW=512) # 1 TB/s if options.numa_high_bit: numa_bit = options.numa_high_bit else: # if the numa_bit is not specified, set the directory bits as the # lowest bits above the block offset bits, and the numa_bit as the # highest of those directory bits dir_bits = int(math.log(options.num_dirs, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) numa_bit = block_size_bits + dir_bits - 1 for i in range(options.num_dirs): dir_ranges = [] for r in system.mem_ranges: addr_range = m5.objects.AddrRange(r.start, size=r.size(), intlvHighBit=numa_bit, intlvBits=dir_bits, intlvMatch=i) dir_ranges.append(addr_range) dir_cntrl = DirCntrl(TCC_select_num_bits=0) dir_cntrl.create(options, dir_ranges, ruby_system, system) # Connect the Directory controller to the ruby network dir_cntrl.requestFromCores = MessageBuffer(ordered=True) dir_cntrl.requestFromCores.slave = ruby_system.network.master dir_cntrl.responseFromCores = MessageBuffer() dir_cntrl.responseFromCores.slave = ruby_system.network.master dir_cntrl.unblockFromCores = MessageBuffer() dir_cntrl.unblockFromCores.slave = ruby_system.network.master dir_cntrl.probeToCore = MessageBuffer() dir_cntrl.probeToCore.master = ruby_system.network.slave dir_cntrl.responseToCore = MessageBuffer() dir_cntrl.responseToCore.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.L3triggerQueue = MessageBuffer(ordered=True) dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() exec("system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mainCluster.add(dir_cntrl) # Technically this config can support an odd number of cpus, but the top # level config files, such as the ruby_random_tester, will get confused if # the number of cpus does not equal the number of sequencers. Thus make # sure that an even number of cpus is specified. assert ((options.num_cpus % 2) == 0) # For an odd number of CPUs, still create the right number of controllers cpuCluster = Cluster(extBW=512, intBW=512) # 1 TB/s for i in range((options.num_cpus + 1) // 2): cp_cntrl = CPCntrl() cp_cntrl.create(options, ruby_system, system) exec("system.cp_cntrl%d = cp_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) # Connect the CP controllers and the network cp_cntrl.requestFromCore = MessageBuffer() cp_cntrl.requestFromCore.master = ruby_system.network.slave cp_cntrl.responseFromCore = MessageBuffer() cp_cntrl.responseFromCore.master = ruby_system.network.slave cp_cntrl.unblockFromCore = MessageBuffer() cp_cntrl.unblockFromCore.master = ruby_system.network.slave cp_cntrl.probeToCore = MessageBuffer() cp_cntrl.probeToCore.slave = ruby_system.network.master cp_cntrl.responseToCore = MessageBuffer() cp_cntrl.responseToCore.slave = ruby_system.network.master cp_cntrl.mandatoryQueue = MessageBuffer() cp_cntrl.triggerQueue = MessageBuffer(ordered=True) cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) if not full_system: for i in range((options.num_cpus + 1) // 2): FileSystemConfig.register_cpu(physical_package_id=0, core_siblings=range( options.num_cpus), core_id=i * 2, thread_siblings=[]) FileSystemConfig.register_cpu(physical_package_id=0, core_siblings=range( options.num_cpus), core_id=i * 2 + 1, thread_siblings=[]) FileSystemConfig.register_cache(level=0, idu_type='Instruction', size=options.l1i_size, line_size=options.cacheline_size, assoc=options.l1i_assoc, cpus=[i * 2, i * 2 + 1]) FileSystemConfig.register_cache(level=0, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i * 2]) FileSystemConfig.register_cache(level=0, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i * 2 + 1]) FileSystemConfig.register_cache(level=1, idu_type='Unified', size=options.l2_size, line_size=options.cacheline_size, assoc=options.l2_assoc, cpus=[i * 2, i * 2 + 1]) for i in range(options.num_dirs): FileSystemConfig.register_cache( level=2, idu_type='Unified', size=options.l3_size, line_size=options.cacheline_size, assoc=options.l3_assoc, cpus=[n for n in range(options.num_cpus)]) # Assuming no DMA devices assert (len(dma_devices) == 0) # Add cpu/gpu clusters to main cluster mainCluster.add(cpuCluster) ruby_system.network.number_of_virtual_networks = 10 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
def create_system(options, full_system, system, dma_devices, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base': panic("This script requires the MOESI_AMD_Base protocol.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to # be consistent with the NetDest list. Therefore the l1 controller # nodes must be listed before the directory nodes and directory nodes # before dma nodes, etc. # l1_cntrl_nodes = [] l3_cntrl_nodes = [] dir_cntrl_nodes = [] control_count = 0 # # Must create the individual controllers before the network to ensure # the controller constructors are called before the network constructor # # This is the base crossbar that connects the L3s, Dirs, and cpu # Cluster mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s if options.numa_high_bit: numa_bit = options.numa_high_bit else: # if the numa_bit is not specified, set the directory bits as the # lowest bits above the block offset bits, and the numa_bit as the # highest of those directory bits dir_bits = int(math.log(options.num_dirs, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) numa_bit = block_size_bits + dir_bits - 1 for i in range(options.num_dirs): dir_ranges = [] for r in system.mem_ranges: addr_range = m5.objects.AddrRange(r.start, size = r.size(), intlvHighBit = numa_bit, intlvBits = dir_bits, intlvMatch = i) dir_ranges.append(addr_range) dir_cntrl = DirCntrl(TCC_select_num_bits = 0) dir_cntrl.create(options, dir_ranges, ruby_system, system) # Connect the Directory controller to the ruby network dir_cntrl.requestFromCores = MessageBuffer(ordered = True) dir_cntrl.requestFromCores.slave = ruby_system.network.master dir_cntrl.responseFromCores = MessageBuffer() dir_cntrl.responseFromCores.slave = ruby_system.network.master dir_cntrl.unblockFromCores = MessageBuffer() dir_cntrl.unblockFromCores.slave = ruby_system.network.master dir_cntrl.probeToCore = MessageBuffer() dir_cntrl.probeToCore.master = ruby_system.network.slave dir_cntrl.responseToCore = MessageBuffer() dir_cntrl.responseToCore.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered = True) dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True) dir_cntrl.responseFromMemory = MessageBuffer() exec("system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mainCluster.add(dir_cntrl) # Technically this config can support an odd number of cpus, but the top # level config files, such as the ruby_random_tester, will get confused if # the number of cpus does not equal the number of sequencers. Thus make # sure that an even number of cpus is specified. assert((options.num_cpus % 2) == 0) # For an odd number of CPUs, still create the right number of controllers cpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s for i in range((options.num_cpus + 1) // 2): cp_cntrl = CPCntrl() cp_cntrl.create(options, ruby_system, system) exec("system.cp_cntrl%d = cp_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) # Connect the CP controllers and the network cp_cntrl.requestFromCore = MessageBuffer() cp_cntrl.requestFromCore.master = ruby_system.network.slave cp_cntrl.responseFromCore = MessageBuffer() cp_cntrl.responseFromCore.master = ruby_system.network.slave cp_cntrl.unblockFromCore = MessageBuffer() cp_cntrl.unblockFromCore.master = ruby_system.network.slave cp_cntrl.probeToCore = MessageBuffer() cp_cntrl.probeToCore.slave = ruby_system.network.master cp_cntrl.responseToCore = MessageBuffer() cp_cntrl.responseToCore.slave = ruby_system.network.master cp_cntrl.mandatoryQueue = MessageBuffer() cp_cntrl.triggerQueue = MessageBuffer(ordered = True) cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) if not full_system: FileSystemConfig.config_filesystem(options) for i in xrange((options.num_cpus + 1) // 2): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = xrange(options.num_cpus), core_id = i*2, thread_siblings = []) FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = xrange(options.num_cpus), core_id = i*2+1, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = options.l1i_size, line_size = options.cacheline_size, assoc = options.l1i_assoc, cpus = [i*2, i*2+1]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*2]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*2+1]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l2_size, line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [i*2, i*2+1]) for i in range(options.num_dirs): FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = options.l3_size, line_size = options.cacheline_size, assoc = options.l3_assoc, cpus = [n for n in xrange(options.num_cpus)]) # Assuming no DMA devices assert(len(dma_devices) == 0) # Add cpu/gpu clusters to main cluster mainCluster.add(cpuCluster) ruby_system.network.number_of_virtual_networks = 10 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': fatal("This script requires the MESI_Three_Level protocol to be\ built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes # must be listed before the directory nodes and directory nodes before # dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) num_cpus_per_cluster = options.num_cpus / options.num_clusters assert (options.num_l2caches % options.num_clusters == 0) num_l2caches_per_cluster = options.num_l2caches / options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # for i in range(options.num_clusters): for j in range(num_cpus_per_cluster): # # First create the Ruby objects associated with this cpu # l0i_cache = L0Cache(size = '4096B', assoc = 1, is_icache = True, start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) l0d_cache = L0Cache(size = '4096B', assoc = 1, is_icache = False, start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l0_cntrl = L0Cache_Controller( version = i * num_cpus_per_cluster + j, Icache = l0i_cache, Dcache = l0d_cache, send_evictions = send_evicts(options), clk_domain = clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j, icache = l0i_cache, clk_domain = clk_domain, dcache = l0d_cache, ruby_system = ruby_system) l0_cntrl.sequencer = cpu_seq l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) l1_cntrl = L1Cache_Controller( version = i * num_cpus_per_cluster + j, cache = l1_cache, l2_select_num_bits = l2_bits, cluster_id = i, ruby_system = ruby_system) exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( i * num_cpus_per_cluster + j)) exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) l0_cntrl_nodes.append(l0_cntrl) l1_cntrl_nodes.append(l1_cntrl) # Connect the L0 and L1 controllers l0_cntrl.mandatoryQueue = MessageBuffer() l0_cntrl.bufferToL1 = MessageBuffer(ordered = True) l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1 l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True) l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1 # Connect the L1 controllers and the network l1_cntrl.requestToL2 = MessageBuffer() l1_cntrl.requestToL2.master = ruby_system.network.slave l1_cntrl.responseToL2 = MessageBuffer() l1_cntrl.responseToL2.master = ruby_system.network.slave l1_cntrl.unblockToL2 = MessageBuffer() l1_cntrl.unblockToL2.master = ruby_system.network.slave l1_cntrl.requestFromL2 = MessageBuffer() l1_cntrl.requestFromL2.slave = ruby_system.network.master l1_cntrl.responseFromL2 = MessageBuffer() l1_cntrl.responseFromL2.slave = ruby_system.network.master for j in range(num_l2caches_per_cluster): l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller( version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l0_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: FileSystemConfig.config_filesystem(options) for i in xrange(options.num_clusters): for j in xrange(num_cpus_per_cluster): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = xrange(options.num_cpus), core_id = i*num_cpus_per_cluster+j, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = '4096B', line_size = options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = '4096B', line_size = options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = str(MemorySize(options.l2_size) * \ num_l2caches_per_cluster)+'B', line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [n for n in xrange(i*num_cpus_per_cluster, \ (i+1)*num_cpus_per_cluster)]) ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_hammer': panic("This script requires the MOESI_hammer protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in range(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits) l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, L2cache=l2_cache, no_mig_atomic=not \ options.allow_atomic_migration, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache,clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave l1_cntrl.triggerQueue = MessageBuffer() # Connect the buffers from the network to the controller l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert(options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: pf = ProbeFilter(size = pf_size, assoc = 4, start_index_bit = pf_start_bit) dir_cntrl.probeFilter = pf dir_cntrl.probe_filter_enabled = options.pf_on dir_cntrl.full_bit_dir_enabled = options.dir_on if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered = True) dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: for i in xrange(options.num_cpus): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = [], core_id = i, thread_siblings = []) FileSystemConfig.register_cache(level = 1, idu_type = 'Instruction', size = options.l1i_size, line_size = options.cacheline_size, assoc = options.l1i_assoc, cpus = [i]) FileSystemConfig.register_cache(level = 1, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i]) FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = options.l2_size, line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [i]) ruby_system.network.number_of_virtual_networks = 6 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)