Exemplo n.º 1
0
    def estimate_resources(self, N, M, app_settings, sync_buff_total_samps,
                           pre_filt_latency):
        rscrs = rfnocsim.HwRsrcs()

        DSP_BLOCKS_PER_MAC = 3  # DSP blocks for a scaled complex MAC
        MAX_DSP_RATE = 400e6  # Max clock rate for a DSP48E block
        MAX_UNROLL_DEPTH = 2  # How many taps (or FFT bins) to compute in parallel?
        COEFF_SETS = 1  # We need two copies of coefficients one live
        # and one buffered for dynamic reload. If both
        # live in BRAM, this should be 2. If the live
        # set lives in registers, this should be 1

        samp_rate = float(app_settings['samp_rate'])
        dsp_cyc_per_samp = MAX_DSP_RATE / samp_rate

        if app_settings['domain'] == 'time':
            fir_taps = app_settings['fir_taps']
            if (fir_taps <= dsp_cyc_per_samp):
                unroll_factor = 1
                dsp_rate = samp_rate * fir_taps
            else:
                unroll_factor = math.ceil((1.0 * fir_taps) / dsp_cyc_per_samp)
                dsp_rate = MAX_DSP_RATE
                if (unroll_factor > MAX_UNROLL_DEPTH):
                    raise self.SimCompError(
                        'Too many FIR coefficients! Reached loop unroll limit.'
                    )

            rscrs.add('DSP', DSP_BLOCKS_PER_MAC * unroll_factor * N * M)
            rscrs.add('BRAM_18kb',
                      math.ceil(ColGlobals.BPI * app_settings['fir_dly_line'] /
                                hw.Bee7Fpga.BRAM_BYTES) * N *
                      M)  # FIR delay line memory
            rscrs.add('BRAM_18kb',
                      math.ceil(ColGlobals.BPI * COEFF_SETS * fir_taps *
                                unroll_factor * N * M /
                                hw.Bee7Fpga.BRAM_BYTES))  # Coefficient storage

            samp_per_tick = dsp_rate / self.get_tick_rate()
            self.update_latency(func=pre_filt_latency +
                                (fir_taps / (samp_per_tick * unroll_factor)))
        else:
            fft_size = app_settings['fft_size']
            rscrs.add('DSP',
                      DSP_BLOCKS_PER_MAC * N * M * MAX_UNROLL_DEPTH)  # MACs
            rscrs.add(
                'BRAM_18kb',
                math.ceil(ColGlobals.BPI * N * M * fft_size * COEFF_SETS /
                          hw.Bee7Fpga.BRAM_BYTES))  # Coeff storage

            samp_per_tick = MAX_DSP_RATE / self.get_tick_rate()
            self.update_latency(func=pre_filt_latency +
                                (fft_size / samp_per_tick))

        rscrs.add(
            'BRAM_18kb',
            math.ceil(ColGlobals.BPI * sync_buff_total_samps /
                      hw.Bee7Fpga.BRAM_BYTES))
        self.update_rsrcs(rscrs)
Exemplo n.º 2
0
 def estimate_resources(self, radix, sync_buff_depth):
     rscrs = rfnocsim.HwRsrcs()
     # Assume that pipelined adders are inferred in logic (not DSP)
     # Assume that buffering uses BRAM
     rscrs.add(
         'BRAM_18kb',
         math.ceil(ColGlobals.BPI * sync_buff_depth * radix /
                   hw.Bee7Fpga.BRAM_BYTES))
     self.update_rsrcs(rscrs)
Exemplo n.º 3
0
    def __init__(self, sim_core, name):
        self.sim_core = sim_core
        rfnocsim.SimComp.__init__(self, sim_core, name,
                                  rfnocsim.comptype.hardware)
        # Max resources from Virtex7 datasheet
        self.max_resources = rfnocsim.HwRsrcs()
        self.max_resources.add('DSP', 3600)
        self.max_resources.add('BRAM_18kb', 2940)
        self.resources = rfnocsim.HwRsrcs()
        # Each FPGA has 80 SERDES lanes
        self.max_io = 80
        self.serdes_i = dict()
        self.serdes_o = dict()
        # Each lane can carry at most 10GB/s
        # Each SERDES needs to have some buffering. We assume elastic buffering (50% full on avg).
        io_buff_size = (self.IO_LN_BW *
                        self.IO_LN_LATENCY) / self.ELASTIC_BUFF_FULLNESS
        # Worst case lane latency
        lane_latency = self.IO_LN_LATENCY * self.get_tick_rate()
        for i in range(self.max_io):
            self.serdes_i[i] = rfnocsim.Channel(sim_core,
                                                self.__ioln_name(i) + '/I',
                                                self.IO_LN_BW,
                                                lane_latency / 2)
            self.serdes_o[i] = rfnocsim.Channel(sim_core,
                                                self.__ioln_name(i) + '/O',
                                                self.IO_LN_BW,
                                                lane_latency / 2)
            self.resources.add('BRAM_18kb', 1 + math.ceil(
                io_buff_size / self.BRAM_BYTES))  #input buffering per lane
            self.resources.add('BRAM_18kb', 1)  #output buffering per lane
        # Other resources
        self.resources.add('BRAM_18kb', 72)  # BPS infrastructure + microblaze
        self.resources.add('BRAM_18kb', 128)  # 2 MIGs

        self.functions = dict()