def __init__(self, numcoarsechannels, numfinechannels, accumulation_length, bandwidth, input_bitwidth, fft_coarse_out_bitwidth, antennas=1): self.maxdesigns=0 self.blocks = {} self.blockalgs = {} self.totalblocks = 0 self.maxdesigns = 1 self.singleimplementation = 1 #add the platform array self.platforms = {} # add platforms: cost, inputbw, outputbw, resources self.platforms['ROACH'] = Platform('ROACH',6700,40,40,['registers','luts','dsp','bram']) self.platforms['GTX580'] = Platform('GTX580',3500,10,1,['time']) for i in range(0,antennas): # add the ADC adc_bw = bandwidth*input_bitwidth self.blocks['ADC'+`i`] = CBlock('ADC', CBlock.getADCModel(self.platforms, bandwidth, input_bitwidth),-1,0,0,'PFB'+`i`,0,adc_bw,1) self.totalblocks += 1 # add the PFB pfb_bw = bandwidth*32 self.blocks['PFB'+`i`] = CBlock('PFB',CBlock.getPFBWModel(self.platforms, bandwidth, pfb_bw, numcoarsechannels),'ADC'+`i`,0,adc_bw,'FFT_coarse'+`i`,0,adc_bw,1) self.totalblocks += 1 # add the FFT #print CBlock.getFFTModel(self.platforms, bandwidth, input_bitwidth, numchannels) fft_coarse_out_bandwidth = bandwidth* fft_coarse_out_bitwidth*2 self.blocks['FFT_coarse'+`i`] = CBlock('FFT_coarse',CBlock.getFFTWModel(self.platforms, bandwidth, numcoarsechannels),'PFB'+`i`,0,pfb_bw,'Transpose'+`i`,0,fft_coarse_out_bandwidth,1) self.totalblocks += 1 # adjust to ensure the block fits on the gpu fft_fine_in_bandwidth = fft_coarse_out_bandwidth/numcoarsechannels finemodel = CBlock.getFFTModel(self.platforms, fft_fine_in_bandwidth, numfinechannels) if(finemodel['GTX580']['time']<0.1): multiplier = pow(2,int(log(0.1/finemodel['GTX580']['time'],2))) else: multiplier = 1 finemodel['GTX580']['time'] = finemodel['GTX580']['time']*multiplier fine_blocks = int(numcoarsechannels/multiplier) fine_sky_bandwidth = bandwidth/fineblocks fine_block_bandwidth = fft_coarse_out_bandwidth/fine_blocks self.blocks['Transpose'+`i`] = CBlock('Transpose', CBlock.getTransposeModel(self.platforms, bandwidth, numcoarsechannels, numfinechannels), 'FFT_coarse'+`i`,0,fft_coarse_out_bandwidth,'FFT_fine'+`i`,1,fft_coarse_out_bandwidth,1) self.totalblocks += 1 self.blocks['FFT_fine'+`i`] = CBlock('FFT_fine',finemodel,'Transpose'+`i`,0,fine_block_bandwidth,'VAcc'+`i`,0,fft_fine_in_bandwidth,fine_blocks) self.totalblocks += fine_blocks self.blocks['VAcc'+`i`] = CBlock('VAcc',{'ROACH': {'registers': 0.2, 'luts': 0.1, 'dsp': 0, 'bram':0.4}, 'GTX580': {'time': 0.001}},'FFT_fine'+`i`,0,fine_block_bandwidth,-1,0,0,fine_blocks) self.totalblocks += fine_blocks
def __init__(self, numcoarsechannels, numfinechannels, accumulation_length, bandwidth, input_bitwidth, fft_coarse_out_bitwidth, antennas=1): self.maxdesigns = 0 self.blocks = {} self.blockalgs = {} self.totalblocks = 0 self.maxdesigns = 1 self.singleimplementation = 1 #add the platform array self.platforms = {} # add platforms: cost, inputbw, outputbw, resources self.platforms['ROACH'] = Platform( 'ROACH', 6700, 40, 40, ['registers', 'luts', 'dsp', 'bram']) self.platforms['GTX580'] = Platform('GTX580', 3500, 10, 1, ['time']) for i in range(0, antennas): # add the ADC adc_bw = bandwidth * input_bitwidth self.blocks['ADC' + ` i `] = CBlock( 'ADC', CBlock.getADCModel(self.platforms, bandwidth, input_bitwidth), -1, 0, 0, 'PFB' + ` i `, 0, adc_bw, 1) self.totalblocks += 1 # add the PFB pfb_bw = bandwidth * 32 self.blocks['PFB' + ` i `] = CBlock( 'PFB', CBlock.getPFBWModel(self.platforms, bandwidth, pfb_bw, numcoarsechannels), 'ADC' + ` i `, 0, adc_bw, 'FFT_coarse' + ` i `, 0, adc_bw, 1) self.totalblocks += 1 # add the FFT #print CBlock.getFFTModel(self.platforms, bandwidth, input_bitwidth, numchannels) fft_coarse_out_bandwidth = bandwidth * fft_coarse_out_bitwidth * 2 self.blocks['FFT_coarse' + ` i `] = CBlock( 'FFT_coarse', CBlock.getFFTWModel(self.platforms, bandwidth, numcoarsechannels), 'PFB' + ` i `, 0, pfb_bw, 'Transpose' + ` i `, 0, fft_coarse_out_bandwidth, 1) self.totalblocks += 1 # adjust to ensure the block fits on the gpu fft_fine_in_bandwidth = fft_coarse_out_bandwidth / numcoarsechannels finemodel = CBlock.getFFTModel(self.platforms, fft_fine_in_bandwidth, numfinechannels) if (finemodel['GTX580']['time'] < 0.1): multiplier = pow( 2, int(log(0.1 / finemodel['GTX580']['time'], 2))) else: multiplier = 1 finemodel['GTX580'][ 'time'] = finemodel['GTX580']['time'] * multiplier fine_blocks = int(numcoarsechannels / multiplier) fine_sky_bandwidth = bandwidth / fineblocks fine_block_bandwidth = fft_coarse_out_bandwidth / fine_blocks self.blocks['Transpose' + ` i `] = CBlock( 'Transpose', CBlock.getTransposeModel(self.platforms, bandwidth, numcoarsechannels, numfinechannels), 'FFT_coarse' + ` i `, 0, fft_coarse_out_bandwidth, 'FFT_fine' + ` i `, 1, fft_coarse_out_bandwidth, 1) self.totalblocks += 1 self.blocks['FFT_fine' + ` i `] = CBlock('FFT_fine', finemodel, 'Transpose' + ` i `, 0, fine_block_bandwidth, 'VAcc' + ` i `, 0, fft_fine_in_bandwidth, fine_blocks) self.totalblocks += fine_blocks self.blocks['VAcc' + ` i `] = CBlock( 'VAcc', { 'ROACH': { 'registers': 0.2, 'luts': 0.1, 'dsp': 0, 'bram': 0.4 }, 'GTX580': { 'time': 0.001 } }, 'FFT_fine' + ` i `, 0, fine_block_bandwidth, -1, 0, 0, fine_blocks) self.totalblocks += fine_blocks
def __init__(self, numchannels, numantpol, accumulation_length, skybandwidth, input_bitwidth, fft_out_bitwidth): self.blocks = {} self.totalblocks = 0 self.maxdesigns = 1 self.singleimplementation = 1 self.windowsize = 1024 cost = 'dollars' #cost = 'power' #add the platforms self.platforms = {} #self.platforms['GTX580'] = Platform.createGTX580Server(cost) #self.platforms['ROACH'] = Platform.createRoach(cost) self.platforms['DualGTX690'] = Platform.createDualGTX690Server(cost) self.platforms['ROACH2'] = Platform.createRoach2(cost) # add the ADC adc_bw = skybandwidth*2*input_bitwidth #self.blocks['ADC'] = CBlock('ADC',CBlock.getADCModel(self.platforms, skybandwidth, input_bitwidth),-1,0,0,'FIR',0,4*adc_bw,numantpol/4) # we are using a 16 input adc board # multiplier needs to be a multiple of 4 because the benchmarks do 4 parallel firs and ffts adcmultiplier = 4 # process 4 streams at a time self.blocks['ADC'] = CBlock('ADC',CBlock.getADCModel(self.platforms, skybandwidth, input_bitwidth), -1,0,0,'PFB',0,adcmultiplier*adc_bw,numantpol/adcmultiplier, CBlock.getADCMaximums(self.platforms, adcmultiplier)) self.totalblocks += numantpol/adcmultiplier #use pfb to process 4 channels at a time fft_out_bandwidth = skybandwidth * 2 * fft_out_bitwidth pfb_model = CBlock.getPFBModel(self.platforms, skybandwidth, input_bitwidth, numchannels) fft_model = CBlock.getFFTRealModel(self.platforms, skybandwidth, numchannels) firfftmodel = CBlock.combineModels(pfb_model, fft_model) #print firfftmodel #firfft2xmodel = CBlock.combineModels(firfftmodel, firfftmodel) #print firfft2xmodel self.blocks['PFB'] = CBlock('PFB',firfftmodel,'ADC',0,adcmultiplier*adc_bw,'Transpose',0,adcmultiplier*fft_out_bandwidth,numantpol/adcmultiplier) self.totalblocks += numantpol/adcmultiplier transposemodel = CBlock.getTransposeModel(self.platforms, skybandwidth, numchannels, self.windowsize) self.blocks['Transpose'] = CBlock('Transpose',transposemodel,'PFB',0,adcmultiplier*fft_out_bandwidth,'XEng',0,adcmultiplier*fft_out_bandwidth,numantpol/adcmultiplier) self.totalblocks += numantpol/adcmultiplier # add the PFBTranspose #fft_out_bandwidth = skybandwidth * 2 * fft_out_bitwidth #firmodel = CBlock.getPFBModel(self.platforms, skybandwidth, input_bitwidth, numchannels) #fftmodel = CBlock.getFFTRealModel(self.platforms, skybandwidth, numchannels) #transposemodel = CBlock.getTransposeModel(self.platforms, skybandwidth, numchannels, self.windowsize) #combinedmodel = CBlock.combineModels(firmodel, CBlock.combineModels(fftmodel, transposemodel)) #we need 4 of these for our 16 input adc #self.blocks['PFBTranspose'] = CBlock('PFBTranspose',combinedmodel,'ADC',0,adcmultiplier*adc_bw,'XEng',1,adcmultiplier * fft_out_bandwidth,numantpol/adcmultiplier) #self.totalblocks += numantpol/adcmultiplier # add the XEngines gtx580_max_bw = {32:0.06914, 64:0.03095, 96:0.01748, 128:0.01069, 192:0.00536, 256:0.00318, 512:0.00087, 1024:0.00023} # the minimum number of xengines we need # if we use any fewer, they will not fit on the gpu mingpuxengines = int(numpy.power(2,numpy.ceil(numpy.log2(skybandwidth/gtx580_max_bw[numantpol])))) # assume xengine is running at 200MHz, takes nantpol clock cycles to get the data out for a single frequency channel # maximum bandwidth it can process is 200MHz/nantpol maxfpgaxengbw = .2/numantpol #the maximum amount of bandwidth we can process in an xengine and still support our platforms maxxenginebw = min(maxfpgaxengbw,gtx580_max_bw[numantpol]) # we need to create this many xengines to meet the spec minxengines = int(skybandwidth/maxxenginebw) #note: this needs to be a power of 2 numxengines = 8*minxengines #numxengines = 4*mingpuxengines #print 'Num xengines is: ' + `numxengines` #numxengines = mingpuxengines*4 xengine_sky_bandwidth = skybandwidth/numxengines #print 'Sky bw is: ' + `xengine_sky_bandwidth` #print xengine_sky_bandwidth xengine_in_bandwidth = numantpol*fft_out_bandwidth/numxengines #print CBlock.getXEngModel(self.platforms, xengine_sky_bandwidth, numantpol) self.blocks['XEng'] = CBlock('XEng',CBlock.getXEngModel(self.platforms, xengine_sky_bandwidth, numantpol) ,'Transpose', 1,xengine_in_bandwidth,-1,0,0,numxengines) self.totalblocks += numxengines
def __init__(self, numchannels, numantpol, accumulation_length, skybandwidth, input_bitwidth, fft_out_bitwidth): self.blocks = {} self.totalblocks = 0 self.maxdesigns = 1 self.singleimplementation = 1 self.windowsize = 1024 cost = 'dollars' #cost = 'power' #add the platforms self.platforms = {} #self.platforms['GTX580'] = Platform.createGTX580Server(cost) #self.platforms['ROACH'] = Platform.createRoach(cost) self.platforms['DualGTX690'] = Platform.createDualGTX690Server(cost) self.platforms['ROACH2'] = Platform.createRoach2(cost) # add the ADC adc_bw = skybandwidth * 2 * input_bitwidth #self.blocks['ADC'] = CBlock('ADC',CBlock.getADCModel(self.platforms, skybandwidth, input_bitwidth),-1,0,0,'FIR',0,4*adc_bw,numantpol/4) # we are using a 16 input adc board # multiplier needs to be a multiple of 4 because the benchmarks do 4 parallel firs and ffts adcmultiplier = 4 # process 4 streams at a time self.blocks['ADC'] = CBlock( 'ADC', CBlock.getADCModel(self.platforms, skybandwidth, input_bitwidth), -1, 0, 0, 'PFB', 0, adcmultiplier * adc_bw, numantpol / adcmultiplier, CBlock.getADCMaximums(self.platforms, adcmultiplier)) self.totalblocks += numantpol / adcmultiplier #use pfb to process 4 channels at a time fft_out_bandwidth = skybandwidth * 2 * fft_out_bitwidth pfb_model = CBlock.getPFBModel(self.platforms, skybandwidth, input_bitwidth, numchannels) fft_model = CBlock.getFFTRealModel(self.platforms, skybandwidth, numchannels) firfftmodel = CBlock.combineModels(pfb_model, fft_model) #print firfftmodel #firfft2xmodel = CBlock.combineModels(firfftmodel, firfftmodel) #print firfft2xmodel self.blocks['PFB'] = CBlock('PFB', firfftmodel, 'ADC', 0, adcmultiplier * adc_bw, 'Transpose', 0, adcmultiplier * fft_out_bandwidth, numantpol / adcmultiplier) self.totalblocks += numantpol / adcmultiplier transposemodel = CBlock.getTransposeModel(self.platforms, skybandwidth, numchannels, self.windowsize) self.blocks['Transpose'] = CBlock('Transpose', transposemodel, 'PFB', 0, adcmultiplier * fft_out_bandwidth, 'XEng', 0, adcmultiplier * fft_out_bandwidth, numantpol / adcmultiplier) self.totalblocks += numantpol / adcmultiplier # add the PFBTranspose #fft_out_bandwidth = skybandwidth * 2 * fft_out_bitwidth #firmodel = CBlock.getPFBModel(self.platforms, skybandwidth, input_bitwidth, numchannels) #fftmodel = CBlock.getFFTRealModel(self.platforms, skybandwidth, numchannels) #transposemodel = CBlock.getTransposeModel(self.platforms, skybandwidth, numchannels, self.windowsize) #combinedmodel = CBlock.combineModels(firmodel, CBlock.combineModels(fftmodel, transposemodel)) #we need 4 of these for our 16 input adc #self.blocks['PFBTranspose'] = CBlock('PFBTranspose',combinedmodel,'ADC',0,adcmultiplier*adc_bw,'XEng',1,adcmultiplier * fft_out_bandwidth,numantpol/adcmultiplier) #self.totalblocks += numantpol/adcmultiplier # add the XEngines gtx580_max_bw = { 32: 0.06914, 64: 0.03095, 96: 0.01748, 128: 0.01069, 192: 0.00536, 256: 0.00318, 512: 0.00087, 1024: 0.00023 } # the minimum number of xengines we need # if we use any fewer, they will not fit on the gpu mingpuxengines = int( numpy.power( 2, numpy.ceil(numpy.log2(skybandwidth / gtx580_max_bw[numantpol])))) # assume xengine is running at 200MHz, takes nantpol clock cycles to get the data out for a single frequency channel # maximum bandwidth it can process is 200MHz/nantpol maxfpgaxengbw = .2 / numantpol #the maximum amount of bandwidth we can process in an xengine and still support our platforms maxxenginebw = min(maxfpgaxengbw, gtx580_max_bw[numantpol]) # we need to create this many xengines to meet the spec minxengines = int(skybandwidth / maxxenginebw) #note: this needs to be a power of 2 numxengines = 8 * minxengines #numxengines = 4*mingpuxengines #print 'Num xengines is: ' + `numxengines` #numxengines = mingpuxengines*4 xengine_sky_bandwidth = skybandwidth / numxengines #print 'Sky bw is: ' + `xengine_sky_bandwidth` #print xengine_sky_bandwidth xengine_in_bandwidth = numantpol * fft_out_bandwidth / numxengines #print CBlock.getXEngModel(self.platforms, xengine_sky_bandwidth, numantpol) self.blocks['XEng'] = CBlock( 'XEng', CBlock.getXEngModel(self.platforms, xengine_sky_bandwidth, numantpol), 'Transpose', 1, xengine_in_bandwidth, -1, 0, 0, numxengines) self.totalblocks += numxengines