def __str__(self): strbuf = 'NSWG: \n' strbuf += '{0:>8} {1:>8}\n'.format('MatrixH', 'MatrixW') for i in range(len(self.net.layers)): if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer( self.net.layers[i]): strbuf += '{0:>8} {1:>8}\n'.format(str(self.matrixH[i]), str(self.matrixW[i])) strbuf += '\n' strbuf += '{0:>8} {1:>8}\n'.format('Synaptic', 'Neuron') for i in range(len(self.net.layers)): if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer( self.net.layers[i]): strbuf += '{0:>8} {1:>8}\n'.format(self.synapse_fold[i], self.neuron_fold[i]) strbuf += '\n' strbuf += '{0:>16} {1:>20} {2:>20} {3:>20} {4:>20}\n'.format( 'Initial Buffer', 'Write Block Cycles', 'Read Block Cycles', 'Total Cycles', 'Input Multiplier') for i in range(len(self.net.layers)): if lb.isConvLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer( self.net.layers[i]): strbuf += '{0:>16} {1:>20} {2:>20} {3:>20} {4:>20}\n'.format( self.initial_buffer[i], self.write_block_cycles[i], self.read_block_cycles[i], self.total_cycles[i], self.input_multiplier[i]) return strbuf
def print_folding_factors(self): print "\nFolding factors: " print '{0:>35} {1:>8} {2:>5} {3:>5} {4:>5}'.format('NAME', 'idx', 'SIMD', 'PE', 'MMV') for i in range(len(self.net.layers)): if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]): print '{0:>35} {1:>8} {2:>5} {3:>5} {4:>5} '.format(self.net.layers[i].get_type(), i, self.SIMD[i], self.PE[i], self.MMV[i]) print ""
def print_hardware_cost(self): print "\nHardware Cost:" print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format( 'Layer', 'idx', 'Input BRAMS', 'Weights BRAM', 'Total LUTS', 'Total BRAM') total_input_brams = 0 total_weights_brams = 0 total_buffer_brams = 0 total_luts = 0 total_brams = 0 for i in range(len(self.net.layers)): if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer( self.net.layers[i]): print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format( self.net.layers[i].get_type(), i, self.bram_cost(i)[0], self.bram_cost(i)[1], self.lut_cost(i), sum(self.bram_cost(i))) brams = self.bram_cost(i) total_input_brams += brams[0] total_weights_brams += brams[1] total_luts += self.lut_cost(i) total_brams += sum(brams) print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format( "Totals", "ALL", total_input_brams, total_weights_brams, total_luts, total_brams) print ""
def print_topology(self): print "\nNetwork Topology: " print '{0:>35} {1:>10} {2:>10} {3:>10} {4:>10} {5:>8} {6:>8} {7:>8}'.format('NAME', 'idx', 'out_dim', 'filter_dim', 'in_chan', 'out_chan', 'stride', 'in_dim') for i in range(len(self.net.layers)): if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]): print '{0:>35} {1:>10} {2:>10} {3:>10} {4:>10} {5:>8} {6:>8} {7:>8}'.format(self.net.layers[i].get_type(), i, self.net.layers[i].get_out_dim(), self.net.layers[i].get_filter_dim(), self.net.layers[i].getInputSize(), self.net.layers[i].getOutputSize(), self.net.layers[i].get_stride(), self.net.layers[i].get_in_dim()) print ""
def ops_per_cycle(self, layer_idx): if lb.isMatrixLayer( self.net.layers[layer_idx]) or lfpga.isFPGAMatrixLayer( self.net.layers[layer_idx]): # 2 because MAC return self.SIMD[layer_idx] * self.PE[layer_idx] * self.MMV[ layer_idx] * 2
def print_cycles(self): print "\nCycles per layer: " layer_cycles = self.calculate_layer_cycles() # Same as est MVC print '{0:>35} {1:>8} {2:>10} {3:>10}'.format('NAME', 'idx', 'ops/layer', 'MVC') for i in range(len(self.net.layers)): if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]): print '{0:>35} {1:>8} {2:>10} {3:>10}'.format(self.net.layers[i].get_type(), i, self.net.ops_per_layer(self.net.layers[i]), layer_cycles[i]) print ""
def find_first_matrix_layer(self): first = -1 for idx, layer in enumerate(self.net.layers): if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer): first = idx break assert (first != -1) return first
def ops_per_layer(self, layer): """ if layerType is pool: return out_dim * out_dim * filter_dim * filter_dim else layerType is conv or fc return parallel * 2 * out_dim * out_dim * filter_dim * filter_dim * in_channels * out_channel """ if layers.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer): return layer.getNumOps() #/2 return 0
def calculate_matrix_cycles(self): layers = [] for idx, layer in enumerate(self.net.layers): if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer): layers.append(self.net.ops_per_layer(layer) / (self.ops_per_cycle(idx) * layer.get_parallel())) else: layers.append(0) return layers
def find_slowest_layer(self): """Find worst case layer as index into layers""" slowest_layer = self.find_first_matrix_layer() cycles = self.calculate_layer_cycles() for idx, cycle in enumerate(cycles): if cycle > cycles[slowest_layer] and ( lb.isMatrixLayer(self.net.layers[idx]) or lfpga.isFPGAMatrixLayer(self.net.layers[idx])): slowest_layer = idx return slowest_layer
def calculate_neural_folding(self): self.synapse_fold = self._zeros() self.neuron_fold = self._zeros() for i in range(len(self.net.layers)): layer = self.net.layers[i] if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer( self.net.layers[i]): self.synapse_fold[i] = self.matrixH[i] / self.perf.SIMD[i] self.neuron_fold[i] = self.matrixW[i] / self.perf.PE[i]
def calculate_layer_cycles(self): # Same as est MVC """ For each layer, calculate cycles required Formula is ops_per_layer() / ops_per_cycle()""" layer_cycles = [] for idx, layer in enumerate(self.net.layers): if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer): layer_cycles.append(self.net.ops_per_layer( layer) / (self.ops_per_cycle(idx) * self.net.parallel_per_layer(layer))) else: layer_cycles.append(0) return layer_cycles
def calculate_matrix_sizes(self): self.matrixH = self._zeros() self.matrixW = self._zeros() for i in range(len(self.net.layers)): if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer( self.net.layers[i]): self.matrixW[i] = self.net.layers[i].getOutputSize() self.matrixH[i] = self.net.layers[i].getInputSize( ) * self.net.layers[i].get_filter_dim( ) * self.net.layers[i].get_filter_dim()
def determine_memory_resources(pipeline): # If pipeline is short, this does not apply. if count_matrix_layers(pipeline) < 5: return "" maxWeights = 0 maxIdx = 0 mem_resources = "#pragma HLS RESOURCE core=RAM_S2P_LUTRAM variable=" for idx, layer in enumerate(pipeline): if layers_fpga.isFPGAMatrixLayer(layer): if layer.getWMemCount() > maxWeights: maxWeights = layer.getWMemCount() maxIdx = idx return mem_resources + pipeline[maxIdx].getWMemName()
def res_alloc_interactive(pipeline): """ Asks the user to input the PE/SIMD/MMV for each layer in the pipeline, returns a copy of the pipeline with the adjusted PE/SIMD/MMV values. """ ret = [] for L in pipeline: Lnew = copy.deepcopy(L) if layers_fpga.isFPGAMatrixLayer(Lnew): print("Please enter compute resources for layer %s" % Lnew.name) print("Weight matrix shape: %s" % str(L.getW().shape)) print("Operations in layer = %d" % L.layer_ops()) Lnew.simd = int(raw_input("SIMD: ")) Lnew.pe = int(raw_input("PE: ")) # no mmv support for now #Lnew.mmv = int(raw_input("MMV: ")) Lnew.mmv = 1 ret += [Lnew] return ret
def calculate_activation_counts(self): for layer in self.layers: if layers.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer): self.num_activations.append(layer.get_out_dim() * layer.get_out_dim() * layer.getOutputSize() )
def count_matrix_layers(pipeline): count = 0 for layer in pipeline: if layers_fpga.isFPGAMatrixLayer(layer): count += 1 return count