Exemplo n.º 1
0
 def __str__(self):
     strbuf = 'NSWG: \n'
     strbuf += '{0:>8} {1:>8}\n'.format('MatrixH', 'MatrixW')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[i]):
             strbuf += '{0:>8} {1:>8}\n'.format(str(self.matrixH[i]),
                                                str(self.matrixW[i]))
     strbuf += '\n'
     strbuf += '{0:>8} {1:>8}\n'.format('Synaptic', 'Neuron')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[i]):
             strbuf += '{0:>8} {1:>8}\n'.format(self.synapse_fold[i],
                                                self.neuron_fold[i])
     strbuf += '\n'
     strbuf += '{0:>16} {1:>20} {2:>20} {3:>20} {4:>20}\n'.format(
         'Initial Buffer', 'Write Block Cycles', 'Read Block Cycles',
         'Total Cycles', 'Input Multiplier')
     for i in range(len(self.net.layers)):
         if lb.isConvLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[i]):
             strbuf += '{0:>16} {1:>20} {2:>20} {3:>20} {4:>20}\n'.format(
                 self.initial_buffer[i], self.write_block_cycles[i],
                 self.read_block_cycles[i], self.total_cycles[i],
                 self.input_multiplier[i])
     return strbuf
Exemplo n.º 2
0
 def print_folding_factors(self):
     print "\nFolding factors: "
     print '{0:>35} {1:>8} {2:>5} {3:>5} {4:>5}'.format('NAME', 'idx', 'SIMD', 'PE', 'MMV')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]):
             print '{0:>35} {1:>8} {2:>5} {3:>5} {4:>5} '.format(self.net.layers[i].get_type(), i, self.SIMD[i], self.PE[i], self.MMV[i])
     print ""
Exemplo n.º 3
0
 def print_hardware_cost(self):
     print "\nHardware Cost:"
     print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format(
         'Layer', 'idx', 'Input BRAMS', 'Weights BRAM', 'Total LUTS',
         'Total BRAM')
     total_input_brams = 0
     total_weights_brams = 0
     total_buffer_brams = 0
     total_luts = 0
     total_brams = 0
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[i]):
             print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format(
                 self.net.layers[i].get_type(), i,
                 self.bram_cost(i)[0],
                 self.bram_cost(i)[1], self.lut_cost(i),
                 sum(self.bram_cost(i)))
             brams = self.bram_cost(i)
             total_input_brams += brams[0]
             total_weights_brams += brams[1]
             total_luts += self.lut_cost(i)
             total_brams += sum(brams)
     print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format(
         "Totals", "ALL", total_input_brams, total_weights_brams,
         total_luts, total_brams)
     print ""
Exemplo n.º 4
0
 def print_topology(self):
     print "\nNetwork Topology: "
     print '{0:>35} {1:>10} {2:>10} {3:>10} {4:>10} {5:>8} {6:>8} {7:>8}'.format('NAME', 'idx', 'out_dim', 'filter_dim', 'in_chan', 'out_chan', 'stride', 'in_dim')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]):
             print '{0:>35} {1:>10} {2:>10} {3:>10} {4:>10} {5:>8} {6:>8} {7:>8}'.format(self.net.layers[i].get_type(), i, self.net.layers[i].get_out_dim(), self.net.layers[i].get_filter_dim(), self.net.layers[i].getInputSize(), self.net.layers[i].getOutputSize(), self.net.layers[i].get_stride(), self.net.layers[i].get_in_dim())
     print ""
Exemplo n.º 5
0
 def ops_per_cycle(self, layer_idx):
     if lb.isMatrixLayer(
             self.net.layers[layer_idx]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[layer_idx]):
         # 2 because MAC
         return self.SIMD[layer_idx] * self.PE[layer_idx] * self.MMV[
             layer_idx] * 2
Exemplo n.º 6
0
 def print_cycles(self):
     print "\nCycles per layer: "
     layer_cycles = self.calculate_layer_cycles()  # Same as est MVC
     print '{0:>35} {1:>8}  {2:>10} {3:>10}'.format('NAME', 'idx', 'ops/layer', 'MVC')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]):
             print '{0:>35} {1:>8}  {2:>10} {3:>10}'.format(self.net.layers[i].get_type(), i, self.net.ops_per_layer(self.net.layers[i]), layer_cycles[i])
     print ""
Exemplo n.º 7
0
 def find_first_matrix_layer(self):
     first = -1
     for idx, layer in enumerate(self.net.layers):
         if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
             first = idx
             break
     assert (first != -1)
     return first
Exemplo n.º 8
0
	def ops_per_layer(self, layer):
		""" if layerType is pool:
				return out_dim * out_dim * filter_dim * filter_dim
			else layerType is conv or fc
				return parallel * 2 * out_dim * out_dim * filter_dim * filter_dim * in_channels * out_channel
		"""
		if layers.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
			return layer.getNumOps() #/2
		return 0
Exemplo n.º 9
0
 def calculate_matrix_cycles(self):
     layers = []
     for idx, layer in enumerate(self.net.layers):
         if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
             layers.append(self.net.ops_per_layer(layer) /
                           (self.ops_per_cycle(idx) * layer.get_parallel()))
         else:
             layers.append(0)
     return layers
Exemplo n.º 10
0
 def find_slowest_layer(self):
     """Find worst case layer as index into layers"""
     slowest_layer = self.find_first_matrix_layer()
     cycles = self.calculate_layer_cycles()
     for idx, cycle in enumerate(cycles):
         if cycle > cycles[slowest_layer] and (
                 lb.isMatrixLayer(self.net.layers[idx])
                 or lfpga.isFPGAMatrixLayer(self.net.layers[idx])):
             slowest_layer = idx
     return slowest_layer
Exemplo n.º 11
0
    def calculate_neural_folding(self):
        self.synapse_fold = self._zeros()
        self.neuron_fold = self._zeros()

        for i in range(len(self.net.layers)):
            layer = self.net.layers[i]
            if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(
                    self.net.layers[i]):
                self.synapse_fold[i] = self.matrixH[i] / self.perf.SIMD[i]
                self.neuron_fold[i] = self.matrixW[i] / self.perf.PE[i]
Exemplo n.º 12
0
 def calculate_layer_cycles(self):  # Same as est MVC
     """ For each layer, calculate cycles required
     Formula is ops_per_layer() / ops_per_cycle()"""
     layer_cycles = []
     for idx, layer in enumerate(self.net.layers):
         if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
             layer_cycles.append(self.net.ops_per_layer(
                 layer) / (self.ops_per_cycle(idx) * self.net.parallel_per_layer(layer)))
         else:
             layer_cycles.append(0)
     return layer_cycles
Exemplo n.º 13
0
    def calculate_matrix_sizes(self):
        self.matrixH = self._zeros()
        self.matrixW = self._zeros()

        for i in range(len(self.net.layers)):
            if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                    self.net.layers[i]):
                self.matrixW[i] = self.net.layers[i].getOutputSize()
                self.matrixH[i] = self.net.layers[i].getInputSize(
                ) * self.net.layers[i].get_filter_dim(
                ) * self.net.layers[i].get_filter_dim()
Exemplo n.º 14
0
def determine_memory_resources(pipeline):
    # If pipeline is short, this does not apply.

    if count_matrix_layers(pipeline) < 5:
        return ""
    maxWeights = 0
    maxIdx = 0
    mem_resources = "#pragma HLS RESOURCE core=RAM_S2P_LUTRAM variable="
    for idx, layer in enumerate(pipeline):
        if layers_fpga.isFPGAMatrixLayer(layer):
            if layer.getWMemCount() > maxWeights:
                maxWeights = layer.getWMemCount()
                maxIdx = idx
    return mem_resources + pipeline[maxIdx].getWMemName()
Exemplo n.º 15
0
def res_alloc_interactive(pipeline):
    """
    Asks the user to input the PE/SIMD/MMV for each layer in the pipeline,
    returns a copy of the pipeline with the adjusted PE/SIMD/MMV values.
    """
    ret = []
    for L in pipeline:
        Lnew = copy.deepcopy(L)
        if layers_fpga.isFPGAMatrixLayer(Lnew):
            print("Please enter compute resources for layer %s" % Lnew.name)
            print("Weight matrix shape: %s" % str(L.getW().shape))
            print("Operations in layer = %d" % L.layer_ops())
            Lnew.simd = int(raw_input("SIMD: "))
            Lnew.pe = int(raw_input("PE: "))
            # no mmv support for now
            #Lnew.mmv = int(raw_input("MMV: "))
            Lnew.mmv = 1
        ret += [Lnew]
    return ret
Exemplo n.º 16
0
	def calculate_activation_counts(self):
		for layer in self.layers:
			if layers.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
				self.num_activations.append(layer.get_out_dim() * layer.get_out_dim() * layer.getOutputSize() )
Exemplo n.º 17
0
def count_matrix_layers(pipeline):
    count = 0
    for layer in pipeline:
        if layers_fpga.isFPGAMatrixLayer(layer):
            count += 1
    return count