Пример #1
0
    def test_cycles_per_layer(self):
        l = CaffeLoader(
            None,
            "./FINN/inputs/dorefanet-pruned-without-extra-messages.prototxt")
        net = nn.NN(l)
        dev = device.Device('XLNX:KU115.json')
        perfmodel = pm.PerfModel(net, dev)
        fps = perfmodel.maximise_fps()
        for idx, layer in enumerate(net.layers):
            in_chans = net.layers[idx].getInputSize()
            out_chans = net.layers[idx].getOutputSize()
            out_dim = net.layers[idx].get_out_dim()

            if isinstance(in_chans, tuple):
                print in_chans
                in_chans = in_chans[0]
            if isinstance(out_chans, tuple):
                print out_chans
                out_chans = out_chans[0]
            if isinstance(out_dim, tuple):
                print out_dim
                out_dim = out_dim[0]

            print perfmodel.SIMD[idx], in_chans
            print perfmodel.PE[idx], out_chans
            print perfmodel.MMV[idx], out_dim
            self.assertLessEqual(perfmodel.SIMD[idx], in_chans)
            self.assertLessEqual(perfmodel.PE[idx], out_chans)
            self.assertLessEqual(perfmodel.MMV[idx], out_dim)
Пример #2
0
 def test_simd_pe_mmv_constraints(self):
     l = CaffeLoader(None, "./FINN/inputs/sfc.prototxt")
     net = nn.NN(l)
     dev = device.Device('XLNX:KU115.json')
     perfmodel = pm.PerfModel(net, dev)
     fps = perfmodel.maximise_fps()
     for idx, layer in enumerate(net.layers):
         self.assertLessEqual(perfmodel.SIMD[idx], layer.getInputSize())
         self.assertLessEqual(perfmodel.PE[idx], layer.getOutputSize())
         self.assertLessEqual(perfmodel.MMV[idx], layer.get_out_dim())
Пример #3
0
 def test_cycles_per_op(self):
     l = CaffeLoader("./FINN/inputs/sfc.caffemodel",
                     "./FINN/inputs/sfc.prototxt")
     net = nn.NN(l)
     dev = device.Device('XLNX:VU9P.json')
     perfmodel = pm.PerfModel(net, dev)
     ops = perfmodel.network_utilisation()
     num_matrix_layers = net.count_matrix_layers()
     self.assertEqual(ops['luts'],
                      2 * num_matrix_layers * dev.lut_cost_per_op())
Пример #4
0
 def res_alloc_predetermined(pipeline, net, dev):
     ret_pipeline = copy.deepcopy(pipeline)
     print "PIPELINE: ", ret_pipeline
     net.layers = ret_pipeline
     perfmodel = pm.PerfModel(net, dev)
     fps = perfmodel.maximise_fps()
     for i in range(len(ret_pipeline)):
         ret_pipeline[i].simd = perfmodel.SIMD[i]
         print "SIMD:", ret_pipeline[i].simd
         ret_pipeline[i].pe = perfmodel.PE[i]
         print "PE:", ret_pipeline[i].pe
     return ret_pipeline
Пример #5
0
def demo_lfc():
    logging.basicConfig(
        filename='FINN.log',
        level=logging.INFO)  # Changed WARNING to INFO if you want logging
    lfcnetwork = []
    W0 = np.zeros((1024, 832))  # OutChans, InChans
    W1 = np.zeros((1024, 1024))
    W2 = np.zeros((1024, 1024))
    W3 = np.zeros((64, 1024))

    lfcnetwork.append(layers.FullyConnectedLayer(W0, 1, 1,
                                                 1))  # wbits, ibits, obits
    lfcnetwork.append(layers.FullyConnectedLayer(W1, 1, 1, 1))
    lfcnetwork.append(layers.FullyConnectedLayer(W2, 1, 1, 1))
    lfcnetwork.append(layers.FullyConnectedLayer(W3, 1, 1, 1))

    net = FINN.core.nn.NN(layers=lfcnetwork)

    dev = device.Device('XLNX:VU9P.json', frequency=192.4)
    perf = perf_model.PerfModel(net, dev)

    fps = perf.maximise_fps()

    # perf.SIMD[0] = 64
    # perf.SIMD[1] = 64
    # perf.SIMD[2] = 64
    # perf.SIMD[3] = 64
    #
    # perf.PE[0] = 256
    # perf.PE[1] = 256
    # perf.PE[2] = 256
    # perf.PE[3] = 16

    fps = perf.fps()

    perf.nswg.calculate_neural_folding()
    perf.nswg.calculate_write_block_cycles()
    perf.nswg.calculate_read_block_cycles()
    perf.nswg.calculate_total_cycles()
    perf.nswg.calculate_input_multipliers()
    perf.print_folding_factors()
    perf.print_hardware_cost()
    perf.print_topology()
    perf.print_cycles()
    fps = perf.fps()

    print "Achieved fps of %f with %f%% LUT utilisation and %f%% BRAM utilisation at %f Mhz" % (
        fps, perf.network_utilisation()['luts'] / dev.luts * 100,
        perf.network_utilisation()['brams'] / dev.brams * 100, dev.frequency)
Пример #6
0
def demo_hwgq_import():
    l = CaffeLoader(None, "inputs/sfc.prototxt")
    net = FINN.core.nn.NN(l)
    dev = device.Device('XLNX:KU115.json')
    perf = perf_model.PerfModel(net, dev)

    perf.print_folding_factors()
    perf.print_hardware_cost()

    for idx, val in enumerate(perf.SIMD):
        perf.SIMD[idx] = 5
        #perf.PE[idx]  = 10
    perf.print_folding_factors()
    perf.print_hardware_cost()

    for idx, val in enumerate(perf.SIMD):
        perf.SIMD[idx] = 20
        #perf.PE[idx]  = 100
    perf.print_folding_factors()
    perf.print_hardware_cost()
Пример #7
0
def demo_sfc():
    logging.basicConfig(
        filename='FINN.log',
        level=logging.INFO)  # Changed WARNING to INFO if you want logging
    sfcnetwork = []

    W0 = np.zeros((64, 3, 3, 3))  # out, in, kernel, kernel
    W1 = np.zeros((64, 64, 3, 3))
    W2 = np.zeros((128, 64, 3, 3))
    W3 = np.zeros((128, 128, 3, 3))
    W4 = np.zeros((256, 128, 3, 3))
    W5 = np.zeros((256, 256, 3, 3))
    W6 = np.zeros((512, 256))
    W7 = np.zeros((512, 512))
    W8 = np.zeros((10, 512))

    sfcnetwork.append(layers.ConvolutionLayer(
        W0, 32, 0, 1, 1, 1, 1, 0))  # in_dim, pad, stride, wbits, ibits, obits
    sfcnetwork.append(layers.ConvolutionLayer(W1, 30, 0, 1, 1, 1, 1, 0))
    sfcnetwork.append(layers.ConvolutionLayer(W2, 14, 0, 1, 1, 1, 1, 0))
    sfcnetwork.append(layers.ConvolutionLayer(W3, 12, 0, 1, 1, 1, 1, 0))
    sfcnetwork.append(layers.ConvolutionLayer(W4, 5, 0, 1, 1, 1, 1, 0))
    sfcnetwork.append(layers.ConvolutionLayer(W5, 3, 0, 1, 1, 1, 1, 0))

    sfcnetwork.append(layers.FullyConnectedLayer(W6, 1, 1, 1))
    sfcnetwork.append(layers.FullyConnectedLayer(W7, 1, 1, 1))
    sfcnetwork.append(layers.FullyConnectedLayer(W8, 1, 1, 1))

    net = FINN.core.nn.NN(layers=sfcnetwork)

    dev = device.Device('XLNX:VU9P.json', frequency=248.5)  # Measured on AWS
    perf = perf_model.PerfModel(net, dev)

    fps = perf.maximise_fps()

    # From BNN spreadsheet, t3
    perf.SIMD[0] = 3
    perf.SIMD[1] = 64
    perf.SIMD[2] = 64
    perf.SIMD[3] = 64
    perf.SIMD[4] = 64
    perf.SIMD[5] = 64
    perf.SIMD[6] = 16
    perf.SIMD[7] = 16
    perf.SIMD[8] = 16

    perf.PE[0] = 64
    perf.PE[1] = 64
    perf.PE[2] = 64
    perf.PE[3] = 64
    perf.PE[4] = 64
    perf.PE[5] = 64
    perf.PE[6] = 16
    perf.PE[7] = 16
    perf.PE[8] = 4

    perf.MMV[0] = 1
    perf.MMV[1] = 1
    perf.MMV[2] = 1
    perf.MMV[3] = 1
    perf.MMV[4] = 1
    perf.MMV[5] = 1
    perf.MMV[6] = 1
    perf.MMV[7] = 1
    perf.MMV[8] = 1

    # FPS given the above folding factors
    fps = perf.fps()

    perf.nswg.calculate_neural_folding()
    perf.nswg.calculate_write_block_cycles()
    perf.nswg.calculate_read_block_cycles()
    perf.nswg.calculate_total_cycles()
    perf.nswg.calculate_input_multipliers()
    perf.print_folding_factors()
    perf.print_hardware_cost()
    perf.print_topology()
    perf.print_cycles()
    fps = perf.fps()

    print(perf.nswg)

    print "Achieved fps of %f with %f%% LUT utilisation and %f%% BRAM utilisation at %f Mhz" % (
        fps, perf.network_utilisation()['luts'] / dev.luts * 100,
        perf.network_utilisation()['brams'] / dev.brams * 100, dev.frequency)
Пример #8
0
def demo_dorefa():
    logging.basicConfig(
        filename='FINN.log',
        level=logging.INFO)  # Changed WARNING to INFO if you want logging
    dorefanetwork = []

    W0 = np.zeros((68, 3, 12, 12))  # out, in, kernel, kernel
    W1 = np.zeros((90, 34, 5, 5))
    W2 = np.zeros((272, 180, 3, 3))
    W3 = np.zeros((192, 136, 3, 3))
    W4 = np.zeros((128, 192, 3, 3))
    W5 = np.zeros((4096, 9216))
    W6 = np.zeros((4096, 4096))
    W7 = np.zeros((1000, 4096))

    dorefanetwork.append(layers.ConvolutionLayer(
        W0, 227, 0, 4, 1, 1, 1, 0))  # in_dim, pad, stride, wbits, ibits, obits
    dorefanetwork.append(layers.ConvolutionLayer(W1, 58, 0, 1, 1, 1, 1, 0))
    dorefanetwork[-1].parallel = 2
    dorefanetwork.append(layers.ConvolutionLayer(W2, 29, 0, 1, 1, 1, 1, 0))
    dorefanetwork.append(layers.ConvolutionLayer(W3, 16, 0, 1, 1, 1, 1, 0))
    dorefanetwork[-1].parallel = 2
    dorefanetwork.append(layers.ConvolutionLayer(W4, 16, 0, 1, 1, 1, 1, 0))
    dorefanetwork[-1].parallel = 2

    dorefanetwork.append(layers.FullyConnectedLayer(W5, 1, 1, 1))
    dorefanetwork.append(layers.FullyConnectedLayer(W6, 1, 1, 1))
    dorefanetwork.append(layers.FullyConnectedLayer(W7, 1, 1, 1))

    net = FINN.core.nn.NN(layers=dorefanetwork)

    dev = device.Device('XLNX:VU9P.json', frequency=101)  # Measured on AWS
    perf = perf_model.PerfModel(net, dev)

    # From BNN spreadsheet, t3
    perf.SIMD[0] = 3
    perf.SIMD[1] = 34
    perf.SIMD[2] = 45
    perf.SIMD[3] = 34
    perf.SIMD[4] = 64
    perf.SIMD[5] = 64
    perf.SIMD[6] = 64
    perf.SIMD[7] = 8

    perf.PE[0] = 68
    perf.PE[1] = 90
    perf.PE[2] = 136
    perf.PE[3] = 64
    perf.PE[4] = 32
    perf.PE[5] = 32
    perf.PE[6] = 16
    perf.PE[7] = 32

    perf.MMV[0] = 18
    perf.MMV[1] = 3
    perf.MMV[2] = 3
    perf.MMV[3] = 1
    perf.MMV[4] = 1
    perf.MMV[5] = 1
    perf.MMV[6] = 1
    perf.MMV[7] = 1

    # FPS given the above folding factors
    fps = perf.fps()

    perf.nswg.calculate_neural_folding()
    perf.nswg.calculate_write_block_cycles()
    perf.nswg.calculate_read_block_cycles()
    perf.nswg.calculate_total_cycles()
    perf.nswg.calculate_input_multipliers()
    perf.print_folding_factors()
    perf.print_hardware_cost()
    perf.print_topology()
    perf.print_cycles()
    fps = perf.fps()

    print(perf.nswg)
    print "Achieved fps of %f with %f%% LUT utilisation and %f%% BRAM utilisation at %f Mhz" % (
        fps, perf.network_utilisation()['luts'] / dev.luts * 100,
        perf.network_utilisation()['brams'] / dev.brams * 100, dev.frequency)
Пример #9
0
#       derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import FINN.core.perf_model as pm
import FINN.core.device as device
import FINN.core.nn as nn
from FINN.frontend.caffeloader import CaffeLoader

print "Hardware model"

frequency = 200
#dev = device.Device('XLNX:PYNQ-Z1.json', frequency)
dev = device.Device('XLNX:VU9P.json', frequency)
net = nn.NN(CaffeLoader(None, './FINN/inputs/lfc-w1a1.prototxt'))
perfmodel = pm.PerfModel(net, dev)
fps = perfmodel.maximise_fps()
print "Network Utilization"

print perfmodel.network_utilisation()['luts']/dev.luts*100
print perfmodel.network_utilisation()['brams']/dev.brams*100