def setUp(self): nname = "lenet-hwgq-w1a2" proto = FINN_ROOT + "/inputs/%s.prototxt" % nname weights = FINN_ROOT + "/inputs/%s.caffemodel" % nname l = CaffeLoader(weights, proto) self.net = nn.NN(l) self.streamlined_net = nn.NN( layers=transform.makeCromulent(self.net.layers)) # use the first numImagesToTest of the test set for verification self.numImagesToTest = 10 # expected number of successful predictions self.ok_golden = 10 # expected number of unsuccessful predictions self.nok_golden = 0
def test_quantize_first_layer(self): # quantize first convolutional (float) layer to use 8-bit weights qnt_layers = transform.directlyQuantizeLayer(self.net.layers[0], 8) qnt_net = nn.NN(layers=transform.makeCromulent(qnt_layers + self.net.layers[1:])) (ok, nok) = testOnImageNet1kSubset(qnt_net, self.numImagesToTest) self.assertTrue(ok == self.ok_golden and nok == self.nok_golden)
def test_0_dataflow_1convlayer(self): net = self.streamlined_net.layers # make a temp dir for generated HLS dirpath = tempfile.mkdtemp() # subset of layers for dataflow synthesis -- 1 quantized conv, 1 threshold hlslayers = net[3:5] def myresalloc(pipeline): ret = copy.deepcopy(pipeline) # weights matrix is (ofm=50) x (k=5 * k=5 * ifm=20) # set simd = 20 for faster sim ret[0].simd = 20 return ret ret = be_fpga.synthesize(hlslayers, myresalloc, dirpath) hlspipeline = ret.getSimLayer() # set up mixed pipeline preproc = net[:3] postproc = net[5:] mixed_net = nn.NN(layers=preproc + hlspipeline + postproc) (ok, nok) = testOnMNIST(mixed_net, self.numImagesToTest) pm = ret.getFPGAPerformanceModel() cost = pm.network_utilisation() # remove temp dir shutil.rmtree(dirpath) # check result correctness self.assertTrue(ok == self.ok_golden and nok == self.nok_golden) # check BRAM and LUT usage from performance model self.assertTrue(cost['luts'] == 164) self.assertTrue(cost['brams'] == 16)
def test_fpgabackend_rawhls(self): # resource allocation function to set number of PE/SIMD per layer # the allocation is statically determined for this test case. def res_alloc_predetermined(pipeline, net, dev): ret_pipeline = copy.deepcopy(pipeline) layer_simd = [16, 64, 64, 64] layer_pe = [64, 64, 64, 10] for i in range(4): ret_pipeline[i].simd = layer_simd[i] ret_pipeline[i].pe = layer_pe[i] return ret_pipeline # make a temp dir for generated HLS dirpath = tempfile.mkdtemp() # pick all layers except first (input quantization) and last # (final batchnorm) of the streamlined network hlslayers = self.streamlined_net.layers[1:-1] # call the FPGA backend to generate HLS and compile raw HLS sim dev = device.Device('XLNX:PYNQ-Z1.json', 100) ret = fpga_backend.synthesize(hlslayers, self.net, dev, res_alloc_predetermined, dirpath, "sfcall-") hlspipeline = ret.getSimLayer() # build a "mixed pipeline", where the first and last layers are in # device-neutral simulation, and everything in the middle is handled # by the HLS sim executable mixed_pipeline = [self.streamlined_net.layers[0]] + hlspipeline + [self.streamlined_net.layers[-1]] # test on MNIST (ok, nok) = testOnMNIST(nn.NN(layers=mixed_pipeline), self.numImagesToTest) # remove temp dir #shutil.rmtree(dirpath) self.assertTrue(ok == self.ok_golden and nok == self.nok_golden)
def test_cycles_per_layer(self): l = CaffeLoader( None, "./FINN/inputs/dorefanet-pruned-without-extra-messages.prototxt") net = nn.NN(l) dev = device.Device('XLNX:KU115.json') perfmodel = pm.PerfModel(net, dev) fps = perfmodel.maximise_fps() for idx, layer in enumerate(net.layers): in_chans = net.layers[idx].getInputSize() out_chans = net.layers[idx].getOutputSize() out_dim = net.layers[idx].get_out_dim() if isinstance(in_chans, tuple): print in_chans in_chans = in_chans[0] if isinstance(out_chans, tuple): print out_chans out_chans = out_chans[0] if isinstance(out_dim, tuple): print out_dim out_dim = out_dim[0] print perfmodel.SIMD[idx], in_chans print perfmodel.PE[idx], out_chans print perfmodel.MMV[idx], out_dim self.assertLessEqual(perfmodel.SIMD[idx], in_chans) self.assertLessEqual(perfmodel.PE[idx], out_chans) self.assertLessEqual(perfmodel.MMV[idx], out_dim)
def test_simd_pe_mmv_constraints(self): l = CaffeLoader(None, "./FINN/inputs/sfc.prototxt") net = nn.NN(l) dev = device.Device('XLNX:KU115.json') perfmodel = pm.PerfModel(net, dev) fps = perfmodel.maximise_fps() for idx, layer in enumerate(net.layers): self.assertLessEqual(perfmodel.SIMD[idx], layer.getInputSize()) self.assertLessEqual(perfmodel.PE[idx], layer.getOutputSize()) self.assertLessEqual(perfmodel.MMV[idx], layer.get_out_dim())
def test_cycles_per_op(self): l = CaffeLoader("./FINN/inputs/sfc.caffemodel", "./FINN/inputs/sfc.prototxt") net = nn.NN(l) dev = device.Device('XLNX:VU9P.json') perfmodel = pm.PerfModel(net, dev) ops = perfmodel.network_utilisation() num_matrix_layers = net.count_matrix_layers() self.assertEqual(ops['luts'], 2 * num_matrix_layers * dev.lut_cost_per_op())
def setUp(self): self.nname = "caffenet-hwgq-w1a2" proto = config.FINN_ROOT + "/inputs/%s.prototxt" % self.nname weights = config.FINN_ROOT + "/inputs/%s.caffemodel" % self.nname # download weights if not already on disk weights_url = "http://www.svcl.ucsd.edu/projects/hwgq/AlexNet_HWGQ.caffemodel" if not os.path.exists(weights): print("Downloading HWGQ CaffeNet weights") urlretrieve(weights_url, weights) l = CaffeLoader(weights, proto) self.net = nn.NN(l) # use the first numImagesToTest of the test set for verification self.numImagesToTest = 10 # expected number of successful predictions self.ok_golden = 7 # expected number of unsuccessful predictions self.nok_golden = 3
def setUp(self): nname = "lfc-w1a1" proto = FINN_ROOT + "/inputs/%s.prototxt" % nname weights = FINN_ROOT + "/inputs/%s.caffemodel" % nname l = CaffeLoader(weights, proto) self.net = nn.NN(l) frequency = 300 self.dev = device.Device('XLNX:VU9P.json', frequency) self.streamlined_net = copy.deepcopy(self.net) print self.streamlined_net.layers self.streamlined_net.layers = transform.makeCromulent( self.streamlined_net.layers) print self.streamlined_net.layers # use the first numImagesToTest of the test set for verification self.numImagesToTest = 1000 # expected number of successful predictions self.ok_golden = 967 # expected number of unsuccessful predictions self.nok_golden = 33
def test_fpgabackend_rawhls(self): # resource allocation function to set number of PE/SIMD per layer # the allocation is statically determined for this test case. def res_alloc_predetermined(pipeline, net, dev): ret_pipeline = copy.deepcopy(pipeline) print "PIPELINE: ", ret_pipeline net.layers = ret_pipeline perfmodel = pm.PerfModel(net, dev) fps = perfmodel.maximise_fps() for i in range(len(ret_pipeline)): ret_pipeline[i].simd = perfmodel.SIMD[i] print "SIMD:", ret_pipeline[i].simd ret_pipeline[i].pe = perfmodel.PE[i] print "PE:", ret_pipeline[i].pe return ret_pipeline # make a temp dir for generated HLS dirpath = tempfile.mkdtemp() # pick all layers except first (input quantization) and last # (final batchnorm) of the streamlined network hlslayers = self.streamlined_net.layers[1:-1] # call the FPGA backend to generate HLS and compile raw HLS sim print "Synthesising" ret = fpga_backend.synthesize(hlslayers, self.net, self.dev, res_alloc_predetermined, dirpath, "sfcall-") print "Synthesised" hlspipeline = ret.getSimLayer() # build a "mixed pipeline", where the first and last layers are in # device-neutral simulation, and everything in the middle is handled # by the HLS sim executable mixed_pipeline = [self.streamlined_net.layers[0]] + \ hlspipeline + [self.streamlined_net.layers[-1]] # test on MNIST (ok, nok) = testOnMNIST(nn.NN(layers=mixed_pipeline), self.numImagesToTest) # remove temp dir # shutil.rmtree(dirpath) self.assertTrue(ok == self.ok_golden and nok == self.nok_golden)
def test_0_quantize_all_float_layers(self): qnt_net = transform.directlyQuantizeAllFloatWeights(self.net.layers, 8) qnt_net = nn.NN(layers=transform.makeCromulent(qnt_net)) (ok, nok) = testOnImageNet1kSubset(qnt_net, self.numImagesToTest) self.assertTrue(ok == self.ok_golden and nok == self.nok_golden)
def getFPGAPerformanceModel(self): # return FPGA performance/cost model return PerfModel(nn.NN(layers=self.ir), self.dev)
# derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import FINN.core.perf_model as pm import FINN.core.device as device import FINN.core.nn as nn from FINN.frontend.caffeloader import CaffeLoader print "Hardware model" frequency = 200 #dev = device.Device('XLNX:PYNQ-Z1.json', frequency) dev = device.Device('XLNX:VU9P.json', frequency) net = nn.NN(CaffeLoader(None, './FINN/inputs/lfc-w1a1.prototxt')) perfmodel = pm.PerfModel(net, dev) fps = perfmodel.maximise_fps() print "Network Utilization" print perfmodel.network_utilisation()['luts']/dev.luts*100 print perfmodel.network_utilisation()['brams']/dev.brams*100
def setUp(self): l = CaffeLoader("inputs/sfc.caffemodel", "inputs/sfc.prototxt") self.net = nn.NN(l)