def main(): # config binary_mode = True epoch = 4 mini_batch = 32 training_modulation_size = 3 inference_modulation_size = 3 # load MNIST data td = bb.load_mnist() batch_size = len(td['x_train']) print('batch_size =', batch_size) ############################ # Learning ############################ # create network main_net = bb.Sequential.create() main_net.add(bb.DenseAffine.create(output_shape=[1024])) main_net.add(bb.BatchNormalization.create()) main_net.add(bb.ReLU.create()) main_net.add(bb.DenseAffine.create([512])) main_net.add(bb.BatchNormalization.create()) main_net.add(bb.ReLU.create()) main_net.add(bb.DenseAffine.create(td.t_shape)) if binary_mode: main_net.add(bb.BatchNormalization.create()) main_net.add(bb.ReLU.create()) # wrapping with binary modulator net = bb.Sequential.create() net.add( bb.BinaryModulation.create( main_net, training_modulation_size=training_modulation_size)) net.add(bb.Reduce.create(td['t_shape'])) net.set_input_shape(td['x_shape']) # print model information print(net.get_info()) # set binary mode if binary_mode: net.send_command("binary true") else: net.send_command("binary false") # learning print('\n[learning]') loss = bb.LossSoftmaxCrossEntropy.create() metrics = bb.MetricsCategoricalAccuracy.create() optimizer = bb.OptimizerAdam.create() optimizer.set_variables(net.get_parameters(), net.get_gradients()) runner = bb.Runner(net, "mnist-dense-simple", loss, metrics, optimizer) runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch)
def main(): epoch = 4 mini_batch = 32 training_modulation_size = 7 test_modulation_size = 7 # load MNIST data td = bb.load_mnist() # create layer layer_sl0 = bb.SparseLut6.create([1024]) layer_sl1 = bb.SparseLut6.create([480]) layer_sl2 = bb.SparseLut6.create([70]) # create network main_net = bb.Sequential.create() main_net.add(layer_sl0) main_net.add(layer_sl1) main_net.add(layer_sl2) # wrapping with binary modulator net = bb.Sequential.create() net.add( bb.BinaryModulation.create( main_net, training_modulation_size=training_modulation_size)) net.add(bb.Reduce.create(td['t_shape'])) net.set_input_shape(td['x_shape']) print(net.get_info()) loss = bb.LossSoftmaxCrossEntropy.create() metrics = bb.MetricsCategoricalAccuracy.create() optimizer = bb.OptimizerAdam.create() optimizer.set_variables(net.get_parameters(), net.get_gradients()) batch_size = len(td['x_train']) print('batch_size =', batch_size) runner = bb.Runner(net, "mnist-sparse-lut6-simple", loss, metrics, optimizer) runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch) # LUT-network layer_bl0 = bb.BinaryLut6Bit.create(layer_sl0.get_output_shape()) layer_bl1 = bb.BinaryLut6Bit.create(layer_sl1.get_output_shape()) layer_bl2 = bb.BinaryLut6Bit.create(layer_sl2.get_output_shape()) lut_net = bb.Sequential.create() lut_net.add(layer_bl0) lut_net.add(layer_bl1) lut_net.add(layer_bl2) # evaluation network eval_net = bb.Sequential.create() eval_net.add( bb.BinaryModulationBit.create( lut_net, inference_modulation_size=test_modulation_size)) eval_net.add(bb.Reduce.create(td['t_shape'])) # set input shape eval_net.set_input_shape(td['x_shape']) # import table print('parameter copy to binary LUT-Network') layer_bl0.import_parameter(layer_sl0) layer_bl1.import_parameter(layer_sl1) layer_bl2.import_parameter(layer_sl2) # evaluation lut_runner = bb.Runner(eval_net, "mnist-binary-lut6-simple", bb.LossSoftmaxCrossEntropy.create(), bb.MetricsCategoricalAccuracy.create()) lut_runner.evaluation(td, mini_batch_size=mini_batch) # Verilog 出力 with open('MnistLutSimple.v', 'w') as f: f.write('`timescale 1ns / 1ps\n\n') f.write( bb.make_verilog_from_lut_bit('MnistLutSimple', [layer_bl0, layer_bl1, layer_bl2]))
print(td['x_train'].shape) print(td['t_train'].shape) print(td['x_test'].shape) print(td['t_test'].shape) for i in range(10): print(td['t_test'][i]) img = td['x_test'][i].reshape(3, 32, 32) img = cv2.merge((img[2], img[1], img[0])) cv2.imshow('img', img) cv2.waitKey() sys.exit() td = bb.load_mnist() print(len(td['x_train'])) print(len(td['t_train'])) print(len(td['x_test'])) print(len(td['t_test'])) print(td['x_train'][0].shape) sys.exit() for i in range(10): print(td['t_train'][i]) cv2.imshow('img', td['x_train'][i].reshape(28, 28)) cv2.waitKey() print(bb.TYPE_BIT) print(bb.get_version())
def main(): epoch = 4 mini_batch = 32 training_modulation_size = 3 inference_modulation_size = 3 # load MNIST data td = bb.load_mnist() batch_size = len(td['x_train']) print('batch_size =', batch_size) ############################ # Learning ############################ # create layer layer_cnv0_sl0 = bb.SparseLut6.create([192]) layer_cnv0_sl1 = bb.SparseLut6.create([32]) layer_cnv1_sl0 = bb.SparseLut6.create([192]) layer_cnv1_sl1 = bb.SparseLut6.create([32]) layer_cnv2_sl0 = bb.SparseLut6.create([384]) layer_cnv2_sl1 = bb.SparseLut6.create([64]) layer_cnv3_sl0 = bb.SparseLut6.create([384]) layer_cnv3_sl1 = bb.SparseLut6.create([64]) layer_sl4 = bb.SparseLut6.create([420]) layer_sl5 = bb.SparseLut6.create([70]) # main network cnv0_sub = bb.Sequential.create() cnv0_sub.add(layer_cnv0_sl0) cnv0_sub.add(layer_cnv0_sl1) cnv1_sub = bb.Sequential.create() cnv1_sub.add(layer_cnv1_sl0) cnv1_sub.add(layer_cnv1_sl1) cnv2_sub = bb.Sequential.create() cnv2_sub.add(layer_cnv2_sl0) cnv2_sub.add(layer_cnv2_sl1) cnv3_sub = bb.Sequential.create() cnv3_sub.add(layer_cnv3_sl0) cnv3_sub.add(layer_cnv3_sl1) main_net = bb.Sequential.create() main_net.add(bb.LoweringConvolution.create(cnv0_sub, 3, 3)) main_net.add(bb.LoweringConvolution.create(cnv1_sub, 3, 3)) main_net.add(bb.MaxPooling.create(2, 2)) main_net.add(bb.LoweringConvolution.create(cnv2_sub, 3, 3)) main_net.add(bb.LoweringConvolution.create(cnv3_sub, 3, 3)) main_net.add(bb.MaxPooling.create(2, 2)) main_net.add(layer_sl4) main_net.add(layer_sl5) # wrapping with binary modulator net = bb.Sequential.create() net.add( bb.BinaryModulation.create( main_net, training_modulation_size=training_modulation_size)) net.add(bb.Reduce.create(td['t_shape'])) net.set_input_shape(td['x_shape']) # print model information print(net.get_info()) # learning print('\n[learning]') loss = bb.LossSoftmaxCrossEntropy.create() metrics = bb.MetricsCategoricalAccuracy.create() optimizer = bb.OptimizerAdam.create() optimizer.set_variables(net.get_parameters(), net.get_gradients()) runner = bb.Runner(net, "mnist-sparse-lut6-cnn", loss, metrics, optimizer) runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch) ################################ # convert to FPGA ################################ print('\n[convert to Binary LUT]') # LUT-network layer_cnv0_bl0 = bb.BinaryLut6.create(layer_cnv0_sl0.get_output_shape()) layer_cnv0_bl1 = bb.BinaryLut6.create(layer_cnv0_sl1.get_output_shape()) layer_cnv1_bl0 = bb.BinaryLut6.create(layer_cnv1_sl0.get_output_shape()) layer_cnv1_bl1 = bb.BinaryLut6.create(layer_cnv1_sl1.get_output_shape()) layer_cnv2_bl0 = bb.BinaryLut6.create(layer_cnv2_sl0.get_output_shape()) layer_cnv2_bl1 = bb.BinaryLut6.create(layer_cnv2_sl1.get_output_shape()) layer_cnv3_bl0 = bb.BinaryLut6.create(layer_cnv3_sl0.get_output_shape()) layer_cnv3_bl1 = bb.BinaryLut6.create(layer_cnv3_sl1.get_output_shape()) layer_bl4 = bb.BinaryLut6.create(layer_sl4.get_output_shape()) layer_bl5 = bb.BinaryLut6.create(layer_sl5.get_output_shape()) cnv0_sub = bb.Sequential.create() cnv0_sub.add(layer_cnv0_bl0) cnv0_sub.add(layer_cnv0_bl1) cnv1_sub = bb.Sequential.create() cnv1_sub.add(layer_cnv1_bl0) cnv1_sub.add(layer_cnv1_bl1) cnv2_sub = bb.Sequential.create() cnv2_sub.add(layer_cnv2_bl0) cnv2_sub.add(layer_cnv2_bl1) cnv3_sub = bb.Sequential.create() cnv3_sub.add(layer_cnv3_bl0) cnv3_sub.add(layer_cnv3_bl1) cnv4_sub = bb.Sequential.create() cnv4_sub.add(layer_bl4) cnv4_sub.add(layer_bl5) cnv0 = bb.LoweringConvolution.create(cnv0_sub, 3, 3) cnv1 = bb.LoweringConvolution.create(cnv1_sub, 3, 3) pol0 = bb.MaxPooling.create(2, 2) cnv2 = bb.LoweringConvolution.create(cnv2_sub, 3, 3) cnv3 = bb.LoweringConvolution.create(cnv3_sub, 3, 3) pol1 = bb.MaxPooling.create(2, 2) cnv4 = bb.LoweringConvolution.create(cnv4_sub, 4, 4) lut_net = bb.Sequential.create() lut_net.add(cnv0) lut_net.add(cnv1) lut_net.add(pol0) lut_net.add(cnv2) lut_net.add(cnv3) lut_net.add(pol1) lut_net.add(cnv4) # evaluate network eval_net = bb.Sequential.create() eval_net.add( bb.BinaryModulation.create( lut_net, inference_modulation_size=inference_modulation_size)) eval_net.add(bb.Reduce.create(td['t_shape'])) # set input shape eval_net.set_input_shape(td['x_shape']) # parameter copy print('parameter copy to binary LUT-Network') layer_cnv0_bl0.import_parameter(layer_cnv0_sl0) layer_cnv0_bl1.import_parameter(layer_cnv0_sl1) layer_cnv1_bl0.import_parameter(layer_cnv1_sl0) layer_cnv1_bl1.import_parameter(layer_cnv1_sl1) layer_cnv2_bl0.import_parameter(layer_cnv2_sl0) layer_cnv2_bl1.import_parameter(layer_cnv2_sl1) layer_cnv3_bl0.import_parameter(layer_cnv3_sl0) layer_cnv3_bl1.import_parameter(layer_cnv3_sl1) layer_bl4.import_parameter(layer_sl4) layer_bl5.import_parameter(layer_sl5) # evaluate network print('evaluate LUT-Network') lut_runner = bb.Runner(eval_net, "mnist-binary-lut6-cnn", bb.LossSoftmaxCrossEntropy.create(), bb.MetricsCategoricalAccuracy.create()) lut_runner.evaluation(td, mini_batch_size=mini_batch) # write Verilog print('write verilog file') with open('MnistLutCnn.v', 'w') as f: f.write('`timescale 1ns / 1ps\n\n') f.write( bb.make_verilog_axi4s_from_lut_cnn('MnistLutCnnCnv0', [cnv0, cnv1, pol0])) f.write( bb.make_verilog_axi4s_from_lut_cnn('MnistLutCnnCnv1', [cnv2, cnv3, pol1])) f.write(bb.make_verilog_axi4s_from_lut_cnn('MnistLutCnnCnv2', [cnv4]))
def main(): # config epoch = 4 mini_batch = 32 training_modulation_size = 3 inference_modulation_size = 3 # download mnist # bb.download_mnist() # load MNIST data # td = bb.LoadMnist.load() td = bb.load_mnist() batch_size = len(td['x_train']) print('batch_size =', batch_size) ############################ # Learning ############################ # create layer layer_sl0 = bb.SparseLut6.create([1024]) layer_sl1 = bb.SparseLut6.create([480]) layer_sl2 = bb.SparseLut6.create([70]) # create network main_net = bb.Sequential.create() main_net.add(layer_sl0) main_net.add(layer_sl1) main_net.add(layer_sl2) # wrapping with binary modulator net = bb.Sequential.create() net.add( bb.BinaryModulation.create( main_net, training_modulation_size=training_modulation_size)) net.add(bb.Reduce.create(td['t_shape'])) net.set_input_shape(td['x_shape']) # print model information print(net.get_info()) # learning print('\n[learning]') loss = bb.LossSoftmaxCrossEntropy.create() metrics = bb.MetricsCategoricalAccuracy.create() optimizer = bb.OptimizerAdam.create() optimizer.set_variables(net.get_parameters(), net.get_gradients()) runner = bb.Runner(net, "mnist-sparse-lut6-simple", loss, metrics, optimizer) runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch, file_read=True, file_write=True) ################################ # convert to FPGA ################################ print('\n[convert to Binary LUT]') # LUT-network layer_bl0 = bb.BinaryLut6.create(layer_sl0.get_output_shape()) layer_bl1 = bb.BinaryLut6.create(layer_sl1.get_output_shape()) layer_bl2 = bb.BinaryLut6.create(layer_sl2.get_output_shape()) lut_net = bb.Sequential.create() lut_net.add(layer_bl0) lut_net.add(layer_bl1) lut_net.add(layer_bl2) # evaluate network eval_net = bb.Sequential.create() eval_net.add( bb.BinaryModulation.create( lut_net, inference_modulation_size=inference_modulation_size)) eval_net.add(bb.Reduce.create(td['t_shape'])) # set input shape eval_net.set_input_shape(td['x_shape']) # parameter copy print('parameter copy to binary LUT-Network') layer_bl0.import_parameter(layer_sl0) layer_bl1.import_parameter(layer_sl1) layer_bl2.import_parameter(layer_sl2) # evaluate network print('evaluate LUT-Network') lut_runner = bb.Runner(eval_net, "mnist-binary-lut6-simple", bb.LossSoftmaxCrossEntropy.create(), bb.MetricsCategoricalAccuracy.create()) lut_runner.evaluation(td, mini_batch_size=mini_batch) # write Verilog print('write verilog file') with open('MnistLutSimple.v', 'w') as f: f.write('`timescale 1ns / 1ps\n\n') f.write( bb.make_verilog_from_lut('MnistLutSimple', [layer_bl0, layer_bl1, layer_bl2]))
def main(): epoch = 8 mini_batch = 32 training_modulation_size = 7 test_modulation_size = 7 # load MNIST data td = bb.load_mnist() # set teaching signnal td['t_shape'] = td['x_shape'] td['t_train'] = td['x_train'] td['t_test'] = td['x_test'] # create layer layer_enc_sl0 = bb.SparseLut6.create([32 * 6 * 6 * 6]) layer_enc_sl1 = bb.SparseLut6.create([32 * 6 * 6]) layer_enc_sl2 = bb.SparseLut6.create([32 * 6]) layer_enc_sl3 = bb.SparseLut6.create([32]) layer_dec_sl2 = bb.SparseLut6.create([28 * 28 * 6 * 6]) layer_dec_sl1 = bb.SparseLut6.create([28 * 28 * 6]) layer_dec_sl0 = bb.SparseLut6.create([28 * 28], False) # diable BatchNorm # create network main_net = bb.Sequential.create() main_net.add(layer_enc_sl0) main_net.add(layer_enc_sl1) main_net.add(layer_enc_sl2) main_net.add(layer_enc_sl3) main_net.add(layer_dec_sl2) main_net.add(layer_dec_sl1) main_net.add(layer_dec_sl0) # wrapping with binary modulator net = bb.Sequential.create() net.add( bb.BinaryModulation.create( main_net, training_modulation_size=training_modulation_size)) net.set_input_shape(td['x_shape']) print(net.get_info()) loss = bb.LossMeanSquaredError.create() metrics = bb.MetricsMeanSquaredError.create() optimizer = bb.OptimizerAdam.create() optimizer.set_variables(net.get_parameters(), net.get_gradients()) batch_size = len(td['x_train']) print('batch_size =', batch_size) # runner = bb.Runner(net, "mnist-autoencoder-sparse-lut6-simple", loss, metrics, optimizer) # runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch) result_img = None x_train = td['x_train'] t_train = td['t_train'] x_test = td['x_test'] t_test = td['t_test'] x_buf = bb.FrameBuffer() t_buf = bb.FrameBuffer() for epoch_num in range(epoch): # train for index in tqdm(range(0, batch_size, mini_batch)): mini_batch_size = min(mini_batch, batch_size - index) x_buf.resize(mini_batch_size, td['x_shape'], bb.TYPE_FP32) x_buf.set_data(x_train[index:index + mini_batch_size]) y_buf = net.forward(x_buf) t_buf.resize(mini_batch_size, td['t_shape'], bb.TYPE_FP32) t_buf.set_data(t_train[index:index + mini_batch_size]) dy_buf = loss.calculate_loss(y_buf, t_buf, mini_batch_size) metrics.calculate_metrics(y_buf, t_buf) dx_buf = net.backward(dy_buf) optimizer.update() cv2.waitKey(1) print('loss =', loss.get_loss()) print('metrics =', metrics.get_metrics()) # test x_buf.resize(16, td['x_shape'], bb.TYPE_FP32) x_buf.set_data(x_test[0:16]) y_buf = net.forward(x_buf) if result_img is None: x_img = make_image_block(x_buf.get_data()) cv2.imwrite('mnist-autoencoder-sparse-lut6-simple_x.png', x_img * 255) result_img = x_img y_img = make_image_block(y_buf.get_data()) cv2.imwrite('mnist-autoencoder-sparse-lut6-simple_%d.png' % epoch_num, y_img * 255) result_img = np.vstack((result_img, y_img)) cv2.imshow('result_img', result_img) cv2.waitKey(1) cv2.destroyAllWindows() cv2.imwrite("mnist-autoencoder-sparse-lut6-simple.png", result_img * 255) # LUT-network layer_enc_bl0 = bb.BinaryLut6Bit.create(layer_enc_sl0.get_output_shape()) layer_enc_bl1 = bb.BinaryLut6Bit.create(layer_enc_sl1.get_output_shape()) layer_enc_bl2 = bb.BinaryLut6Bit.create(layer_enc_sl2.get_output_shape()) layer_enc_bl3 = bb.BinaryLut6Bit.create(layer_enc_sl3.get_output_shape()) layer_dec_bl2 = bb.BinaryLut6Bit.create(layer_dec_sl2.get_output_shape()) layer_dec_bl1 = bb.BinaryLut6Bit.create(layer_dec_sl1.get_output_shape()) layer_dec_bl0 = bb.BinaryLut6Bit.create(layer_dec_sl0.get_output_shape()) lut_net = bb.Sequential.create() lut_net.add(layer_enc_bl0) lut_net.add(layer_enc_bl1) lut_net.add(layer_enc_bl2) lut_net.add(layer_enc_bl3) lut_net.add(layer_dec_bl2) lut_net.add(layer_dec_bl1) lut_net.add(layer_dec_bl0) # evaluation network eval_net = bb.Sequential.create() eval_net.add( bb.BinaryModulationBit.create( lut_net, inference_modulation_size=test_modulation_size)) eval_net.add(bb.Reduce.create(td['t_shape'])) # set input shape eval_net.set_input_shape(td['x_shape']) # import table print('parameter copy to binary LUT-Network') layer_enc_bl0.import_parameter(layer_enc_sl0) layer_enc_bl1.import_parameter(layer_enc_sl1) layer_enc_bl2.import_parameter(layer_enc_sl2) layer_enc_bl3.import_parameter(layer_enc_sl3) layer_dec_bl2.import_parameter(layer_dec_sl2) layer_dec_bl1.import_parameter(layer_dec_sl1) layer_dec_bl0.import_parameter(layer_dec_sl0) # evaluation lut_runner = bb.Runner(eval_net, "mnist-autoencpder-binary-lut6-simple", bb.LossMeanSquaredError.create(), bb.MetricsMeanSquaredError.create()) lut_runner.evaluation(td, mini_batch_size=mini_batch) # Verilog 出力 with open('MnistAeLutSimple.v', 'w') as f: f.write('`timescale 1ns / 1ps\n\n') f.write( bb.make_verilog_from_lut_bit('MnistAeLutSimple', [ layer_enc_bl0, layer_enc_bl1, layer_enc_bl2, layer_dec_bl2, layer_dec_bl1, layer_dec_bl0 ]))