def cmd_mfcc_kws_frame(): cmd = 'mfcc_kws_frame' print('--------------------------------------------------') print(' Testing command %s' % (cmd)) print('--------------------------------------------------') if mcu.sendCommand(cmd) < 0: print('FAIL') return -1 input_shape = [62, 13] input_size = np.prod(input_shape) frame_step = 1024 n_frames = 62 print('Sending %d frames' % (n_frames)) for frame in tqdm(range(n_frames)): mcu.sendData(np.zeros(frame_step, dtype='int16'), 0, progress=False) mcu.waitForMcuReady() # MCU now runs inference, wait for complete mcu.waitForMcuReady() # MCU returns net input and output mcu_mfccs, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_mfccs.dtype, tag, mcu_mfccs.shape[0])) mcu_pred, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_pred.dtype, tag, mcu_pred.shape[0])) print('SUCCESS') return 0
def cmd_mel_one_batch(): cmd = 'mel_one_batch' print('--------------------------------------------------') print(' Testing command %s' % (cmd)) print('--------------------------------------------------') if mcu.sendCommand(cmd) < 0: print('FAIL') return -1 print('Upload sample') sample_size = 1024 y = np.array(65536 * np.random.rand(sample_size) - 65536 // 2, dtype='int16') # random mcu.sendData(y, 0) print('Download samples') mcu_fft, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_fft.dtype, tag, mcu_fft.shape[0])) mcu_spec, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_spec.dtype, tag, mcu_fft.shape[0])) mcu_melspec, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_melspec.dtype, tag, mcu_melspec.shape[0])) mcu_dct, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_dct.dtype, tag, mcu_dct.shape[0])) print('SUCCESS') return 0
def infereOnMCU(net_input, progress=False): """ Upload, process and download inference """ import edison.mcu.mcu_util as mcu if mcu.sendCommand('kws_single_inference') < 0: exit() mcu.sendData(net_input.reshape(-1), 0, progress=progress) mcu_pred, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_pred.dtype, tag, mcu_pred.shape[0])) return mcu_pred
def cmd_kws_single_inference(): cmd = 'kws_single_inference' print('--------------------------------------------------') print(' Testing command %s' % (cmd)) print('--------------------------------------------------') if mcu.sendCommand(cmd) < 0: print('FAIL') return -1 input_shape = [62, 13] input_size = np.prod(input_shape) net_input = np.array(np.random.rand(input_size).reshape([1] + input_shape), dtype='float32') mcu.sendData(net_input.reshape(-1), 0) mcu_pred, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_pred.dtype, tag, mcu_pred.shape[0])) print('SUCCESS') return 0
def mfccAndInfereOnMCU(data, progress=False): """ Upload, process and download inference of raw audio data """ import edison.mcu.mcu_util as mcu if data.dtype == 'float32': data = ((2**15 - 1) * data).astype('int16') if mcu.sendCommand('mfcc_kws_frame') < 0: exit() print('Sending %d frames' % (n_frames)) for frame in tqdm(range(n_frames)): mcu.sendData(data[frame * frame_step:frame * frame_step + frame_length], 0, progress=False) if mcu.waitForMcuReady() < 0: print('Wait for MCU timed out') # MCU now runs inference, wait for complete if mcu.waitForMcuReady() < 0: print('Wait for MCU timed out') print('Inference complete') # MCU returns net input and output mcu_mfccs, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_mfccs.dtype, tag, mcu_mfccs.shape[0])) mcu_pred, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_pred.dtype, tag, mcu_pred.shape[0])) return mcu_mfccs, mcu_pred
def main(argv): if len(argv) < 2: print('Usage:') print(' kws_nnom <mode>') print(' Modes:') print( ' train Train model and quantise/implement with NNoM' ) print( ' test Load model from file and test on it' ) print( ' testfile <file> Load data from file and compute MFCC on host, infere on MCU' ) exit() try: x_train = np.load(in_dir + '/x_train.npy') x_test = np.load(in_dir + '/x_test.npy') x_val = np.load(in_dir + '/x_val.npy') y_train = np.load(in_dir + '/y_train.npy') y_test = np.load(in_dir + '/y_test.npy') y_val = np.load(in_dir + '/y_val.npy') keywords = np.load(in_dir + '/keywords.npy') print('Load data from cache success!') # x_train = np.load('train_data.npy') # y_train = np.load('train_label.npy') # x_test = np.load('test_data.npy') # y_test = np.load('test_label.npy') # x_val = np.load('val_data.npy') # y_val = np.load('val_label.npy') except: # test print('Could not load') exit() # label: the selected label will be recognised, while the others will be classified to "unknow". #selected_lable = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go'] #selected_lable = ['marvin', 'sheila', 'yes', 'no', 'left', 'right', 'forward', 'backward', 'stop', 'go'] selected_lable = keywords print('y_val.shape', y_val.shape) print('x_train.shape', x_train.shape) print('y_train.shape', y_train.shape) # parameters epochs = 10 batch_size = 64 num_type = len(selected_lable) # Check this: only take 2~13 coefficient. 1 is destructive. # num_mfcc = 13 # x_train = x_train[:, :, :num_mfcc] # x_test = x_test[:, :, :num_mfcc] # x_val = x_val[:, :, :num_mfcc] # expand on channel axis because we only have one channel x_train = x_train.reshape( (x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)) x_test = x_test.reshape( (x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)) x_val = x_val.reshape((x_val.shape[0], x_val.shape[1], x_val.shape[2], 1)) print('x_train shape:', x_train.shape, 'max', x_train.max(), 'min', x_train.min()) # fake quantised # instead of using maximum value for quantised, we allows some saturation to save more details in small values. quantise_factor = nnom_net_input_scale print('x_train.max()', x_train.max()) print('x_train.min()', x_train.min()) print("scale by", quantise_factor, 'clip to', nnom_net_input_clip_min, nnom_net_input_clip_max) x_train = np.clip((x_train * quantise_factor), nnom_net_input_clip_min, nnom_net_input_clip_max) x_test = np.clip((x_test * quantise_factor), nnom_net_input_clip_min, nnom_net_input_clip_max) x_val = np.clip((x_val * quantise_factor), nnom_net_input_clip_min, nnom_net_input_clip_max) print('x_train.max()', x_train.max()) print('x_train.min()', x_train.min()) # training data enforcement # x_train = np.vstack((x_train, x_train*0.8)) # y_train = np.hstack((y_train, y_train)) print(y_train.shape) # saturation to -1 to 1 # x_train = np.clip(x_train, -1, 1) # x_test = np.clip(x_test, -1, 1) # x_val = np.clip(x_val, -1, 1) # -1 to 1 quantised to 256 level (8bit) # x_train = (x_train * 128).round()/128 # x_test = (x_test * 128).round()/128 # x_val = (x_val * 128).round()/128 print('quantised', 'x_train shape:', x_train.shape, 'max', x_train.max(), 'min', x_train.min()) # print("dataset abs mean at", abs(x_test).mean()*128) # test, if you want to see a few random MFCC imagea. if (0): which = 232 while True: mfcc_plot(x_train[which].reshape((31, 13)) * 128, keywords[y_train[which].argmax()]) which += 352 # word label to number label # y_train = label_to_category(y_train, selected_lable) # y_test = label_to_category(y_test, selected_lable) # y_val = label_to_category(y_val, selected_lable) # number label to onehot # y_train = keras.utils.to_categorical(y_train, num_classes=None) # y_test = keras.utils.to_categorical(y_test, num_classes=None) # y_val = keras.utils.to_categorical(y_val, num_classes=None) # shuffle test data # permutation = np.random.permutation(x_test.shape[0]) # x_test = x_test[permutation, :] # y_test = y_test[permutation] # permutation = np.random.permutation(x_train.shape[0]) # x_train = x_train[permutation, :] # y_train = y_train[permutation] if argv[1] == 'train': # generate test data for MCU generate_test_bin(x_test, y_test, cache_dir + '/test_data.bin') generate_test_bin(x_train, y_train, cache_dir + '/train_data.bin') # do the job print('num_type', num_type) print('len', len(keywords)) print(keywords) print('y_train.shape', y_train.shape) history = train(x_train, y_train, x_val, y_val, type=num_type, batch_size=batch_size, epochs=epochs) print(history) print(history.history) # reload the best model model = load_model(model_path) evaluate_model(model, x_test, y_test) generate_model(model, np.vstack((x_test, x_val)), name=cache_dir + '/weights.h') print('Wrote weights in', cache_dir + '/weights.h') # append scaling as macros with open(cache_dir + '/weights.h', 'a+') as fd: fd.write('#define NNOM_INPUT_SCALE ' + str(int(1 / quantise_factor)) + '\n') fd.write('#define NNOM_INPUT_MIN ' + str(int(nnom_net_input_clip_min)) + '\n') fd.write('#define NNOM_INPUT_MAX ' + str(int(nnom_net_input_clip_max)) + '\n') acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] plt.plot(range(0, epochs), acc, color='red', label='Training acc') plt.plot(range(0, epochs), val_acc, color='green', label='Validation acc') plt.title('Training and validation accuracy') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.show() else: model = load_model(model_path) if argv[1] == 'test': print('Performance on train data') predictWithConfMatrix(model, x_train, y_train) print('Performance on test data') predictWithConfMatrix(model, x_test, y_test) print('Performance on val data') predictWithConfMatrix(model, x_val, y_val) if argv[1] == 'testfile': import scipy.io.wavfile as wavfile in_fs, data = wavfile.read(argv[2]) data = ((2**15 - 1) * data).astype('int16') if (in_fs != fs): print('Sample rate of file %d doesn\'t match %d' % (in_fs, fs)) exit() # Cut/pad sample if data.shape[0] < sample_len: data = np.pad(data, (0, sample_len - data.shape[0])) else: data = data[:sample_len] # Calculate MFCC import edison.mfcc.mfcc_utils as mfu o_mfcc = mfu.mfcc_mcu(data, fs, nSamples, frame_len, frame_step, frame_count, fft_len, num_mel_bins, lower_edge_hertz, upper_edge_hertz, mel_mtx_scale) data_mfcc = np.array([x['mfcc'][:num_mfcc] for x in o_mfcc]) # make fit shape and dtype input_shape = model.input.shape.as_list()[1:] print('data_mfcc.max()', data_mfcc.max()) print('data_mfcc.min()', data_mfcc.min()) print("scale by", quantise_factor, 'clip to', nnom_net_input_clip_min, nnom_net_input_clip_max) net_input = np.array(data_mfcc.reshape([1] + input_shape), dtype='float32') net_input = np.clip((net_input * quantise_factor), nnom_net_input_clip_min, nnom_net_input_clip_max).round() print('net_input.max()', net_input.max()) print('net_input.min()', net_input.min()) np.set_printoptions(precision=1, suppress=True) print('net input', net_input.ravel()) # predict on CPU and MCU host_preds, mcu_preds = [], [] host_preds.append((127 * model.predict(net_input)[0]).round()) # import matplotlib.pyplot as plt # fig = plt.figure(constrained_layout=True) # gs = fig.add_gridspec(1, 2) # ax = fig.add_subplot(gs[0, 0]) # ax.plot(data) # ax = fig.add_subplot(gs[0, 1]) # c = ax.pcolor(net_input.reshape(31,13).T, cmap='PuBu') # plt.show() # infere on MCU import edison.mcu.mcu_util as mcu if mcu.sendCommand('kws_single_inference') < 0: exit() mcu.sendData(net_input.reshape(-1).astype('int8'), 0, progress=True) mcu_pred, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_pred.dtype, tag, mcu_pred.shape[0])) mcu_preds.append(mcu_pred) def rmse(a, b): return np.sqrt(np.mean((a - b)**2)) # report rmserror = rmse(host_preds[-1], mcu_preds[-1]) np.set_printoptions(precision=3, suppress=True) print('keywords:', keywords) print('host prediction:', host_preds[-1], keywords[host_preds[-1].argmax()]) print('mcu prediction: ', mcu_preds[-1], keywords[mcu_preds[-1].argmax()]) print('rmse:', rmserror) mcu_preds = np.array(mcu_preds) host_preds = np.array(host_preds) deviaitons = 100.0 * (1.0 - (mcu_preds.ravel() + 1e-9) / (host_preds.ravel() + 1e-9)) print( '_________________________________________________________________' ) print('Comparing: %s' % ('stuffs')) print("Deviation: max %.3f%% min %.3f%% avg %.3f%% \nrmse %.3f" % (deviaitons.max(), deviaitons.min(), np.mean(deviaitons), rmse(mcu_preds.ravel(), host_preds.ravel()))) print('scale %.3f=1/%.3f' % (mcu_preds.max() / host_preds.max(), host_preds.max() / mcu_preds.max())) print('correlation coeff %.3f' % (np.corrcoef(host_preds.ravel(), mcu_preds.ravel())[0, 1])) print( '_________________________________________________________________' )
def modeFile(from_files, argv): global fs, y, host_fft, mcu_fft, mel_mtx, host_spec, mcu_spec, host_melspec, mcu_melspec, host_dct, mcu_dct, host_logmelspec global host_dct_reorder, host_dct_fft, host_dct_makhoul, nSamples, fname if len(argv) < 3: print('Specify input file') exit() fname = argv[2] from_files = 1 if len(argv) > 3 else 0 print('Working with %s' % (fname)) # Read data in_fs, in_data = wavfile.read(fname) in_data = np.array(in_data) if in_data.dtype == 'float32': in_data = np.array((2**15 - 1) * in_data, dtype='int16') y = in_data # Set MFCC settings fs = in_fs nSamples = len(in_data) frame_len = sample_size frame_step = 1024 frame_count = 0 # 0 for auto fft_len = sample_size # Some info print("Frame length in seconds = %.3fs" % (frame_len / fs)) print("Number of input samples = %d" % (nSamples)) # calculate mfcc o_mfcc = mfu.mfcc_mcu(in_data, fs, nSamples, frame_len, frame_step, frame_count, fft_len, num_mel_bins, lower_edge_hertz, upper_edge_hertz, mel_mtx_scale) host_fft = np.array([x['fft'][:sample_size // 2] for x in o_mfcc])[:sample_size] host_spec = np.array([x['spectrogram'][:sample_size // 2] for x in o_mfcc]) host_melspec = np.array( [x['mel_spectrogram'][:sample_size // 2] for x in o_mfcc]) host_logmelspec = np.array( [x['log_mel_spectrogram'][:sample_size // 2] for x in o_mfcc]) host_dct = np.array([x['mfcc'] for x in o_mfcc]) # calculate on MCU frames = mfu.frames(in_data, frame_length=sample_size, frame_step=frame_step) mcu_fft = [] mcu_spec = [] mcu_melspec = [] mcu_dct = [] frame_ctr = 0 print('Running on MCU') from tqdm import tqdm if not from_files: for frame in tqdm(frames): yf = np.array(frame, dtype='int16') # Exchange some data if mcu.sendCommand('mel_one_batch') < 0: exit() # print('Upload sample') mcu.sendData(yf, 0, progress=False) # print('Download samples') dat, tag = mcu.receiveData() mcu_fft.append(dat) # print('Received %s type with tag 0x%x len %d' % (dat.dtype, tag, dat.shape[0])) # print(dat) dat, tag = mcu.receiveData() mcu_spec.append(dat) # print('Received %s type with tag 0x%x len %d' % (dat.dtype, tag, dat.shape[0])) # print(dat) dat, tag = mcu.receiveData() mcu_melspec.append(dat) # print('Received %s type with tag 0x%x len %d' % (dat.dtype, tag, dat.shape[0])) # print(dat) dat, tag = mcu.receiveData() mcu_dct.append(dat) # print('Received %s type with tag 0x%x len %d' % (dat.dtype, tag, dat.shape[0])) # print(dat) frame_ctr += 1 mcu_fft = np.array(mcu_fft) mcu_spec = np.array(mcu_spec) mcu_melspec = np.array(mcu_melspec) mcu_dct = np.array(mcu_dct) import pathlib pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True) np.save(cache_dir + '/mcu_fft_file.npy', mcu_fft) np.save(cache_dir + '/mcu_spec_file.npy', mcu_spec) np.save(cache_dir + '/mcu_melspec_file.npy', mcu_melspec) np.save(cache_dir + '/mcu_dct_file.npy', mcu_dct) else: mcu_fft = np.load(cache_dir + '/mcu_fft_file.npy') mcu_spec = np.load(cache_dir + '/mcu_spec_file.npy') mcu_melspec = np.load(cache_dir + '/mcu_melspec_file.npy') mcu_dct = np.load(cache_dir + '/mcu_dct_file.npy') ###################################################################### # plot print('MCU Audio processing took %.2fms' % (mcu.getStats()['AudioLastProcessingTime'])) # fig = plt.figure(constrained_layout=True) # gs = fig.add_gridspec(5, 1) # ax = fig.add_subplot(gs[0, 0]) # ax.plot(y) # ax = fig.add_subplot(gs[1, 0]) # ax.plot(mcu_fft[2]) # ax = fig.add_subplot(gs[2, 0]) # ax.plot(mcu_spec[2]) # ax = fig.add_subplot(gs[3, 0]) # ax.plot(mcu_melspec[2]) # ax = fig.add_subplot(gs[4, 0]) # ax.plot(mcu_dct[2]) fig = plotFileMode() plt.show()
def modeSingle(from_files): global fs, y, host_fft, mcu_fft, mel_mtx, host_spec, mcu_spec, host_melspec, mcu_melspec, host_dct, mcu_dct global host_dct_reorder, host_dct_fft, host_dct_makhoul # Create synthetic sample fs = sample_rate t = np.linspace(0, sample_size / fs, sample_size) y = np.array(1000 * np.cos(2 * np.pi * (fs / 16) * t) + 500 * np.cos(2 * np.pi * (fs / 128) * t), dtype='int16') # y = np.array((2**15-1)*np.cos(2*np.pi*(fs/80)*t), dtype='int16') # saturating # y = np.array((2**15-1)*np.cos(2*np.pi*(2*fs/1024)*t), dtype='int16') # y = np.array(65536*np.random.rand(sample_size)-65536//2, dtype='int16') # random # natural sample in_fs, in_data = wavfile.read('data/hey_short_16k.wav') in_data = np.pad(in_data, (0, sample_size - in_data.shape[0]), 'constant', constant_values=(4, 6)) y = in_data if not from_files: # Exchange some data if mcu.sendCommand('mel_one_batch') < 0: exit() print('Upload sample') mcu.sendData(y, 0) print('Download samples') mcu_fft, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_fft.dtype, tag, mcu_fft.shape[0])) mcu_spec, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_spec.dtype, tag, mcu_fft.shape[0])) mcu_melspec, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_melspec.dtype, tag, mcu_melspec.shape[0])) # mcu_melspec_manual, tag = mcu.receiveData() # print('Received %s type with tag 0x%x len %d' % (mcu_melspec_manual.dtype, tag, mcu_melspec_manual.shape[0])) mcu_dct, tag = mcu.receiveData() print('Received %s type with tag 0x%x len %d' % (mcu_dct.dtype, tag, mcu_dct.shape[0])) # store this valuable data! import pathlib pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True) np.save(cache_dir + '/mcu_fft.npy', mcu_fft) np.save(cache_dir + '/mcu_spec.npy', mcu_spec) np.save(cache_dir + '/mcu_melspec.npy', mcu_melspec) np.save(cache_dir + '/mcu_dct.npy', mcu_dct) else: mcu_fft = np.load(cache_dir + '/mcu_fft.npy') mcu_spec = np.load(cache_dir + '/mcu_spec.npy') mcu_melspec = np.load(cache_dir + '/mcu_melspec.npy') mcu_dct = np.load(cache_dir + '/mcu_dct.npy') ###################################################################### # Same calculations on host # compensate same bit shift as on MCU host_fft = np.fft.fft(y) host_spec = np.abs(host_fft) mel_mtx = mfu.gen_mel_weight_matrix(num_mel_bins=num_mel_bins, num_spectrogram_bins=num_spectrogram_bins, sample_rate=sample_rate, \ lower_edge_hertz=lower_edge_hertz, upper_edge_hertz=upper_edge_hertz) mel_mtx_s16 = np.array(mel_mtx_scale * mel_mtx, dtype='int16') host_melspec = host_spec[:(sample_size // 2) + 1].dot(mel_mtx_s16) host_dct = dct(host_melspec, type=2) host_dct_makhoul, host_dct_reorder, host_dct_fft = mfu.dct2Makhoul( host_melspec) o_mfcc = mfu.mfcc_mcu(y, fs, nSamples=1024, frame_len=1024, frame_step=1024, frame_count=1, fft_len=1024, mel_nbins=num_mel_bins, mel_lower_hz=lower_edge_hertz, mel_upper_hz=upper_edge_hertz, mel_mtx_scale=mel_mtx_scale) host_fft = o_mfcc[0]['fft'] host_spec = o_mfcc[0]['spectrogram'] host_melspec = o_mfcc[0]['mel_spectrogram'] host_dct = o_mfcc[0]['mfcc'] ###################################################################### # Print some facts scale = np.real(host_fft).max() / mcu_fft[0::2].max() print('host/mcu fft scale %f' % (scale)) # host_fft = host_fft * 1/scale scale = host_spec.max() / mcu_spec.max() print('host/mcu spectrum scale %f' % (scale)) # host_spec = host_spec * 1/scale scale = host_melspec.max() / mcu_melspec.max() print('host/mcu mel spectrum scale %f' % (scale)) # host_melspec = host_melspec * 1/scale scale = host_dct.max() / mcu_dct.max() print('host/mcu dct scale %f' % (scale)) ###################################################################### # plot print('MCU Audio processing took %.2fms' % (mcu.getStats()['AudioLastProcessingTime'])) fig = plotCompare() plt.show()