def specaug_fbank_test(): conf = { "feature_type": "fbank", "sample_rate": 16000, "num_mel_bins": 80, "use_energy": False, "spec_aug": { "freq_mask_num": 2, "freq_mask_width": 27, "time_mask_num": 2, "time_mask_width": 100, } } fn = "file:testdata/100-121669-0000.wav" pipe = "pipe:flac -c -d -s testdata/103-1240-0005.flac |" sample_rate, waveform1 = utils.load_wave(fn) sample_rate, waveform2 = utils.load_wave(pipe) waveform1 = torch.from_numpy(waveform1) waveform2 = torch.from_numpy(waveform2) lengths = [waveform1.shape[0], waveform2.shape[0]] max_length = max(lengths) print(lengths) padded_waveforms = torch.zeros(2, max_length) padded_waveforms[0, :lengths[0]] += waveform1 padded_waveforms[1, :lengths[1]] += waveform2 layer = SPLayer(conf) features, feature_lengths = layer(padded_waveforms, lengths) import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt plt.imshow(features[1].numpy()) plt.savefig("test.png")
def get_dur(wav_dic): durdic = {} for key, path in wav_dic.items(): sample_rate, data = utils.load_wave(path) dur = data.shape[0] / float(sample_rate) durdic[key] = dur return durdic
def __getitem__(self, idx): if self.feature_type == "waveform": path, label = self.item[idx] return (load_wave(path), label) else: path, label = self.item[idx] feature = torch.FloatTensor(read_ark(path)) if self.mel: return (F_Mel(feature, audio_conf), label) else: return (feature, label)
def load_wave_batch(paths): waveforms = [] lengths = [] for path in paths: sample_rate, waveform = utils.load_wave(path) waveform = torch.from_numpy(waveform) waveforms.append(waveform) lengths.append(waveform.shape[0]) max_length = max(lengths) padded_waveforms = torch.zeros(len(lengths), max_length) for i in range(len(lengths)): padded_waveforms[i, :lengths[i]] += waveforms[i] return padded_waveforms, torch.tensor(lengths).long()
def specaug_test(): featconf = { "feature_type": "fbank", "sample_rate": 16000, "num_mel_bins": 40, "use_energy": False } augconf = { "feature_type": "fbank", "sample_rate": 16000, "num_mel_bins": 40, "use_energy": False, "spec_aug": { "freq_mask_width": 10, "freq_mask_num": 2, "time_mask_width": 100, "time_mask_num": 2 } } fn = "file:testdata/100-121669-0000.wav" pipe = "pipe:flac -c -d -s testdata/103-1240-0005.flac |" sample_rate, waveform1 = utils.load_wave(fn) sample_rate, waveform2 = utils.load_wave(pipe) waveform1 = torch.from_numpy(waveform1) waveform2 = torch.from_numpy(waveform2) lengths = [waveform1.shape[0], waveform2.shape[0]] max_length = max(lengths) padded_waveforms = torch.zeros(2, max_length) padded_waveforms[0, :lengths[0]] += waveform1 padded_waveforms[1, :lengths[1]] += waveform2 splayer = SPLayer(featconf) auglayer = SPLayer(augconf) features, feature_lengths = splayer(padded_waveforms, lengths) features2, feature_lengths2 = auglayer(padded_waveforms, lengths) print("Before augmentation") print(features) print("After augmentation") print(features2)
def test_load_wave(): pipe = "pipe:flac -c -d -s testdata/100-121669-0000.flac | " fn = "file:testdata/100-121669-0000.wav" ark = "ark:/data1/Corpora/LibriSpeech/ark/train_960.ark:16" ark2 = "ark:/data1/Corpora/LibriSpeech/ark/train_960.ark:2591436" timer = utils.Timer() timer.tic() s3, d3 = utils.load_wave(ark) print("Load ark time: {}s".format(timer.toc())) timer.tic() s2, d2 = utils.load_wave(fn) print("Load file time: {}s".format(timer.toc())) timer.tic() s1, d1 = utils.load_wave(pipe) print("Load flac pipe time: {}s".format(timer.toc())) print("Load ark2") s, d = utils.load_wave(ark2) assert s1 == s2 assert s3 == s2 assert np.sum(d1 != d2) == 0 assert np.sum(d3 != d2) == 0
def fbank_test(): conf = { "feature_type": "fbank", "sample_rate": 16000, "num_mel_bins": 40, "use_energy": False } fn = "file:testdata/100-121669-0000.wav" pipe = "pipe:flac -c -d -s testdata/103-1240-0005.flac |" sample_rate, waveform1 = utils.load_wave(fn) sample_rate, waveform2 = utils.load_wave(pipe) waveform1 = torch.from_numpy(waveform1) waveform2 = torch.from_numpy(waveform2) lengths = [waveform1.shape[0], waveform2.shape[0]] max_length = max(lengths) padded_waveforms = torch.zeros(2, max_length) padded_waveforms[0, :lengths[0]] += waveform1 padded_waveforms[1, :lengths[1]] += waveform2 layer = SPLayer(conf) features, feature_lengths = layer(padded_waveforms, lengths) print(features) print(feature_lengths)
def decode_file(): start = time.time_ns() get_wave = utils.load_wave(save_base='../receive', file_name='output.wav') end = time.time_ns() print('读取文件耗时:', (end - start) / 1e6, 'ms') if len(get_wave.shape) == 2: get_wave = get_wave[:, 0] start = time.time_ns() packets = FSK.demodulation(utils.init_args(), get_wave) count, result = utils.decode_bluetooth_packet(utils.init_args(), packets) f = open('../result.txt', 'w', encoding='utf-8') f.write(result) f.close() end = time.time_ns() print('解码文本耗时:', (end - start) / 1e6, 'ms') print('蓝牙包成功解码数量:{}\n解码信息:{}\n'.format(count, result))
def get_result(self): ''' 描述:解析并获取录音结果 参数:无 返回:无 ''' get_wave = load_wave(save_base=args.save_base_receive, file_name=self.save_place) wave = bandpass(get_wave, self.args.framerate, self.args.frequency - 500, self.args.frequency + 500) #get_wave = filt_wave(get_wave) get_seq = pulse_demodulation(wave, args) print(get_seq) result = decode(get_seq) decode_original = decode(self.original_seq) #计算传输速率,丢包率,准确率 speed = len( self.original_seq) / (len(get_wave) / self.args.framerate) / 8 packet_loss = max(0, (len(decode_original) - len(result)) / len(decode_original)) show_text = "原始结果:{}\n接收结果:{} \n传输速率:{:.2f}byte/s\n丢包率:{:.2f}%\n".format( decode_original, result, speed, 100 * packet_loss) tkinter.messagebox.showinfo('传输结果', show_text)
parser.add_argument("--nchannels", type=int, default=1) parser.add_argument("--volume", type=float, default=1.0) parser.add_argument("--start_place", type=int, default=0) parser.add_argument("--pulse_length", type=float, default=0.01) parser.add_argument("--interval_0", type=float, default=0.01) parser.add_argument("--interval_1", type=float, default=0.02) parser.add_argument("--save_base", type=str, default='sound') parser.add_argument("--file_name", type=str, default='pulse.wav') parser.add_argument("--seq_len", type=int, default=100) args = parser.parse_args() original_seq = generate_random_seq(args.seq_len) print("The original seq is:\n", original_seq) the_wave = pulse_modulation(original_seq, args) save_wave(the_wave, framerate=args.framerate, sample_width=args.sample_width, nchannels=args.nchannels, save_base=args.save_base, file_name=args.file_name) get_wave = load_wave(save_base=args.save_base, file_name=args.file_name) get_seq = pulse_demodulation(get_wave, args) print("The loaded seq is:\n", get_seq) result = compare_seqs(original_seq, get_seq) if result: print("The original seq and the seq I get is identical, right!") else: print("The original seq and the seq I get is not identical, wrong!")
def main(): packets = FSK.demodulation(utils.init_args(), utils.load_wave("", "_tmp_a_0_6535.wav"))