def main(): args = create_parser(usage).parse_args() args.tags_file = abspath(args.tags_file) if args.tags_file else None args.folder = abspath(args.folder) args.output_folder = abspath(args.output_folder) noise_min, noise_max = args.noise_ratio_low, args.noise_ratio_high data = TrainData.from_both(args.tags_file, args.folder, args.folder) noise_data = NoiseData(args.noise_folder) print('Data:', data) def translate_filename(source: str, n=0) -> str: assert source.startswith(args.folder) relative_file = source[len(args.folder):].strip(os.path.sep) if n > 0: base, ext = splitext(relative_file) relative_file = base + '.' + str(n) + ext return join(args.output_folder, relative_file) all_filenames = sum(data.train_files + data.test_files, []) for i, filename in enumerate(all_filenames): print('{0:.2%} \r'.format(i / (len(all_filenames) - 1)), end='', flush=True) audio = load_audio(filename) for n in range(args.inflation_factor): altered = noise_data.noised_audio(audio, noise_min + (noise_max - noise_min) * random()) output_filename = translate_filename(filename, n) makedirs(dirname(output_filename), exist_ok=True) save_audio(output_filename, altered) print('Done!') if args.tags_file and args.tags_file.startswith(args.folder): shutil.copy2(args.tags_file, translate_filename(args.tags_file))
def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > self.args.threshold: self.samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and self.samples_since_train >= self.args.delay_samples and \ self.args.epochs > 0: self.samples_since_train = 0 self.retrain()
def on_activation_safe(): global chunk_num nm = join( args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1
def on_activation(self): activate_notify() if self.args.save_dir: nm = join( self.args.save_dir, self.args.save_prefix + self.session_id + '.' + str(self.chunk_num) + '.wav') save_audio(nm, self.audio_buffer) print() print('Saved to ' + nm + '.') self.chunk_num += 1
def on_activation(): Popen(['aplay', '-q', 'data/activate.wav']) if args.save_dir: global chunk_num nm = join( args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1
def vectors_from_fn(self, fn: str): """ Run through a single background audio file, overlaying with wake words. Generates (mfccs, target) where mfccs is a series of mfcc values and target is a single integer classification of the target network output for that chunk """ audio = load_audio(fn) audio_volume = self.calc_volume(audio) audio_volume *= 0.4 + 0.5 * random() audio = self.normalize_volume_to(audio, audio_volume) self.listener.clear() chunked_bg = chunk_audio(audio, self.args.chunk_size) chunked_ww = self.chunk_audio_pieces( self.generate_wakeword_pieces(audio_volume), self.args.chunk_size) for i, (chunk_bg, (chunk_ww, targets)) in enumerate(zip(chunked_bg, chunked_ww)): chunk = self.merge(chunk_bg, chunk_ww, 0.6) self.vals_buffer = np.concatenate( (self.vals_buffer[len(targets):], targets)) self.audio_buffer = np.concatenate( (self.audio_buffer[len(chunk):], chunk)) mfccs = self.listener.update_vectors(chunk) percent_overlapping = self.max_run_length( self.vals_buffer, 1) / len(self.vals_buffer) if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8: target = 1 elif percent_overlapping < 0.5: target = 0 else: continue if random() > 1.0 - self.args.save_prob: name = splitext(basename(fn))[0] wav_file = join('debug', 'ww' if target == 1 else 'nww', '{} - {}.wav'.format(name, i)) save_audio(wav_file, self.audio_buffer) yield mfccs, target
def generate_samples(self, folder, name, value, duration): for i in range(self.count): save_audio(join(folder, name.format(i)), np.array([value] * int(duration * pr.sample_rate)))