def initialize(self): # reversed to height width for easy processing if self.size is not None: self.size = as_tuple(self.size, N=2, t=int) segments = self.segments video_ext = as_tuple('' if self.video_ext is None else self.video_ext, 1, str) # ====== load jobs ====== # if isinstance(segments, str): if not os.path.exists(segments): raise ValueError('Path to segments must exists, however, ' 'exist(segments)={}'.format( os.path.exists(segments))) if os.path.isdir(segments): file_list = get_all_files(segments) file_list = [(os.path.basename(i), i, 0.0, -1.0) for i in file_list] # segment, path, start, end else: # csv file file_list = np.genfromtxt(segments, dtype=str, delimiter=' ') elif isinstance(segments, (tuple, list)): if isinstance(segments[0], str): # just a list of path to file file_list = [(os.path.basename(i), os.path.abspath(i), 0.0, -1.0) for i in segments] elif isinstance(segments[0], (tuple, list)): if len(segments[0]) != 4: raise Exception( 'segments must contain information in following for:' '[name] [path] [start] [end]') file_list = segments # filter using support audio extension file_list = [ f for f in file_list if any(ext in f[1] for ext in video_ext) ] # convert into: audio_path -> segment(name, start, end, channel) self.jobs = defaultdict(list) names = [] for segment, file, start, end in file_list: self.jobs[file].append((segment, float(start), float(end))) names.append(segment) self.jobs = sorted(self.jobs.items(), key=lambda x: x[0]) # ====== load bounding box ====== # if self.boundingbox is not None: if not isinstance(self.boundingbox, dict): raise ValueError('Bounding box must be a dictionary') if set(names) != set(self.boundingbox.keys()): raise Exception( 'Segments names and boundingbox keys mismatch.') # ====== check output ====== # self.dataset = Dataset(self.output) self._temp_path = get_tempdir() print('Temporary dir created at:', self._temp_path) # remove old cache files for p in os.listdir(self._temp_path): os.remove(os.path.join(self._temp_path, p))
def load_command(self): r""" Warden P. Speech Commands: A public dataset for single-word speech recognition, 2017. Available from http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz Sample rate: 16,000 Example: ds = AudioFeatureLoader(sample_rate=16000, frame_length=int(0.025 * 16000), frame_step=int(0.005 * 16000)) train, valid, test = ds.load_command() train = ds.create_dataset(train, max_length=40, return_path=True) """ LINK = "http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz" MD5 = "a08eb256cea8cbb427c6c0035fffd881" save_path = os.path.join(self.save_path, 'speech_commands') if not os.path.exists(save_path): os.mkdir(save_path) audio_path = os.path.join(save_path, 'audio') audio_files = sorted( get_all_files(audio_path, filter_func=lambda x: '.wav' == x[-4:])) md5 = md5_checksum(''.join([os.path.basename(i) for i in audio_files])) # ====== Download and extract the data ====== # if md5 != MD5: zip_path = get_file(fname='speech_commands_v0.01.tar.gz', origin=LINK, outdir=save_path, verbose=True) with tarfile.open(zip_path, 'r:gz') as tar: tar.extractall(audio_path) # ====== processing the audio file list ====== # audio_files = [i for i in audio_files if '_background_noise_' not in i] with open(os.path.join(audio_path, 'validation_list.txt'), 'r') as f: valid_list = {i.strip(): 1 for i in f} with open(os.path.join(audio_path, 'testing_list.txt'), 'r') as f: test_list = {i.strip(): 1 for i in f} train_files = [] valid_files = [] test_files = [] for f in audio_files: name = '/'.join(f.split('/')[-2:]) if name in valid_list: valid_files.append(f) elif name in test_list: test_files.append(f) else: train_files.append(f) return train_files, valid_files, test_files
def zip_aes(in_path, out_path, password=None, compression=True, verbose=False): r""" Parameters ---------- in_path : string path to a folder out_path : string path to output zip file """ if password is None: password = input("Your password:"******"`password`=%s length must be greater than 0" % password # ====== prepare input ====== # from odin.utils import get_all_files if not os.path.isdir(in_path): raise ValueError("`in_path` to %s is not a folder" % str(in_path)) all_files = get_all_files(in_path) # ====== prepare output ====== # if not isinstance(out_path, string_types): raise ValueError("`out_path` must be string") f = zipfile.ZipFile(out_path, 'w', compression=zipfile.ZIP_DEFLATED if bool(compression) else zipfile.ZIP_STORED, allowZip64=True) # ====== compression ====== # md5_map = {} for path in all_files: name = os.path.basename(path) md5_map[name] = md5_checksum(path) f.writestr(name, encrypt_aes(path, password + name)) if verbose: print('Compressed: "%s"' % name, "(MD5:%s)" % md5_map[name]) f.writestr('_MD5_CHECKSUM_', pickle.dumps(md5_map)) f.close()
def run(self, overrides=[], ncpu=None, **configs): r""" Arguments: strict: A Boolean, strict configurations prevent the access to unknown key, otherwise, the config will return `None`. Example: exp = SisuaExperimenter(ncpu=1) exp.run( overrides={ 'model': ['sisua', 'dca', 'vae'], 'dataset.name': ['cortex', 'pbmc8kly'], 'train.verbose': 0, 'train.epochs': 2, 'train': ['adam'], }) """ overrides = _overrides(overrides) + _overrides(configs) strict = False command = ' '.join(sys.argv) # parse ncpu if ncpu is None: ncpu = self.ncpu ncpu = int(ncpu) for idx, arg in enumerate(list(sys.argv)): if 'ncpu' in arg: if '=' in arg: ncpu = int(arg.split('=')[-1]) sys.argv.pop(idx) else: ncpu = int(sys.argv[idx + 1]) sys.argv.pop(idx) sys.argv.pop(idx) break # check reset for idx, arg in enumerate(list(sys.argv)): if arg in ('--reset', '--clear', '--clean'): configs_filter = lambda f: 'configs' != f.split('/')[-1] if len( get_all_files(self._save_path, filter_func=configs_filter)) > 0: old_exps = '\n'.join([ " - %s" % i for i in os.listdir(self._save_path) if configs_filter(i) ]) inp = input("<Enter> to clear all exists experiments:" "\n%s\n'n' to cancel, otherwise continue:" % old_exps) if inp.strip().lower() != 'n': clean_folder(self._save_path, filter=configs_filter, verbose=True) sys.argv.pop(idx) # check multirun is_multirun = any(',' in ovr for ovr in overrides) or \ any(',' in arg and '=' in arg for arg in sys.argv) # write history self.write_history(command, "overrides: %s" % str(overrides), "strict: %s" % str(strict), "ncpu: %d" % ncpu, "multirun: %s" % str(is_multirun)) # generate app help hlp = '\n\n'.join([ "%s - %s" % (str(key), ', '.join(sorted(as_tuple(val, t=str)))) for key, val in dict(self.args_help).items() ]) def _run(self, config_file, task_function, overrides): if is_multirun: raise RuntimeError( "Performing single run with multiple overrides in hydra " "(use '-m' for multirun): %s" % str(overrides)) cfg = self.compose_config(config_file=config_file, overrides=overrides, strict=strict, with_log_configuration=True) HydraConfig().set_config(cfg) return run_job( config=cfg, task_function=task_function, job_dir_key="hydra.run.dir", job_subdir_key=None, ) def _multirun(self, config_file, task_function, overrides): # Initial config is loaded without strict (individual job configs may have strict). from hydra._internal.plugins import Plugins cfg = self.compose_config(config_file=config_file, overrides=overrides, strict=strict, with_log_configuration=True) HydraConfig().set_config(cfg) sweeper = Plugins.instantiate_sweeper( config=cfg, config_loader=self.config_loader, task_function=task_function) # override launcher for using multiprocessing sweeper.launcher = ParallelLauncher(ncpu=ncpu) sweeper.launcher.setup(config=cfg, config_loader=self.config_loader, task_function=task_function) return sweeper.sweep(arguments=cfg.hydra.overrides.task) old_multirun = (Hydra.run, Hydra.multirun) Hydra.run = _run Hydra.multirun = _multirun try: # append the new override if len(overrides) > 0: sys.argv += overrides # help for arguments if '--help' in sys.argv: # sys.argv.append("hydra.help.header='**** %s ****'" % # self.__class__.__name__) # sys.argv.append("hydra.help.template=%s" % (_APP_HELP % hlp)) # TODO : fix bug here pass # append the hydra log path job_fmt = "/${now:%d%b%y_%H%M%S}" sys.argv.insert( 1, "hydra.run.dir=%s" % self.get_hydra_path() + job_fmt) sys.argv.insert( 1, "hydra.sweep.dir=%s" % self.get_hydra_path() + job_fmt) sys.argv.insert(1, "hydra.sweep.subdir=${hydra.job.id}") # sys.argv.append(r"hydra.job_logging.formatters.simple.format=" + # r"[\%(asctime)s][\%(name)s][\%(levelname)s] - \%(message)s") args_parser = get_args_parser() run_hydra( args_parser=args_parser, task_function=self._run, config_path=self.config_path, strict=strict, ) except KeyboardInterrupt: sys.exit(-1) except SystemExit: pass Hydra.run = old_multirun[0] Hydra.multirun = old_multirun[1] # update the summary self.summary() return self
help='Override exist models', action='store_true') parser.add_argument('-ds', default='') args = parser.parse_args() OVERRIDE = bool(args.override) ## create the filter if args.ds: ds = set([str(i).lower() for i in args.ds.split(',')]) fn_filter = lambda job: job['ds'] in ds else: fn_filter = lambda job: True ## just print some debugging if not args.train and not args.eval: log = sorted([ (path.split('/')[-3:-1], path) for path in get_all_files(outdir, lambda path: 'log.txt' in path) ], key=lambda x: x[0][0] + x[0][1]) for (ds, model), path in log: print(ds, model) with open(path, 'r') as f: lines = [ line[:-1].split('at ')[-1] for line in f.readlines() if 'best' in line ][-1:] for l in lines: print(' ', l) ## run train or evaluation tasks else: main(mode='evaluate' if args.eval else 'train', fn_filter=fn_filter)
def __init__(self, segments, output_path, sr=None, win=0.02, shift=0.01, nb_melfilters=24, nb_ceps=12, get_spec=True, get_mspec=False, get_mfcc=False, get_qspec=False, get_phase=False, get_pitch=False, get_vad=True, get_energy=False, get_delta=False, fmin=64, fmax=None, sr_new=None, preemphasis=0.97, pitch_threshold=0.8, pitch_fmax=800, vad_smooth=3, vad_minlen=0.1, cqt_bins=96, pca=True, pca_whiten=False, center=True, audio_ext=None, save_stats=True, substitute_nan=None, dtype='float16', datatype='memmap', ncache=0.12, ncpu=1): super(SpeechProcessor, self).__init__(output_path=output_path, datatype=datatype, pca=pca, pca_whiten=pca_whiten, save_stats=save_stats, substitute_nan=substitute_nan, ncache=ncache, ncpu=ncpu) audio_ext = as_tuple('' if audio_ext is None else audio_ext, t=string_types) # ====== load jobs ====== # # NOT loaded segments if isinstance(segments, str): if not os.path.exists(segments): raise ValueError('Path to segments must exists, however, ' 'exist(segments)={}'.format( os.path.exists(segments))) # given a directory if os.path.isdir(segments): file_list = get_all_files(segments) file_list = [(os.path.basename(i), i, 0.0, -1.0) for i in file_list] # segment, path, start, end # given csv file else: file_list = np.genfromtxt(segments, dtype=str, delimiter=' ') # LOADED segments elif isinstance(segments, (tuple, list)): # just a list of path to file if isinstance(segments[0], str): file_list = [(os.path.basename(i), os.path.abspath(i), 0.0, -1.0) for i in segments] # list of all information elif isinstance(segments[0], (tuple, list)): if len(segments[0]) != 4 and len(segments[0]) != 5: raise Exception( 'segments must contain information in following for:' '[name] [path] [start] [end]') file_list = segments # filter using support audio extension file_list = [ f for f in file_list if any(ext in f[1][-len(ext):] for ext in audio_ext) ] # if no channel is provided, append the channel file_list = [list(f) + [0] if len(f) == 4 else f for f in file_list] self.njobs = len(file_list) # convert into: audio_path -> segment(name, start, end, channel) self.jobs = defaultdict(list) for segment, file, start, end, channel in file_list: self.jobs[file].append( (segment, float(start), float(end), int(channel))) self.jobs = sorted(self.jobs.items(), key=lambda x: x[0]) # check empty jobs if len(self.jobs) == 0: raise Exception('NO jobs found for processing.') # ====== which features to get ====== # if not get_spec and not get_mspec and not get_mfcc \ and not get_pitch and not get_energy and not get_vad: raise Exception('You must specify which features you want: ' 'spectrogram, filter-banks, MFCC, or pitch.') features_properties = [] if get_mfcc: features_properties.append(('mfcc', dtype, True)) if get_energy: features_properties.append(('energy', dtype, True)) if get_spec: features_properties.append(('spec', dtype, True)) if get_mspec: features_properties.append(('mspec', dtype, True)) if get_qspec: features_properties.append(('qspec', dtype, True)) if get_mspec: features_properties.append(('qmspec', dtype, True)) if get_mfcc: features_properties.append(('qmfcc', dtype, True)) if get_phase: features_properties.append(('qphase', dtype, True)) if get_phase: features_properties.append(('phase', dtype, True)) if get_pitch: features_properties.append(('pitch', dtype, True)) if get_vad: features_properties.append(('vad', 'uint8', False)) features_properties.append(('vadids', 'dict', False)) self.__features_properties = features_properties self.get_spec = get_spec self.get_mspec = get_mspec self.get_mfcc = get_mfcc self.get_pitch = get_pitch self.get_qspec = get_qspec self.get_phase = get_phase self.get_vad = get_vad self.get_energy = get_energy self.get_delta = int(get_delta) self.primary_indices = ['mfcc'] # ====== feature information ====== # self.sr = sr self.win = win self.shift = shift self.nb_melfilters = nb_melfilters self.nb_ceps = nb_ceps # constraint pitch threshold in 0-1 self.pitch_threshold = min(max(pitch_threshold, 0.), 1.) self.pitch_fmax = pitch_fmax self.vad_smooth = vad_smooth self.vad_minlen = vad_minlen self.cqt_bins = cqt_bins self.fmin = fmin self.fmax = fmax self.sr_new = sr_new self.preemphasis = preemphasis self.center = center
inpath = args.path outpath = '/home/trung/data/TIDIGITS_wav' compress_path = '/home/trung/data/TIDIGITS.zip' # ====== others ====== # wav_path = os.path.join(inpath, "wave") infopath = os.path.join(inpath, 'data/children/doc/spkrinfo.txt') logpath = os.path.join(inpath, 'log.txt') print("Input path: ", ctext(inpath, 'cyan')) print("Output path: ", ctext(outpath, 'cyan')) print("Convert to WAV at:", ctext(wav_path, 'cyan')) print("Log path: ", ctext(logpath, 'cyan')) stdio(logpath) exts = get_all_ext(inpath) audio_files = get_all_files( inpath, filter_func=lambda f: f[-4:] == '.wav' and f.split('/')[-3] in ('girl', 'boy', 'man', 'woman')) # ID Gender Age Dialect Usage # ID - Unique 2-character speaker identifier # Gender - (M-man, W-woman, B-boy, G-girl) # Age - Speaker age at time of recording # Dialect - Dialect region identifier (see file "dialects.txt" for decode) # Usage - (TST-test material, TRN-training material) info = np.genfromtxt(infopath, dtype=str, skip_header=12) info = { ID.lower(): (Gender.lower(), Age, Dialect, Usage) for ID, Gender, Age, Dialect, Usage in info } gender_map = {"man": "m", "woman": "w", "boy": "b", "girl": "g"} usage_map = {"TST": "test", "TRN": "train"}
from odin import fuel as F, nnet as N, backend as K, training from odin.utils import get_all_files, get_datasetpath from odin.stats import freqcount from odin.basic import has_roles, WEIGHT, BIAS # =========================================================================== # Const # =========================================================================== FEAT = 'mspec' # using mel-spectrogram np.random.seed(12082518) # =========================================================================== # Load wav files # =========================================================================== wav_path = F.load_commands_wav() print("Found:", len(get_all_files(wav_path, filter_func=lambda x: '.wav' in x)), " .wav files") datapath = get_datasetpath("commands", override=False) # ====== start preprocessing audio files ====== # if False: speech = F.SpeechProcessor(wav_path, datapath, win=0.025, shift=0.01, nb_melfilters=40, nb_ceps=13, get_spec=True, get_mspec=True, get_mfcc=True, get_qspec=True, get_phase=True,
inpath = args.path outpath = '/home/trung/data/TIDIGITS_wav' compress_path = '/home/trung/data/TIDIGITS.zip' # ====== others ====== # wav_path = os.path.join(inpath, "wave") infopath = os.path.join(inpath, 'data/children/doc/spkrinfo.txt') logpath = os.path.join(inpath, 'log.txt') print("Input path: ", ctext(inpath, 'cyan')) print("Output path: ", ctext(outpath, 'cyan')) print("Convert to WAV at:", ctext(wav_path, 'cyan')) print("Log path: ", ctext(logpath, 'cyan')) stdio(logpath) exts = get_all_ext(inpath) audio_files = get_all_files(inpath, filter_func=lambda f: f[-4:] == '.wav' and f.split('/')[-3] in ('girl', 'boy', 'man', 'woman')) # ID Gender Age Dialect Usage # ID - Unique 2-character speaker identifier # Gender - (M-man, W-woman, B-boy, G-girl) # Age - Speaker age at time of recording # Dialect - Dialect region identifier (see file "dialects.txt" for decode) # Usage - (TST-test material, TRN-training material) info = np.genfromtxt(infopath, dtype=str, skip_header=12) info = {ID.lower(): (Gender.lower(), Age, Dialect, Usage) for ID, Gender, Age, Dialect, Usage in info} gender_map = { "man": "m", "woman": "w", "boy": "b", "girl": "g"
import matplotlib matplotlib.use('TkAgg') from matplotlib import pyplot as plt import seaborn import numpy as np import shutil import os from odin import fuel as F, utils from odin.preprocessing import speech from odin import visual datapath = F.load_digit_wav() print(datapath) files = utils.get_all_files(datapath, lambda x: '.wav' in x) y, sr = speech.read(files[0]) print('Raw signal:', y.shape, sr) feat = speech.speech_features(y, sr, win=0.02, shift=0.01, nb_melfilters=40, nb_ceps=13, get_spec=True, get_mspec=True, get_mfcc=True, get_qspec=True, get_phase=True, get_pitch=True,