def find_data(ref, db): idx = ref.parent.name data = [entry for entry in db if entry["symbol"] == idx] if len(data) > 0: return data[0] else: LOG.warning(f"WARNING: Reference {idx} not found") return {}
async def read_info_file(info_file, check=True, verbose=VERBOSE): if not info_file: return {} if info_file.exists(): async with aiofiles.open(info_file, "r") as info: data = await info.read() if verbose > 1: LOG.info(f"Info file read:{get_tabs('', prev=15)}{info_file}") return json.loads(data) else: if check: LOG.error(f"ERROR: No info found at {info_file}") if verbose > 1: LOG.warning(f"Info file: {info_file}\tDO NOT EXISTS!") return {}
def _process_audio_files(worker_id: int, tasks: pd.DataFrame, p_out: PosixPath, p_raw: PosixPath, n_samples: int = 59049, sample_rate: int = 22050, topk: int = 50, file_pattern: str = 'clip-{}-seg-{}-of-{}') -> None: n_tasks = tasks.shape[0] t_start = time.time() n_parts = n_tasks // 10 idx = 0 LOG.info(f"[Worker {worker_id:02d}]: Received {n_tasks} tasks.") for i, t in tasks.iterrows(): # find output dir split = t.split out_dir = p_out.joinpath(split) # process audio file try: segments = _segment_audio(_load_audio(p_raw.joinpath(t.mp3_path), sample_rate=sample_rate), n_samples=n_samples, center=False) loaded = True except (RuntimeError, EOFError) as e: LOG.warning(f"[Worker {worker_id:02d}]: Failed load audio: {t.mp3_path}. Ignored.") loaded = False # save label and segments to npy files if loaded: labels = t[t.index.tolist()[:topk]].values.astype(bool) n_segments = len(segments) for j, seg in enumerate(segments): np.savez_compressed(out_dir.joinpath(file_pattern.format(t.clip_id, j+1, n_segments)).as_posix(), data=seg, labels=labels) # report progress idx += 1 if idx == n_tasks: LOG.info(f"[Worker {worker_id:02d}]: Job finished. Quit. (time usage: {(time.time() - t_start) / 60:.02f} min)") elif idx % n_parts == 0: LOG.info(f"[Worker {worker_id:02d}]: {idx//n_parts*10}% tasks done. (time usage: {(time.time() - t_start) / 60:.02f} min)") return