示例#1
0
 def __init__(self, rt60_opt, absc_opt, room_dim):
     """
     rt60_opt: "" or "a,b", higher priority than absc_opt
     absc_opt: tuple like (a,b)
     room_dim: str like "a,b;c,d;e,d"
     """
     self.rt60_opt = rt60_opt
     if not rt60_opt:
         self.absc = UniformSampler(absc_opt)
     else:
         rt60_r = str2tuple(rt60_opt)
         self.rt60 = UniformSampler(rt60_r)
     dim_range = [str2tuple(t) for t in room_dim.split(";")]
     if len(dim_range) != 3:
         raise RuntimeError(f"Wrong format with --room-dim={room_dim}")
     self.dim_sampler = [UniformSampler(c) for c in dim_range]
示例#2
0
def run(args):
    if args.geometry == "linear":
        topo = np.array(str2tuple(args.linear_topo))
        candidate_doa = np.linspace(0, 180, args.num_doas)
    else:
        topo = None
        step = 360 / args.num_doas
        candidate_doa = np.arange(0, 360, step)

    sv = []
    for doa in candidate_doa:
        if topo is None:
            sv.append(
                circular_steer_vector(args.circular_radius,
                                      args.circular_around,
                                      doa,
                                      args.num_bins,
                                      c=args.speed,
                                      sr=args.sr,
                                      center=args.circular_center))
        else:
            sv.append(
                linear_steer_vector(topo,
                                    doa,
                                    args.num_bins,
                                    c=args.speed,
                                    sr=args.sr))
    # A x F x M
    sv = np.stack(sv)
    # norm or not
    if args.normalize:
        sv = sv / sv.shape[-1]**0.5
    # A x M x F
    sv = sv.transpose(0, 2, 1)
    np.save(args.steer_vector, sv)
示例#3
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
        "transpose": False
    }

    if args.geometry == "linear":
        topo = str2tuple(args.linear_topo)
        beamformer = LinearSDBeamformer(topo)
        logger.info(f"Initialize LinearSDBeamformer for array: {topo}")
    else:
        beamformer = CircularSDBeamformer(args.circular_radius,
                                          args.circular_around,
                                          center=args.circular_center)
        logger.info(
            "Initialize CircularSDBeamformer for " +
            f"radius = {args.circular_radius}, center = {args.circular_center}"
        )

    utt2doa = None
    doa = None
    if args.utt2doa:
        utt2doa = ScpReader(args.utt2doa, value_processor=lambda x: float(x))
        logger.info(f"Use --utt2doa={args.utt2doa} for each utterance")
    else:
        doa = args.doa
        if not check_doa(args.geometry, doa):
            logger.info(f"Invalid doa {doa:.2f} for {args.geometry} array")
        logger.info(f"Use --doa={doa:.2f} for all utterances")

    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)

    done = 0
    with WaveWriter(args.dst_dir, sr=args.sr) as writer:
        for key, stft_src in spectrogram_reader:
            if utt2doa:
                if key not in utt2doa:
                    continue
                doa = utt2doa[key]
                if not check_doa(args.geometry, doa):
                    logger.info(f"Invalid DoA {doa:.2f} for utterance {key}")
                    continue
            stft_enh = beamformer.run(doa, stft_src, c=args.speed, sr=args.sr)
            done += 1
            norm = spectrogram_reader.maxabs(key)
            samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm)
            writer.write(key, samps)
    logger.info(f"Processed {done} utterances over {len(spectrogram_reader)}")
示例#4
0
 def __init__(self, args):
     if args.gpu and not gpu_rir_available:
         raise RuntimeError("Please install gpuRIR first if --gpu=True")
     # make dump dir
     Path(args.dump_dir).mkdir(exist_ok=True, parents=True)
     self.rirs_cfg = []
     self.room_generator = RoomGenerator(args.rt60, args.abs_range,
                                         args.room_dim)
     self.mx, self.my = args.array_relx, args.array_rely
     self.array_topo = [str2tuple(t) for t in args.array_topo.split(";")]
     self.sr = args.sample_rate
     self.args = args
示例#5
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
        "transpose": False
    }

    utt2doa = None
    doa = None
    if args.utt2doa:
        utt2doa = ScpReader(args.utt2doa, value_processor=lambda x: float(x))
        logger.info(f"Use utt2doa {args.utt2doa} for each utterance")
    else:
        doa = args.doa
        if doa < 0:
            doa = 180 + doa
        if doa < 0 or doa > 180:
            raise RuntimeError(f"Invalid doa {doa:.2f} for --doa")
        logger.info(f"Use DoA {doa:.2f} for all utterances")

    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)

    done = 0
    topo = str2tuple(args.linear_topo)
    beamformer = LinearDSBeamformer(topo)
    logger.info(f"Initialize channel LinearDSBeamformer for array: {topo}")

    with WaveWriter(args.dst_dir, fs=args.fs) as writer:
        for key, stft_src in spectrogram_reader:
            if utt2doa:
                if key not in utt2doa:
                    continue
                doa = utt2doa[key]
                if doa < 0:
                    doa = 180 + doa
                if doa < 0 or doa > 180:
                    logger.info(f"Invalid doa {doa:.2f} for utterance {key}")
                    continue
            stft_enh = beamformer.run(doa, stft_src, c=args.speed, sr=args.fs)
            done += 1
            norm = spectrogram_reader.maxabs(key)
            samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm)
            writer.write(key, samps)
    logger.info(f"Processed {done} utterances over {len(spectrogram_reader)}")
示例#6
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "round_power_of_two": args.round_power_of_two,
        "window": args.window,
        "center": args.center,
        "transpose": True
    }
    steer_vector = np.load(args.steer_vector)
    logger.info(f"Shape of the steer vector: {steer_vector.shape}")
    num_doa, _, _ = steer_vector.shape
    min_doa, max_doa = str2tuple(args.doa_range)
    if args.output == "radian":
        angles = np.linspace(min_doa * np.pi / 180, max_doa * np.pi / 180,
                             num_doa + 1)
    else:
        angles = np.linspace(min_doa, max_doa, num_doa + 1)

    spectrogram_reader = SpectrogramReader(args.wav_scp, **stft_kwargs)
    mask_reader = None
    if args.mask_scp:
        mask_reader = [NumpyReader(scp) for scp in args.mask_scp.split(",")]
    online = (args.chunk_len > 0 and args.look_back > 0)
    if online:
        logger.info("Set up in online mode: chunk_len " +
                    f"= {args.chunk_len}, look_back = {args.look_back}")

    if args.backend == "srp":
        split_index = lambda sstr: [
            tuple(map(int, p.split(","))) for p in sstr.split(";")
        ]
        srp_pair = split_index(args.srp_pair)
        srp_pair = ([t[0] for t in srp_pair], [t[1] for t in srp_pair])
        logger.info(f"Choose srp-based algorithm, srp pair is {srp_pair}")
    else:
        srp_pair = None

    with open(args.doa_scp, "w") as doa_out:
        for key, stft in spectrogram_reader:
            # stft: M x T x F
            _, _, F = stft.shape
            if mask_reader:
                # T x F => F x T
                mask = [r[key] for r in mask_reader] if mask_reader else None
                if args.mask_eps >= 0 and len(mask_reader) > 1:
                    mask = add_wta(mask, eps=args.mask_eps)
                mask = mask[0]
                # F x T => T x F
                if mask.shape[-1] != F:
                    mask = mask.transpose()
            else:
                mask = None
            if not online:
                if srp_pair:
                    idx = srp_ssl(stft,
                                  steer_vector,
                                  srp_pair=srp_pair,
                                  mask=mask)
                else:
                    idx = ml_ssl(stft,
                                 steer_vector,
                                 mask=mask,
                                 compression=-1,
                                 eps=EPSILON)
                doa = angles[idx]
                logger.info(f"Processing utterance {key}: {doa:.4f}")
                doa_out.write(f"{key}\t{doa:.4f}\n")
            else:
                logger.info(f"Processing utterance {key}...")
                _, T, _ = stft.shape
                online_doa = []
                for t in range(0, T, args.chunk_len):
                    s = max(t - args.look_back, 0)
                    if mask is not None:
                        chunk_mask = mask[..., s:t + args.chunk_len]
                    else:
                        chunk_mask = None
                    stft_chunk = stft[:, s:t + args.chunk_len, :]
                    if srp_pair:
                        idx = srp_ssl(stft_chunk,
                                      steer_vector,
                                      srp_pair=srp_pair,
                                      mask=chunk_mask)
                    else:
                        idx = ml_ssl(stft_chunk,
                                     steer_vector,
                                     mask=chunk_mask,
                                     compression=-1,
                                     eps=EPSILON)
                    doa = angles[idx]
                    online_doa.append(doa)
                doa_str = " ".join([f"{d:.4f}" for d in online_doa])
                doa_out.write(f"{key}\t{doa_str}\n")
    logger.info(f"Processing {len(spectrogram_reader)} utterance done")