示例#1
0
def compute_mask(mixture, targets_list, mask_type):
    """
    Arguments:
        mixture: STFT of mixture signal(complex result)
        targets_list: python list of target signal's STFT results(complex result)
        mask_type: ["irm", "ibm", "iam", "psm"]
    Return:
        masks_list
    """
    if mask_type == "ibm":
        max_index = np.argmax(
            np.stack([cmat_abs(mat) for mat in targets_list]), 0)
        return [max_index == s for s in range(len(targets_list))]

    if mask_type == "irm":
        denominator = sum([cmat_abs(mat) for mat in targets_list]) + EPSILON
    else:
        denominator = cmat_abs(mixture) + EPSILON
    if mask_type != "psm":
        masks = [cmat_abs(mat) / denominator for mat in targets_list]
    else:
        mixture_phase = np.angle(mixture)
        masks = [
            cmat_abs(mat) * np.cos(mixture_phase - np.angle(mat)) / denominator
            for mat in targets_list
        ]
    return masks
示例#2
0
def compute_mask(speech, noise_or_mixture, mask):
    """
    for signal model:
        y = x1 + x2
    def f = STFT(x):
        f(y) = f(x1) + f(x2) => |f(y)| = |f(x1) + f(x2)| < |f(x1)| + |f(x2)|
    for irm:
        1) M(x1) = |f(x1)| / (|f(x1)| + |f(x2)|)            DongYu
        2) M(x1) = |f(x1)| / sqrt(|f(x1)|^2 + |f(x2)|^2)    DeliangWang
        s.t. 1 >= 2) >= 1) >= 0
    for iam(FFT-mask, smm):
        M(x1) = |f(x1)| / |f(y)| = |f(x1)| / |f(x1) + f(x2)| in [0, oo]
    for psm:
        M(x1) = |f(x1) / f(y)| = |f(x1)| * cos(delta_phase) / |f(y)|
    """
    if mask == "ibm":
        binary_mask = cmat_abs(speech) > cmat_abs(noise_or_mixture)
        return binary_mask.astype(np.float)
    # irm/iam/psm
    if mask == "irm":
        # denominator = cmat_abs(speech) + cmat_abs(noise_or_mixture)
        denominator = np.sqrt(
            cmat_abs(speech)**2 + cmat_abs(noise_or_mixture)**2)
    else:
        denominator = cmat_abs(noise_or_mixture)
    if mask == "psm":
        return cmat_abs(speech) * np.cos(
            np.angle(noise_or_mixture) - np.angle(speech)) / denominator
    elif mask == "psa":
        # keep nominator only
        return cmat_abs(speech) * np.cos(
            np.angle(noise_or_mixture) - np.angle(speech))
    else:
        # irm/iam
        return cmat_abs(speech) / denominator
示例#3
0
def compute_mask(tgt, mix, mask):
    """
    for signal model:
        y = x1 + x2
    def f = STFT(x):
        f(y) = f(x1) + f(x2) => |f(y)| = |f(x1) + f(x2)| < |f(x1)| + |f(x2)|
    for irm:
        1) M(x1) = |f(x1)| / (|f(x1)| + |f(x2)|)            DongYu
        2) M(x1) = |f(x1)| / sqrt(|f(x1)|^2 + |f(x2)|^2)    Deliang Wang
        s.t. 1 >= 2) >= 1) >= 0
    for iam(FFT-mask, smm):
        M(x1) = |f(x1)| / |f(y)| = |f(x1)| / |f(x1) + f(x2)| in [0, oo]
    for psm:
        M(x1) = |f(x1) / f(y)| = |f(x1)| * cos(delta_phase) / |f(y)|
    for crm:
        M(x1) = f(x1) / f(y)
    """
    # target speech
    tgt_abs = cmat_abs(tgt)
    # mixture
    mix_abs = cmat_abs(mix)
    # interference speech
    inf_abs = cmat_abs(mix - tgt)
    if mask == "ibm":
        return (tgt_abs > inf_abs).astype(np.float32)
    # irm/iam/psm
    if mask == "irm":
        # denominator = tgt_abs + inf_abs
        denominator = np.sqrt(tgt_abs**2 + inf_abs**2 + EPSILON)
    elif mask == "crm":
        denominator = mix + EPSILON
    else:
        denominator = mix_abs
    if mask == "psm":
        return tgt_abs * np.cos(np.angle(mix) - np.angle(tgt)) / denominator
    # phase sensitive amplitude
    elif mask == "psa":
        # keep nominator only
        non_neg = np.maximum(0, np.cos(np.angle(mix) - np.angle(tgt)))
        return tgt_abs * non_neg
    elif mask == "crm":
        # stack real/imag part
        cpx_mask = tgt / denominator
        return np.hstack(
            [tangent(np.real(cpx_mask)),
             tangent(np.imag(cpx_mask))])
    else:
        # irm/iam
        return tgt_abs / denominator
def compute_vad_masks(spectrogram, proportion):
    """
    We ignore several minimum values and keep proportion*100% energy
    Arguments:
        spectrogram: F x T
    Return:
        vad_mask: T x F
    """
    energy_mat = cmat_abs(spectrogram)
    energy_vec = np.sort(energy_mat.flatten())
    filter_energy = np.sum(energy_vec) * (1 - proportion)
    threshold = 0
    cumsum, index = 0, 0
    while index < energy_vec.shape[0]:
        threshold = energy_vec[index]
        cumsum += threshold
        if cumsum > filter_energy:
            break
        index += 1
    # silence if 1
    vad_mask = (energy_mat < threshold)
    return vad_mask.transpose(), index