Exemplo n.º 1
0
    def __init__(
        self,
        bidim,
        btype: str = "blstmp",
        blayers: int = 3,
        bunits: int = 300,
        bprojs: int = 320,
        num_spk: int = 1,
        use_noise_mask: bool = True,
        dropout_rate: float = 0.0,
        badim: int = 320,
        ref_channel: int = -1,
        beamformer_type: str = "mvdr",
        eps: float = 1e-6,
        # only for WPD beamformer
        btaps: int = 5,
        bdelay: int = 3,
    ):
        super().__init__()
        bnmask = num_spk + 1 if use_noise_mask else num_spk
        self.mask = MaskEstimator(btype,
                                  bidim,
                                  blayers,
                                  bunits,
                                  bprojs,
                                  dropout_rate,
                                  nmask=bnmask)
        self.ref = AttentionReference(bidim,
                                      badim) if ref_channel < 0 else None
        self.ref_channel = ref_channel

        self.use_noise_mask = use_noise_mask
        assert num_spk >= 1, num_spk
        self.num_spk = num_spk
        self.nmask = bnmask

        if beamformer_type not in ("mvdr", "mpdr", "wpd"):
            raise ValueError(
                "Not supporting beamformer_type={}".format(beamformer_type))
        if beamformer_type == "mvdr" and (not use_noise_mask):
            if num_spk == 1:
                logging.warning(
                    "Initializing MVDR beamformer without noise mask "
                    "estimator (single-speaker case)")
                logging.warning(
                    "(1 - speech_mask) will be used for estimating noise "
                    "PSD in MVDR beamformer!")
            else:
                logging.warning(
                    "Initializing MVDR beamformer without noise mask "
                    "estimator (multi-speaker case)")
                logging.warning(
                    "Interference speech masks will be used for estimating "
                    "noise PSD in MVDR beamformer!")

        self.beamformer_type = beamformer_type
        assert btaps >= 0 and bdelay >= 0, (btaps, bdelay)
        self.btaps = btaps
        self.bdelay = bdelay if self.btaps > 0 else 1
        self.eps = eps
Exemplo n.º 2
0
    def __init__(self,
                 bidim,
                 btype='blstmp',
                 blayers=3,
                 bunits=300,
                 bprojs=320,
                 bnmask=2,
                 dropout_rate=0.0,
                 badim=320,
                 ref_channel: int = -1,
                 beamformer_type='mvdr'):
        super().__init__()
        self.mask = MaskEstimator(btype,
                                  bidim,
                                  blayers,
                                  bunits,
                                  bprojs,
                                  dropout_rate,
                                  nmask=bnmask)
        self.ref = AttentionReference(bidim, badim)
        self.ref_channel = ref_channel

        self.nmask = bnmask

        if beamformer_type != 'mvdr':
            raise ValueError(
                'Not supporting beamformer_type={}'.format(beamformer_type))
        self.beamformer_type = beamformer_type
Exemplo n.º 3
0
    def __init__(
        self,
        wtype: str = 'blstmp',
        widim: int = 257,
        wlayers: int = 3,
        wunits: int = 300,
        wprojs: int = 320,
        dropout_rate: float = 0.0,
        taps: int = 5,
        delay: int = 3,
        use_dnn_mask: bool = True,
        iterations: int = 1,
        normalization: bool = False,
    ):
        super().__init__()
        self.iterations = iterations
        self.taps = taps
        self.delay = delay

        self.normalization = normalization
        self.use_dnn_mask = use_dnn_mask

        self.inverse_power = True

        if self.use_dnn_mask:
            self.mask_est = MaskEstimator(wtype,
                                          widim,
                                          wlayers,
                                          wunits,
                                          wprojs,
                                          dropout_rate,
                                          nmask=1)