def __init__( self, wtype: str = "blstmp", widim: int = 257, wlayers: int = 3, wunits: int = 300, wprojs: int = 320, dropout_rate: float = 0.0, taps: int = 5, delay: int = 3, use_dnn_mask: bool = True, nmask: int = 1, nonlinear: str = "sigmoid", iterations: int = 1, normalization: bool = False, eps: float = 1e-6, diagonal_loading: bool = True, diag_eps: float = 1e-7, mask_flooring: bool = False, flooring_thres: float = 1e-6, use_torch_solver: bool = True, ): super().__init__() self.iterations = iterations self.taps = taps self.delay = delay self.eps = eps self.normalization = normalization self.use_dnn_mask = use_dnn_mask self.inverse_power = True self.diagonal_loading = diagonal_loading self.diag_eps = diag_eps self.mask_flooring = mask_flooring self.flooring_thres = flooring_thres self.use_torch_solver = use_torch_solver if self.use_dnn_mask: self.nmask = nmask self.mask_est = MaskEstimator( wtype, widim, wlayers, wunits, wprojs, dropout_rate, nmask=nmask, nonlinear=nonlinear, ) else: self.nmask = 1
def __init__( self, wtype: str = "blstmp", widim: int = 257, wlayers: int = 3, wunits: int = 300, wprojs: int = 320, dropout_rate: float = 0.0, taps: int = 5, delay: int = 3, use_dnn_mask: bool = True, nonlinear: str = "sigmoid", iterations: int = 1, normalization: bool = False, ): super().__init__() self.iterations = iterations self.taps = taps self.delay = delay self.normalization = normalization self.use_dnn_mask = use_dnn_mask self.inverse_power = True if self.use_dnn_mask: self.mask_est = MaskEstimator( wtype, widim, wlayers, wunits, wprojs, dropout_rate, nmask=1, nonlinear=nonlinear, )
def __init__( self, bidim, btype: str = "blstmp", blayers: int = 3, bunits: int = 300, bprojs: int = 320, num_spk: int = 1, use_noise_mask: bool = True, nonlinear: str = "sigmoid", dropout_rate: float = 0.0, badim: int = 320, ref_channel: int = -1, beamformer_type: str = "mvdr_souden", rtf_iterations: int = 2, mwf_mu: float = 1.0, eps: float = 1e-6, diagonal_loading: bool = True, diag_eps: float = 1e-7, mask_flooring: bool = False, flooring_thres: float = 1e-6, use_torch_solver: bool = True, # only for WPD beamformer btaps: int = 5, bdelay: int = 3, ): super().__init__() bnmask = num_spk + 1 if use_noise_mask else num_spk self.mask = MaskEstimator( btype, bidim, blayers, bunits, bprojs, dropout_rate, nmask=bnmask, nonlinear=nonlinear, ) self.ref = ( AttentionReference(bidim, badim, eps=eps) if ref_channel < 0 else None ) self.ref_channel = ref_channel self.use_noise_mask = use_noise_mask assert num_spk >= 1, num_spk self.num_spk = num_spk self.nmask = bnmask if beamformer_type not in BEAMFORMER_TYPES: raise ValueError("Not supporting beamformer_type=%s" % beamformer_type) if ( beamformer_type == "mvdr_souden" or not beamformer_type.endswith("_souden") ) and not use_noise_mask: if num_spk == 1: logging.warning( "Initializing %s beamformer without noise mask " "estimator (single-speaker case)" % beamformer_type.upper() ) logging.warning( "(1 - speech_mask) will be used for estimating noise " "PSD in %s beamformer!" % beamformer_type.upper() ) else: logging.warning( "Initializing %s beamformer without noise mask " "estimator (multi-speaker case)" % beamformer_type.upper() ) logging.warning( "Interference speech masks will be used for estimating " "noise PSD in %s beamformer!" % beamformer_type.upper() ) self.beamformer_type = beamformer_type if not beamformer_type.endswith("_souden"): assert rtf_iterations >= 2, rtf_iterations # number of iterations in power method for estimating the RTF self.rtf_iterations = rtf_iterations # noise suppression weight in SDW-MWF self.mwf_mu = mwf_mu assert btaps >= 0 and bdelay >= 0, (btaps, bdelay) self.btaps = btaps self.bdelay = bdelay if self.btaps > 0 else 1 self.eps = eps self.diagonal_loading = diagonal_loading self.diag_eps = diag_eps self.mask_flooring = mask_flooring self.flooring_thres = flooring_thres self.use_torch_solver = use_torch_solver
def __init__( self, bidim, btype: str = "blstmp", blayers: int = 3, bunits: int = 300, bprojs: int = 320, num_spk: int = 1, use_noise_mask: bool = True, nonlinear: str = "sigmoid", dropout_rate: float = 0.0, badim: int = 320, ref_channel: int = -1, beamformer_type: str = "mvdr", eps: float = 1e-6, # only for WPD beamformer btaps: int = 5, bdelay: int = 3, ): super().__init__() bnmask = num_spk + 1 if use_noise_mask else num_spk self.mask = MaskEstimator( btype, bidim, blayers, bunits, bprojs, dropout_rate, nmask=bnmask, nonlinear=nonlinear, ) self.ref = AttentionReference(bidim, badim) if ref_channel < 0 else None self.ref_channel = ref_channel self.use_noise_mask = use_noise_mask assert num_spk >= 1, num_spk self.num_spk = num_spk self.nmask = bnmask if beamformer_type not in ("mvdr", "mpdr", "wpd"): raise ValueError( "Not supporting beamformer_type={}".format(beamformer_type)) if beamformer_type == "mvdr" and (not use_noise_mask): if num_spk == 1: logging.warning( "Initializing MVDR beamformer without noise mask " "estimator (single-speaker case)") logging.warning( "(1 - speech_mask) will be used for estimating noise " "PSD in MVDR beamformer!") else: logging.warning( "Initializing MVDR beamformer without noise mask " "estimator (multi-speaker case)") logging.warning( "Interference speech masks will be used for estimating " "noise PSD in MVDR beamformer!") self.beamformer_type = beamformer_type assert btaps >= 0 and bdelay >= 0, (btaps, bdelay) self.btaps = btaps self.bdelay = bdelay if self.btaps > 0 else 1 self.eps = eps