示例#1
0
    def __init__(
        self,
        etype,
        idim,
        elayers_sd,
        elayers_rec,
        eunits,
        eprojs,
        subsample,
        dropout,
        num_spkrs=2,
        in_channel=1,
    ):
        """Initialize the encoder of single-channel multi-speaker ASR."""
        super(EncoderMix, self).__init__()
        typ = etype.lstrip("vgg").rstrip("p")
        if typ not in ["lstm", "gru", "blstm", "bgru"]:
            logging.error(
                "Error: need to specify an appropriate encoder architecture")
        if etype.startswith("vgg"):
            if etype[-1] == "p":
                self.enc_mix = torch.nn.ModuleList([VGG2L(in_channel)])
                self.enc_sd = torch.nn.ModuleList([
                    torch.nn.ModuleList([
                        RNNP(
                            get_vgg2l_odim(idim, in_channel=in_channel),
                            elayers_sd,
                            eunits,
                            eprojs,
                            subsample[:elayers_sd + 1],
                            dropout,
                            typ=typ,
                        )
                    ]) for i in range(num_spkrs)
                ])
                self.enc_rec = torch.nn.ModuleList([
                    RNNP(
                        eprojs,
                        elayers_rec,
                        eunits,
                        eprojs,
                        subsample[elayers_sd:],
                        dropout,
                        typ=typ,
                    )
                ])
                logging.info("Use CNN-VGG + B" + typ.upper() + "P for encoder")
            else:
                logging.error(
                    f"Error: need to specify an appropriate encoder architecture. "
                    f"Illegal name {etype}")
                sys.exit()
        else:
            logging.error(
                f"Error: need to specify an appropriate encoder architecture. "
                f"Illegal name {etype}")
            sys.exit()

        self.num_spkrs = num_spkrs
示例#2
0
    def __init__(self,
                 type,
                 idim,
                 layers,
                 units,
                 projs,
                 dropout,
                 nmask=1,
                 nonlinear="sigmoid"):
        super().__init__()
        subsample = np.ones(layers + 1, dtype=np.int)

        typ = type.lstrip("vgg").rstrip("p")
        if type[-1] == "p":
            self.brnn = RNNP(idim,
                             layers,
                             units,
                             projs,
                             subsample,
                             dropout,
                             typ=typ)
        else:
            self.brnn = RNN(idim, layers, units, projs, dropout, typ=typ)

        self.type = type
        self.nmask = nmask
        self.linears = torch.nn.ModuleList(
            [torch.nn.Linear(projs, idim) for _ in range(nmask)])

        if nonlinear not in ("sigmoid", "relu", "tanh", "crelu"):
            raise ValueError("Not supporting nonlinear={}".format(nonlinear))

        self.nonlinear = nonlinear
示例#3
0
    def __init__(
            self,
            input_size: int,
            rnn_type: str = "lstm",
            bidirectional: bool = True,
            use_projection: bool = True,
            num_layers: int = 4,
            hidden_size: int = 320,
            output_size: int = 320,
            dropout: float = 0.0,
            subsample: Optional[Sequence[int]] = (2, 2, 1, 1),
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.use_projection = use_projection

        if rnn_type not in {"lstm", "gru"}:
            raise ValueError(f"Not supported rnn_type={rnn_type}")

        if subsample is None:
            subsample = np.ones(num_layers + 1, dtype=np.int64)
        else:
            subsample = subsample[:num_layers]
            # Append 1 at the beginning because the second or later is used
            subsample = np.pad(
                np.array(subsample, dtype=np.int64),
                [1, num_layers - len(subsample)],
                mode="constant",
                constant_values=1,
            )

        rnn_type = ("b" if bidirectional else "") + rnn_type
        if use_projection:
            self.enc = torch.nn.ModuleList([
                RNNP(
                    input_size,
                    num_layers,
                    hidden_size,
                    output_size,
                    subsample,
                    dropout,
                    typ=rnn_type,
                )
            ])

        else:
            self.enc = torch.nn.ModuleList([
                RNN(
                    input_size,
                    num_layers,
                    hidden_size,
                    output_size,
                    dropout,
                    typ=rnn_type,
                )
            ])
示例#4
0
    def __init__(self,
                 etype,
                 idim,
                 elayers_sd,
                 elayers_rec,
                 eunits,
                 eprojs,
                 subsample,
                 dropout,
                 num_spkrs=2,
                 in_channel=1):
        super(Encoder, self).__init__()
        typ = etype.lstrip("vgg").lstrip("b").rstrip("p")
        if typ != "lstm" and typ != "gru":
            logging.error(
                "Error: need to specify an appropriate encoder architecture")
        if etype.startswith("vgg"):
            if etype[-1] == "p":
                self.enc_mix = torch.nn.ModuleList([VGG2L(in_channel)])
                self.enc_sd = torch.nn.ModuleList([
                    torch.nn.ModuleList([
                        RNNP(get_vgg2l_odim(idim, in_channel=in_channel),
                             elayers_sd,
                             eunits,
                             eprojs,
                             subsample[:elayers_sd + 1],
                             dropout,
                             typ=typ)
                    ]) for i in range(num_spkrs)
                ])
                self.enc_rec = torch.nn.ModuleList([
                    RNNP(eprojs,
                         elayers_rec,
                         eunits,
                         eprojs,
                         subsample[elayers_sd:],
                         dropout,
                         typ=typ)
                ])
                logging.info('Use CNN-VGG + B' + typ.upper() + 'P for encoder')
        else:
            logging.error(
                "Error: need to specify an appropriate encoder architecture")
            sys.exit()

        self.num_spkrs = num_spkrs
示例#5
0
    def __init__(
        self,
        input_size: int,
        rnn_type: str = "lstm",
        bidirectional: bool = True,
        use_projection: bool = True,
        num_layers: int = 4,
        hidden_size: int = 320,
        output_size: int = 320,
        dropout: float = 0.0,
        in_channel: int = 1,
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.use_projection = use_projection
        if rnn_type not in {"lstm", "gru"}:
            raise ValueError(f"Not supported rnn_type={rnn_type}")

        # Subsample is not used for VGGRNN
        subsample = np.ones(num_layers + 1, dtype=np.int64)
        rnn_type = ("b" if bidirectional else "") + rnn_type
        if use_projection:
            self.enc = torch.nn.ModuleList([
                VGG2L(in_channel),
                RNNP(
                    get_vgg2l_odim(input_size, in_channel=in_channel),
                    num_layers,
                    hidden_size,
                    output_size,
                    subsample,
                    dropout,
                    typ=rnn_type,
                ),
            ])

        else:
            self.enc = torch.nn.ModuleList([
                VGG2L(in_channel),
                RNN(
                    get_vgg2l_odim(input_size, in_channel=in_channel),
                    num_layers,
                    hidden_size,
                    output_size,
                    dropout,
                    typ=rnn_type,
                ),
            ])
示例#6
0
    def __init__(self, type, idim, layers, units, projs, dropout, nmask=1):
        super().__init__()
        subsample = np.ones(layers + 1, dtype=np.int)

        typ = type.lstrip("vgg").rstrip("p")
        if type[-1] == "p":
            self.brnn = RNNP(idim,
                             layers,
                             units,
                             projs,
                             subsample,
                             dropout,
                             typ=typ)
        else:
            self.brnn = RNN(idim, layers, units, projs, dropout, typ=typ)

        self.type = type
        self.nmask = nmask
        self.linears = torch.nn.ModuleList(
            [torch.nn.Linear(projs, idim) for _ in range(nmask)])