def __init__(self, in_chan, n_src, rnn_type='lstm', n_layers=2, hidden_size=600, bidirectional=True, dropout=0.3, embedding_dim=20, take_log=False): super().__init__() self.input_dim = in_chan self.n_src = n_src self.take_log = take_log # RNN common self.embedding_dim = embedding_dim self.rnn = SingleRNN(rnn_type, in_chan, hidden_size, n_layers=n_layers, dropout=dropout, bidirectional=bidirectional) self.dropout = nn.Dropout(dropout) rnn_out_dim = hidden_size * 2 if bidirectional else hidden_size # Mask heads self.mask_layer = nn.Linear(rnn_out_dim, in_chan * self.n_src) self.mask_act = nn.Sigmoid() # sigmoid or relu or softmax # DC head self.embedding_layer = nn.Linear(rnn_out_dim, in_chan * embedding_dim) self.embedding_act = nn.Tanh() # sigmoid or tanh
def __init__(self): #def __init__(self, in_chan, n_src, rnn_type = 'lstm', # embedding_dim=20, n_layers=2, hidden_size=600, # dropout=0, bidirectional=True, log=False): super(Model, self).__init__() in_chan = 129 rnn_type = 'lstm' self.input_dim = in_chan n_layers = 2 hidden_size = 600 bidirectional = True dropout = 0.5 embedding_dim = 20 # self.n_src = n_src self.embedding_dim = embedding_dim self.rnn = SingleRNN(rnn_type, in_chan, hidden_size, n_layers, \ dropout, bidirectional) self.dropout = nn.Dropout(dropout) rnn_out_dim = hidden_size * 2 if bidirectional else hidden_size # self.embedding_layer = nn.Linear(rnn_out_dim, \ # in_chan * embedding_dim) self.embedding_layer = nn.Linear(rnn_out_dim, \ in_chan * embedding_dim) self.mask_layer = nn.Linear(rnn_out_dim, in_chan * 2) self.non_linearity = nn.Sigmoid() self.EPS = torch.finfo(torch.float32).eps # if log: # #TODO: Use pytorch lightning logger here # print('Using log spectrum as input') # Temp check self.lin1 = nn.Linear(in_chan, in_chan) self.lin2 = nn.Linear(in_chan, in_chan)
def __init__(self, fb_conf, mask_conf): super().__init__() self.n_src = mask_conf['n_src'] self.n_filters = fb_conf['n_filters'] # Create TasNet encoders and decoders (could use nn.Conv1D as well) self.encoder_sig = Encoder(FreeFB(**fb_conf)) self.encoder_relu = Encoder(FreeFB(**fb_conf)) self.decoder = Decoder(FreeFB(**fb_conf)) self.bn_layer = GlobLN(fb_conf['n_filters']) # Create TasNet masker self.masker = nn.Sequential( SingleRNN('lstm', fb_conf['n_filters'], hidden_size=mask_conf['n_units'], n_layers=mask_conf['n_layers'], bidirectional=True, dropout=mask_conf['dropout']), nn.Linear(2 * mask_conf['n_units'], self.n_src * self.n_filters), nn.Sigmoid())