def __init__(self, speech_encoder, config): super(SpeechTranscriber, self).__init__() self.config = config self.SpeechEncoderBottom = speech_encoder self.SpeechEncoderTop = SpeechEncoderTop(**config['SpeechEncoderTop']) self.TextDecoder = DecoderWithAttn(**config['TextDecoder']) self.optimizer = optim.Adam(self.parameters(), lr=config['lr'])
def __init__(self, speech_encoder, config): super(SpeechCorrText, self).__init__() self.config = config self.SpeechEncoderBottom = speech_encoder self.SpeechEncoderTop = SpeechEncoderTop(**config['SpeechEncoderTop']) self.CorrEncoder = ImageEncoder(size=1024, size_target=1024 * 4) self.optimizer = optim.Adam(self.parameters(), lr=config['lr'])
def __init__(self, speech_encoder, config): super(SpeechImage, self).__init__() self.config = config self.SpeechEncoderBottom = speech_encoder self.SpeechEncoderTop = SpeechEncoderTop(**config['SpeechEncoderTop']) self.ImageEncoder = ImageEncoder(**config['ImageEncoder']) self.optimizer = optim.Adam(self.parameters(), lr=config['lr'])
class SpeechTranscriber(nn.Module): def __init__(self, speech_encoder, config): super(SpeechTranscriber, self).__init__() self.config = config self.SpeechEncoderBottom = speech_encoder self.SpeechEncoderTop = SpeechEncoderTop(**config['SpeechEncoderTop']) self.TextDecoder = DecoderWithAttn(**config['TextDecoder']) self.optimizer = optim.Adam(self.parameters(), lr=config['lr']) def cost(self, speech, target, target_prev): states, rep = self.SpeechEncoderTop.states( self.SpeechEncoderBottom(speech)) target_logits = self.TextDecoder(states, rep, target_prev) cost = F.cross_entropy( target_logits.view( target_logits.size(0) * target_logits.size(1), -1), target.view(target.size(0) * target.size(1))) return cost def args(self, item): return (item['audio'], item['target_t'].astype('int64'), item['target_prev_t'].astype('int64')) def test_cost(self, *args): with testing(self): return self.cost(*args)
def __init__(self, speech_encoder, text_encoder, config): super(SpeechText, self).__init__() self.config = config self.SpeechEncoderBottom = speech_encoder self.TextEncoderBottom = text_encoder self.SpeechEncoderTop = SpeechEncoderTop(**config['SpeechEncoderTop']) self.TextEncoderTop = TextEncoderTop(**config['TextEncoderTop']) self.optimizer = optim.Adam(self.parameters(), lr=config['lr'])