def __init__(self, size_input, size, depth=1, size_attn=512, dropout_p=0.0): super(SpeechEncoderTopStack, self).__init__() util.autoassign(locals()) if self.depth > 0: self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = GRUStack(self.size_input, self.size, self.depth) self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
def __init__(self, size_feature, size, depth=1, size_attn=512, dropout_p=0.0): super(TextEncoderTop, self).__init__() util.autoassign(locals()) if self.depth > 0: self.h0 = torch.autograd.Variable( torch.zeros(self.depth, 1, self.size)) self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = nn.GRU(self.size_feature, self.size, self.depth, batch_first=True) self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
def __init__(self, size_input, size, depth=1, size_attn=512, dropout_p=0.0): super(SpeechEncoderTopBidi, self).__init__() util.autoassign(locals()) if self.depth > 0: self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = nn.GRU(self.size_input, self.size, self.depth, batch_first=True, bidirectional=True) self.Down = nn.Linear(self.size * 2, self.size) self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
def __init__(self, config): super(Audio, self).__init__() util.autoassign(locals()) self.margin_size = config.get('margin_size', 0.2) # FIXME FIXME ADD gradient clipping! #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr']) self.max_norm = config['max_norm'] self.Encode = Encoder(config['size_vocab'], config['size'], filter_length=config.get('filter_length', 6), filter_size=config.get('filter_size', 1024), stride=config.get('stride', 3), depth=config.get('depth', 1)) self.Attn = attention.SelfAttention(config['size'], size=config.get('size_attn', 512)) self.ProjBeg = nn.Linear(config['size'], config['size_target']) self.ProjEnd = nn.Linear(config['size'], config['size_target'])
def __init__(self, config): super(Audio, self).__init__() util.autoassign(locals()) # FIXME FIXME ADD gradient clipping! #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr']) self.max_norm = config['max_norm'] self.Encode = Encoder(config['size_vocab'], config['size'], filter_length=config.get('filter_length', 6), filter_size=config.get('filter_size', 1024), stride=config.get('stride', 3), depth=config.get('depth', 1), residual=config.get('residual', False)) self.Attn = attention.SelfAttention(config['size'], size=config.get('size_attn', 512)) self.Decode1 = Decoder(config['size_vocab'], config['size']) self.Decode3 = Decoder(config['size_vocab'], config['size'])
def __init__(self, config): super(Visual, self).__init__() util.autoassign(locals()) self.margin_size = config.get('margin_size', 0.2) # FIXME FIXME ADD gradient clipping! #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr']) self.max_norm = config['max_norm'] self.Encode = Encoder(config['size_vocab'], config['size'], filter_length=config.get('filter_length', 6), filter_size=config.get('filter_size', 1024), stride=config.get('stride', 3), depth=config.get('depth', 1), recur_depth=config.get('recur_depth', 1), drop_i=config.get('drop_i', 0.75), drop_s=config.get('drop_s', 0.25), residual=config.get('residual', False), seed=config.get('seed', 1)) self.Attn = attention.SelfAttention(config['size'], size=config.get('size_attn', 512)) self.ImgEncoder = util.make_linear(config['size_target'], config['size'])
def __init__(self, size_vocab, size, depth=1, filter_length=6, filter_size=64, stride=2, size_attn=512, dropout_p=0.0): super(SpeechEncoder, self).__init__() util.autoassign(locals()) self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1, self.size)) self.Conv = conv.Convolution1D(self.size_vocab, self.filter_length, self.filter_size, stride=self.stride) self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = nn.GRU(self.filter_size, self.size, self.depth, batch_first=True) self.Attn = attention.SelfAttention(self.size, size=self.size_attn)