Пример #1
0
 def __init__(self,
              size_input,
              size,
              depth=1,
              size_attn=512,
              dropout_p=0.0):
     super(SpeechEncoderTopStack, self).__init__()
     util.autoassign(locals())
     if self.depth > 0:
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = GRUStack(self.size_input, self.size, self.depth)
     self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
Пример #2
0
 def __init__(self,
              size_feature,
              size,
              depth=1,
              size_attn=512,
              dropout_p=0.0):
     super(TextEncoderTop, self).__init__()
     util.autoassign(locals())
     if self.depth > 0:
         self.h0 = torch.autograd.Variable(
             torch.zeros(self.depth, 1, self.size))
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = nn.GRU(self.size_feature,
                           self.size,
                           self.depth,
                           batch_first=True)
     self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
Пример #3
0
 def __init__(self,
              size_input,
              size,
              depth=1,
              size_attn=512,
              dropout_p=0.0):
     super(SpeechEncoderTopBidi, self).__init__()
     util.autoassign(locals())
     if self.depth > 0:
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = nn.GRU(self.size_input,
                           self.size,
                           self.depth,
                           batch_first=True,
                           bidirectional=True)
         self.Down = nn.Linear(self.size * 2, self.size)
     self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
Пример #4
0
 def __init__(self, config):
     super(Audio, self).__init__()
     util.autoassign(locals())
     self.margin_size = config.get('margin_size', 0.2)
     # FIXME FIXME ADD gradient clipping!
     #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr'])
     self.max_norm = config['max_norm']
     self.Encode = Encoder(config['size_vocab'],
                           config['size'],
                           filter_length=config.get('filter_length', 6),
                           filter_size=config.get('filter_size', 1024),
                           stride=config.get('stride', 3),
                           depth=config.get('depth', 1))
     self.Attn = attention.SelfAttention(config['size'],
                                         size=config.get('size_attn', 512))
     self.ProjBeg = nn.Linear(config['size'], config['size_target'])
     self.ProjEnd = nn.Linear(config['size'], config['size_target'])
Пример #5
0
    def __init__(self, config):
        super(Audio, self).__init__()
        util.autoassign(locals())
        # FIXME FIXME ADD gradient clipping!
        #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr'])
        self.max_norm = config['max_norm']
        self.Encode = Encoder(config['size_vocab'],
                              config['size'],
                              filter_length=config.get('filter_length', 6),
                              filter_size=config.get('filter_size', 1024),
                              stride=config.get('stride', 3),
                              depth=config.get('depth', 1),
                              residual=config.get('residual', False))
        self.Attn = attention.SelfAttention(config['size'],
                                            size=config.get('size_attn', 512))

        self.Decode1 = Decoder(config['size_vocab'], config['size'])

        self.Decode3 = Decoder(config['size_vocab'], config['size'])
Пример #6
0
 def __init__(self, config):
     super(Visual, self).__init__()
     util.autoassign(locals())
     self.margin_size = config.get('margin_size', 0.2)
     # FIXME FIXME ADD gradient clipping!
     #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr'])
     self.max_norm = config['max_norm']
     self.Encode = Encoder(config['size_vocab'],
                           config['size'],
                           filter_length=config.get('filter_length', 6),
                           filter_size=config.get('filter_size', 1024),
                           stride=config.get('stride', 3),
                           depth=config.get('depth', 1),
                           recur_depth=config.get('recur_depth', 1),
                           drop_i=config.get('drop_i', 0.75),
                           drop_s=config.get('drop_s', 0.25),
                           residual=config.get('residual', False),
                           seed=config.get('seed', 1))
     self.Attn = attention.SelfAttention(config['size'],
                                         size=config.get('size_attn', 512))
     self.ImgEncoder = util.make_linear(config['size_target'],
                                        config['size'])
Пример #7
0
 def __init__(self,
              size_vocab,
              size,
              depth=1,
              filter_length=6,
              filter_size=64,
              stride=2,
              size_attn=512,
              dropout_p=0.0):
     super(SpeechEncoder, self).__init__()
     util.autoassign(locals())
     self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1,
                                                   self.size))
     self.Conv = conv.Convolution1D(self.size_vocab,
                                    self.filter_length,
                                    self.filter_size,
                                    stride=self.stride)
     self.Dropout = nn.Dropout(p=self.dropout_p)
     self.RNN = nn.GRU(self.filter_size,
                       self.size,
                       self.depth,
                       batch_first=True)
     self.Attn = attention.SelfAttention(self.size, size=self.size_attn)