def __init__(self, size_vocab, size, depth=1, recur_depth=1, bidirectional=False, filter_length=6, filter_size=64, stride=2, drop_i=0.75, drop_s=0.25): super(Encoder, self).__init__() util.autoassign(locals()) self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1, self.size)) self.Conv = conv.Convolution1D(self.size_vocab, self.filter_length, self.filter_size, stride=self.stride) # self.RNN = nn.GRU(self.filter_size, self.size, self.depth, batch_first=True) self.RNN = stacked_gru.StackedGRU(self.filter_size, self.size, self.depth, bidirectional=bidirectional, residual=True, batch_first=True)
def __init__(self, size_vocab, size, depth=1, recur_depth=1, filter_length=6, filter_size=64, stride=2, drop_i=0.75, drop_s=0.25, residual=False, seed=1): super(Encoder, self).__init__() util.autoassign(locals()) #self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1, self.size)) self.Conv = conv.Convolution1D(self.size_vocab, self.filter_length, self.filter_size, stride=self.stride) self.RHN = rhn.StackedRHNH0(self.filter_size, self.size, depth=self.depth, recur_depth=self.recur_depth, drop_i=self.drop_i, drop_s=self.drop_s, residual=self.residual, seed=self.seed)
def __init__(self, input_size, hidden_size, num_layers, residual=False, bidirectional=False, **kwargs): super(StackedGRU, self).__init__() assert num_layers > 0 util.autoassign(locals()) self.bottom = nn.GRU(input_size, hidden_size, 1, bidirectional=bidirectional, **kwargs) self.layers = nn.ModuleList() if bidirectional: self.downscale = nn.Linear(hidden_size * 2, hidden_size) for i in range(num_layers - 1): layer = nn.GRU(hidden_size, hidden_size, 1, bidirectional=self.bidirectional, **kwargs) self.layers.append(layer)
def __init__(self, config): super(Segmatch, self).__init__() util.autoassign(locals()) self.Encode = Encoder(**config['encoder']) self.ProjBeg = nn.Linear(config['segmatch']['size'], config['segmatch']['size_target']) self.ProjEnd = nn.Linear(config['segmatch']['size'], config['segmatch']['size_target']) self.optimizer = optim.Adam(self.parameters(), lr=config['segmatch']['lr'])
def __init__(self, size_feature, size, depth=1): super(Decoder, self).__init__() util.autoassign(locals()) self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1, self.size)) self.RNN = nn.GRU(self.size, self.size, self.depth, batch_first=True) self.Proj = nn.Linear(self.size, self.size_feature)
def __init__(self, size_in, length, size, stride=1, padding=None): super(Convolution1D, self).__init__() util.autoassign(locals()) padding = padding if padding is not None else self.length self.Conv = nn.Conv1d(self.size_in, self.size, self.length, stride=self.stride, padding=padding, bias=False) # use Glorot uniform initialization self.Conv.weight.data = init.glorot_uniform((self.size, self.size_in, self.length, 1)).squeeze()
def __init__(self, size): super(FixedZeros, self).__init__() util.autoassign(locals()) self.zeros = torch.autograd.Variable(torch.zeros(self.size), requires_grad=True) if torch.cuda.is_available(): self.zeros = self.zeros.cuda()
def __init__(self, size_in, length, size, stride=1, padding=None, maxpool=False, relu=False): super(Convolution2D, self).__init__() util.autoassign(locals()) padding = padding if padding is not None else self.length #self.Conv = nn.Conv2d(self.size_in, self.size, self.length, # stride=self.stride, padding=padding, bias=False) self.Conv = nn.Conv2d(self.size_in, self.size, self.length, stride=self.stride, padding=padding) # use Glorot uniform initialization # TODO: decide which initialization to use #self.Conv.weight.data = init.glorot_uniform((self.size, self.size_in, # self.length, self.length)) if self.relu: self.Relu = nn.ReLU(True) #FIXME what is the correct padding??? if self.maxpool: self.Maxpool = nn.MaxPool2d(2, 2, ceil_mode=True)
def __init__(self, in_size, out_size, bias_init=None, init_scale=0.04): super(Linear, self).__init__() util.autoassign(locals()) self.w = torch.nn.Parameter( self.make_param((self.in_size, self.out_size), 'uniform')) if bias_init is not None: self.b = torch.nn.Parameter( self.make_param((self.out_size, ), self.bias_init))
def __init__(self, size_in, size, depth=2, residual=False, fixed=False, **kwargs): super(StackedRHN, self).__init__() util.autoassign(locals()) f = lambda x: Residual(x) if self.residual else x self.layers = torch.nn.ModuleList( [ f(RHNH0(self.size, self.size, fixed=self.fixed, **self.kwargs)) for _ in range(1,self.depth) ] ) self.bottom = RHN(self.size_in, self.size, **self.kwargs) self.stack = reduce(lambda z, x: Compose(x, z), self.layers, Identity())
def __init__(self, size, size_target_vocab, size_embed=64, depth=1): super(DecoderWithAttn, self).__init__() util.autoassign(locals()) self.Decoder = SimpleDecoder(self.size_target_vocab, self.size, size_embed=self.size_embed, depth=self.depth) self.BAttn = BilinearAttention(self.size) self.Proj = nn.Linear(self.size * 2, self.size_target_vocab)
def __init__(self, mapper, pad_end=False, visual=True, erasure=(5,5), sigma=None, noise_tied=False, midpoint=False): autoassign(locals()) self.BEG = self.mapper.BEG_ID self.END = self.mapper.END_ID try: self.gap_low = self.erasure[0] self.gap_high = self.erasure[1] except: self.gap_low = self.erasure self.gap_high = self.erasure + 1
def __init__(self, size_vocab, size, depth=1, dropout_p=0.0): super(SpeechEncoderBottomBidi, self).__init__() util.autoassign(locals()) if self.depth > 0: self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = nn.GRU(self.size_vocab, self.size, self.depth, batch_first=True, bidirectional=True) self.Down = nn.Linear(self.size * 2, self.size)
def __init__(self, size_vocab, size, depth=1, dropout_p=0.0): super(SpeechEncoderBottomNoConv, self).__init__() util.autoassign(locals()) if self.depth > 0: self.h0 = torch.autograd.Variable( torch.zeros(self.depth, 1, self.size)) self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = nn.GRU(self.size_vocab, self.size, self.depth, batch_first=True)
def __init__(self, size_in, length, size, stride=1): super(Convolution1D, self).__init__() util.autoassign(locals()) self.Conv = nn.Conv1d(self.size_in, self.size, self.length, stride=self.stride, padding=self.length) # use Glorot uniform initialization self.Conv.weight.data = init.glorot_uniform( (self.size, self.size_in, self.length))
def __init__(self, size_input, size, depth=1, size_attn=512, dropout_p=0.0): super(SpeechEncoderTopStack, self).__init__() util.autoassign(locals()) if self.depth > 0: self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = GRUStack(self.size_input, self.size, self.depth) self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
def __init__(self, size_feature, size, size_embed=64, depth=1): super(SimpleDecoder, self).__init__() util.autoassign(locals()) self.Embed = nn.Embedding( self.size_feature, self.size_embed) # Why not share embeddings with encoder? self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1, self.size)) self.RNN = nn.GRU(self.size_embed, self.size, self.depth, batch_first=True)
def __init__(self, config): super(Audio, self).__init__() util.autoassign(locals()) self.Encode = Encoder(**config['encoder']) self.Decode1 = Decoder(config['audio']['size_feature'], config['audio']['size']) self.Decode3 = Decoder(config['audio']['size_feature'], config['audio']['size']) self.optimizer = optim.Adam(self.parameters(), lr=config['audio']['lr'])
def __init__(self, provider, tokenize=words, min_df=10, scale=True, scale_input=False, scale_utt=False, batch_size=64, shuffle=False, limit=None, limit_val=None, curriculum=False, by_speaker=False, val_vocab=False, visual=True, erasure=5, midpoint=False, sigma=None, noise_tied=False, speakers=None): autoassign(locals()) self.data = {} self.mapper = IdMapper(min_df=self.min_df) self.scaler = StandardScaler() if scale else NoScaler() self.audio_scaler = InputScaler() if scale_input else NoScaler() self.speaker_encoder = LabelEncoder() parts = insideout(self.shuffled(arrange( provider.iterImages(split='train'), tokenize=self.tokenize, limit=limit, speakers=speakers))) parts_val = insideout(self.shuffled(arrange( provider.iterImages(split='val'), tokenize=self.tokenize, limit=limit_val))) # TRAINING if self.val_vocab: _ = list(self.mapper.fit_transform(parts['tokens_in'] + parts_val['tokens_in'])) parts['tokens_in'] = self.mapper.transform(parts['tokens_in']) # FIXME UGLY HACK else: parts['tokens_in'] = self.mapper.fit_transform(parts['tokens_in']) parts['tokens_out'] = self.mapper.transform(parts['tokens_out']) parts['img'] = self.scaler.fit_transform(parts['img']) self.speaker_encoder.fit(parts['speaker']+parts_val['speaker']) parts['speaker_id'] = self.speaker_encoder.transform(parts['speaker']) if scale_input: parts['audio'] = self.audio_scaler.fit_transform(parts['audio']) elif scale_utt: parts['audio'] = scale_utterance(parts['audio']) self.data['train'] = outsidein(parts) # VALIDATION parts_val['tokens_in'] = self.mapper.transform(parts_val['tokens_in']) parts_val['tokens_out'] = self.mapper.transform(parts_val['tokens_out']) if self.visual: parts_val['img'] = self.scaler.transform(parts_val['img']) if scale_input: parts_val['audio'] = self.audio_scaler.transform(parts_val['audio']) elif scale_utt: parts_val['audio'] = scale_utterance(parts_val['audio']) parts_val['speaker_id'] = self.speaker_encoder.transform(parts_val['speaker']) self.data['valid'] = outsidein(parts_val) self.batcher = Batcher(self.mapper, pad_end=True, visual=visual, erasure=erasure, sigma=sigma, noise_tied=noise_tied, midpoint=midpoint)
def __init__(self, provider, tokenize=words, min_df=10, scale=True, scale_input=False, batch_size=64, shuffle=False, limit=None, curriculum=False, val_vocab=False): autoassign(locals()) self.data = {} self.mapper = IdMapper(min_df=self.min_df) self.scaler = StandardScaler() if scale else NoScaler() self.audio_scaler = InputScaler() if scale_input else NoScaler() parts = insideout( self.shuffled( arrange(provider.iterImages(split='train'), tokenize=self.tokenize, limit=limit))) parts_val = insideout( self.shuffled( arrange(provider.iterImages(split='val'), tokenize=self.tokenize))) # TRAINING if self.val_vocab: _ = list( self.mapper.fit_transform(parts['tokens_in'] + parts_val['tokens_in'])) parts['tokens_in'] = self.mapper.transform( parts['tokens_in']) # FIXME UGLY HACK else: parts['tokens_in'] = self.mapper.fit_transform(parts['tokens_in']) parts['tokens_out'] = self.mapper.transform(parts['tokens_out']) parts['img'] = self.scaler.fit_transform(parts['img']) parts['audio'] = self.audio_scaler.fit_transform(parts['audio']) self.data['train'] = outsidein(parts) # VALIDATION parts_val['tokens_in'] = self.mapper.transform(parts_val['tokens_in']) parts_val['tokens_out'] = self.mapper.transform( parts_val['tokens_out']) parts_val['img'] = self.scaler.transform(parts_val['img']) parts_val['audio'] = self.audio_scaler.transform(parts_val['audio']) self.data['valid'] = outsidein(parts_val) self.batcher = Batcher(self.mapper, pad_end=False)
def __init__(self, size_vocab, size, nb_conv_layer=1, depth=1, filter_length=6, filter_size=[64], stride=2, dropout_p=0.0, relu=False, maxpool=False, bidirectional=False): super(SpeechEncoderBottom, self).__init__() util.autoassign(locals()) layers = [] size_in = self.size_vocab for i_conv in range(0, self.nb_conv_layer): layers.append( conv.Convolution1D(size_in, self.filter_length, self.filter_size[i_conv], stride=self.stride, maxpool=self.maxpool)) if self.relu: layers.append(nn.ReLU(True)) size_in = self.filter_size[i_conv] self.Conv = nn.Sequential(*layers) if self.depth > 0: # TODO: LSTM/GRU? if self.bidirectional: self.h0 = torch.autograd.Variable( torch.zeros(self.depth * 2, 1, self.size)) self.c0 = torch.autograd.Variable( torch.zeros(self.depth * 2, 1, self.size)) else: self.h0 = torch.autograd.Variable( torch.zeros(self.depth, 1, self.size)) self.c0 = torch.autograd.Variable( torch.zeros(self.depth, 1, self.size)) self.Dropout = nn.Dropout(p=self.dropout_p) # TODO: LSTM/GRU? #self.RNN = nn.GRU(self.filter_size[self.nb_conv_layer - 1], # self.size, self.depth, batch_first=True, # bidirectional=self.bidirectional) self.RNN = nn.LSTM(self.filter_size[self.nb_conv_layer - 1], self.size, self.depth, batch_first=True, bidirectional=self.bidirectional)
def __init__(self, in_size, out_size, bias_init='uniform', init_scale=0.04): super(Linear, self).__init__() util.autoassign(locals()) self.layer = nn.Linear(in_size, out_size, bias=bias_init is not None) self.layer.weight.data.uniform_(-init_scale, init_scale) if isinstance(self.bias_init, numbers.Number): self.layer.bias.data.uniform_(bias_init) elif bias_init == 'uniform': self.layer.bias.data.uniform_(-init_scale, init_scale) else: raise AssertionError('unsupported init_scheme')
def __init__(self, size_in, size, depth=2, residual=False, fixed=False, **kwargs): super(StackedRHNH0, self).__init__() util.autoassign(locals()) self.layer = WithH0(StackedRHN(size_in, size, depth=depth, residual=residual, **kwargs), fixed=fixed)
def __init__(self, size_in, size, recur_depth=1, drop_i=0.75 , drop_s=0.25, init_T_bias=-2.0, init_H_bias='uniform', tied_noise=True, init_scale=0.04, seed=1): super(RHN, self).__init__() util.autoassign(locals()) hidden_size = self.size self.LinearH = Linear(in_size=self.size_in, out_size=hidden_size, bias_init=self.init_H_bias) self.LinearT = Linear(in_size=self.size_in, out_size=hidden_size, bias_init=self.init_T_bias) self.recurH = nn.ModuleList() self.recurT = nn.ModuleList() for l in range(self.recur_depth): if l == 0: self.recurH.append(Linear(in_size=hidden_size, out_size=hidden_size)) self.recurT.append(Linear(in_size=hidden_size, out_size=hidden_size)) else: self.recurH.append(Linear(in_size=hidden_size, out_size=hidden_size, bias_init=self.init_H_bias)) self.recurT.append(Linear(in_size=hidden_size, out_size=hidden_size, bias_init=self.init_T_bias))
def __init__(self, size_feature, size, size_embed=64, depth=1, dropout_p=0.0): super(TextEncoderBottom, self).__init__() util.autoassign(locals()) self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1, self.size)) self.Embed = nn.Embedding(self.size_feature, self.size_embed) self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = nn.GRU(self.size_embed, self.size, self.depth, batch_first=True)
def __init__(self, config): super(Audio, self).__init__() util.autoassign(locals()) self.margin_size = config.get('margin_size', 0.2) # FIXME FIXME ADD gradient clipping! #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr']) self.max_norm = config['max_norm'] self.Encode = Encoder(config['size_vocab'], config['size'], filter_length=config.get('filter_length', 6), filter_size=config.get('filter_size', 1024), stride=config.get('stride', 3), depth=config.get('depth', 1)) self.Attn = attention.SelfAttention(config['size'], size=config.get('size_attn', 512)) self.ProjBeg = nn.Linear(config['size'], config['size_target']) self.ProjEnd = nn.Linear(config['size'], config['size_target'])
def __init__(self, size_feature, size, depth=1, size_attn=512, dropout_p=0.0): super(TextEncoderTop, self).__init__() util.autoassign(locals()) if self.depth > 0: self.h0 = torch.autograd.Variable( torch.zeros(self.depth, 1, self.size)) self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = nn.GRU(self.size_feature, self.size, self.depth, batch_first=True) self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
def __init__(self, size_vocab, size, depth=1, filter_length=6, filter_size=64, stride=2, dropout_p=0.0): super(SpeechEncoderBottomStack, self).__init__() util.autoassign(locals()) self.Conv = conv.Convolution1D(self.size_vocab, self.filter_length, self.filter_size, stride=self.stride) if self.depth > 0: self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = GRUStack(self.filter_size, self.size, self.depth)
def __init__(self, size_input, size, depth=1, size_attn=512, dropout_p=0.0): super(SpeechEncoderTopBidi, self).__init__() util.autoassign(locals()) if self.depth > 0: self.Dropout = nn.Dropout(p=self.dropout_p) self.RNN = nn.GRU(self.size_input, self.size, self.depth, batch_first=True, bidirectional=True) self.Down = nn.Linear(self.size * 2, self.size) self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
def __init__(self, config): super(Audio, self).__init__() util.autoassign(locals()) # FIXME FIXME ADD gradient clipping! #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr']) self.max_norm = config['max_norm'] self.Encode = Encoder(config['size_vocab'], config['size'], filter_length=config.get('filter_length', 6), filter_size=config.get('filter_size', 1024), stride=config.get('stride', 3), depth=config.get('depth', 1), residual=config.get('residual', False)) self.Attn = attention.SelfAttention(config['size'], size=config.get('size_attn', 512)) self.Decode1 = Decoder(config['size_vocab'], config['size']) self.Decode3 = Decoder(config['size_vocab'], config['size'])