Exemplo n.º 1
0
def test_regression(sru_prev_version):
    """
    IMPORTANT:

    You need to run:

        test/regression/build_artifact.sh [SRU VERSION]

    and add the resulting artifact in test/regression/artifacts into github,
    for each sru_prev_version you want to test
    """
    torch.manual_seed(2)  # so the model is initialized differently than first stage

    artifact_path = f'{ARTIFACT_DIR}/{sru_prev_version}.pt'
    artifact_dict = torch.load(artifact_path)
    assert artifact_dict['sru.__version__'] == sru_prev_version

    model = sru.SRU(**artifact_dict['sru_kwargs']).eval()

    output_artifact = artifact_dict['outputs']
    model.load_state_dict(artifact_dict['model_state'])
    with torch.no_grad():
        output_current = model(artifact_dict['inputs'])

    assert len(output_artifact) == len(output_current) == 2
    max_diff0 = (output_artifact[0] - output_current[0]).abs().max().item()
    max_diff1 = (output_artifact[1] - output_current[1]).abs().max().item()
    assert max_diff0 <= EPSILON
    assert max_diff1 <= EPSILON
Exemplo n.º 2
0
 def __init__(self, words, args):
     super(Model, self).__init__()
     self.args = args
     if args.n_e:
         self.n_e = args.n_e
     else:
         self.n_e = len(words) if len(words) < args.n_d else args.n_d
     self.n_d = args.n_d
     self.depth = args.depth
     self.drop = nn.Dropout(args.dropout)
     self.embedding_layer = nn.Embedding(len(words), self.n_e)
     self.n_V = len(words)
     custom_m_list = [CustomLinear(self.n_e, self.n_d * 4, bias=False)]
     for i in range(self.depth - 1):
         custom_m_list.append(
             flop.ProjectedLinear(self.n_d,
                                  self.n_d * 3,
                                  proj_features=args.n_proj,
                                  bias=False))
     self.rnn = sru.SRU(
         self.n_e,
         self.n_d,
         self.depth,
         dropout=args.dropout,
         highway_bias=args.bias,
         layer_norm=args.layer_norm,
         rescale=args.rescale,
         custom_m=custom_m_list,
     )
     self.output_layer = nn.Linear(self.n_d, self.n_V)
     self.init_weights()
Exemplo n.º 3
0
 def __init__(self, words, args):
     super(Model, self).__init__()
     self.args = args
     if args.n_e:
         self.n_e = args.n_e
     else:
         self.n_e = len(words) if len(words) < args.n_d else args.n_d
     self.n_d = args.n_d
     self.depth = args.depth
     self.drop = nn.Dropout(args.dropout)
     self.embedding_layer = nn.Embedding(len(words), self.n_e)
     self.n_V = len(words)
     if args.lstm:
         self.rnn = nn.LSTM(self.n_e, self.n_d,
             self.depth,
             dropout = args.dropout
         )
     else:
         self.rnn = sru.SRU(self.n_e, self.n_d, self.depth,
             dropout = args.dropout,
             n_proj = args.n_proj,
             #use_tanh = 0,
             highway_bias = args.bias,
             layer_norm = args.layer_norm
         )
     self.output_layer = nn.Linear(self.n_d, self.n_V)
     self.init_weights()
Exemplo n.º 4
0
    def __init__(self, words, args):
        super(Model, self).__init__()
        self.args = args
        self.n_d = args.d
        self.depth = args.depth
        self.drop = nn.Dropout(args.dropout)
        self.embedding_layer = EmbeddingLayer(self.n_d, words)
        self.n_V = self.embedding_layer.n_V
        if args.lstm:
            self.rnn = nn.LSTM(self.n_d,
                               self.n_d,
                               self.depth,
                               dropout=args.rnn_dropout)
        else:
            self.rnn = sru.SRU(
                self.n_d,
                self.n_d,
                self.depth,
                dropout=args.rnn_dropout,
                rnn_dropout=args.rnn_dropout,
                use_tanh=0,
                rescale=False,  # make sure the behavior is the same as before
                v1=True,  #
                highway_bias=args.bias)
        self.output_layer = nn.Linear(self.n_d, self.n_V)
        # tie weights
        self.output_layer.weight = self.embedding_layer.embedding.weight

        self.init_weights()
Exemplo n.º 5
0
    def __init__(self, labels):
        super().__init__()

        self.labels = labels
        self.output_numbers =  max(labels.values()) + 1
        self.rnn_size = self.output_numbers

        print_normal("Creating resSru with " + str(self.output_numbers) + " labels")

        self.convolutions = torch.nn.Sequential(OrderedDict([
            ('conv1', torch.nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)),
            ('bn1', torch.nn.BatchNorm2d(64)),
            ('activation', torch.nn.ReLU(inplace=True)),
            ('maxpool', torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=(1, 1))),
            ('resnet', ResNet(BasicBlock, [2, 2, 2, 2], strides=[1, (2, 1), (2, 1), (2, 1)], bn=True)),
        ]))
        self.convolutions_output_size = self.get_cnn_output_size()

        self.rnn = sru.SRU(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.output_numbers, num_layers=4, bidirectional=False, rnn_dropout=0.3, use_tanh=1, use_relu=0, layer_norm=False, weight_norm=True)

        # self.rnn = torch.nn.GRU(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, num_layers=1, bidirectional=True)
        # self.rnn = IndRNN(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, n_layer=3, bidirectional=True, batch_norm=True, batch_first=True, dropout=0.1, nonlinearity='relu')
        # self.fc = torch.nn.Linear(2 * self.rnn_size, self.output_numbers)

        self.softmax = torch.nn.Softmax(dim=2)
Exemplo n.º 6
0
def test_all(bidirectional, rescale, proj, layer_norm):
    eps = 1e-4
    torch.manual_seed(1234)
    L = 16
    B = 8
    D = 32
    x = torch.randn(L, B, D)
    model = sru.SRU(D,
                    D,
                    bidirectional=bidirectional,
                    projection_size=proj,
                    layer_norm=layer_norm,
                    rescale=rescale)
    model.eval()

    h, c = model(x)
    h, c = h.detach(), c.detach()

    with torch.no_grad():
        h_, c_ = model(x)
        assert (h - h_).abs().max() <= eps
        assert (c - c_).abs().max() <= eps

    ts_model = torch.jit.script(model)
    h_, c_ = ts_model(x)
    assert (h - h_).abs().max() <= eps
    assert (c - c_).abs().max() <= eps
Exemplo n.º 7
0
def test_all(cuda, bidirectional, rescale, proj, layer_norm):
    eps = 1e-4
    torch.manual_seed(1234)
    if cuda:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    L = 16
    B = 8
    D = 32
    x = torch.randn(L, B, D)
    model = sru.SRU(D,
                    D,
                    bidirectional=bidirectional,
                    projection_size=proj,
                    layer_norm=layer_norm,
                    rescale=rescale)
    if cuda:
        model = model.cuda()
        x = x.cuda()
    model.eval()

    h, c = model(x)
    h, c = h.detach(), c.detach()

    with torch.no_grad():
        h_, c_ = model(x)
        assert (h - h_).abs().max() <= eps
        assert (c - c_).abs().max() <= eps

    ts_model = torch.jit.script(model)
    h_, c_ = ts_model(x)
    assert (h - h_).abs().max() <= eps
    assert (c - c_).abs().max() <= eps
Exemplo n.º 8
0
def run(args):
    torch.manual_seed(1)

    batch_size = 3
    input_size = 5
    hidden_size = 7
    seq_len = 4
    num_layers = 2

    sru_kwargs = {
        'input_size': input_size,
        'hidden_size': hidden_size,
        'num_layers': num_layers,
        'bidirectional': True,
        'dropout': 0.1,
        'rescale': False
    }

    inputs = torch.rand(seq_len, batch_size, input_size)
    model = sru.SRU(**sru_kwargs).eval()

    with torch.no_grad():
        outputs = model(inputs)

    artifact_dict = {
        'outputs': outputs,
        'inputs': inputs,
        'model_state': model.state_dict(),
        'sru_kwargs': sru_kwargs,
        'sru.__version__': sru.__version__
    }
    torch.save(artifact_dict, args.out_artifact)
Exemplo n.º 9
0
def test_sru_backward_simple(cuda, bidirectional, layer_norm, normalize_after,
                             rescale, has_skip_term):
    torch.manual_seed(123)
    if cuda:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    input_length = 3
    batch_size = 5
    input_size = 4
    hidden_size = 2
    encoder = sru.SRU(input_size,
                      hidden_size,
                      bidirectional=bidirectional,
                      layer_norm=layer_norm,
                      normalize_after=normalize_after,
                      rescale=rescale,
                      has_skip_term=has_skip_term)
    if cuda:
        encoder = encoder.cuda()

    def run(x):
        if cuda:
            x = x.cuda()
        output, state = encoder(x)
        output.mean().backward()

    # test batch size > 1
    input_data = torch.rand(input_length, batch_size, input_size)
    run(input_data)
Exemplo n.º 10
0
def test_sru_backward(bidirectional, layer_norm, normalize_after):
    eps = 1e-4
    torch.manual_seed(123)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    input_length = 3
    batch_size = 5
    input_size = 4
    hidden_size = 2
    encoder = sru.SRU(input_size,
                      hidden_size,
                      bidirectional=bidirectional,
                      layer_norm=layer_norm,
                      normalize_after=normalize_after)
    x = torch.randn(input_length, batch_size, input_size)

    # backward in CPU mode
    h, c = encoder(x)
    h.sum().backward()
    grads = [p.grad.clone() for p in encoder.parameters() if p.requires_grad]

    # backward in GPU mode
    encoder.zero_grad()
    encoder, x = encoder.cuda(), x.cuda()
    h_, c_ = encoder(x)
    h_.sum().backward()
    grads_ = [
        p.grad.cpu().clone() for p in encoder.parameters() if p.requires_grad
    ]

    assert len(grads) == len(grads_)
    for g1, g2 in zip(grads, grads_):
        assert (g1 - g2).abs().max() <= eps
Exemplo n.º 11
0
def run(args):
    D = 4
    model = sru.SRU(D, D, num_layers=2, normalize_after=args.normalize_after)
    model.eval()

    ts_model = torch.jit.script(model)
    ts_model.save('sru_ts.pt')

    with torch.no_grad():
        x = torch.ones(3, 2, D)
        h, c = model(x)
        h, c = h.view(-1), c.view(-1)
        print(''.join(["{:.4f} ".format(x.item()) for x in h]))
        print(''.join(["{:.4f} ".format(x.item()) for x in c]))
Exemplo n.º 12
0
 def __init__(self, args):
     super(Model, self).__init__()
     self.args = args
     # self.cutoffs = [20000, 60000]
     self.cutoffs = [10000, 20000, 40000, 60000, 100000]
     self.n_V = args.n_token
     self.n_e = args.n_e or args.n_proj
     self.n_d = args.n_d
     self.depth = args.depth
     self.drop = nn.Dropout(args.dropout)
     self.embedding_layer = AdaptiveEmbedding(
         self.n_V,
         self.n_e,
         self.n_d,
         self.cutoffs,
         div_val=args.div_val,
         div_freq=2,
         dropout=args.dropout_e,
     )
     self.rnn = sru.SRU(
         self.n_d,
         self.n_d,
         self.depth,
         projection_size=args.n_proj,
         dropout=args.dropout,
         highway_bias=args.bias,
         layer_norm=args.layer_norm,
         rescale=args.rescale,
         custom_m=flop.ProjectedLinear(self.n_d,
                                       self.n_d * 3,
                                       proj_features=args.n_proj,
                                       bias=False),
     )
     self.output_layer = AdaptiveLogSoftmax(
         self.n_V,
         self.n_e,
         self.n_d,
         self.cutoffs,
         div_val=args.div_val,
         div_freq=2,
         dropout=args.dropout_e,
         keep_order=False,
     )
     self.init_weights()
     if not args.not_tie:
         self.tie_weights()
Exemplo n.º 13
0
    def run():
        eps = 1e-4
        num_sentences = 3
        embedding_size = 7
        rnn_hidden = 4
        max_len = 4
        layers = 5
        bidirectional = True
        encoder = sru.SRU(
            embedding_size,
            rnn_hidden,
            layers,
            bidirectional=bidirectional,
            nn_rnn_compatible_return=compat,
        )
        words_embeddings = torch.rand((max_len, num_sentences, embedding_size),
                                      dtype=torch.float32)
        if cuda:
            words_embeddings = words_embeddings.to("cuda")
            encoder.cuda()
        encoder.eval()
        hidden, cell = encoder(words_embeddings)

        def cell_to_emb(cell, batch_size):
            if compat:
                # should arrive as:
                # (num_layers * num_directions, batch, hidden_size)
                cell = cell.view(layers, 2 if bidirectional else 1, batch_size,
                                 rnn_hidden)
                cell = cell[-1].transpose(0, 1)
                # (batch, num_directions, hidden_size)
                cell = cell.contiguous().view(batch_size, -1)
            else:
                # should arrive as:
                # (num_layers, batch_size, num_directions * hidden_size)
                cell = cell[-1].view(batch_size, -1)
            return cell

        scores = cell_to_emb(cell, num_sentences)
        for i in range(num_sentences):
            hidden, cell = encoder(words_embeddings[:, i:i + 1])
            score = cell_to_emb(cell, 1)
            assert (score.detach() - scores[i].detach()).abs().max() <= eps
Exemplo n.º 14
0
    def __init__(self,
                 vocabulary_size,
                 embedding_dim=1024,
                 dropout=0.5,
                 rnn_dropout=0.1,
                 depth=2,
                 bias=-3,
                 trainable=False):
        """
        num_embeddings -- size of Embedding Layer input (num of words)
        embedding_dim -- size of Embedding Layer output (n_d)
        dropout -- % of neurons to disable in dropout layer
        rnn_dropout -- % of neurons to disable in rnn layer (HAHA NO!)
        depth -- number of rnn layers
        bias -- I don't know what is it
        """
        super(Model, self).__init__()

        self.embedding_dim = embedding_dim
        self.depth = depth
        self.bias = bias
        self.trainable = trainable

        self.relu = nn.ReLU()

        self.rnn = sru.SRU(
            input_size=embedding_dim,
            hidden_size=embedding_dim,
            num_layers=depth,
            dropout=dropout,
            rnn_dropout=rnn_dropout,
            bidirectional=True,
            rescale=False,  # wtf is rescale?
            v1=True,  # wtf is v1?
            highway_bias=bias)

        if trainable:
            self.output_layer = nn.Linear(embedding_dim, vocabulary_size)

        self.drop = nn.Dropout(dropout)

        self.init_weights()
Exemplo n.º 15
0
import torch
import sru

D = 4
model = sru.SRU(D, D, num_layers=2)
model.eval()

ts_model = torch.jit.script(model)
ts_model.save('sru_ts.pt')

with torch.no_grad():
    x = torch.ones(3, 2, D)
    h, c = model(x)
    h, c = h.view(-1), c.view(-1)
    print(''.join(["{:.4f} ".format(x.item()) for x in h]))
    print(''.join(["{:.4f} ".format(x.item()) for x in c]))