Exemplo n.º 1
0
    def test_regex_matches_are_initialized_correctly(self):
        class Net(torch.nn.Module):
            def __init__(self):
                super(Net, self).__init__()
                self.linear_1_with_funky_name = torch.nn.Linear(5, 10)
                self.linear_2 = torch.nn.Linear(10, 5)
                self.conv = torch.nn.Conv1d(5, 5, 5)

            def forward(self, inputs):  # pylint: disable=arguments-differ
                pass

        # pyhocon does funny things if there's a . in a key.  This test makes sure that we
        # handle these kinds of regexes correctly.
        json_params = """{"initializer": [
        ["conv", {"type": "constant", "val": 5}],
        ["funky_na.*bi", {"type": "constant", "val": 7}]
        ]}
        """
        params = Params(pyhocon.ConfigFactory.parse_string(json_params))
        initializers = InitializerApplicator.from_params(params['initializer'])
        model = Net()
        initializers(model)

        for parameter in model.conv.parameters():
            assert torch.equal(parameter.data, torch.ones(parameter.size()) * 5)

        parameter = model.linear_1_with_funky_name.bias
        assert torch.equal(parameter.data, torch.ones(parameter.size()) * 7)
Exemplo n.º 2
0
    def test_remote_tensor_multi_var_methods(self):
        hook = TorchHook(verbose=False)
        local = hook.local_worker
        remote = VirtualWorker(hook, 1)
        local.add_worker(remote)

        x = torch.FloatTensor([[1, 2], [4, 3], [5, 6]])
        x.send(remote)
        y, z = torch.max(x, 1)
        assert torch.equal(y.get(), torch.FloatTensor([2, 4, 6]))
        assert torch.equal(z.get(), torch.LongTensor([1, 0, 1]))

        x = torch.FloatTensor([[0, 0], [1, 0]]).send(remote)
        y, z = torch.qr(x)
        assert (y.get() == torch.FloatTensor([[0, -1], [-1, 0]])).all()
        assert (z.get() == torch.FloatTensor([[-1, 0], [0, 0]])).all()

        x = torch.arange(1, 6).send(remote)
        y, z = torch.kthvalue(x, 4)
        assert (y.get() == torch.FloatTensor([4])).all()
        assert (z.get() == torch.LongTensor([3])).all()

        x = torch.FloatTensor([[0, 0], [1, 1]]).send(remote)
        y, z = torch.eig(x, True)
        assert (y.get() == torch.FloatTensor([[1, 0], [0, 0]])).all()
        assert ((z.get() == torch.FloatTensor([[0, 0], [1, 0]])) == torch.ByteTensor([[1, 0], [1, 0]])).all()

        x = torch.zeros(3, 3).send(remote)
        w, y, z = torch.svd(x)
        assert (w.get() == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
        assert (y.get() == torch.FloatTensor([0, 0, 0])).all()
        assert (z.get() == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
Exemplo n.º 3
0
 def test_add_output_dim(self, cuda=False):
     for double in (False, True):
         tkwargs = {
             "device": torch.device("cuda") if cuda else torch.device("cpu"),
             "dtype": torch.double if double else torch.float,
         }
         original_batch_shape = torch.Size([2])
         # check exception is raised
         X = torch.rand(2, 1, **tkwargs)
         with self.assertRaises(ValueError):
             add_output_dim(X=X, original_batch_shape=original_batch_shape)
         # test no new batch dims
         X = torch.rand(2, 2, 1, **tkwargs)
         X_out, output_dim_idx = add_output_dim(
             X=X, original_batch_shape=original_batch_shape
         )
         self.assertTrue(torch.equal(X_out, X.unsqueeze(0)))
         self.assertEqual(output_dim_idx, 0)
         # test new batch dims
         X = torch.rand(3, 2, 2, 1, **tkwargs)
         X_out, output_dim_idx = add_output_dim(
             X=X, original_batch_shape=original_batch_shape
         )
         self.assertTrue(torch.equal(X_out, X.unsqueeze(1)))
         self.assertEqual(output_dim_idx, 1)
Exemplo n.º 4
0
    def test_degenerate_GPyTorchPosterior(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # singular covariance matrix
            degenerate_covar = torch.tensor(
                [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=device
            )
            mean = torch.rand(3, dtype=dtype, device=device)
            mvn = MultivariateNormal(mean, lazify(degenerate_covar))
            posterior = GPyTorchPosterior(mvn=mvn)
            # basics
            self.assertEqual(posterior.device.type, device.type)
            self.assertTrue(posterior.dtype == dtype)
            self.assertEqual(posterior.event_shape, torch.Size([3, 1]))
            self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1)))
            variance_exp = degenerate_covar.diag().unsqueeze(-1)
            self.assertTrue(torch.equal(posterior.variance, variance_exp))

            # rsample
            with warnings.catch_warnings(record=True) as w:
                # we check that the p.d. warning is emitted - this only
                # happens once per posterior, so we need to check only once
                samples = posterior.rsample(sample_shape=torch.Size([4]))
                self.assertEqual(len(w), 1)
                self.assertTrue(issubclass(w[-1].category, RuntimeWarning))
                self.assertTrue("not p.d." in str(w[-1].message))
            self.assertEqual(samples.shape, torch.Size([4, 3, 1]))
            samples2 = posterior.rsample(sample_shape=torch.Size([4, 2]))
            self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1]))
            # rsample w/ base samples
            base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype)
            samples_b1 = posterior.rsample(
                sample_shape=torch.Size([4]), base_samples=base_samples
            )
            samples_b2 = posterior.rsample(
                sample_shape=torch.Size([4]), base_samples=base_samples
            )
            self.assertTrue(torch.allclose(samples_b1, samples_b2))
            base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype)
            samples2_b1 = posterior.rsample(
                sample_shape=torch.Size([4, 2]), base_samples=base_samples2
            )
            samples2_b2 = posterior.rsample(
                sample_shape=torch.Size([4, 2]), base_samples=base_samples2
            )
            self.assertTrue(torch.allclose(samples2_b1, samples2_b2))
            # collapse_batch_dims
            b_mean = torch.rand(2, 3, dtype=dtype, device=device)
            b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape)
            b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar))
            b_posterior = GPyTorchPosterior(mvn=b_mvn)
            b_base_samples = torch.randn(4, 2, 3, 1, device=device, dtype=dtype)
            with warnings.catch_warnings(record=True) as w:
                b_samples = b_posterior.rsample(
                    sample_shape=torch.Size([4]), base_samples=b_base_samples
                )
                self.assertEqual(len(w), 1)
                self.assertTrue(issubclass(w[-1].category, RuntimeWarning))
                self.assertTrue("not p.d." in str(w[-1].message))
            self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
Exemplo n.º 5
0
    def test_q_noisy_expected_improvement(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # the event shape is `b x q x t` = 1 x 2 x 1
            samples_noisy = torch.tensor([1.0, 0.0], device=device, dtype=dtype)
            samples_noisy = samples_noisy.view(1, 2, 1)
            # X_baseline is `q' x d` = 1 x 1
            X_baseline = torch.zeros(1, 1, device=device, dtype=dtype)
            mm_noisy = MockModel(MockPosterior(samples=samples_noisy))
            # X is `q x d` = 1 x 1
            X = torch.zeros(1, 1, device=device, dtype=dtype)

            # basic test
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qNoisyExpectedImprovement(
                model=mm_noisy, X_baseline=X_baseline, sampler=sampler
            )
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)

            # basic test, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qNoisyExpectedImprovement(
                model=mm_noisy, X_baseline=X_baseline, sampler=sampler
            )
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qNoisyExpectedImprovement(
                model=mm_noisy, X_baseline=X_baseline, sampler=sampler
            )
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True, seed=12345)
            acqf = qNoisyExpectedImprovement(
                model=mm_noisy, X_baseline=X_baseline, sampler=sampler
            )
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
Exemplo n.º 6
0
 def test_MockPosterior(self):
     mean = torch.rand(2)
     variance = torch.eye(2)
     samples = torch.rand(1, 2)
     mp = MockPosterior(mean=mean, variance=variance, samples=samples)
     self.assertTrue(torch.equal(mp.mean, mean))
     self.assertTrue(torch.equal(mp.variance, variance))
     self.assertTrue(torch.all(mp.sample() == samples.unsqueeze(0)))
     self.assertTrue(
         torch.all(mp.sample(torch.Size([2])) == samples.repeat(2, 1, 1))
     )
Exemplo n.º 7
0
 def test_GPyTorchPosterior(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         mean = torch.rand(3, dtype=dtype, device=device)
         variance = 1 + torch.rand(3, dtype=dtype, device=device)
         covar = variance.diag()
         mvn = MultivariateNormal(mean, lazify(covar))
         posterior = GPyTorchPosterior(mvn=mvn)
         # basics
         self.assertEqual(posterior.device.type, device.type)
         self.assertTrue(posterior.dtype == dtype)
         self.assertEqual(posterior.event_shape, torch.Size([3, 1]))
         self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1)))
         self.assertTrue(torch.equal(posterior.variance, variance.unsqueeze(-1)))
         # rsample
         samples = posterior.rsample()
         self.assertEqual(samples.shape, torch.Size([1, 3, 1]))
         samples = posterior.rsample(sample_shape=torch.Size([4]))
         self.assertEqual(samples.shape, torch.Size([4, 3, 1]))
         samples2 = posterior.rsample(sample_shape=torch.Size([4, 2]))
         self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1]))
         # rsample w/ base samples
         base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype)
         # incompatible shapes
         with self.assertRaises(RuntimeError):
             posterior.rsample(
                 sample_shape=torch.Size([3]), base_samples=base_samples
             )
         samples_b1 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         samples_b2 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         self.assertTrue(torch.allclose(samples_b1, samples_b2))
         base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype)
         samples2_b1 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         samples2_b2 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         self.assertTrue(torch.allclose(samples2_b1, samples2_b2))
         # collapse_batch_dims
         b_mean = torch.rand(2, 3, dtype=dtype, device=device)
         b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=device)
         b_covar = b_variance.unsqueeze(-1) * torch.eye(3).type_as(b_variance)
         b_mvn = MultivariateNormal(b_mean, lazify(b_covar))
         b_posterior = GPyTorchPosterior(mvn=b_mvn)
         b_base_samples = torch.randn(4, 1, 3, 1, device=device, dtype=dtype)
         b_samples = b_posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=b_base_samples
         )
         self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
Exemplo n.º 8
0
    def test_local_tensor_iterable_methods(self):

        x = torch.FloatTensor([1, 2, 3])
        y = torch.FloatTensor([2, 3, 4])
        z = torch.FloatTensor([5, 6, 7])
        assert(torch.equal(torch.stack([x, y, z]), torch.FloatTensor([[1, 2, 3], [2, 3, 4], [5, 6, 7]])))

        x = torch.FloatTensor([1, 2, 3])
        y = torch.FloatTensor([2, 3, 4])
        z = torch.FloatTensor([5, 6, 7])
        assert (torch.equal(torch.cat([x, y, z]), torch.FloatTensor([1, 2, 3, 2, 3, 4, 5, 6, 7])))
Exemplo n.º 9
0
 def test_generic_mc_objective(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         obj = GenericMCObjective(generic_obj)
         samples = torch.randn(1, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(2, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(3, 1, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(3, 2, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
Exemplo n.º 10
0
    def test_make_grid_not_inplace(self):
        t = torch.rand(5, 3, 10, 10)
        t_clone = t.clone()

        utils.make_grid(t, normalize=False)
        assert torch.equal(t, t_clone), 'make_grid modified tensor in-place'

        utils.make_grid(t, normalize=True, scale_each=False)
        assert torch.equal(t, t_clone), 'make_grid modified tensor in-place'

        utils.make_grid(t, normalize=True, scale_each=True)
        assert torch.equal(t, t_clone), 'make_grid modified tensor in-place'
Exemplo n.º 11
0
    def test_q_expected_improvement(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # the event shape is `b x q x t` = 1 x 1 x 1
            samples = torch.zeros(1, 1, 1, device=device, dtype=dtype)
            mm = MockModel(MockPosterior(samples=samples))
            # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking
            X = torch.zeros(1, 1, device=device, dtype=dtype)

            # basic test
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)

            # test shifting best_f value
            acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)

            # basic test, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            res = acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
Exemplo n.º 12
0
    def do_test_per_param_optim(self, fixed_param, free_param):
        pyro.clear_param_store()

        def model():
            prior_dist = Normal(self.mu0, torch.pow(self.lam0, -0.5))
            mu_latent = pyro.sample("mu_latent", prior_dist)
            x_dist = Normal(mu_latent, torch.pow(self.lam, -0.5))
            pyro.observe("obs", x_dist, self.data)
            return mu_latent

        def guide():
            mu_q = pyro.param(
                "mu_q",
                Variable(
                    torch.zeros(1),
                    requires_grad=True))
            log_sig_q = pyro.param(
                "log_sig_q", Variable(
                    torch.zeros(1), requires_grad=True))
            sig_q = torch.exp(log_sig_q)
            pyro.sample("mu_latent", Normal(mu_q, sig_q))

        def optim_params(module_name, param_name, tags):
            if param_name == fixed_param:
                return {'lr': 0.00}
            elif param_name == free_param:
                return {'lr': 0.01}

        adam = optim.Adam(optim_params)
        adam2 = optim.Adam(optim_params)
        svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True)
        svi2 = SVI(model, guide, adam2, loss="ELBO", trace_graph=True)

        svi.step()
        adam_initial_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step']
        adam.save('adam.unittest.save')
        svi.step()
        adam_final_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step']
        adam2.load('adam.unittest.save')
        svi2.step()
        adam2_step_count_after_load_and_step = list(adam2.get_state()['mu_q']['state'].items())[0][1]['step']

        assert adam_initial_step_count == 1
        assert adam_final_step_count == 2
        assert adam2_step_count_after_load_and_step == 2

        free_param_unchanged = torch.equal(pyro.param(free_param).data, torch.zeros(1))
        fixed_param_unchanged = torch.equal(pyro.param(fixed_param).data, torch.zeros(1))
        assert fixed_param_unchanged and not free_param_unchanged
Exemplo n.º 13
0
    def test_match_batch_shape(self):
        X = torch.rand(3, 2)
        Y = torch.rand(1, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.unsqueeze(0)))

        X = torch.rand(1, 3, 2)
        Y = torch.rand(2, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.repeat(2, 1, 1)))

        X = torch.rand(2, 3, 2)
        Y = torch.rand(1, 3, 2)
        with self.assertRaises(RuntimeError):
            match_batch_shape(X, Y)
Exemplo n.º 14
0
 def test_standardize(self, cuda=False):
     tkwargs = {"device": torch.device("cuda" if cuda else "cpu")}
     for dtype in (torch.float, torch.double):
         tkwargs["dtype"] = dtype
         X = torch.tensor([0.0, 0.0], **tkwargs)
         self.assertTrue(torch.equal(X, standardize(X)))
         X2 = torch.tensor([0.0, 1.0, 1.0, 1.0], **tkwargs)
         expected_X2_stdized = torch.tensor([-1.5, 0.5, 0.5, 0.5], **tkwargs)
         self.assertTrue(torch.equal(expected_X2_stdized, standardize(X2)))
         X3 = torch.tensor(
             [[0.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]], **tkwargs
         ).transpose(1, 0)
         X3_stdized = standardize(X3)
         self.assertTrue(torch.equal(X3_stdized[:, 0], expected_X2_stdized))
         self.assertTrue(torch.equal(X3_stdized[:, 1], torch.zeros(4, **tkwargs)))
Exemplo n.º 15
0
 def test_identity_mc_objective(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         obj = IdentityMCObjective()
         # single-element tensor
         samples = torch.randn(1, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), samples[0]))
         # single-dimensional non-squeezable tensor
         samples = torch.randn(2, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), samples))
         # two-dimensional squeezable tensor
         samples = torch.randn(3, 1, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), samples.squeeze(-1)))
         # two-dimensional non-squeezable tensor
         samples = torch.randn(3, 2, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), samples))
Exemplo n.º 16
0
    def test_read_embedding_file_inside_archive(self):
        token2vec = {
                "think": torch.Tensor([0.143, 0.189, 0.555, 0.361, 0.472]),
                "make": torch.Tensor([0.878, 0.651, 0.044, 0.264, 0.872]),
                "difference": torch.Tensor([0.053, 0.162, 0.671, 0.110, 0.259]),
                "àèìòù": torch.Tensor([1.0, 2.0, 3.0, 4.0, 5.0])
                }
        vocab = Vocabulary()
        for token in token2vec:
            vocab.add_token_to_namespace(token)

        params = Params({
                'pretrained_file': str(self.FIXTURES_ROOT / 'embeddings/multi-file-archive.zip'),
                'embedding_dim': 5
                })
        with pytest.raises(ValueError, message="No ValueError when pretrained_file is a multi-file archive"):
            Embedding.from_params(vocab, params)

        for ext in ['.zip', '.tar.gz']:
            archive_path = str(self.FIXTURES_ROOT / 'embeddings/multi-file-archive') + ext
            file_uri = format_embeddings_file_uri(archive_path, 'folder/fake_embeddings.5d.txt')
            params = Params({
                    'pretrained_file': file_uri,
                    'embedding_dim': 5
                    })
            embeddings = Embedding.from_params(vocab, params).weight.data
            for tok, vec in token2vec.items():
                i = vocab.get_token_index(tok)
                assert torch.equal(embeddings[i], vec), 'Problem with format ' + archive_path
Exemplo n.º 17
0
    def test_archiving(self):
        # copy params, since they'll get consumed during training
        params_copy = copy.deepcopy(self.params.as_dict())

        # `train_model` should create an archive
        serialization_dir = self.TEST_DIR / 'archive_test'
        model = train_model(self.params, serialization_dir=serialization_dir)

        archive_path = serialization_dir / "model.tar.gz"

        # load from the archive
        archive = load_archive(archive_path)
        model2 = archive.model

        # check that model weights are the same
        keys = set(model.state_dict().keys())
        keys2 = set(model2.state_dict().keys())

        assert keys == keys2

        for key in keys:
            assert torch.equal(model.state_dict()[key], model2.state_dict()[key])

        # check that vocabularies are the same
        vocab = model.vocab
        vocab2 = model2.vocab

        assert vocab._token_to_index == vocab2._token_to_index  # pylint: disable=protected-access
        assert vocab._index_to_token == vocab2._index_to_token  # pylint: disable=protected-access

        # check that params are the same
        params2 = archive.config
        assert params2.as_dict() == params_copy
Exemplo n.º 18
0
def compare_state_dict(sa, sb):
    if sa.keys() != sb.keys():
        return False
    for k, va in sa.items():
        if not torch.equal(va, sb[k]):
            return False
    return True
Exemplo n.º 19
0
    def test_local_tensor_multi_var_methods(self):
        x = torch.FloatTensor([[1, 2], [2, 3], [5, 6]])
        t, s = torch.max(x, 1)
        assert (t == torch.FloatTensor([2, 3, 6])).float().sum() == 3
        assert (s == torch.LongTensor([1, 1, 1])).float().sum() == 3

        x = torch.FloatTensor([[0, 0], [1, 1]])
        y, z = torch.eig(x, True)
        assert (y == torch.FloatTensor([[1, 0], [0, 0]])).all()
        assert (torch.equal(z == torch.FloatTensor([[0, 0], [1, 0]]), torch.ByteTensor([[1, 0], [1, 0]])))

        x = torch.FloatTensor([[0, 0], [1, 0]])
        y, z = torch.qr(x)
        assert (y == torch.FloatTensor([[0, -1], [-1, 0]])).all()
        assert (z == torch.FloatTensor([[-1, 0], [0, 0]])).all()

        x = torch.arange(1, 6)
        y, z = torch.kthvalue(x, 4)
        assert (y == torch.FloatTensor([4])).all()
        assert (z == torch.LongTensor([3])).all()

        x = torch.zeros(3, 3)
        w, y, z = torch.svd(x)
        assert (w == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
        assert (y == torch.FloatTensor([0, 0, 0])).all()
        assert (z == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
Exemplo n.º 20
0
 def test_joint_optimize(
     self,
     mock_get_best_candidates,
     mock_gen_candidates,
     mock_gen_batch_initial_conditions,
     cuda=False,
 ):
     q = 3
     num_restarts = 2
     raw_samples = 10
     options = {}
     mock_acq_function = MockAcquisitionFunction()
     tkwargs = {"device": torch.device("cuda") if cuda else torch.device("cpu")}
     for dtype in (torch.float, torch.double):
         tkwargs["dtype"] = dtype
         mock_gen_batch_initial_conditions.return_value = torch.zeros(
             num_restarts, q, 3, **tkwargs
         )
         mock_gen_candidates.return_value = torch.cat(
             [i * torch.ones(1, q, 3, **tkwargs) for i in range(num_restarts)], dim=0
         )
         mock_get_best_candidates.return_value = torch.ones(1, q, 3, **tkwargs)
         expected_candidates = mock_get_best_candidates.return_value
         bounds = torch.stack(
             [torch.zeros(3, **tkwargs), 4 * torch.ones(3, **tkwargs)]
         )
         candidates = joint_optimize(
             acq_function=mock_acq_function,
             bounds=bounds,
             q=q,
             num_restarts=num_restarts,
             raw_samples=raw_samples,
             options=options,
         )
         self.assertTrue(torch.equal(candidates, expected_candidates))
Exemplo n.º 21
0
    def assertNotEqual(self, x, y, prec=None, message=''):
        if prec is None:
            prec = self.precision

        x, y = self.unwrapVariables(x, y)

        if torch.is_tensor(x) and torch.is_tensor(y):
            if x.size() != y.size():
                super(TestCase, self).assertNotEqual(x.size(), y.size())
            self.assertGreater(x.numel(), 0)
            y = y.type_as(x)
            y = y.cuda(device=x.get_device()) if x.is_cuda else y.cpu()
            nan_mask = x != x
            if torch.equal(nan_mask, y != y):
                diff = x - y
                if diff.is_signed():
                    diff = diff.abs()
                diff[nan_mask] = 0
                max_err = diff.max()
                self.assertGreaterEqual(max_err, prec, message)
        elif type(x) == str and type(y) == str:
            super(TestCase, self).assertNotEqual(x, y)
        elif is_iterable(x) and is_iterable(y):
            super(TestCase, self).assertNotEqual(x, y)
        else:
            try:
                self.assertGreaterEqual(abs(x - y), prec, message)
                return
            except (TypeError, AssertionError):
                pass
            super(TestCase, self).assertNotEqual(x, y, message)
Exemplo n.º 22
0
 def test_gen_batch_initial_conditions_simple_warning(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         bounds = torch.tensor([[0, 0], [1, 1]], device=device, dtype=dtype)
         with warnings.catch_warnings(record=True) as ws:
             with mock.patch(
                 "botorch.optim.optimize.draw_sobol_samples",
                 return_value=torch.zeros(10, 1, 2, device=device, dtype=dtype),
             ):
                 batch_initial_conditions = gen_batch_initial_conditions(
                     acq_function=MockAcquisitionFunction(),
                     bounds=bounds,
                     q=1,
                     num_restarts=2,
                     raw_samples=10,
                 )
                 self.assertEqual(len(ws), 1)
                 self.assertTrue(
                     issubclass(ws[-1].category, BadInitialCandidatesWarning)
                 )
                 self.assertTrue(
                     torch.equal(
                         batch_initial_conditions,
                         torch.zeros(2, 1, 2, device=device, dtype=dtype),
                     )
                 )
Exemplo n.º 23
0
def geometric(p, t=None):
    t = 0 if t is None else t
    x = pyro.sample("x_{}".format(t), dist.bernoulli, p)
    if torch.equal(x.data, torch.zeros(1)):
        return x
    else:
        return x + geometric(p, t+1)
Exemplo n.º 24
0
    def test_python_ir(self):
        x = Variable(torch.Tensor([0.4]), requires_grad=True)
        y = Variable(torch.Tensor([0.7]), requires_grad=True)

        def doit(x, y):
            return torch.sigmoid(torch.tanh(x * (x + y)))

        traced, _ = torch.jit.trace(doit, (x, y))
        g = torch._C._jit_get_graph(traced)
        g2 = torch._C.Graph()
        g_to_g2 = {}
        for node in g.inputs():
            g_to_g2[node] = g2.addInput()
        for node in g.nodes():
            n_ = g2.createClone(node, lambda x: g_to_g2[x])
            g2.appendNode(n_)
            for o, no in zip(node.outputs(), n_.outputs()):
                g_to_g2[o] = no

        for node in g.outputs():
            g2.registerOutput(g_to_g2[node])

        t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2]))
        assert(t_node.attributeNames() == ["a"])
        g2.appendNode(t_node)
        assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
        self.assertExpected(str(g2))
Exemplo n.º 25
0
    def test_forward_pass_runs_correctly(self):
        """
        Check to make sure a forward pass on an ensemble of two identical copies of a model yields the same
        results as the model itself.
        """
        bidaf_ensemble = BidafEnsemble([self.model, self.model])

        batch = Batch(self.instances)
        batch.index_instances(self.vocab)
        training_tensors = batch.as_tensor_dict()

        bidaf_output_dict = self.model(**training_tensors)
        ensemble_output_dict = bidaf_ensemble(**training_tensors)

        metrics = self.model.get_metrics(reset=True)

        # We've set up the data such that there's a fake answer that consists of the whole
        # paragraph.  _Any_ valid prediction for that question should produce an F1 of greater than
        # zero, while if we somehow haven't been able to load the evaluation data, or there was an
        # error with using the evaluation script, this will fail.  This makes sure that we've
        # loaded the evaluation data correctly and have hooked things up to the official evaluation
        # script.
        assert metrics['f1'] > 0
        assert torch.equal(ensemble_output_dict['best_span'], bidaf_output_dict['best_span'])
        assert ensemble_output_dict['best_span_str'] == bidaf_output_dict['best_span_str']
Exemplo n.º 26
0
    def test_torch_function_with_multiple_output_on_remote_var(self):
        hook = TorchHook(verbose=False)
        me = hook.local_worker
        remote = VirtualWorker(id=2, hook=hook)
        me.add_worker(remote)

        x = Var(torch.FloatTensor([[1, 2], [4, 3], [5, 6]]))
        x.send(remote)
        y, z = torch.max(x, 1)
        y.get()
        assert torch.equal(y, Var(torch.FloatTensor([2, 4, 6])))

        x = Var(torch.FloatTensor([[0, 0], [1, 0]])).send(remote)
        y, z = torch.qr(x)
        assert (y.get() == Var(torch.FloatTensor([[0, -1], [-1, 0]]))).all()
        assert (z.get() == Var(torch.FloatTensor([[-1, 0], [0, 0]]))).all()

        x = Var(torch.arange(1, 6)).send(remote)
        y, z = torch.kthvalue(x, 4)
        assert (y.get() == Var(torch.FloatTensor([4]))).all()
        assert (z.get() == Var(torch.LongTensor([3]))).all()

        x = Var(torch.FloatTensor([[0, 0], [0, 0]]))
        x.send(remote)
        y, z = torch.eig(x, True)
        assert (y.get() == Var(torch.FloatTensor([[0, 0], [0, 0]]))).all()
        assert (z.get() == Var(torch.FloatTensor([[1, 0.], [0, 1]]))).all()


        x = Var(torch.zeros(3, 3)).send(remote)
        w, y, z = torch.svd(x)
        assert (w.get() == Var(torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))).all()
        assert (y.get() == Var(torch.FloatTensor([0, 0, 0]))).all()
        assert (z.get() == Var(torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))).all()
Exemplo n.º 27
0
 def test_GPyTorchPosterior_Multitask(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         mean = torch.rand(3, 2, dtype=dtype, device=device)
         variance = 1 + torch.rand(3, 2, dtype=dtype, device=device)
         covar = variance.view(-1).diag()
         mvn = MultitaskMultivariateNormal(mean, lazify(covar))
         posterior = GPyTorchPosterior(mvn=mvn)
         # basics
         self.assertEqual(posterior.device.type, device.type)
         self.assertTrue(posterior.dtype == dtype)
         self.assertEqual(posterior.event_shape, torch.Size([3, 2]))
         self.assertTrue(torch.equal(posterior.mean, mean))
         self.assertTrue(torch.equal(posterior.variance, variance))
         # rsample
         samples = posterior.rsample(sample_shape=torch.Size([4]))
         self.assertEqual(samples.shape, torch.Size([4, 3, 2]))
         samples2 = posterior.rsample(sample_shape=torch.Size([4, 2]))
         self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2]))
         # rsample w/ base samples
         base_samples = torch.randn(4, 3, 2, device=device, dtype=dtype)
         samples_b1 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         samples_b2 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         self.assertTrue(torch.allclose(samples_b1, samples_b2))
         base_samples2 = torch.randn(4, 2, 3, 2, device=device, dtype=dtype)
         samples2_b1 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         samples2_b2 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         self.assertTrue(torch.allclose(samples2_b1, samples2_b2))
         # collapse_batch_dims
         b_mean = torch.rand(2, 3, 2, dtype=dtype, device=device)
         b_variance = 1 + torch.rand(2, 3, 2, dtype=dtype, device=device)
         b_covar = b_variance.view(2, 6, 1) * torch.eye(6).type_as(b_variance)
         b_mvn = MultitaskMultivariateNormal(b_mean, lazify(b_covar))
         b_posterior = GPyTorchPosterior(mvn=b_mvn)
         b_base_samples = torch.randn(4, 1, 3, 2, device=device, dtype=dtype)
         b_samples = b_posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=b_base_samples
         )
         self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
Exemplo n.º 28
0
    def test_sequential_optimize(self, mock_joint_optimize, cuda=False):
        q = 3
        num_restarts = 2
        raw_samples = 10
        options = {}
        tkwargs = {"device": torch.device("cuda") if cuda else torch.device("cpu")}
        for dtype in (torch.float, torch.double):
            mock_acq_function = MockAcquisitionFunction()
            tkwargs["dtype"] = dtype
            joint_optimize_return_values = [
                torch.tensor([[[1.1, 2.1, 3.1]]], **tkwargs) for _ in range(q)
            ]
            mock_joint_optimize.side_effect = joint_optimize_return_values
            expected_candidates = torch.cat(
                joint_optimize_return_values, dim=-2
            ).round()
            bounds = torch.stack(
                [torch.zeros(3, **tkwargs), 4 * torch.ones(3, **tkwargs)]
            )
            inequality_constraints = [
                (torch.tensor([3]), torch.tensor([4]), torch.tensor(5))
            ]
            candidates = sequential_optimize(
                acq_function=mock_acq_function,
                bounds=bounds,
                q=q,
                num_restarts=num_restarts,
                raw_samples=raw_samples,
                options=options,
                inequality_constraints=inequality_constraints,
                post_processing_func=rounding_func,
            )
            self.assertTrue(torch.equal(candidates, expected_candidates))

            expected_call_kwargs = {
                "acq_function": mock_acq_function,
                "bounds": bounds,
                "q": 1,
                "num_restarts": num_restarts,
                "raw_samples": raw_samples,
                "options": options,
                "inequality_constraints": inequality_constraints,
                "equality_constraints": None,
                "fixed_features": None,
            }
            call_args_list = mock_joint_optimize.call_args_list[-q:]
            for i in range(q):
                self.assertEqual(call_args_list[i][1], expected_call_kwargs)

            # test that error is raised for acquisition functions without X_baseline
            mock_acq_function = MockAcquisitionFunction(has_X_baseline_attr=False)
            with self.assertRaises(UnsupportedError):
                sequential_optimize(
                    acq_function=mock_acq_function,
                    bounds=bounds,
                    q=q,
                    num_restarts=num_restarts,
                    raw_samples=raw_samples,
                )
Exemplo n.º 29
0
    def test_local_tensor_tertiary_methods(self):

        x = torch.FloatTensor([1, 2, 3])
        y = torch.FloatTensor([1, 2, 3])
        z = torch.FloatTensor([1, 2, 3])
        assert (torch.equal(torch.addcmul(z, 2, x, y), torch.FloatTensor([3.,  10.,  21.])))

        x = torch.FloatTensor([1, 2, 3])
        y = torch.FloatTensor([1, 2, 3])
        z = torch.FloatTensor([1, 2, 3])
        z.addcmul_(2, x, y)
        assert (torch.equal(z, torch.FloatTensor([3., 10., 21.])))

        x = torch.FloatTensor([[1, 2]])
        y = torch.FloatTensor([[1, 2, 3], [4, 5, 6]])
        z = torch.FloatTensor([1, 2, 3])
        assert(torch.equal(torch.addmm(z, x, y), torch.FloatTensor([[10., 14., 18.]])))
Exemplo n.º 30
0
    def test_remote_var_binary_methods(self):
        ''' Unit tests for methods mentioned on issue 1385
            https://github.com/OpenMined/PySyft/issues/1385'''
        hook = TorchHook(verbose=False)
        local = hook.local_worker
        remote = VirtualWorker(hook, 1)
        local.add_worker(remote)

        x = Var(torch.FloatTensor([1, 2, 3, 4])).send(remote)
        y = Var(torch.FloatTensor([[1, 2, 3, 4]])).send(remote)
        z = torch.matmul(x, y.t())
        assert (torch.equal(z.get(), Var(torch.FloatTensor([30]))))
        z = torch.add(x, y)
        assert (torch.equal(z.get(), Var(torch.FloatTensor([[2, 4, 6, 8]]))))
        x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote)
        y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote)
        z = torch.cross(x, y, dim=1)
        assert (torch.equal(z.get(), Var(torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]]))))
        x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote)
        y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote)
        z = torch.dist(x, y)
        assert (torch.equal(z.get(), Var(torch.FloatTensor([0.]))))
        x = Var(torch.FloatTensor([1, 2, 3])).send(remote)
        y = Var(torch.FloatTensor([1, 2, 3])).send(remote)
        z = torch.dot(x, y)
        print(torch.equal(z.get(), Var(torch.FloatTensor([14]))))
        z = torch.eq(x, y)
        assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1]))))
        z = torch.ge(x, y)
        assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1]))))
Exemplo n.º 31
0
    def forward(self, inputs, is_training, add_noise):
        """
        Args:
            inputs: [batch_size x 1 x sourceL]
        """
        batch_size = inputs.size(0)
        seq_len = inputs.size(2)
        assert seq_len == self.seq_len + 1

        embedded = self.embedding(inputs)
        embedded = self.gaussian(embedded, is_training, add_noise)
        encoder_outputs, (hidden, context) = self.encoder(embedded)

        prev_probs = []
        prev_idxs = []
        mask = torch.zeros(batch_size, seq_len).byte()
        if self.use_cuda:
            mask = mask.cuda()

        idxs = None

        # decoder_input = [batch, embedding_size]
        decoder_input = self.decoder_start_input.unsqueeze(0).repeat(
            batch_size, 1)
        stop_dict = {}

        # for CH and DT, this for loop shuld be replaced by while loop
        loop_idx = 0
        while True:

            _, (hidden, context) = self.decoder(decoder_input.unsqueeze(1),
                                                (hidden, context))

            query = hidden.squeeze(0)
            for i in range(self.n_glimpses):
                ref, logits = self.glimpse(query, encoder_outputs)
                logits, mask = self.apply_mask_to_logits(logits, mask, idxs)
                query = torch.bmm(
                    ref,
                    F.softmax(logits).unsqueeze(2),
                ).squeeze(2)

            _, logits = self.pointer(query, encoder_outputs)
            logits, mask = self.apply_mask_to_logits(logits, mask, idxs)
            probs = F.softmax(logits)

            # torch.multinomial: sampling with multinomial distribution
            # torch.squeeze: eliminate the dimension of size 1    e.g. (4, 1, 3) -> (4, 3)
            idxs = probs.multinomial(1).squeeze(1)
            '''
            for old_idxs in prev_idxs:
                if old_idxs.eq(idxs).data.any():
                    print(seq_len)
                    print('RESAMPLE!')
                    idxs = probs.multinomial(1).squeeze(1)
                    break
            '''

            # check and process idxs
            for i in range(batch_size):
                if idxs[i] == 0 and not (i in stop_dict.keys()):
                    stop_dict[i] = loop_idx
                if i in stop_dict.keys():
                    idxs[i] = 0

            # decoder_input = [batch_size, indexes, embedding_size]
            decoder_input = embedded[[i for i in range(batch_size)],
                                     idxs.data, :]

            prev_probs.append(probs)
            prev_idxs.append(idxs)
            loop_idx += 1

            if torch.equal(idxs, torch.zeros_like(idxs)):
                break

        return prev_probs, prev_idxs, stop_dict, loop_idx
Exemplo n.º 32
0
def run_test_pipe(rank,
                  world_size,
                  filename,
                  filename_rpc,
                  skip_dist_init=False):
    pipe_world_size = 2

    if world_size == 1:
        return

    if not skip_dist_init:
        dist_init(rank, world_size, filename, filename_rpc)
    else:
        os.environ["MASTER_ADDR"] = "localhost"
        os.environ["MASTER_PORT"] = "29502"
        rpc.init_rpc(f"Test{rank}", rank=rank, world_size=world_size)

    mpu.initialize_model_parallel(world_size / pipe_world_size,
                                  pipe_world_size)
    model_parallel_size = mpu.get_model_parallel_world_size()
    if torch.distributed.get_rank() == 0:
        print(
            "> testing Sequential + MultiProcessPipe with model parallel size: {}, pipe: {}"
            .format(model_parallel_size, pipe_world_size))
    chunk_size = 4

    seed = 12345
    set_random_seed(seed)
    input_size_coeff = 3
    input_size = input_size_coeff * model_parallel_size
    output_size_coeff = 7
    output_size = output_size_coeff * model_parallel_size
    batch_size = 3 * chunk_size

    target = torch.rand((batch_size, input_size), requires_grad=True).cuda()
    print(f"target = {target}")

    identity = IdentityLayer2D(batch_size, input_size).cuda()

    pipeline_devices = mpu.get_pipeline_parallel_group()

    set_random_seed(seed)
    model = nn.Sequential(
        layers.ColumnParallelLinear(input_size,
                                    output_size,
                                    keep_master_weight_for_test=True,
                                    bias=False).cuda(),
        nn.ReLU(),
        layers.RowParallelLinear(output_size,
                                 input_size,
                                 keep_master_weight_for_test=True,
                                 bias=False).cuda(),
    )
    set_random_seed(seed)

    reference = [
        nn.Linear(input_size, output_size, bias=False).cuda(),
        nn.ReLU(),
        nn.Linear(output_size, input_size, bias=False).cuda(),
    ]

    print(
        f"setup {reference[0].weight.size()}, {model[0].weight.size()}, {(input_size, output_size)}"
    )
    print(f"setup {reference[2].weight.size()}, {(output_size, input_size)}")

    reference[0].weight = Parameter(
        model[0].get_master_weight().clone()).cuda()
    reference[2].weight = Parameter(
        model[2].get_master_weight().clone()).cuda()

    reference = nn.Sequential(*reference)

    def grad_graph(depth, grad):
        result = depth * " " + str(grad)
        if grad:
            for x in grad.next_functions:
                result += "\n" + grad_graph(depth + 1, x[0])
        return result

    def check_weights(x, y, key: str, index=None):
        for i in [2, 0]:
            if index is not None and i != index:
                continue
            left = x[i].get_master_weight()
            right = y[i].weight.data
            if not torch.allclose(left, right,
                                  atol=1.0e-6) or index is not None:
                print(
                    f"check_weights {key}-{i}: left = {left}, \nright = {right}"
                )
            if not torch.equal(left, right):
                print(
                    f"check_weights NOT_EQUAL {key}-{i}: left = {left}, \nright = {right}"
                )
            assert torch.allclose(left, right, atol=1.0e-6)

    def dump_opt_params(opt):
        for i, group in enumerate(opt.param_groups):
            for j, p in enumerate(group["params"]):
                print(f"{torch.distributed.get_rank()}:param {(i,j)} = {p}")
                print(
                    f"{torch.distributed.get_rank()}:param.grad {(i,j)} = {p.grad}"
                )

    def forward_model(model_, target, step=False):
        optimizer = torch.optim.SGD(model_.parameters(), lr=0.01, momentum=0.9)
        optimizer.zero_grad()
        model_.zero_grad()
        output = model_(identity())
        loss = nn.MSELoss()
        model_.zero_grad()
        if step:
            loss(output, target).backward()
            saved_weight_0 = model_[0].weight.data.clone()
            saved_weight_2 = model_[2].weight.data.clone()
            dump_opt_params(optimizer)
            optimizer.step()
            assert not torch.allclose(
                saved_weight_0, model_[0].weight.data, atol=1.0e-6)
            assert not torch.allclose(
                saved_weight_2, model_[2].weight.data, atol=1.0e-6)
        return output

    output = forward_model(model, target)
    reference_output = forward_model(reference, target)

    error = reference_output.sub(output).max()
    torch.distributed.barrier()
    assert error < 1.0e-6

    output = forward_model(model, target)
    error = reference_output.sub(output).max()
    torch.distributed.barrier()
    assert error < 1.0e-6

    output = forward_model(model, target)
    error = reference_output.sub(output).max()
    torch.distributed.barrier()
    assert error < 1.0e-6

    check_weights(model, reference, "before")
    saved_weight_0 = model[0].weight.data.clone()
    saved_weight_2 = model[2].weight.data.clone()
    output = forward_model(model, target, step=True)
    error = reference_output.sub(output).max()
    assert error < 1.0e-6
    model[0].weight.data = saved_weight_0
    model[2].weight.data = saved_weight_2

    worker_map = {
        i: f"Test{i}"
        for i in range(torch.distributed.get_world_size())
    }

    if pipe_world_size == 2:
        print(f"actually doing pipe stuff now")
        assert torch.equal(saved_weight_0, model[0].weight.data)
        assert torch.equal(saved_weight_2, model[2].weight.data)
        pipe_model = MultiProcessPipe(
            model,
            [2, 1],
            group=pipeline_devices,
            worker_map=worker_map,
            input_device=torch.cuda.current_device(),
            chunks=chunk_size,
        ).cuda()
        torch.distributed.barrier()
        pipe_rank = torch.distributed.get_rank(
            group=mpu.get_pipeline_parallel_group())
        print(f"pipe rank is {pipe_rank}")
        if pipe_rank == 0:
            assert torch.equal(saved_weight_0, pipe_model[0].weight.data)
        else:
            if not torch.equal(saved_weight_2, pipe_model[0].weight.data):
                print(
                    f"ne {pipe_rank}: left\n{saved_weight_2}\nright:\n{pipe_model[0].weight.data}"
                )
                assert torch.equal(saved_weight_2, pipe_model[0].weight.data)
        optimizer = torch.optim.SGD(pipe_model.parameters(),
                                    lr=0.01,
                                    momentum=0.9)
        optimizer.zero_grad()
        if pipe_rank == 0:
            assert torch.equal(saved_weight_0, pipe_model[0].weight.data)
            print(f"runner {rank}:\n{pipe_model[0].weight.data}")
        else:
            assert torch.equal(saved_weight_2, pipe_model[0].weight.data)
            print(f"runner {rank}:\n{pipe_model[0].weight.data}")

        if torch.distributed.get_rank(mpu.get_pipeline_parallel_group()) == 1:
            check_weights(model, reference, "pre-pipe", index=2)
        else:
            check_weights(model, reference, "pre-pipe", index=0)

        pipe_output = pipe_model(identity())
        print(f"exited pipe for {rank}")
        forward_model(reference, target, step=True)

        print(f"pipe_output {rank} = {pipe_output}")
        print(f"reference_output {rank} = {reference_output}")

        torch.distributed.barrier()

        if torch.distributed.get_rank(mpu.get_pipeline_parallel_group()) == 1:
            error = reference_output.sub(pipe_output.cuda()).max()
            if error >= 1.0e-6:
                print(f"error bad {error}")
            assert error < 1.0e-6

            loss = nn.MSELoss()
            failed = False
            pipe_output.retain_grad()
            with torch.autograd.profiler.profile() as prof:
                try:
                    loss(pipe_output, target).backward()
                except Exception as e:
                    failed = True
                    print(f"got {e} while doing backward, deadlock?")
            if failed:
                raise RuntimeError("failed somehow")
            dump_opt_params(optimizer)
            optimizer.step()

            print(f"calling check_weights on master")
            check_weights(model, reference, "pipe", index=2)
            print(f"waiting for barrier on master, pid={os.getpid()}")
        else:
            print(f"calling backwards on slave, pid={os.getpid()}")
            failed = False
            with torch.autograd.profiler.profile() as prof:
                try:
                    pipe_model.back_helper(pipe_output)
                except Exception as e:
                    failed = True
                    print(f"got {e} while doing backward, deadlock?")
            if failed:
                raise RuntimeError("failed somehow")
            dump_opt_params(optimizer)
            print(f"calling step on slave")
            optimizer.step()
            print(f"calling check_weights on slave")
            check_weights(model, reference, "pipe", index=0)
            print(f"waiting for barrier on slave")

        pipe_model.zero_grad()
        torch.distributed.barrier()

        pipe_model.eval()
        pipe_output = pipe_model(identity())
        updated_ref_output = forward_model(reference, target)
        if torch.distributed.get_rank(mpu.get_pipeline_parallel_group()) == 1:
            error = updated_ref_output.sub(pipe_output.cuda()).max()
            print(
                f"outputs are ref:\n{updated_ref_output}\npipe:\n{pipe_output}"
            )
            assert error < 1.0e-6
        torch.distributed.barrier()

        print(f"finished waiting for barrier on, pid={os.getpid()}")

    print(f"really exited pipe for {rank}")

    rpc.shutdown()
    torch.distributed.destroy_process_group()
Exemplo n.º 33
0
def test_conv_module():
    with pytest.raises(AssertionError):
        # conv_cfg must be a dict or None
        conv_cfg = 'conv'
        ConvModule(3, 8, 2, conv_cfg=conv_cfg)

    with pytest.raises(AssertionError):
        # norm_cfg must be a dict or None
        norm_cfg = 'norm'
        ConvModule(3, 8, 2, norm_cfg=norm_cfg)

    with pytest.raises(KeyError):
        # softmax is not supported
        act_cfg = dict(type='softmax')
        ConvModule(3, 8, 2, act_cfg=act_cfg)

    # conv + norm + act
    conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
    assert conv.with_activation
    assert hasattr(conv, 'activate')
    assert conv.with_norm
    assert hasattr(conv, 'norm')
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # conv + act
    conv = ConvModule(3, 8, 2)
    assert conv.with_activation
    assert hasattr(conv, 'activate')
    assert not conv.with_norm
    assert not hasattr(conv, 'norm')
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # conv
    conv = ConvModule(3, 8, 2, act_cfg=None)
    assert not conv.with_norm
    assert not hasattr(conv, 'norm')
    assert not conv.with_activation
    assert not hasattr(conv, 'activate')
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # conv with its own `init_weights` method
    conv_module = ConvModule(3,
                             8,
                             2,
                             conv_cfg=dict(type='ExampleConv'),
                             act_cfg=None)
    assert torch.equal(conv_module.conv.conv0.weight, torch.zeros(8, 3, 2, 2))

    # with_spectral_norm=True
    conv = ConvModule(3, 8, 3, padding=1, with_spectral_norm=True)
    assert hasattr(conv.conv, 'weight_orig')
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # padding_mode='reflect'
    conv = ConvModule(3, 8, 3, padding=1, padding_mode='reflect')
    assert isinstance(conv.padding_layer, nn.ReflectionPad2d)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # non-existing padding mode
    with pytest.raises(KeyError):
        conv = ConvModule(3, 8, 3, padding=1, padding_mode='non_exists')

    # leaky relu
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU'))
    assert isinstance(conv.activate, nn.LeakyReLU)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # tanh
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='Tanh'))
    assert isinstance(conv.activate, nn.Tanh)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # Sigmoid
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='Sigmoid'))
    assert isinstance(conv.activate, nn.Sigmoid)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # PReLU
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='PReLU'))
    assert isinstance(conv.activate, nn.PReLU)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # HSwish
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSwish'))
    assert isinstance(conv.activate, HSwish)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # HSigmoid
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSigmoid'))
    assert isinstance(conv.activate, HSigmoid)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)
Exemplo n.º 34
0
    for lstm, name in lstms:
        th.manual_seed(1234)
        x = V(th.rand(1, 1, 256))
        hiddens = (V(th.rand(1, 1, 256)), V(th.rand(1, 1, 256)))
        ref = nn.LSTM(256, 256, bias=False, dropout=0.0)
        cus = lstm(256, 256, bias=False, dropout=0.0)

        # Make sure they have the same parameters:
        val = th.rand(1)[0]
        for c in cus.parameters():
            c.data.fill_(val)
        for r in ref.parameters():
            r.data.fill_(val)

        objective = V(th.zeros(1, 256))

        i, j = x.clone(), [h.clone() for h in hiddens]
        g, h = x.clone(), [h.clone() for h in hiddens]
        for _ in range(10):
            i, j = ref(i, j)
            g, h = cus(g, h)
            assert (th.equal(g.data, i.data))
            assert (th.equal(j[0].data, h[0].data))
            assert (th.equal(j[1].data, h[1].data))
            ref_loss = th.sum((i - objective)**2)
            cus_loss = th.sum((g - objective)**2)
            ref_loss.backward(retain_graph=True)
            cus_loss.backward(retain_graph=True)
        print('Correct: ', name)
    print('Test passed')
    def test_multi_objective_max_value_entropy(self):
        for dtype, m in product((torch.float, torch.double), (2, 3)):
            torch.manual_seed(7)
            # test batched model
            train_X = torch.rand(1, 1, 2, dtype=dtype, device=self.device)
            train_Y = torch.rand(1, 1, m, dtype=dtype, device=self.device)
            model = SingleTaskGP(train_X, train_Y)
            with self.assertRaises(NotImplementedError):
                qMultiObjectiveMaxValueEntropy(model,
                                               dummy_sample_pareto_frontiers)
            # test initialization
            train_X = torch.rand(4, 2, dtype=dtype, device=self.device)
            train_Y = torch.rand(4, m, dtype=dtype, device=self.device)
            # test batched MO model
            model = SingleTaskGP(train_X, train_Y)
            mesmo = qMultiObjectiveMaxValueEntropy(
                model, dummy_sample_pareto_frontiers)
            self.assertEqual(mesmo.num_fantasies, 16)
            self.assertIsInstance(mesmo.sampler, SobolQMCNormalSampler)
            self.assertEqual(mesmo.sampler.sample_shape, torch.Size([128]))
            self.assertIsInstance(mesmo.fantasies_sampler,
                                  SobolQMCNormalSampler)
            self.assertEqual(mesmo.posterior_max_values.shape,
                             torch.Size([3, 1, m]))
            # test conversion to single-output model
            self.assertIs(mesmo.mo_model, model)
            self.assertEqual(mesmo.mo_model.num_outputs, m)
            self.assertIsInstance(mesmo.model, SingleTaskGP)
            self.assertEqual(mesmo.model.num_outputs, 1)
            self.assertEqual(mesmo.model._aug_batch_shape,
                             mesmo.model._input_batch_shape)
            # test ModelListGP
            model = ModelListGP(
                *
                [SingleTaskGP(train_X, train_Y[:, i:i + 1]) for i in range(m)])
            mock_sample_pfs = mock.Mock()
            mock_sample_pfs.return_value = dummy_sample_pareto_frontiers(
                model=model)
            mesmo = qMultiObjectiveMaxValueEntropy(model, mock_sample_pfs)
            self.assertEqual(mesmo.num_fantasies, 16)
            self.assertIsInstance(mesmo.sampler, SobolQMCNormalSampler)
            self.assertEqual(mesmo.sampler.sample_shape, torch.Size([128]))
            self.assertIsInstance(mesmo.fantasies_sampler,
                                  SobolQMCNormalSampler)
            self.assertEqual(mesmo.posterior_max_values.shape,
                             torch.Size([3, 1, m]))
            # test conversion to batched MO model
            self.assertIsInstance(mesmo.mo_model, SingleTaskGP)
            self.assertEqual(mesmo.mo_model.num_outputs, m)
            self.assertIs(mesmo.mo_model, mesmo._init_model)
            # test conversion to single-output model
            self.assertIsInstance(mesmo.model, SingleTaskGP)
            self.assertEqual(mesmo.model.num_outputs, 1)
            self.assertEqual(mesmo.model._aug_batch_shape,
                             mesmo.model._input_batch_shape)
            # test that we call sample_pareto_frontiers with the multi-output model
            mock_sample_pfs.assert_called_once_with(mesmo.mo_model)
            # test basic evaluation
            X = torch.rand(1, 2, device=self.device, dtype=dtype)
            with torch.no_grad():
                vals = mesmo(X)
                igs = qMaxValueEntropy.forward(mesmo, X=X.view(1, 1, 1, 2))
            self.assertEqual(vals.shape, torch.Size([1]))
            self.assertTrue(torch.equal(vals, igs.sum(dim=-1)))

            # test batched evaluation
            X = torch.rand(4, 1, 2, device=self.device, dtype=dtype)
            with torch.no_grad():
                vals = mesmo(X)
                igs = qMaxValueEntropy.forward(mesmo, X=X.view(4, 1, 1, 2))
            self.assertEqual(vals.shape, torch.Size([4]))
            self.assertTrue(torch.equal(vals, igs.sum(dim=-1)))

            # test set X pending to None
            mesmo.set_X_pending(None)
            self.assertIs(mesmo.mo_model, mesmo._init_model)
            fant_X = torch.cat(
                [
                    train_X.expand(16, 4, 2),
                    torch.rand(16, 1, 2, device=self.device, dtype=dtype),
                ],
                dim=1,
            )
            fant_Y = torch.cat(
                [
                    train_Y.expand(16, 4, m),
                    torch.rand(16, 1, m, device=self.device, dtype=dtype),
                ],
                dim=1,
            )
            fantasy_model = SingleTaskGP(fant_X, fant_Y)

            # test with X_pending is not None
            with mock.patch.object(
                    SingleTaskGP, "fantasize",
                    return_value=fantasy_model) as mock_fantasize:
                qMultiObjectiveMaxValueEntropy(
                    model,
                    dummy_sample_pareto_frontiers,
                    X_pending=torch.rand(1, 2, device=self.device,
                                         dtype=dtype),
                )
                mock_fantasize.assert_called_once()
Exemplo n.º 36
0
def test_dataset_rng_states_restart(dataset_class, num_workers, batch_size):
    """Test that the sequence of batches coming from a random number generator continues with the correct sequence
    after reloading the state."""

    def create_dataset_sampler():
        dset = CaptureMapDataset(dataset_class(16, 8))
        random_sampler = RandomSampler(dset, generator=torch.Generator())
        return dset, random_sampler

    def create_dataloader_sampler(dset, sampler):
        sampler = FastForwardSampler(sampler)
        sampler.setup(batch_size)
        dl = DataLoader(dset, num_workers=num_workers, sampler=sampler, batch_size=batch_size)
        _add_capture_metadata_collate(dl)
        return dl, sampler

    def fetch(fetcher, prefetch_iter, num_batches_fetched):
        batch, _ = next(prefetch_iter)

        state: List[MergedIteratorState] = fetcher.state
        assert len(state) == 1
        assert isinstance(state[0], MergedIteratorState)

        assert len(fetcher.dataloader_iter.cache_states) == 1
        if num_workers == 0:
            assert state[0].state[0].num_batches_fetched == num_batches_fetched
        return state

    dataset, random_sampler = create_dataset_sampler()
    dataloader, ff_sampler = create_dataloader_sampler(dataset, random_sampler)

    fetcher = DataFetcher()
    fetcher.setup(dataloader)
    prefetch_iter = iter(fetcher)

    # fetch 4 batches
    fetch(fetcher, prefetch_iter, 1)
    fetch(fetcher, prefetch_iter, 2)
    fetch(fetcher, prefetch_iter, 3)

    # (A) capture the state after fetching 4 batches
    state = fetch(fetcher, prefetch_iter, 4)
    state = deepcopy(state[0])

    # (B) simulate 2 additional batches
    batch05, _ = next(prefetch_iter)
    batch06, _ = next(prefetch_iter)

    # start reloading
    dataset, random_sampler = create_dataset_sampler()
    dataloader, ff_sampler = create_dataloader_sampler(dataset, random_sampler)

    # load the state dict saved at (A)
    ff_sampler.load_state_dict(state.sampler_states)
    dataset.load_state_dict(state.dataset_states, latest_worker_id=state.latest_worker_id, num_workers=num_workers)

    prefetcher = DataFetcher()
    prefetcher.setup(dataloader)
    prefetch_iter = iter(prefetcher)

    # fetch 2 random batches, these should match exactly the batches seen at (B)
    batch05_restart, _ = next(prefetch_iter)
    batch06_restart, _ = next(prefetch_iter)

    assert torch.equal(batch05, batch05_restart)
    assert torch.equal(batch06, batch06_restart)
Exemplo n.º 37
0
def _test_fast_forward_sampler_with_distributed_sampler_and_iterative_dataset(rank, worldsize):
    if worldsize > 1:
        _setup_ddp(rank, worldsize)

    def all_gather(tensor, world_size):
        tensor_list = [torch.zeros_like(tensor, dtype=torch.int64) for _ in range(world_size)]
        torch.distributed.all_gather(tensor_list, tensor)
        return tensor_list

    initial_seed = seed_everything(42)

    generator = torch.Generator()
    generator.manual_seed(initial_seed)

    num_workers = 2
    batch_size = 4
    dataset_length = 60
    num_classes = 10

    labels = np.random.randint(0, num_classes, dataset_length)

    dataset = ClassificationDataset(range(dataset_length), labels)
    dataset = MetaLearningDataset(
        dataset,
        batch_size=batch_size,
        drop_last=True,
        num_workers=num_workers,
        global_rank=rank,
        world_size=worldsize,
        initial_seed=initial_seed,
        debugging=True,
        shuffle=True,
    )
    dataset = CaptureIterableDataset(dataset)
    dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=1, generator=generator)
    _add_capture_metadata_collate(dataloader)

    epoch_results = []
    for _ in range(2):
        iter_dataloader = iter(dataloader)
        batches = []
        while True:
            try:
                batches.append(next(iter_dataloader))
            except StopIteration:
                break
        epoch_results.append(batches)
        dataloader.dataset.dataset.current_task_iteration += 1

    assert len(epoch_results) == 2

    assert len(epoch_results[0]) == math.ceil((dataset_length / (num_workers * worldsize)) / batch_size) + 2

    if worldsize == 1:
        assert epoch_results[0][0]["data"]["task_length"] == epoch_results[0][1]["data"]["task_length"]
        assert torch.equal(
            epoch_results[0][0]["data"]["selected_indexes"], epoch_results[0][1]["data"]["selected_indexes"]
        )
        assert 0 in epoch_results[0][2][AutoRestartBatchKeys.PL_RESTART_META]["iter_sampler"]  # worker id 0
        assert 1 in epoch_results[0][3][AutoRestartBatchKeys.PL_RESTART_META]["iter_sampler"]  # worker id 1
        assert not torch.equal(epoch_results[0][2]["data"][0], epoch_results[0][3]["data"][0])
    else:
        first_task_metadata = all_gather(epoch_results[0][0]["data"]["task_length"], worldsize)
        second_task_metadata = all_gather(epoch_results[0][1]["data"]["task_length"], worldsize)
        assert torch.equal(first_task_metadata[0], first_task_metadata[1])
        assert torch.equal(second_task_metadata[0], second_task_metadata[1])
        assert torch.equal(first_task_metadata[0], second_task_metadata[1])

        first_batch_list = all_gather(epoch_results[0][2]["data"][0], worldsize)
        assert not torch.equal(first_batch_list[0], first_batch_list[1])
        second_batch_list = all_gather(epoch_results[0][3]["data"][0], worldsize)
        assert not torch.equal(second_batch_list[0], second_batch_list[1])

    # restarting on epoch 0 / real batch 2
    state_dict = {"iter_sampler": {}}
    for batch in epoch_results[0][2:4]:
        batch, _state_dict = batch["data"], batch[AutoRestartBatchKeys.PL_RESTART_META]
        for k, v in _state_dict.items():
            state_dict[k].update(v)

    dataset = ClassificationDataset(range(dataset_length), labels)
    dataset = MetaLearningDataset(
        dataset,
        batch_size=batch_size,
        drop_last=True,
        num_workers=num_workers,
        global_rank=rank,
        world_size=worldsize,
        initial_seed=initial_seed,
        debugging=True,
        shuffle=True,
    )

    dataset = CaptureIterableDataset(dataset)
    dataset.load_state_dict(state_dict)
    dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=1, generator=generator)
    _add_capture_metadata_collate(dataloader)

    epoch_results_restart = []
    for _ in range(2):
        iter_dataloader = iter(dataloader)
        batches = []
        while True:
            try:
                batches.append(next(iter_dataloader))
            except StopIteration:
                break
        epoch_results_restart.append(batches)
        dataloader.dataset.dataset.increment_iteration()
        dataloader.dataset.reset_on_epoch()

    assert len(epoch_results_restart[0]) + 2 == len(epoch_results[0])
    epoch_tensors = [e["data"][0] for e in epoch_results[0][4:]]
    epoch_tensors_restart = [e["data"][0] for e in epoch_results_restart[0][2:]]

    for t, tr in zip(epoch_tensors, epoch_tensors_restart):
        assert torch.equal(t, tr)

    epoch_tensors = [e["data"][0] for e in epoch_results[1][2:]]
    epoch_tensors_restart = [e["data"][0] for e in epoch_results_restart[1][2:]]

    for t, tr in zip(epoch_tensors, epoch_tensors_restart):
        assert torch.equal(t, tr)
Exemplo n.º 38
0
 def test_init_copy(self):
     t = torch.randn(2, 3)
     t_other = torch.tensor([[1, 2, 3], [4, 5, 6]]).to(dtype=t.dtype)
     init_copy_(t, t_other)
     assert torch.equal(t, t_other)
Exemplo n.º 39
0
def test_add_diag():
    diag = Variable(torch.Tensor([4]))
    lazy_var = make_sum_lazy_var().add_diag(diag)
    assert torch.equal(lazy_var.evaluate().data,
                       (t1_eval + t2_eval + torch.eye(4) * 4))
Exemplo n.º 40
0
    def forward(self, input):
        # 训练态
        if self.training:
            self.step += 1
            if self.bn:
                # 先做普通卷积得到A,以取得BN参数
                output = F.conv2d(input=input,
                                  weight=self.weight,
                                  bias=self.bias,
                                  stride=self.stride,
                                  padding=self.padding,
                                  dilation=self.dilation,
                                  groups=self.groups)
                # 更新BN统计参数(batch和running)
                dims = [dim for dim in range(4) if dim != 1]
                self.batch_mean = torch.mean(output, dim=dims)
                self.batch_var = torch.var(output, dim=dims)

                with torch.no_grad():
                    if self.first_bn == 0 and torch.equal(
                            self.running_mean,
                            torch.zeros_like(
                                self.running_mean)) and torch.equal(
                                    self.running_var,
                                    torch.zeros_like(self.running_var)):
                        self.first_bn.add_(1)
                        self.running_mean.add_(self.batch_mean)
                        self.running_var.add_(self.batch_var)
                    else:
                        self.running_mean.mul_(1 - self.momentum).add_(
                            self.momentum * self.batch_mean)
                        self.running_var.mul_(1 - self.momentum).add_(
                            self.momentum * self.batch_var)
                # BN融合
                if self.step < self.freeze_step:
                    if self.bias is not None:
                        bias = reshape_to_bias(
                            self.beta + (self.bias - self.batch_mean) *
                            (self.gamma /
                             torch.sqrt(self.batch_var + self.eps)))
                    else:
                        bias = reshape_to_bias(
                            self.beta - self.batch_mean *
                            (self.gamma /
                             torch.sqrt(self.batch_var + self.eps)))  # b融batch
                    weight = self.weight * reshape_to_weight(
                        self.gamma /
                        torch.sqrt(self.batch_var + self.eps))  # w融running
                else:
                    if self.bias is not None:
                        bias = reshape_to_bias(
                            self.beta + (self.bias - self.running_mean) *
                            (self.gamma /
                             torch.sqrt(self.running_var + self.eps)))
                    else:
                        bias = reshape_to_bias(
                            self.beta - self.running_mean *
                            (self.gamma / torch.sqrt(self.running_var +
                                                     self.eps)))  # b融batch
                    weight = self.weight * reshape_to_weight(
                        self.gamma /
                        torch.sqrt(self.running_var + self.eps))  # w融running

            else:
                bias = self.bias
                weight = self.weight
        # 测试态
        else:
            # print(self.running_mean, self.running_var)
            # BN融合
            if self.bn:
                if self.bias is not None:
                    bias = reshape_to_bias(
                        self.beta + (self.bias - self.running_mean) *
                        (self.gamma / torch.sqrt(self.running_var + self.eps)))
                else:
                    bias = reshape_to_bias(
                        self.beta - self.running_mean *
                        (self.gamma / torch.sqrt(self.running_var + self.eps))
                    )  # b融running
                weight = self.weight * reshape_to_weight(
                    self.gamma /
                    torch.sqrt(self.running_var + self.eps))  # w融running
            else:
                bias = self.bias
                weight = self.weight
        # 量化A和bn融合后的W
        q_weight = self.weight_quantizer(weight)
        q_bias = self.bias_quantizer(bias)

        if self.quantizer_output == True:  # 输出量化参数txt文档

            # 创建的quantizer_output输出文件夹
            if not os.path.isdir('./quantizer_output'):
                os.makedirs('./quantizer_output')

            if not os.path.isdir('./quantizer_output/q_weight_out'):
                os.makedirs('./quantizer_output/q_weight_out')
            if not os.path.isdir('./quantizer_output/w_scale_out'):
                os.makedirs('./quantizer_output/w_scale_out')
            if not os.path.isdir('./quantizer_output/q_weight_max'):
                os.makedirs('./quantizer_output/q_weight_max')
            if not os.path.isdir('./quantizer_output/max_weight_count'):
                os.makedirs('./quantizer_output/max_weight_count')
            #######################输出当前层的权重量化因子
            weight_scale = self.weight_quantizer.get_scale()
            np.savetxt(
                ('./quantizer_output/w_scale_out/scale %f.txt' % time.time()),
                weight_scale,
                delimiter='\n')
            #######################输出当前层的量化权重
            q_weight_txt = self.weight_quantizer.get_quantize_value(weight)
            q_weight_txt = np.array(q_weight_txt.cpu()).reshape(1, -1)
            q_weight_max = [np.max(q_weight_txt)]
            # q_weight_max = np.argmax(q_weight_txt)
            max_weight_count = [np.sum(abs(q_weight_txt) >= 255)]  # 统计该层溢出的数目
            np.savetxt(
                ('./quantizer_output/max_weight_count/max_weight_count %f.txt'
                 % time.time()), max_weight_count)
            np.savetxt(('./quantizer_output/q_weight_max/max_weight %f.txt' %
                        time.time()), q_weight_max)
            np.savetxt(('./quantizer_output/q_weight_out/weight %f.txt' %
                        time.time()),
                       q_weight_txt,
                       delimiter='\n')
            # io.savemat('save.mat',{'q_weight_txt':q_weight_txt})

            #######################创建输出偏置txt的文件夹
            if not os.path.isdir('./quantizer_output/q_bias_out'):
                os.makedirs('./quantizer_output/q_bias_out')
            if not os.path.isdir('./quantizer_output/b_scale_out'):
                os.makedirs('./quantizer_output/b_scale_out')
            #######################输出当前层偏置的量化因子
            bias_scale = self.bias_quantizer.get_scale()
            np.savetxt(
                ('./quantizer_output/b_scale_out/scale %f.txt' % time.time()),
                bias_scale,
                delimiter='\n')
            #######################输出当前层的量化偏置
            q_bias_txt = self.bias_quantizer.get_quantize_value(bias)
            q_bias_txt = np.array(q_bias_txt.cpu()).reshape(1, -1)
            np.savetxt(
                ('./quantizer_output/q_bias_out/bias %f.txt' % time.time()),
                q_bias_txt,
                delimiter='\n')

        # 量化卷积
        if self.training:  # 训练态
            output = F.conv2d(
                input=input,
                weight=q_weight,
                # bias=self.bias,  # 注意,这里不加bias(self.bias为None)
                bias=q_bias,
                stride=self.stride,
                padding=self.padding,
                dilation=self.dilation,
                groups=self.groups)

        else:  # 测试态
            output = F.conv2d(
                input=input,
                weight=q_weight,
                bias=q_bias,  # 注意,这里加bias,做完整的conv+bn
                stride=self.stride,
                padding=self.padding,
                dilation=self.dilation,
                groups=self.groups)
        if self.activate == 'leaky':
            output = F.leaky_relu(output,
                                  0.125 if not self.maxabsscaler else 0.25,
                                  inplace=True)
        elif self.activate == 'relu6':
            output = F.relu6(output, inplace=True)
        elif self.activate == 'h_swish':
            output = output * (F.relu6(output + 3.0, inplace=True) / 6.0)
        elif self.activate == 'relu':
            output = F.relu(output, inplace=True)
        elif self.activate == 'mish':
            output = output * F.softplus(output).tanh()
        elif self.activate == 'linear':
            return output
            # pass
        else:
            print(self.activate + " is not supported !")

        if self.quantizer_output == True:

            if not os.path.isdir('./quantizer_output/q_activation_out'):
                os.makedirs('./quantizer_output/q_activation_out')
            if not os.path.isdir('./quantizer_output/a_scale_out'):
                os.makedirs('./quantizer_output/a_scale_out')
            if not os.path.isdir('./quantizer_output/q_activation_max'):
                os.makedirs('./quantizer_output/q_activation_max')
            if not os.path.isdir('./quantizer_output/max_activation_count'):
                os.makedirs('./quantizer_output/max_activation_count')
            ##################输出当前激活的量化因子
            activation_scale = self.activation_quantizer.get_scale()
            np.savetxt(
                ('./quantizer_output/a_scale_out/scale %f.txt' % time.time()),
                activation_scale,
                delimiter='\n')
            ##################输出当前层的量化激活
            q_activation_txt = self.activation_quantizer.get_quantize_value(
                output)
            q_activation_txt = np.array(q_activation_txt.cpu()).reshape(1, -1)
            q_activation_max = [np.max(q_activation_txt)]  # 统计该层的最大值(即查看是否有溢出)
            max_activation_count = [np.sum(abs(q_activation_txt) >= 255)
                                    ]  # 统计该层溢出的数目
            # q_weight_max = np.argmax(q_weight_txt)
            np.savetxt((
                './quantizer_output/max_activation_count/max_activation_count %f.txt'
                % time.time()), max_activation_count)
            np.savetxt(
                ('./quantizer_output/q_activation_max/max_activation %f.txt' %
                 time.time()), q_activation_max)
            np.savetxt(
                ('./quantizer_output/q_activation_out/activation %f.txt' %
                 time.time()),
                q_activation_txt,
                delimiter='\n')

        output = self.activation_quantizer(output)
        return output
Exemplo n.º 41
0
 def train_per_batch_transform_on_device(self, batch: Any) -> Any:
     assert self.training
     assert self.current_fn == "per_batch_transform_on_device"
     self.train_per_batch_transform_on_device_called = True
     assert torch.equal(batch, tensor([[0, 1, 2, 3, 5], [0, 1, 2, 3, 5]]))