Exemplo n.º 1
0
    def fit(self,
            dataset,
            nb_epoch=10,
            batch_size=50,
            pad_batches=False,
            **kwargs):
        """
    Fits a model on data in a Dataset object.
    """
        # TODO(rbharath/enf): We need a structured way to deal with potential GPU
        #                     memory overflows.
        for epoch in range(nb_epoch):
            log("Starting epoch %s" % str(epoch + 1), self.verbosity)
            losses = []
            for (X_batch, y_batch, w_batch,
                 ids_batch) in dataset.iterbatches(batch_size,
                                                   pad_batches=pad_batches):
                if self.fit_transformers:
                    X_batch, y_batch, w_batch = self.transform_on_batch(
                        X_batch, y_batch, w_batch)
                if pad_batches:
                    X_batch, y_batch, w_batch, ids_batch = pad_batch(
                        batch_size, X_batch, y_batch, w_batch, ids_batch)

                losses.append(self.fit_on_batch(X_batch, y_batch, w_batch))
            log(
                "Avg loss for epoch %d: %f" %
                (epoch + 1, np.array(losses).mean()), self.verbosity)
Exemplo n.º 2
0
 def predict_proba_on_batch(self, support, test_batch):
     """Make predictions on batch of data."""
     n_samples = len(test_batch)
     padded_test_batch = NumpyDataset(
         *pad_batch(self.test_batch_size, test_batch.X, test_batch.y,
                    test_batch.w, test_batch.ids))
     feed_dict = self.construct_feed_dict(padded_test_batch, support)
     # Get scores
     pred, scores = self.sess.run([self.pred_op, self.scores_op],
                                  feed_dict=feed_dict)
     y_pred_batch = to_one_hot(np.round(pred))
     return y_pred_batch
Exemplo n.º 3
0
    def fit(self, dataset):
        """
    Fits a model on data in a Dataset object.
    """
        # TODO(rbharath/enf): We need a structured way to deal with potential GPU
        #                     memory overflows.
        batch_size = self.model_params["batch_size"]
        if "pad_batches" in self.model_params:
            pad_batches = self.model_params["pad_batches"]
        else:
            pad_batches = False
        for epoch in range(self.model_params["nb_epoch"]):
            log("Starting epoch %s" % str(epoch + 1), self.verbosity)
            losses = []
            for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size, pad_batches=pad_batches):
                if self.fit_transformers:
                    X_batch, y_batch, w_batch = self.transform_on_batch(X_batch, y_batch, w_batch)
                if pad_batches:
                    X_batch, y_batch, w_batch, ids_batch = pad_batch(batch_size, X_batch, y_batch, w_batch, ids_batch)

                losses.append(self.fit_on_batch(X_batch, y_batch, w_batch))
            log("Avg loss for epoch %d: %f" % (epoch + 1, np.array(losses).mean()), self.verbosity)
Exemplo n.º 4
0
  def test_pad_batches(self):
    """Test that pad_batch pads batches correctly."""
    batch_size = 100
    num_features = 10
    num_tasks = 5
  
    # Test cases where n_samples < 2*n_samples < batch_size
    n_samples = 29
    X_b = np.zeros((n_samples, num_features))
    y_b = np.zeros((n_samples, num_tasks))
    w_b = np.zeros((n_samples, num_tasks))
    ids_b = np.zeros((n_samples,))
  
    X_out, y_out, w_out, ids_out = pad_batch(
        batch_size, X_b, y_b, w_b, ids_b)
    assert len(X_out) == len(y_out) == len(w_out) == len(ids_out) == batch_size

    # Test cases where n_samples < batch_size
    n_samples = 79
    X_b = np.zeros((n_samples, num_features))
    y_b = np.zeros((n_samples, num_tasks))
    w_b = np.zeros((n_samples, num_tasks))
    ids_b = np.zeros((n_samples,))
  
    X_out, y_out, w_out, ids_out = pad_batch(
        batch_size, X_b, y_b, w_b, ids_b)
    assert len(X_out) == len(y_out) == len(w_out) == len(ids_out) == batch_size

    # Test case where n_samples == batch_size
    n_samples = 100 
    X_b = np.zeros((n_samples, num_features))
    y_b = np.zeros((n_samples, num_tasks))
    w_b = np.zeros((n_samples, num_tasks))
    ids_b = np.zeros((n_samples,))
  
    X_out, y_out, w_out, ids_out = pad_batch(
        batch_size, X_b, y_b, w_b, ids_b)
    assert len(X_out) == len(y_out) == len(w_out) == len(ids_out) == batch_size

    # Test case for object featurization.
    n_samples = 2
    X_b = np.array([{"a": 1}, {"b": 2}])
    y_b = np.zeros((n_samples, num_tasks))
    w_b = np.zeros((n_samples, num_tasks))
    ids_b = np.zeros((n_samples,))
    X_out, y_out, w_out, ids_out = pad_batch(
        batch_size, X_b, y_b, w_b, ids_b)
    assert len(X_out) == len(y_out) == len(w_out) == len(ids_out) == batch_size

    # Test case for more complicated object featurization
    n_samples = 2
    X_b = np.array([(1, {"a": 1}), (2, {"b": 2})])
    y_b = np.zeros((n_samples, num_tasks))
    w_b = np.zeros((n_samples, num_tasks))
    ids_b = np.zeros((n_samples,))
    X_out, y_out, w_out, ids_out = pad_batch(
        batch_size, X_b, y_b, w_b, ids_b)
    assert len(X_out) == len(y_out) == len(w_out) == len(ids_out) == batch_size

    # Test case with multidimensional data
    n_samples = 50
    num_atoms = 15
    d = 3
    X_b = np.zeros((n_samples, num_atoms, d))
    y_b = np.zeros((n_samples, num_tasks))
    w_b = np.zeros((n_samples, num_tasks))
    ids_b = np.zeros((n_samples,))
  
    X_out, y_out, w_out, ids_out = pad_batch(
        batch_size, X_b, y_b, w_b, ids_b)
    assert len(X_out) == len(y_out) == len(w_out) == len(ids_out) == batch_size
Exemplo n.º 5
0
    def test_pad_batches(self):
        """Test that pad_batch pads batches correctly."""
        batch_size = 100
        num_features = 10
        num_tasks = 5

        # Test cases where n_samples < 2*n_samples < batch_size
        n_samples = 29
        X_b = np.zeros((n_samples, num_features))
        y_b = np.zeros((n_samples, num_tasks))
        w_b = np.zeros((n_samples, num_tasks))
        ids_b = np.zeros((n_samples, ))

        X_out, y_out, w_out, ids_out = pad_batch(batch_size, X_b, y_b, w_b,
                                                 ids_b)
        assert len(X_out) == len(y_out) == len(w_out) == len(
            ids_out) == batch_size

        # Test cases where n_samples < batch_size
        n_samples = 79
        X_b = np.zeros((n_samples, num_features))
        y_b = np.zeros((n_samples, num_tasks))
        w_b = np.zeros((n_samples, num_tasks))
        ids_b = np.zeros((n_samples, ))

        X_out, y_out, w_out, ids_out = pad_batch(batch_size, X_b, y_b, w_b,
                                                 ids_b)
        assert len(X_out) == len(y_out) == len(w_out) == len(
            ids_out) == batch_size

        # Test case where n_samples == batch_size
        n_samples = 100
        X_b = np.zeros((n_samples, num_features))
        y_b = np.zeros((n_samples, num_tasks))
        w_b = np.zeros((n_samples, num_tasks))
        ids_b = np.zeros((n_samples, ))

        X_out, y_out, w_out, ids_out = pad_batch(batch_size, X_b, y_b, w_b,
                                                 ids_b)
        assert len(X_out) == len(y_out) == len(w_out) == len(
            ids_out) == batch_size

        # Test case for object featurization.
        n_samples = 2
        X_b = np.array([{"a": 1}, {"b": 2}])
        y_b = np.zeros((n_samples, num_tasks))
        w_b = np.zeros((n_samples, num_tasks))
        ids_b = np.zeros((n_samples, ))
        X_out, y_out, w_out, ids_out = pad_batch(batch_size, X_b, y_b, w_b,
                                                 ids_b)
        assert len(X_out) == len(y_out) == len(w_out) == len(
            ids_out) == batch_size

        # Test case for more complicated object featurization
        n_samples = 2
        X_b = np.array([(1, {"a": 1}), (2, {"b": 2})])
        y_b = np.zeros((n_samples, num_tasks))
        w_b = np.zeros((n_samples, num_tasks))
        ids_b = np.zeros((n_samples, ))
        X_out, y_out, w_out, ids_out = pad_batch(batch_size, X_b, y_b, w_b,
                                                 ids_b)
        assert len(X_out) == len(y_out) == len(w_out) == len(
            ids_out) == batch_size

        # Test case with multidimensional data
        n_samples = 50
        num_atoms = 15
        d = 3
        X_b = np.zeros((n_samples, num_atoms, d))
        y_b = np.zeros((n_samples, num_tasks))
        w_b = np.zeros((n_samples, num_tasks))
        ids_b = np.zeros((n_samples, ))

        X_out, y_out, w_out, ids_out = pad_batch(batch_size, X_b, y_b, w_b,
                                                 ids_b)
        assert len(X_out) == len(y_out) == len(w_out) == len(
            ids_out) == batch_size