Exemplo n.º 1
0
    def predict_proba(self, X):
        """Predict label probabilities with the fitted estimator on 
        predictor(s) X.

        Returns
        -------
        proba : array of shape = [n_samples]
            The predicted label probabilities of the input samples.
        """
        proba = []

        X_subs = self._get_subdata(X)

        for i in range(self.n_classes_):
            e = self.estimators_[i]
            X_i = X_subs[i]
            pred = e.predict(X_i).reshape(-1, 1)
            proba.append(pred)
        proba = np.hstack(proba)

        normalizer = proba.sum(axis=1)[:, np.newaxis]
        normalizer[normalizer == 0.0] = 1.0
        proba /= normalizer

        assert_all_finite(proba)

        return proba
Exemplo n.º 2
0
    def predict(self, X):
        """
        Perform regression on an array of test vectors X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        p : array, shape = [n_samples]
            Predicted target values for X
        """
        try:
            assert_all_finite(self.coef_)
            pred = safe_sparse_dot(X, self.coef_.T)
        except ValueError:
            n_samples = X.shape[0]
            n_vectors = self.coef_.shape[0]
            pred = np.zeros((n_samples, n_vectors))

        if not self.outputs_2d_:
            pred = pred.ravel()

        return pred
Exemplo n.º 3
0
def _check_X_y(X,
               y,
               dtype="numeric",
               accept_sparse=False,
               order=None,
               copy=False,
               force_all_finite=True,
               ensure_2d=True):
    if y is None:
        raise ValueError("y cannot be None")

    X = _check_array(X,
                     accept_sparse=accept_sparse,
                     dtype=dtype,
                     order=order,
                     copy=copy,
                     force_all_finite=force_all_finite,
                     ensure_2d=ensure_2d)

    y = _column_or_1d(y)
    if y.dtype.kind == 'O':
        y = y.astype(np.float64)
    # TODO: replace on daal4py
    from sklearn.utils.validation import assert_all_finite
    assert_all_finite(y)

    lengths = [len(X), len(y)]
    uniques = np.unique(lengths)
    if len(uniques) > 1:
        raise ValueError("Found input variables with inconsistent numbers of"
                         " samples: %r" % [int(length) for length in lengths])

    return X, y
Exemplo n.º 4
0
  def _fit_diag(self, pairs, y):
    """Learn diagonal metric using MMC.
    Parameters
    ----------
    X : (n x d) data matrix
        each row corresponds to a single instance
    constraints : 4-tuple of arrays
        (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d)
        dissimilar pairs
    """
    num_dim = pairs.shape[2]
    pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1]
    s_sum = np.sum((pos_pairs[:, 0, :] - pos_pairs[:, 1, :]) ** 2, axis=0)

    it = 0
    error = 1.0
    eps = 1e-6
    reduction = 2.0
    w = np.diag(self.A_).copy()

    while error > self.convergence_threshold and it < self.max_iter:

      fD0, fD_1st_d, fD_2nd_d = self._D_constraint(neg_pairs, w)
      obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0
      fS_1st_d = s_sum  # first derivative of the similarity constraints

      gradient = fS_1st_d - self.diagonal_c * fD_1st_d               # gradient of the objective
      hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim)  # Hessian of the objective
      step = np.dot(np.linalg.inv(hessian), gradient)

      # Newton-Rapshon update
      # search over optimal lambda
      lambd = 1  # initial step-size
      w_tmp = np.maximum(0, w - lambd * step)
      obj = (np.dot(s_sum, w_tmp) + self.diagonal_c *
             self._D_objective(neg_pairs, w_tmp))
      assert_all_finite(obj)
      obj_previous = obj + 1  # just to get the while-loop started

      inner_it = 0
      while obj < obj_previous:
        obj_previous = obj
        w_previous = w_tmp.copy()
        lambd /= reduction
        w_tmp = np.maximum(0, w - lambd * step)
        obj = (np.dot(s_sum, w_tmp) + self.diagonal_c *
               self._D_objective(neg_pairs, w_tmp))
        inner_it += 1
        assert_all_finite(obj)

      w[:] = w_previous
      error = np.abs((obj_previous - obj_initial) / obj_previous)
      if self.verbose:
        print('mmc iter: %d, conv = %f' % (it, error))
      it += 1

    self.A_ = np.diag(w)

    self.transformer_ = transformer_from_metric(self.A_)
    return self
Exemplo n.º 5
0
    def _scrub_x(self, X, missing, **kwargs):
        '''
        Sanitize input predictors and extract column names if appropriate.
        '''
        # Check for sparseness
        if sparse.issparse(X):
            raise TypeError('A sparse matrix was passed, but dense data '
                            'is required. Use X.toarray() to convert to dense.')
        
        # Figure out missingness
        if missing is None:
            # Infer missingness
            missing = np.isnan(X)
            
        # Convert to internally used data type
        missing = np.asarray(missing, dtype=BOOL, order='F')
        assert_all_finite(missing)
        if missing.ndim == 1:
            missing = missing[:, np.newaxis]
        X = np.asarray(X, dtype=np.float64, order='F')
        if not self.allow_missing:
            try:
                assert_all_finite(X)
            except ValueError:
                raise ValueError("Input contains NaN, infinity or a value that's too large.  Did you mean to set allow_missing=True?")
        if X.ndim == 1:
            X = X[:, np.newaxis]

        # Ensure correct number of columns
        if hasattr(self, 'basis_') and self.basis_ is not None:
            if X.shape[1] != self.basis_.num_variables:
                raise ValueError('Wrong number of columns in X')
        
        return X, missing
    def predict_proba(self, X):
        """ Predict label probabilities with the fitted estimator 
        on predictor(s) X.

        Returns
        -------
        proba : array of shape = [n_samples]
            The predicted label probabilities of the input samples.
        """
        proba = []

        X_subs = self._get_subdata(X)

        for i in range(self.n_classes_):
            e = self.estimators_[i]
            X_i = X_subs[i]
            pred = e.predict(X_i).reshape(-1, 1)
            proba.append(pred)
        proba = np.hstack(proba)

        normalizer = proba.sum(axis=1)[:, np.newaxis]
        normalizer[normalizer == 0.0] = 1.0
        proba /= normalizer

        assert_all_finite(proba)

        return proba
Exemplo n.º 7
0
 def _make_meta(self, X):
     rows = []
     for e in self.estimators_:
         proba = e.predict_proba(X)
         assert_all_finite(proba)
         rows.append(proba)
     return np.hstack(rows)
Exemplo n.º 8
0
    def _svd(self, array, n_components, n_discard):
        """Returns first `n_components` left and right singular
        vectors u and v, discarding the first `n_discard`.

        """
        if self.svd_method == 'randomized':
            kwargs = {}
            if self.n_svd_vecs is not None:
                kwargs['n_oversamples'] = self.n_svd_vecs
            u, _, vt = randomized_svd(array,
                                      n_components,
                                      random_state=self.random_state,
                                      **kwargs)

        elif self.svd_method == 'arpack':
            u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)
            if np.any(np.isnan(vt)):
                # some eigenvalues of A * A.T are negative, causing
                # sqrt() to be np.nan. This causes some vectors in vt
                # to be np.nan.
                _, v = eigsh(safe_sparse_dot(array.T, array),
                             ncv=self.n_svd_vecs)
                vt = v.T
            if np.any(np.isnan(u)):
                _, u = eigsh(safe_sparse_dot(array, array.T),
                             ncv=self.n_svd_vecs)

        assert_all_finite(u)
        assert_all_finite(vt)
        u = u[:, n_discard:]
        vt = vt[n_discard:]
        return u, vt.T
Exemplo n.º 9
0
    def _svd(self, array, n_components, n_discard):
        """Returns first `n_components` left and right singular
        vectors u and v, discarding the first `n_discard`.

        """
        if self.svd_method == "randomized":
            kwargs = {}
            if self.n_svd_vecs is not None:
                kwargs["n_oversamples"] = self.n_svd_vecs
            u, _, vt = randomized_svd(array, n_components, random_state=self.random_state, **kwargs)

        elif self.svd_method == "arpack":
            u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)
            if np.any(np.isnan(vt)):
                # some eigenvalues of A * A.T are negative, causing
                # sqrt() to be np.nan. This causes some vectors in vt
                # to be np.nan.
                _, v = eigsh(safe_sparse_dot(array.T, array), ncv=self.n_svd_vecs)
                vt = v.T
            if np.any(np.isnan(u)):
                _, u = eigsh(safe_sparse_dot(array, array.T), ncv=self.n_svd_vecs)

        assert_all_finite(u)
        assert_all_finite(vt)
        u = u[:, n_discard:]
        vt = vt[n_discard:]
        return u, vt.T
Exemplo n.º 10
0
    def predict(self, X):
        """
        Perform regression on an array of test vectors X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        p : array, shape = [n_samples]
            Predicted target values for X
        """
        try:
            assert_all_finite(self.coef_)
            pred = safe_sparse_dot(X, self.coef_.T)
            pred += self.intercept_
        except ValueError:
            n_samples = X.shape[0]
            n_vectors = self.coef_.shape[0]
            pred = np.zeros((n_samples, n_vectors))

        if not self.outputs_2d_:
            pred = pred.ravel()

        return pred
Exemplo n.º 11
0
def test_suppress_validation():
    X = np.array([0, np.inf])
    assert_raises(ValueError, assert_all_finite, X)
    sklearn.set_config(assume_finite=True)
    assert_all_finite(X)
    sklearn.set_config(assume_finite=False)
    assert_raises(ValueError, assert_all_finite, X)
Exemplo n.º 12
0
  def _fit_diag(self, pairs, y):
    """Learn diagonal metric using MMC.
    Parameters
    ----------
    X : (n x d) data matrix
        each row corresponds to a single instance
    constraints : 4-tuple of arrays
        (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d)
        dissimilar pairs
    """
    num_dim = pairs.shape[2]
    pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1]
    s_sum = np.sum((pos_pairs[:, 0, :] - pos_pairs[:, 1, :]) ** 2, axis=0)

    it = 0
    error = 1.0
    eps = 1e-6
    reduction = 2.0
    w = np.diag(self.A_).copy()

    while error > self.convergence_threshold and it < self.max_iter:

      fD0, fD_1st_d, fD_2nd_d = self._D_constraint(neg_pairs, w)
      obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0
      fS_1st_d = s_sum  # first derivative of the similarity constraints

      gradient = fS_1st_d - self.diagonal_c * fD_1st_d               # gradient of the objective
      hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim)  # Hessian of the objective
      step = np.dot(np.linalg.inv(hessian), gradient)

      # Newton-Rapshon update
      # search over optimal lambda
      lambd = 1  # initial step-size
      w_tmp = np.maximum(0, w - lambd * step)
      obj = (np.dot(s_sum, w_tmp) + self.diagonal_c *
             self._D_objective(neg_pairs, w_tmp))
      assert_all_finite(obj)
      obj_previous = obj + 1  # just to get the while-loop started

      inner_it = 0
      while obj < obj_previous:
        obj_previous = obj
        w_previous = w_tmp.copy()
        lambd /= reduction
        w_tmp = np.maximum(0, w - lambd * step)
        obj = (np.dot(s_sum, w_tmp) + self.diagonal_c *
               self._D_objective(neg_pairs, w_tmp))
        inner_it += 1
        assert_all_finite(obj)

      w[:] = w_previous
      error = np.abs((obj_previous - obj_initial) / obj_previous)
      if self.verbose:
        print('mmc iter: %d, conv = %f' % (it, error))
      it += 1

    self.A_ = np.diag(w)

    self.transformer_ = transformer_from_metric(self.A_)
    return self
Exemplo n.º 13
0
    def fit(self, X, y):
        """Fit model according to X and y.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : classifier
            Returns self.
        """
        rs = check_random_state(self.random_state)

        reencode = self.multiclass
        y, n_classes, n_vectors = self._set_label_transformers(y, reencode)

        ds = get_dataset(X)
        n_samples = ds.get_n_samples()
        n_features = ds.get_n_features()
        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)

        self.intercept_ = np.zeros(n_vectors, dtype=np.float64)

        loss = self._get_loss()
        penalty = self._get_penalty()

        if n_vectors == 1 or not self.multiclass:
            Y = np.asfortranarray(self.label_binarizer_.fit_transform(y),
                                  dtype=np.float64)
            for i in xrange(n_vectors):
                _binary_sgd(self, self.coef_, self.intercept_, i, ds,
                            Y[:, i], loss, penalty, self.alpha,
                            self._get_learning_rate(), self.eta0, self.power_t,
                            self.fit_intercept, self.intercept_decay,
                            int(self.max_iter * n_samples), self.shuffle, rs,
                            self.callback, self.n_calls, self.verbose)

        elif self.multiclass:
            _multiclass_sgd(self, self.coef_, self.intercept_, ds,
                            y.astype(np.int32), loss, penalty, self.alpha,
                            self._get_learning_rate(), self.eta0, self.power_t,
                            self.fit_intercept, self.intercept_decay,
                            int(self.max_iter * n_samples), self.shuffle, rs,
                            self.callback, self.n_calls, self.verbose)

        else:
            raise ValueError("Wrong value for multiclass.")

        try:
            assert_all_finite(self.coef_)
        except ValueError:
            warnings.warn("coef_ contains infinite values")

        return self
Exemplo n.º 14
0
def test_gibbs_smoke():
    """Check if we don't get NaNs sampling the full digits dataset."""
    rng = np.random.RandomState(42)
    X = Xdigits.astype(np.float32)
    rbm1 = BernoulliRBM(X.shape[1], n_hidden=42, batch_size=40,
                        n_iter=20, random_state=rng)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
Exemplo n.º 15
0
def test_gibbs_smoke():
    """ just seek if we don't get NaNs sampling the full digits dataset """
    rng = np.random.RandomState(42)
    X = Xdigits
    rbm1 = BernoulliRBM(n_components=42, batch_size=10,
                        n_iter=20, random_state=rng)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
Exemplo n.º 16
0
def test_gibbs_smoke():
    """Check if we don't get NaNs sampling the full digits dataset.
    Also check that sampling again will yield different results."""
    X = Xdigits
    rbm1 = BernoulliRBM(n_components=42, batch_size=40, n_iter=20, random_state=42)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
    X_sampled2 = rbm1.gibbs(X)
    assert_true(np.all((X_sampled != X_sampled2).max(axis=1)))
Exemplo n.º 17
0
def test_gibbs_smoke():
    # Check if we don't get NaNs sampling the full digits dataset.
    # Also check that sampling again will yield different results.
    X = Xdigits
    rbm1 = BernoulliRBM(n_components=42, batch_size=40, n_iter=20, random_state=42)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
    X_sampled2 = rbm1.gibbs(X)
    assert np.all((X_sampled != X_sampled2).max(axis=1))
Exemplo n.º 18
0
def bad_rows(X, is_X=True):
    bad = []
    from sklearn.utils.validation import assert_all_finite
    for i in range(X.shape[0]):
        try:
            assert_all_finite(X[i])
        except ValueError:
            print("Index %s was not finite" % i)
            bad.append(i)
            print_bad(X[i], i, is_X)
    return bad
def custom_svd(array, n_components, n_discard,n_svd_vecs):
	u, _, vt = svds(array, k=n_components, ncv=n_svd_vecs)
	if np.any(np.isnan(vt)):
		_, v = eigsh(safe_sparse_dot(array.T, array),ncv=n_svd_vecs)
		vt = v.T
	if np.any(np.isnan(u)):
		_, u = eigsh(safe_sparse_dot(array, array.T),ncv=n_svd_vecs)
	assert_all_finite(u)
	assert_all_finite(vt)
	u = u[:, n_discard:]
	vt = vt[n_discard:]
	return u, vt.T
Exemplo n.º 20
0
    def fit(self, X, y):
        """Fit model according to X and y.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values.

        Returns
        -------
        self : regressor
            Returns self.
        """
        rs = check_random_state(self.random_state)

        ds = get_dataset(X)
        n_samples = ds.get_n_samples()
        n_features = ds.get_n_features()

        self.outputs_2d_ = len(y.shape) == 2
        if self.outputs_2d_:
            Y = y
        else:
            Y = y.reshape(-1, 1)
        Y = np.asfortranarray(Y)
        n_vectors = Y.shape[1]
        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
        self.intercept_ = np.zeros(n_vectors, dtype=np.float64)

        loss = self._get_loss()
        penalty = self._get_penalty()

        for k in range(n_vectors):
            _binary_sgd(self,
                        self.coef_, self.intercept_, k,
                        ds, Y[:, k], loss, penalty, self.alpha,
                        self._get_learning_rate(),
                        self.eta0, self.power_t,
                        self.fit_intercept,
                        self.intercept_decay,
                        int(self.max_iter * n_samples), self.shuffle, rs,
                        self.callback, self.n_calls, self.verbose)

        try:
            assert_all_finite(self.coef_)
        except ValueError:
            warnings.warn("coef_ contains infinite values")

        return self
Exemplo n.º 21
0
def test_gibbs_smoke():
    """Check if we don't get NaNs sampling the full digits dataset."""
    rng = np.random.RandomState(42)
    X = Xdigits.astype(np.float32)
    rbm1 = BernoulliRBM(X.shape[1],
                        n_hidden=42,
                        batch_size=40,
                        n_iter=20,
                        random_state=rng)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
Exemplo n.º 22
0
    def fit(self, X, y):
        """Fit model according to X and y.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values.

        Returns
        -------
        self : regressor
            Returns self.
        """
        rs = check_random_state(self.random_state)

        ds = get_dataset(X)
        n_samples = ds.get_n_samples()
        n_features = ds.get_n_features()

        self.outputs_2d_ = len(y.shape) == 2
        if self.outputs_2d_:
            Y = y
        else:
            Y = y.reshape(-1, 1)
        Y = np.asfortranarray(Y)
        n_vectors = Y.shape[1]
        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
        self.intercept_ = np.zeros(n_vectors, dtype=np.float64)

        loss = self._get_loss()
        penalty = self._get_penalty()

        for k in xrange(n_vectors):
            _binary_sgd(self,
                        self.coef_, self.intercept_, k,
                        ds, Y[:, k], loss, penalty, self.alpha,
                        self._get_learning_rate(),
                        self.eta0, self.power_t,
                        self.fit_intercept,
                        self.intercept_decay,
                        int(self.max_iter * n_samples), self.shuffle, rs,
                        self.callback, self.n_calls, self.verbose)

        try:
            assert_all_finite(self.coef_)
        except ValueError:
            warnings.warn("coef_ contains infinite values")

        return self
Exemplo n.º 23
0
def main(name, num, useSpecial=False):

    labels = []
    with open("C:/MissingWord/corrScoring/" + name + "Labels.txt", "r") as f:
        for line in f:
            labels.append(float(line))

    features = []
    with open("C:/MissingWord/corrScoring/1000features.txt", "r") as f:
        for line in f:
            features.append([float(elem) for elem in line.split(",")])

    specialFeatures = getSpecialFeatures(len(features))

    if useSpecial:
        for i in range(min(len(specialFeatures), len(features))):
            features[i].extend(specialFeatures[i])

    features = features[:num]
    labels = labels[:num]

    for i in range(len(features)):
        if len(features[i]) != len(features[0]):
            print(i)
        try:
            assert_all_finite(features[i])
        except:
            print(i)

    cutoff = int(len(features) * 7 / 10)

    trainFeatures = features[:cutoff]
    testFeatures = features[cutoff:]

    trainLabels = labels[:cutoff]
    testLabels = labels[cutoff:]

    #regr = svm.SVR(C=1)
    regr = RandomForestRegressor(n_estimators=300, n_jobs=7)
    #regr = linear_model.LinearRegression()

    regr.fit(trainFeatures, trainLabels)

    print("Train Residual sum of squares: %.2f" % np.mean(
        (regr.predict(trainFeatures) - trainLabels)**2))
    print("Test Residual sum of squares: %.2f" % np.mean(
        (regr.predict(testFeatures) - testLabels)**2))

    print('Variance score: %.2f' % regr.score(testFeatures, testLabels))

    with open("C:/MissingWord/corrScoring/" + name + ".regr", "wb") as f:
        pickle.dump(regr, f)
Exemplo n.º 24
0
def check_input_arrays(*args, validate_len=True, force_all_finite=True):
    """Cast input sequences into numpy arrays.

    Only inputs that are sequence-like will be converted, all other inputs will be left as is.
    When `validate_len` is True, the sequences will be checked for equal length.

    Parameters
    ----------
    args : scalar or array_like
        Inputs to be checked.

    validate_len : bool (default=True)
        Whether to check if the input arrays have the same length.

    force_all_finite : bool (default=True)
        Whether to allow inf and nan in input arrays.

    Returns
    -------
    args: array-like
        List of inputs where sequence-like objects have been cast to numpy arrays.

    """
    n = None
    args = list(args)
    for i, arg in enumerate(args):
        if np.ndim(arg) > 0:
            new_arg = check_array(arg,
                                  dtype=None,
                                  ensure_2d=False,
                                  accept_sparse=True,
                                  force_all_finite=force_all_finite)
            if not force_all_finite:
                # For when checking input values is disabled
                try:
                    assert_all_finite(new_arg)
                except ValueError:
                    warnings.warn(
                        "Input contains NaN, infinity or a value too large for dtype('float64') "
                        "but input check is disabled. Check the inputs before proceeding."
                    )
            if validate_len:
                m = new_arg.shape[0]
                if n is None:
                    n = m
                else:
                    assert (
                        m == n
                    ), "Input arrays have incompatible lengths: {} and {}".format(
                        n, m)
            args[i] = new_arg
    return args
Exemplo n.º 25
0
def test_cd_linear_trivial():
    # trivial example that failed due to gh#4
    loss = Squared()
    alpha = 1e-5
    n_features = 100
    x = np.zeros((1, n_features))
    x[0, 1] = 1
    y = np.ones(1)
    cb = Callback(x, y, alpha)
    w = _fit_linear(x, y, alpha, n_iter=20, loss=loss, callback=cb)

    assert_all_finite(w)
    assert_all_finite(cb.losses_)
Exemplo n.º 26
0
    def predict(self, X):
        try:
            assert_all_finite(self.coef_)
            pred = safe_sparse_dot(X, self.coef_.T)
        except ValueError:
            n_samples = X.shape[0]
            n_vectors = self.coef_.shape[0]
            pred = np.zeros((n_samples, n_vectors))

        if not self.outputs_2d_:
            pred = pred.ravel()

        return pred
Exemplo n.º 27
0
    def _check_alphas(self):
        create_path = self.alphas is None
        if create_path:
            if self.n_alphas <= 0:
                raise ValueError("n_alphas must be a positive integer")

            alphas = numpy.empty(int(self.n_alphas), dtype=numpy.float64)
        else:
            alphas = column_or_1d(self.alphas, warn=True)
            assert_all_finite(alphas)
            check_non_negative(alphas, "alphas")
            assert_all_finite(alphas)
        return alphas, create_path
Exemplo n.º 28
0
    def fit(self, X, y):
        rs = check_random_state(self.random_state)

        reencode = self.multiclass
        y, n_classes, n_vectors = self._set_label_transformers(y, reencode)

        ds = get_dataset(X)
        n_samples = ds.get_n_samples()
        n_features = ds.get_n_features()
        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)

        self.intercept_ = np.zeros(n_vectors, dtype=np.float64)

        loss = self._get_loss()
        penalty = self._get_penalty()

        if n_vectors == 1 or not self.multiclass:
            Y = np.asfortranarray(self.label_binarizer_.fit_transform(y),
                                  dtype=np.float64)
            for i in xrange(n_vectors):
                _binary_sgd(self,
                            self.coef_, self.intercept_, i,
                            ds, Y[:, i], loss, penalty,
                            self.alpha,
                            self._get_learning_rate(),
                            self.eta0, self.power_t,
                            self.fit_intercept,
                            self.intercept_decay,
                            int(self.max_iter * n_samples), self.shuffle, rs,
                            self.callback, self.n_calls, self.verbose)

        elif self.multiclass:
            _multiclass_sgd(self, self.coef_, self.intercept_,
                            ds, y.astype(np.int32), loss, penalty,
                            self.alpha, self._get_learning_rate(),
                            self.eta0, self.power_t, self.fit_intercept,
                            self.intercept_decay,
                            int(self.max_iter * n_samples),
                            self.shuffle, rs, self.callback, self.n_calls,
                            self.verbose)

        else:
            raise ValueError("Wrong value for multiclass.")

        try:
            assert_all_finite(self.coef_)
        except ValueError:
            warnings.warn("coef_ contains infinite values")

        return self
Exemplo n.º 29
0
def main(name, num, useSpecial = False):

    labels = []
    with open("C:/MissingWord/corrScoring/"+name+"Labels.txt", "r") as f:
        for line in f:
            labels.append(float(line))

    features = []
    with open("C:/MissingWord/corrScoring/1000features.txt", "r") as f:
        for line in f:
            features.append([float(elem) for elem in line.split(",")])

    specialFeatures = getSpecialFeatures(len(features))

    if useSpecial:
        for i in range(min(len(specialFeatures), len(features))):
            features[i].extend(specialFeatures[i])

    features = features[:num]
    labels = labels[:num]

    for i in range(len(features)):
        if len(features[i]) != len(features[0]):
            print(i)
        try:
            assert_all_finite(features[i])
        except:
            print(i)

    cutoff = int(len(features) * 7 / 10)

    trainFeatures = features[:cutoff]
    testFeatures = features[cutoff:]

    trainLabels = labels[:cutoff]
    testLabels = labels[cutoff:]

    #regr = svm.SVR(C=1)
    regr = RandomForestRegressor(n_estimators = 300, n_jobs = 7)
    #regr = linear_model.LinearRegression()

    regr.fit(trainFeatures, trainLabels)

    print("Train Residual sum of squares: %.2f"% np.mean((regr.predict(trainFeatures) - trainLabels) ** 2))
    print("Test Residual sum of squares: %.2f"% np.mean((regr.predict(testFeatures) - testLabels) ** 2))

    print('Variance score: %.2f' % regr.score(testFeatures, testLabels))

    with open("C:/MissingWord/corrScoring/"+name+".regr", "wb") as f:
        pickle.dump(regr, f)
Exemplo n.º 30
0
    def _check_params(self, n_features):
        if not 0 < self.l1_ratio <= 1:
            raise ValueError("l1_ratio must be in interval ]0;1], but was %f" % self.l1_ratio)

        if self.tol <= 0:
            raise ValueError("tolerance must be positive, but was %f" % self.tol)

        if self.penalty_factor is None:
            penalty_factor = numpy.ones(n_features, dtype=numpy.float64)
        else:
            pf = column_or_1d(self.penalty_factor, warn=True)
            if pf.shape[0] != n_features:
                raise ValueError("penalty_factor must be array of length n_features (%d), "
                                 "but got %d" % (n_features, pf.shape[0]))
            assert_all_finite(pf)
            check_non_negative(pf, "penalty_factor")
            penalty_factor = pf * n_features / pf.sum()
            assert_all_finite(penalty_factor)

        create_path = self.alphas is None
        if create_path:
            if self.n_alphas <= 0:
                raise ValueError("n_alphas must be a positive integer")

            alphas = numpy.empty(int(self.n_alphas), dtype=numpy.float64)
        else:
            alphas = column_or_1d(self.alphas, warn=True)
            assert_all_finite(alphas)
            check_non_negative(alphas, "alphas")
            assert_all_finite(alphas)

        if self.max_iter <= 0:
            raise ValueError("max_iter must be a positive integer")

        return create_path, alphas.astype(numpy.float64), penalty_factor.astype(numpy.float64)
Exemplo n.º 31
0
 def _check_penalty_factor(self, n_features):
     if self.penalty_factor is None:
         penalty_factor = numpy.ones(n_features, dtype=numpy.float64)
     else:
         pf = column_or_1d(self.penalty_factor, warn=True)
         if pf.shape[0] != n_features:
             raise ValueError(
                 "penalty_factor must be array of length n_features (%d), "
                 "but got %d" % (n_features, pf.shape[0]))
         assert_all_finite(pf)
         check_non_negative(pf, "penalty_factor")
         penalty_factor = pf * n_features / pf.sum()
         assert_all_finite(penalty_factor)
     return penalty_factor
Exemplo n.º 32
0
    def fit(self, X, X_error=None):
        """Implements the standard fitting function for a DL8.5 classifier.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples. If X_error is provided, it represents explanation input
        X_error : array-like, shape (n_samples, n_features_1)
            The training input used to calculate error. If it is not provided X is used to calculate error

        Returns
        -------
        self : object
            Returns self.
        """

        # Check that X_error has correct shape and raise ValueError if not
        if X_error is not None:
            assert_all_finite(X_error)
            X_error = check_array(X_error, dtype='int32')

        if self.error_function is None:
            if X_error is None:
                self.error_function = lambda tids: self.default_error(tids, X)
            else:
                if X_error.shape[0] == X.shape[0]:
                    self.error_function = lambda tids: self.default_error(
                        tids, X_error)
                else:
                    raise ValueError(
                        "X_error does not have the same number of rows as X")

        if self.leaf_value_function is None:
            if X_error is None:
                self.leaf_value_function = lambda tids: self.default_leaf_value(
                    tids, X)
            else:
                if X_error.shape[0] == X.shape[0]:
                    self.leaf_value_function = lambda tids: self.default_leaf_value(
                        tids, X_error)
                else:
                    raise ValueError(
                        "X_error does not have the same number of rows as X")

        # call fit method of the predictor
        DL85Predictor.fit(self, X)
        # print(self.tree_)

        # Return the classifier
        return self
Exemplo n.º 33
0
    def _scrub(self, X, y, sample_weight, **kwargs):
        '''
        Sanitize input data.
        '''
        # Check for sparseness
        if sparse.issparse(y):
            raise TypeError(
                'A sparse matrix was passed, but dense data '
                'is required. Use y.toarray() to convert to dense.')
        if sparse.issparse(sample_weight):
            raise TypeError(
                'A sparse matrix was passed, but dense data '
                'is required. Use sample_weight.toarray() to convert to dense.'
            )

        # Check whether X is the output of patsy.dmatrices
        if y is None and isinstance(X, tuple):
            y, X = X

        # Handle X separately
        X = self._scrub_x(X, **kwargs)

        # Convert y to internally used data type
        y = np.asarray(y, dtype=np.float64)
        assert_all_finite(y)
        y = y.reshape(y.shape[0])

        # Deal with sample_weight
        if sample_weight is None:
            sample_weight = np.ones(y.shape[0], dtype=y.dtype)
        else:
            sample_weight = np.asarray(sample_weight)
            assert_all_finite(sample_weight)
            sample_weight = sample_weight.reshape(sample_weight.shape[0])

        # Make sure dimensions match
        if y.shape[0] != X.shape[0]:
            raise ValueError('X and y do not have compatible dimensions.')
        if y.shape != sample_weight.shape:
            raise ValueError(
                'y and sample_weight do not have compatible dimensions.')

        # Make sure everything is finite
        assert_all_finite(X)
        assert_all_finite(y)
        assert_all_finite(sample_weight)

        return X, y, sample_weight
Exemplo n.º 34
0
    def _validate_inputs(self, X, y):
        X, y = check_X_y(X, y, accept_sparse=False)

        assert_all_finite(X, y)

        if np.any(np.iscomplex(X)) or np.any(np.iscomplex(y)):
            raise ValueError("Complex data not supported")
        if np.issubdtype(X.dtype, np.object_) or np.issubdtype(
                y.dtype, np.object_):
            try:
                X = X.astype(float)
                y = y.astype(int)
            except TypeError:
                raise TypeError("argument must be a string.* number")

        return (X, y)
Exemplo n.º 35
0
    def predict(self, X):
        assert_all_finite(X)
        check_is_fitted(self, 'is_fitted_')
        X = check_array(X, accept_sparse=True)

        n_iteration = len(self.clf)  #ensemble the number of choosen classifier
        self.pred1 = np.zeros((X.shape[0], n_iteration))
        self.ensemble_pred1 = np.zeros((X.shape[0], ))

        for i in range(n_iteration):
            pred1 = self.clf[i].predict(X)
            self.pred1[:, i] = pred1
            self.ensemble_pred1 = (self.pred1 *
                                   self.alpha[:, :n_iteration]).sum(axis=1)
        result = sgn(self.ensemble_pred1)
        return np.where(result == -1, 0, result)
Exemplo n.º 36
0
    def _scrub(self, X, y, sample_weight, **kwargs):
        '''
        Sanitize input data.
        '''
        # Check for sparseness
        if sparse.issparse(y):
            raise TypeError('A sparse matrix was passed, but dense data '
                            'is required. Use y.toarray() to convert to dense.')
        if sparse.issparse(sample_weight):
            raise TypeError('A sparse matrix was passed, but dense data '
                            'is required. Use sample_weight.toarray()'
                            'to convert to dense.')

        # Check whether X is the output of patsy.dmatrices
        if y is None and isinstance(X, tuple):
            y, X = X

        # Handle X separately
        X = self._scrub_x(X, **kwargs)

        # Convert y to internally used data type
        y = np.asarray(y, dtype=np.float64)
        assert_all_finite(y)
        y = y.reshape(y.shape[0])

        # Deal with sample_weight
        if sample_weight is None:
            sample_weight = np.ones(y.shape[0], dtype=y.dtype)
        else:
            sample_weight = np.asarray(sample_weight)
            assert_all_finite(sample_weight)
            sample_weight = sample_weight.reshape(sample_weight.shape[0])

        # Make sure dimensions match
        if y.shape[0] != X.shape[0]:
            raise ValueError('X and y do not have compatible dimensions.')
        if y.shape != sample_weight.shape:
            raise ValueError(
                'y and sample_weight do not have compatible dimensions.')

        # Make sure everything is finite
        assert_all_finite(X)
        assert_all_finite(y)
        assert_all_finite(sample_weight)

        return X, y, sample_weight
Exemplo n.º 37
0
    def predict_proba(self, X):
        proba = []

        X_subs = self._get_subdata(X)

        for i in range(self.n_classes_):
            e = self.estimators_[i]
            X_i = X_subs[i]
            pred = e.predict(X_i).reshape(-1, 1)
            proba.append(pred)
        proba = np.hstack(proba)

        normalizer = proba.sum(axis=1)[:, np.newaxis]
        normalizer[normalizer == 0.0] = 1.0
        proba /= normalizer

        assert_all_finite(proba)

        return proba
Exemplo n.º 38
0
    def _base_estimator_predict(self, e, X):
        """Predict label values with the specified estimator on predictor(s) X.

        Parameters
        ----------
        e : int
            The estimator object.

        X : np.ndarray, shape=(n, m)
            The feature data for which to compute the predicted outputs.

        Returns
        -------
        pred : np.ndarray, shape=(len(X), 1)
            The mean of the label probabilities predicted by the specified 
            estimator for each fold for each instance X.
        """
        # Generate array for the base-level testing set, which is n x n_folds.
        pred = e.predict(X)
        assert_all_finite(pred)
        return pred
Exemplo n.º 39
0
    def _scrub_x(self, X, **kwargs):
        '''
        Sanitize input predictors and extract column names if appropriate.
        '''
        # Check for sparseness
        if sparse.issparse(X):
            raise TypeError('A sparse matrix was passed, but dense data '
                            'is required. Use X.toarray() to convert to dense.')

        # Convert to internally used data type
        X = np.asarray(X, dtype=np.float64, order='F')
        assert_all_finite(X)
        if X.ndim == 1:
            X = X[:, np.newaxis]

        # Ensure correct number of columns
        if hasattr(self, 'basis_') and self.basis_ is not None:
            if X.shape[1] != self.basis_.num_variables:
                raise ValueError('Wrong number of columns in X')

        return X
Exemplo n.º 40
0
    def _base_estimator_predict(self, e, X):
        """Predict label values with the specified estimator on predictor(s) X.

        Parameters
        ----------
        e : int
            The estimator object.

        X : np.ndarray, shape=(n, m)
            The feature data for which to compute the predicted outputs.

        Returns
        -------
        pred : np.ndarray, shape=(len(X), 1)
            The mean of the label probabilities predicted by the specified 
            estimator for each fold for each instance X.
        """
        # Generate array for the base-level testing set, which is n x n_folds.
        pred = e.predict(X)
        assert_all_finite(pred)
        return pred
Exemplo n.º 41
0
    def _scrub_x(self, X, **kwargs):
        '''
        Sanitize input predictors and extract column names if appropriate.
        '''
        # Check for sparseness
        if sparse.issparse(X):
            raise TypeError(
                'A sparse matrix was passed, but dense data '
                'is required. Use X.toarray() to convert to dense.')

        # Convert to internally used data type
        X = np.asarray(X, dtype=np.float64)
        assert_all_finite(X)
        if len(X.shape) == 1:
            X = X.reshape((X.shape[0], 1))

        # Ensure correct number of columns
        if hasattr(self, 'basis_') and self.basis_ is not None:
            if X.shape[1] != self.basis_.num_variables:
                raise ValueError('Wrong number of columns in X')

        return X
Exemplo n.º 42
0
    def _validate_inputs(self, X):
        # Things we don't want to allow until we've tested them:
        # - Sparse inputs
        # - Multiclass outputs (e.g., more than 2 classes in `y`)
        # - Non-finite inputs
        # - Complex inputs

        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()

        X = check_array(X, accept_sparse=False, allow_nd=False)

        assert_all_finite(X)

        if np.any(np.iscomplex(X)):
            raise ValueError("Complex data not supported")
        if np.issubdtype(X.dtype, np.object_):
            try:
                X = X.astype(float)
            except (TypeError, ValueError):
                raise ValueError("argument must be a string.* number")

        return (X)
Exemplo n.º 43
0
    def fit(self, X, y):
        rs = check_random_state(self.random_state)

        ds = get_dataset(X)
        n_samples = ds.get_n_samples()
        n_features = ds.get_n_features()

        self.outputs_2d_ = len(y.shape) == 2
        if self.outputs_2d_:
            Y = y
        else:
            Y = y.reshape(-1, 1)
        Y = np.asfortranarray(Y)
        n_vectors = Y.shape[1]
        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
        self.intercept_ = np.zeros(n_vectors, dtype=np.float64)

        loss = self._get_loss()
        penalty = self._get_penalty()

        for k in xrange(n_vectors):
            _binary_sgd(self,
                        self.coef_, self.intercept_, k,
                        ds, Y[:, k], loss, penalty, self.alpha,
                        self._get_learning_rate(),
                        self.eta0, self.power_t,
                        self.fit_intercept,
                        self.intercept_decay,
                        int(self.max_iter * n_samples), self.shuffle, rs,
                        self.callback, self.n_calls, self.verbose)

        try:
            assert_all_finite(self.coef_)
        except ValueError:
            warnings.warn("coef_ contains infinite values")

        return self
Exemplo n.º 44
0
    def _scrub_x(self, X, missing, **kwargs):
        '''
        Sanitize input predictors and extract column names if appropriate.
        '''
        # Check for sparseness
        if sparse.issparse(X):
            raise TypeError(
                'A sparse matrix was passed, but dense data '
                'is required. Use X.toarray() to convert to dense.')
        X = np.asarray(X, dtype=np.float64, order='F')
        # Figure out missingness
        if missing is None:
            # Infer missingness
            missing = np.isnan(X)

        # Convert to internally used data type
        missing = np.asarray(missing, dtype=BOOL, order='F')
        assert_all_finite(missing)
        if missing.ndim == 1:
            missing = missing[:, np.newaxis]
        if not self.allow_missing:
            try:
                assert_all_finite(X)
            except ValueError:
                raise ValueError(
                    "Input contains NaN, infinity or a value that's too large.  Did you mean to set allow_missing=True?"
                )
        if X.ndim == 1:
            X = X[:, np.newaxis]

        # Ensure correct number of columns
        if hasattr(self, 'basis_') and self.basis_ is not None:
            if X.shape[1] != self.basis_.num_variables:
                raise ValueError('Wrong number of columns in X')

        return X, missing
Exemplo n.º 45
0
def test_make_biclusters():
    X, rows, cols = make_biclusters(shape=(100, 100), n_clusters=4, shuffle=True, random_state=0)
    assert_equal(X.shape, (100, 100), "X shape mismatch")
    assert_equal(rows.shape, (4, 100), "rows shape mismatch")
    assert_equal(cols.shape, (4, 100), "columns shape mismatch")
    assert_all_finite(X)
    assert_all_finite(rows)
    assert_all_finite(cols)

    X2, _, _ = make_biclusters(shape=(100, 100), n_clusters=4, shuffle=True, random_state=0)
    assert_array_equal(X, X2)
Exemplo n.º 46
0
def test_make_checkerboard():
    X, rows, cols = make_checkerboard(shape=(100, 100), n_clusters=(20, 5), shuffle=True, random_state=0)
    assert_equal(X.shape, (100, 100), "X shape mismatch")
    assert_equal(rows.shape, (100, 100), "rows shape mismatch")
    assert_equal(cols.shape, (100, 100), "columns shape mismatch")

    X, rows, cols = make_checkerboard(shape=(100, 100), n_clusters=2, shuffle=True, random_state=0)
    assert_all_finite(X)
    assert_all_finite(rows)
    assert_all_finite(cols)

    X1, _, _ = make_checkerboard(shape=(100, 100), n_clusters=2, shuffle=True, random_state=0)
    X2, _, _ = make_checkerboard(shape=(100, 100), n_clusters=2, shuffle=True, random_state=0)
    assert_array_equal(X1, X2)
Exemplo n.º 47
0
    def fit(self, X, y):
        """Fit model according to X and y.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : classifier
            Returns self.
        """
        rs = check_random_state(self.random_state)

        reencode = self.multiclass
        y, n_classes, n_vectors = self._set_label_transformers(y, reencode)

        self.train_x = get_dataset(X)
        n_samples = self.train_x.get_n_samples()
        n_features = self.train_x.get_n_features()
        #self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
        self.upd_ = np.zeros(int(self.max_iter * n_samples)+1, dtype=np.float64)
        self.seq_ = np.zeros(int(self.max_iter * n_samples)+1, dtype=np.int32)

        self.intercept_ = np.zeros(n_vectors, dtype=np.float64)

        loss = self._get_loss()
        penalty = self._get_penalty()

        if n_vectors == 1 or not self.multiclass:
            Y = np.asfortranarray(self.label_binarizer_.fit_transform(y),
                                  dtype=np.float64)
            for i in xrange(n_vectors):
                
                (self.upd_, self.tr_err) = _karma_sgd(self,
                            #self.coef_,
                            self.upd_, self.seq_,
                            self.intercept_, i,
                            self.train_x, Y[:, i], loss, penalty,
                            self.alpha,
                            self._get_learning_rate(),
                            self.eta0, self.power_t,
                            self.fit_intercept,
                            self.intercept_decay,
                            int(self.max_iter * n_samples), self.shuffle, rs,
                            self.callback, self.n_calls, self.gamma, self.verbose)
                
        # elif self.multiclass:
        #     _multiclass_sgd(self, self.coef_, self.intercept_,
        #                     ds, y.astype(np.int32), loss, penalty,
        #                     self.alpha, self._get_learning_rate(),
        #                     self.eta0, self.power_t, self.fit_intercept,
        #                     self.intercept_decay,
        #                     int(self.max_iter * n_samples),
        #                     self.shuffle, rs, self.callback, self.n_calls,
        #                     self.verbose)

        else:
            raise ValueError("Wrong value for multiclass.")

        try:
            assert_all_finite(self.upd_)
            assert_all_finite(self.seq_)
        except ValueError:
            warnings.warn("coef_ contains infinite values")

        return self
Exemplo n.º 48
0
    def fit(self, X, X_test, y, y_test):
        """Fit model according to X and y.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : classifier
            Returns self.
        """
        rs = check_random_state(self.random_state)

        reencode = self.multiclass
        y, _, n_vectors = self._set_label_transformers(y, reencode)
        y_test, _, n_vectors_test = self._set_label_transformers(y_test, reencode)
        #assert n_vectors==n_vectors_test
        ds = get_dataset(X)
        ds_test = get_dataset(X_test)
        n_samples = ds.get_n_samples()
        n_features = ds.get_n_features()
        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)

        self.intercept_ = np.zeros(n_vectors, dtype=np.float64)

        loss = self._get_loss()
        penalty = self._get_penalty()

        if n_vectors == 1 or not self.multiclass:
            Y = np.asfortranarray(self.label_binarizer_.fit_transform(y),
                                  dtype=np.float64)
            Y_test = np.asfortranarray(self.label_binarizer_.fit_transform(y_test),
                                  dtype=np.float64)
            for i in xrange(n_vectors):
                _binary_sgd_test(self,
                            self.coef_, self.intercept_, i,
                            ds, Y[:, i], ds_test, Y_test[:, i], loss, penalty,
                            self.alpha,
                            self._get_learning_rate(),
                            self.eta0, self.power_t,
                            self.fit_intercept,
                            self.intercept_decay,
                            int(self.max_iter * n_samples), self.shuffle, rs,
                            self.callback, self.n_calls, self.verbose, self.black_out, (self.disp_freq * n_samples), (self.test_freq * n_samples))

        elif self.multiclass:
            _multiclass_sgd(self, self.coef_, self.intercept_,
                            ds, y.astype(np.int32), loss, penalty,
                            self.alpha, self._get_learning_rate(),
                            self.eta0, self.power_t, self.fit_intercept,
                            self.intercept_decay,
                            int(self.max_iter * n_samples),
                            self.shuffle, rs, self.callback, self.n_calls,
                            self.verbose)

        else:
            raise ValueError("Wrong value for multiclass.")

        try:
            assert_all_finite(self.coef_)
        except ValueError:
            warnings.warn("coef_ contains infinite values")

        return self
Exemplo n.º 49
0
    def _scrub(self, X, y, sample_weight, output_weight, missing, **kwargs):
        '''
        Sanitize input data.
        '''
        # Check for sparseness
        if sparse.issparse(y):
            raise TypeError('A sparse matrix was passed, but dense data '
                            'is required. Use y.toarray() to convert to dense.')
        if sparse.issparse(sample_weight):
            raise TypeError('A sparse matrix was passed, but dense data '
                            'is required. Use sample_weight.toarray()'
                            'to convert to dense.')
        if sparse.issparse(output_weight):
            raise TypeError('A sparse matrix was passed, but dense data '
                            'is required. Use output_weight.toarray()'
                            'to convert to dense.')

        # Check whether X is the output of patsy.dmatrices
        if y is None and isinstance(X, tuple):
            y, X = X

        # Handle X separately
        X, missing = self._scrub_x(X, missing, **kwargs)

        # Convert y to internally used data type
        y = np.asarray(y, dtype=np.float64)
        assert_all_finite(y)

        if len(y.shape) == 1:
            y = y[:, np.newaxis]

        # Deal with sample_weight
        if sample_weight is None:
            sample_weight = np.ones(y.shape[0], dtype=y.dtype)
        else:
            sample_weight = np.asarray(sample_weight)
            assert_all_finite(sample_weight)
        # Deal with output_weight
        if output_weight is None:
            output_weight = np.ones(y.shape[1], dtype=y.dtype)
        else:
            output_weight = np.asarray(output_weight)
            assert_all_finite(output_weight)

        # Make sure dimensions match
        if y.shape[0] != X.shape[0]:
            raise ValueError('X and y do not have compatible dimensions.')
        if y.shape[0] != sample_weight.shape[0]:
            raise ValueError(
                'y and sample_weight do not have compatible dimensions.')
        if y.shape[1] != output_weight.shape[0]:
            raise ValueError(
                'y and output_weight do not have compatible dimensions.')

        # Make sure everything is finite (except X, which is allowed to have
        # missing values)
        assert_all_finite(missing)
        assert_all_finite(y)
        assert_all_finite(sample_weight)
        assert_all_finite(output_weight)
        
        # Make sure everything is consistent
        check_X_y(X, y, accept_sparse=None, multi_output=True,
                  force_all_finite=False)

        return X, y, sample_weight, output_weight, missing
Exemplo n.º 50
0
 def _base_estimator_predict_proba(self, e, X):
     pred = e.predict_proba(X)
     assert_all_finite(pred)
     return pred
Exemplo n.º 51
0
__author__ = 'SEOKHO'

from sklearn.utils.validation import assert_all_finite
import numpy as np

print(assert_all_finite(np.array([1.5465978588774336, 13.173744467937684, 0.7164582594283925, 6.073044534100405, 0.563888990932914, 9.253016646256619, 1.5479898935732566, 13.172805509656142, 1.76135884564872, 15.4882753202587, 0.6621080655920463, 7.368970402194793, 0.5638305796422928, 9.249753374333018, 0.6931471805599453, 3.4011973816621555, 1.7616927591930054, 15.488775300844194, 0.5637344396624081, 9.249657234353133, 1.791759469228055, 1.791759469228055, 4.240504214996096, 9.587611745713565, 0.8472978603872037, 4.584967478670572, 1.78673244403277, 15.47979284224048, 1.7552292288982276, 15.488951473171578, 1.5461643256320923, 13.173248488496412, 1.7612086097539006, 15.48839602164465, 1.6084169832999933, 7.579678823090456, 1.5475324485137967, 13.172303356473655, 1.761260622260961, 15.488375937043429, 6.163314804034641, 3.258096538021482])))