class image_data_repo:
    def __init__(self, name='image_', image_feature_dict={}, reconsitution_element_nums=6, error_limit=0.1):
        self.name = name
        self.image_feature_dict = image_feature_dict.copy()
        self.reconsitution_element_nums=reconsitution_element_nums
        self.error_limit=error_limit
        self.omp=OrthogonalMatchingPursuit(n_nonzero_coefs=reconsitution_element_nums)
    def image_nums(self):
        return np.size(self.image_feature_dict.keys())
    def add_element(self, keys, values):
        self.image_feature_dict[keys]=values
    def use_image(self, image_feature):
        data = np.array(self.image_feature_dict.values()).T
        self.omp.fit(data, image_feature)
        err = 1 - self.omp.score(data, image_feature)
        if err<self.error_limit:
            return False,err
        else:
            return True,err
    def update(self):
        image_list = self.image_feature_dict.items()
        data = np.array([i[1] for i in image_list])
        name = [i[0] for i in image_list]
        similar_coef = np.amax( np.dot(data.T,data))
        filename = name[ np.argmax( similar_coef )]
        dst_filename = '';
        self.image_feature_dict.pop( filename)  
        os.system('cp ~/caffe/{} ~/rubbish/'.format(filename)) 
        os.system('rm -f ~/caffe/{}'.format(filename))  
        return
def omp(data, label, ll, ul, step, weight, state):
    kf = KFold(n_splits=10, shuffle=True, random_state=state)
    X = data
    y = label
    r2 = []
    mse = []
    pred = []
    true = []
    ilist = []
    feature = []
    pbar = tnrange(step * 10, desc='loop')
    for i in np.linspace(ll, ul, step).astype(int):
        r2_single = []
        mse_single = []
        pred_single = []
        true_single = []
        feature_single = []
        for train_index, test_index in kf.split(X):
            y_train, y_test = y[train_index], y[test_index]
            X_train_tmp, X_test_tmp = X[train_index], X[test_index]

            clf = OrthogonalMatchingPursuit(n_nonzero_coefs=i, normalize=False)
            clf.fit(X_train_tmp, np.ravel(y_train))
            feature_index = np.where(clf.coef_ > 0)[0]
            X_train = X_train_tmp[:, feature_index]
            X_test = X_test_tmp[:, feature_index]

            svr = svm.SVR(kernel='linear')
            svr.fit(X_train, np.ravel(y_train))
            y_test_pred = svr.predict(X_test)
            feature_single.append(feature_index)
            pred_single.append(y_test_pred)
            true_single.append(np.ravel(y_test))
            r2_single.append(r2_score(y_test, y_test_pred))
            mse_single.append(mean_squared_error(y_test, y_test_pred))
            pbar.update(1)
        r2.append(r2_single)
        mse.append(mse_single)
        pred.append(pred_single)
        true.append(true_single)
        feature.append(feature_single)
        ilist.append(i)
    r2 = np.array(r2)
    r2_mean = np.average(r2, axis=1, weights=weight)
    pbar.close()
    plt.figure()
    plt.plot(np.linspace(ll, ul, step), r2_mean)
    plt.xlabel('$non-zero coefficients$')
    plt.ylabel('$R^2$')
    a = np.where(r2_mean == max(r2_mean))[0]
    pred = np.array(pred)[a[0]]
    true = np.array(true)[a[0]]
    r2 = r2[a[0]]
    mse = np.array(mse)[a[0]]
    feature = np.array(feature)[a[0]]
    a = ilist[a[0]]
    print('max r2_score=', np.max(r2_mean), ', number of non-zero coefs=', a)
    feature = feature[np.where(r2 == max(r2))][0]
    print('number of selected features:', len(feature))
    return pred, true, r2, mse, feature
def Linear_Regression(R_data):  # return data
    """
    The R_data is with nXm matrix with n observations and m factors.
    Each column will be the time series for each ticker name
    """
    # even though we change the order of getting data
    #ticker_list = R_data.columns.values

    #Depend_sid = ticker_list[sid1]
    #Indep_sids = ticker_list[sid2]
    sid_list = []
    for i in range(0, len(factors)):
        sid_list.append(R_data[factors[i]])

    Y = R_data[securities[0]]
    #     del R_data[securities[0]]
    #     indep = R_data.ix[:,1:len(securities)]
    indep = pd.concat(sid_list, axis=1)

    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=len(factors),
                                    fit_intercept=True)
    omp.fit(indep, Y)
    #     coef = omp.coef_
    #     idx_r, = coef.nonzero()
    #     X = sm.add_constant(indep, prepend=True)
    #     lm_Result = sm.OLS(Y, X).fit()
    return omp
示例#4
0
def omp0(y, A, normalize=False, tol=1.0e-6, verbose=False):
    r"""omp

    Arguments
    ---------------------
     y {[type]} -- [description]
     A {[type]} -- [description]

    Keyword Arguments
    ---------------------
     alpha {float, optional} -- Constant that multiplies the L1 term. (default: {0.5})
     normalize {boolean} -- If True, the regressors X will be normalized before regression by
                            subtracting the mean and dividing by the l2-norm. (default: {True})
     max_iter {int} -- The maximum number of iterations (default: {200})
     tol {float} -- The tolerance for the optimization (default: {1.0e-6})
    """

    if verbose:
        print("================in omp================")
        print("===Do OMP...")
    rgr_omp = OrthogonalMatchingPursuit(normalize=normalize, tol=tol)
    rgr_omp.fit(A, y)
    x = rgr_omp.coef_
    if verbose:
        print("===Done!")

    return x
def Linear_Regression(R_data):# return data
    """
    The R_data is with nXm matrix with n observations and m factors.
    Each column will be the time series for each ticker name
    """
    # even though we change the order of getting data
    #ticker_list = R_data.columns.values
    
    #Depend_sid = ticker_list[sid1]
    #Indep_sids = ticker_list[sid2]
    sid_list = []
    for i in range(0,len(factors)):
        sid_list.append(R_data[factors[i]])
    
    Y = R_data[securities[0]]
#     del R_data[securities[0]]
#     indep = R_data.ix[:,1:len(securities)]
    indep = pd.concat(sid_list, axis=1)
    
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=len(factors), fit_intercept= True)
    omp.fit(indep, Y)
#     coef = omp.coef_
#     idx_r, = coef.nonzero()
#     X = sm.add_constant(indep, prepend=True)
#     lm_Result = sm.OLS(Y, X).fit()
    return omp
示例#6
0
 def fit_predict_omp(self, X, y=None):
     n_sample = X.shape[0]
     H = NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X)
     C = np.zeros((n_sample, n_sample))
     # solve sparse self-expressive representation
     for i in range(n_sample):
         y_i = H[i]
         H_i = np.delete(H, i, axis=0)
         # H_T = H_i.transpose()  # M x (N-1)
         omp = OrthogonalMatchingPursuit(n_nonzero_coefs=int(n_sample *
                                                             0.5),
                                         tol=1e20)
         omp.fit(H_i.transpose(), y_i)
         #  Normalize the columns of C: ci = ci / ||ci||_ss.
         coef = omp.coef_ / np.max(np.abs(omp.coef_))
         C[:i, i] = coef[:i]
         C[i + 1:, i] = coef[i:]
     # compute affinity matrix
     L = 0.5 * (np.abs(C) + np.abs(C.T))  # affinity graph
     # L = 0.5 * (C + C.T)
     self.affinity_matrix = L
     # spectral clustering
     sc = SpectralClustering(n_clusters=self.n_clusters,
                             affinity='precomputed')
     sc.fit(self.affinity_matrix)
     return sc.labels_
示例#7
0
def restore_cs1_signal(non_zero_features,
                       sdm_signal,
                       transformation,
                       error_handler=print) -> np.ndarray:
    try:
        len_non_zero_features = len(non_zero_features[0])
        if len_non_zero_features == 0:
            raise ValueError("No features in array")

        set_sdm_signal = set(sdm_signal)
        if set_sdm_signal == {0}:
            cs1_signal = [0] * transformation.shape[1]
        else:
            omp = OrthogonalMatchingPursuit(
                n_nonzero_coefs=len_non_zero_features)
            omp.fit(transformation, sdm_signal)
            cs1_signal = omp.coef_
            cs1_signal[cs1_signal != 0] = 1

        return cs1_signal
    except Exception as error:
        if callable(error_handler):
            error_handler(error)
        else:
            print(error)
示例#8
0
def OMP(problem, **kwargs):
    r"""High level description.

    Parameters
    ----------
    problem : type
        Description
    kwargs : dictionary
        kwargs['choose'] must be a positive integer
        kwargs['coef_tolerance'] must be a nonnegative float

    Returns
    -------

    """
    data_list = [datum['data']['values'] for datum in problem.data]
    data = numpy.array(data_list)
    OMP = OrthogonalMatchingPursuit(n_nonzero_coefs=kwargs['choose'])
    OMP.fit(data.T, problem.goal['data']['values'])
    OMP_coefficients = OMP.coef_
    optimum = [
        problem.data[index] for index, element in enumerate(OMP_coefficients)
        if abs(element) > kwargs['coef_tolerance']
    ]
    maximum = OMP.score(data.T, problem.goal['data']['values'])
    return (optimum, maximum)
示例#9
0
def csper(t,
          y,
          fmin=None,
          fmax=None,
          nfreqs=5000,
          nsines=4,
          polyorder=2,
          sig=5):
    trange = np.nanmax(t) - np.nanmin(t)
    dt = np.abs(np.nanmedian(t - np.roll(t, -1)))
    nt = np.size(t)

    # make defaults

    if fmin is None:
        fmin = 1. / trange
    if fmax is None:
        fmax = 2. / dt

    freqs = np.linspace(fmin, fmax, nfreqs)
    df = np.abs(np.nanmedian(freqs - np.roll(freqs, -1)))

    X = np.zeros((nt, nfreqs * 2 + polyorder))

    # set up matrix of sines and cosines
    for j in range(nfreqs):
        X[:, j] = np.sin(t * freqs[j])
        X[:, nfreqs + j] = np.cos(t * freqs[j])

    # now do polynomial bits
    for j in range(polyorder):
        X[:, -j] = t**(polyorder - j)

    n_components, n_features = nfreqs, nt
    n_nonzero_coefs = nsines + polyorder

    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(X, y - np.nanmedian(y))

    coef = omp.coef_
    idx_r, = coef[:-polyorder].nonzero()
    sines = freqs[idx_r[idx_r < nfreqs]]
    cosines = freqs[idx_r[idx_r > nfreqs] - nfreqs]
    print 'Sine components:', sines
    print 'Cosine components:', cosines

    amp_raw = np.sqrt(coef[:nfreqs]**2. + coef[nfreqs:-polyorder]**2)
    amp = gaussian_filter1d(amp_raw, sig)

    recon = np.dot(X, coef)

    output = {
        'Frequencies': freqs,
        'Raw_Amplitudes': coef[:-polyorder],
        'Polynomial': coef[-polyorder:],
        'Reconstruction': recon,
        'Amplitude': amp
    }

    return output
def omp_2D(dictionary, samples, n_nonzero_coefs, params=[]):
    """2D Orthogonal Matching Pursuit"""
    ompfun = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs,
                                       fit_intercept=False,
                                       normalize=False,
                                       precompute=True)

    samples_vec = np.zeros(
        (samples.shape[1] * samples.shape[2], samples.shape[0]))
    for i in range(samples.shape[0]):
        samples_vec[:, i] = samples[i, :, :].T.reshape(samples.shape[1] *
                                                       samples.shape[2])

    dictionary_vec = np.kron(dictionary[1], dictionary[0])

    codes_vec = ompfun.fit(dictionary_vec, samples_vec).coef_.T
    #codes_vec = np.zeros((dictionary[0].shape[1]*dictionary[1].shape[1], samples.shape[0]))
    #for i in range(samples.shape[0]):
    #    codes_vec[:, i] = ompfun.fit(dictionary_vec, samples[i, :, :].T.reshape((samples.shape[1]*samples.shape[2]))).coef_.T

    err = np.linalg.norm(samples_vec - dictionary_vec @ codes_vec, 'fro')**2

    codes = np.zeros(
        (samples.shape[0], dictionary[0].shape[1], dictionary[1].shape[1]))
    for i in range(samples.shape[0]):
        codes[i, :, :] = codes_vec[:, i].reshape(
            (dictionary[0].shape[1], dictionary[1].shape[1])).T

    return codes, err
示例#11
0
 def __init__(self, patch_size=(12, 12), max_samples=1000000, **omp_args):
     self.patch_size = patch_size
     self.max_samples = max_samples
     self.omp = OrthogonalMatchingPursuit(**omp_args)
     self.D = None
     self.data = None
     self.components = None
     self.zscore = False
     self.log_amplitude = False
示例#12
0
def omp_batch_recon(signal,
                    ind,
                    target_pts,
                    n_nonzero_coefs=20,
                    transform='dct',
                    retCoefs=False):
    """ Performs an Orthogonal Matching Pursuit technique, with batch approach

    This algorithm is based on Compressed sensing theory and works as a
    greedy algorithm to find the sparsest coefficients in a given transform that
    fit the input signal.
    Then it returns the inverse transform of these coefficients

     Parameters
     ----------
    signal : list
        the downsampled signal to reconstruct
    ind : list
        the list of indices corresponding to the position of
        the downsampled points
    target_pts : integer
        the number of points the reconstructed signal should have
    n_nonzero_coefs : integer
        the number of nonzeros that are supposed to be in
        the original signal's transform.
    transform : 'dct' or 'dst'
        the type of transform to use (discrete cosine or sine transform)
    retCeofs : boolean
        if True, will return the coefficients of the transform

     Returns
     -------
     x : list
        the reconstructed signal
    coef : list
        the coefficients of the reconstructed signal's transform

    """
    if transform == 'dst':
        phi = spfft.idst(np.identity(target_pts), axis=0)
    else:
        phi = spfft.idct(np.identity(target_pts), axis=0)

    phi = phi[ind]
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(phi, signal)
    coef = omp.coef_

    if transform == 'dst':
        x = spfft.idst(coef, axis=0) + np.mean(signal)
    else:
        x = spfft.idct(coef, axis=0) + np.mean(signal)
    x = utils.normalize(x)
    if retCoefs:
        return (x, coef)
    else:
        return x
示例#13
0
def orthogonal_matching_pursuit(A, y, sparsity_level, **kwargs):
    """ Orthogonal matching pursuit wrapper for scipy. """
    start_time = timer()
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=sparsity_level)
    omp.fit(A, y)
    elapsed_time = timer() - start_time
    coefs = omp.coef_
    support = coefs.nonzero()[0]
    return coefs, elapsed_time, support
示例#14
0
 def __init__(self, n_components=49, patch_size=(8,8), max_samples=1000000, **kwargs):
     self.omp = OrthogonalMatchingPursuit()
     self.n_components = n_components
     self.patch_size = patch_size
     self.max_samples = max_samples
     self.D = None
     self.data = None
     self.components = None
     self.standardize=False
示例#15
0
 def __init__(self,
              name='image_',
              image_feature_dict={},
              reconsitution_element_nums=8,
              error_limit=0.2):
     self.name = name
     self.image_feature_dict = image_feature_dict.copy()
     self.reconsitution_element_nums = reconsitution_element_nums
     self.error_limit = error_limit
     self.omp = OrthogonalMatchingPursuit(
         n_nonzero_coefs=reconsitution_element_nums)
示例#16
0
def test_omp_reaches_least_squares():
    # Use small simple data; it's a sanity check but OMP can stop early
    rng = check_random_state(0)
    n_samples, n_features = (10, 8)
    n_targets = 3
    X = rng.randn(n_samples, n_features)
    Y = rng.randn(n_samples, n_targets)
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
    lstsq = LinearRegression()
    omp.fit(X, Y)
    lstsq.fit(X, Y)
    assert_array_almost_equal(omp.coef_, lstsq.coef_)
示例#17
0
def test_omp_reaches_least_squares():
    # Use small simple area_data; it's a sanity check but OMP can stop early
    rng = check_random_state(0)
    n_samples, n_features = (10, 8)
    n_targets = 3
    X = rng.randn(n_samples, n_features)
    Y = rng.randn(n_samples, n_targets)
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
    lstsq = LinearRegression()
    omp.fit(X, Y)
    lstsq.fit(X, Y)
    assert_array_almost_equal(omp.coef_, lstsq.coef_)
示例#18
0
def GetNeighborDims(data, paras):
    ndata, ndim=data.shape
    kND=paras["kND"]
    objOMP=OMP(n_nonzero_coefs=kND)
    idxDict=npy.ones(ndim, dtype=npy.bool)
    w=npy.zeros((ndim-1, ndim), dtype=npy.float32)
    for kk in range(ndim):
        idxDict.fill(True)
        idxDict[kk]=False
        objOMP.fit(data[:,idxDict], data[:,kk])
        w[:,kk]=objOMP.coef_.astype(npy.float32)
    return w    
示例#19
0
def test_omp_cv():
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                        max_iter=10, cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
示例#20
0
文件: ksvd.py 项目: wsuzume/ksvd
    def _estimate_X(self,Y,A):
        if self.num_of_NZ is None:
            n_nonzero_coefs = np.ceil(0.1 * A.shape[1])
        else:
            n_nonzero_coefs = self.num_of_NZ

        omp = OrthogonalMatchingPursuit(n_nonzero_coefs = int(n_nonzero_coefs))
        for j in range(A.shape[1]):
            A[:,j] /= max(np.linalg.norm(A[:,j]),1e-20)
            
        omp.fit(A,Y)
        return omp.coef_.T
示例#21
0
def test_omp_cv():
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                        max_iter=10, cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
示例#22
0
def classify_OMP(train, test):
	from sklearn.linear_model import OrthogonalMatchingPursuit as OMP

	x, y = train
	ydim = np.unique(y).shape[0]
	y = [tovec(yi, ydim) for yi in y]

	clf = OMP()
	clf.fit(x, y)
	
	x, y = test
	proba = clf.predict(x)
	return proba
示例#23
0
def constrained_binary_solve(
    w, psi, fit_intercept=True, normalize=True, precompute="auto"
):
    if ndim(w) != 1:
        raise ValueError(
            f"w must be a 1D vector; received a vector of dimension {ndim(w)}"
        )

    model = OrthogonalMatchingPursuit(
        tol=0, fit_intercept=fit_intercept, normalize=normalize, precompute=precompute
    )
    model.fit(psi, w)
    return model.coef_
示例#24
0
    def fit_model_14(self,toWrite=False):
        model = OrthogonalMatchingPursuit()

        for data in self.cv_data:
            X_train, X_test, Y_train, Y_test = data
            model.fit(X_train,Y_train)
            pred = model.predict(X_test)
            print("Model 14 score %f" % (logloss(Y_test,pred),))

        if toWrite:
            f2 = open('model14/model.pkl','w')
            pickle.dump(model,f2)
            f2.close()
def _solver_OMP(A, b, K):
    """
    Find a K-sparse solution to Ax = b.

    @param K    Sparsity of the solution.
    """

    from sklearn.linear_model import OrthogonalMatchingPursuit as OMP
    omp = OMP(n_nonzero_coefs=K)
    omp.fit(A, b)
    x = omp.coef_

    return x
class _OrthogonalMatchingPursuitImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
示例#27
0
def solve_preconditioned_orthogonal_matching_pursuit(basis_matrix_func,
                                                     samples,values,
                                                     precond_func,
                                                     tol=1e-8):
    from sklearn.linear_model import OrthogonalMatchingPursuit
    basis_matrix = basis_matrix_func(samples)
    weights = precond_func(basis_matrix,samples)
    basis_matrix = basis_matrix*weights[:,np.newaxis]
    rhs = values*weights[:,np.newaxis]
    omp = OrthogonalMatchingPursuit(tol=tol,fit_intercept=False)
    omp.fit(basis_matrix, rhs)
    coef = omp.coef_
    print('nnz_terms',np.count_nonzero(coef))
    return coef[:,np.newaxis]
示例#28
0
def test_omp_cv():
    # FIXME: This test is unstable on Travis, see issue #3190 for more detail.
    check_skip_travis()
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                        max_iter=10, cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
示例#29
0
def test_scaling_with_gram():
    omp1 = OrthogonalMatchingPursuit(n_nonzero_coefs=1,
                                     fit_intercept=False, normalize=False)
    omp2 = OrthogonalMatchingPursuit(n_nonzero_coefs=1,
                                     fit_intercept=True, normalize=False)
    omp3 = OrthogonalMatchingPursuit(n_nonzero_coefs=1,
                                     fit_intercept=False, normalize=True)

    f, w = assert_warns, DeprecationWarning
    f(w, omp1.fit, X, y, Gram=G)
    f(w, omp1.fit, X, y, Gram=G, Xy=Xy)
    f(w, omp2.fit, X, y, Gram=G)
    f(w, omp2.fit, X, y, Gram=G, Xy=Xy)
    f(w, omp3.fit, X, y, Gram=G)
    f(w, omp3.fit, X, y, Gram=G, Xy=Xy)
示例#30
0
def compare_linear_classifiers():
    # No way
    # Lars
    # LassoLars
    # Lasso
    # ElasticNet
    # HuberRegressor
    # Hinge
    # SGDRegressor
    # Ridge
    # RANSACRegressor
    # TheilSenRegressor

    classifiers = (
        # ScikitClassifierWrapper(ElasticNetCV(cv=5, eps=0.01)),
        ScikitClassifierWrapper(SGDClassifier(max_iter=1000, tol=0.01)),
        ScikitClassifierWrapper(LogisticRegression()),
        ScikitClassifierWrapper(RidgeClassifier()),
        ScikitClassifierWrapper(LogisticRegressionCV()),
        ScikitClassifierWrapper(OrthogonalMatchingPursuit()),
        ScikitClassifierWrapper(PassiveAggressiveClassifier()),
        ScikitClassifierWrapper(Perceptron(max_iter=1000, tol=0.01)),
    )
    cc = ClassifierComparator(classifiers=classifiers)
    cc.load_data()

    cc.train_classifiers(save_trained=False)
    cc.compare()
def get_regressors_variable(nmodels='all'):
    """
		Returns one of or all variable selection regressors
	"""

    # 1. Elastic net
    lr1 = ElasticNet()

    # 2. Elastic net
    lr2 = Lars()

    # 3. Lasso
    lr3 = Lasso()

    # 4. LassoLars
    lr4 = LassoLars()

    # 5. OrthogonalMatchingPursuit
    lr5 = OrthogonalMatchingPursuit()

    if (nmodels == 'all'):
        models = [lr1, lr2, lr3, lr4, lr5]
    else:
        models = ['lr' + str(nmodels)]

    return models
示例#32
0
def test_transform_target_regressor_error():
    X, y = friedman
    # provide a transformer and functions at the same time
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=StandardScaler(),
                                      func=np.exp,
                                      inverse_func=np.log)
    with pytest.raises(ValueError,
                       match="'transformer' and functions"
                       " 'func'/'inverse_func' cannot both be set."):
        regr.fit(X, y)
    # fit with sample_weight with a regressor which does not support it
    sample_weight = np.ones((y.shape[0], ))
    regr = TransformedTargetRegressor(regressor=OrthogonalMatchingPursuit(),
                                      transformer=StandardScaler())
    with pytest.raises(TypeError,
                       match=r"fit\(\) got an unexpected "
                       "keyword argument 'sample_weight'"):
        regr.fit(X, y, sample_weight=sample_weight)
    # func is given but inverse_func is not
    regr = TransformedTargetRegressor(func=np.exp)
    with pytest.raises(ValueError,
                       match="When 'func' is provided, "
                       "'inverse_func' must also be provided"):
        regr.fit(X, y)
def _orthogonal_matching_pursuit(response_mat, diff_vec, opt):
    """ Calculated n_correctors via orthogonal matching pursuit"""
    if opt.n_correctors is None:
        raise ValueError(
            "n_correctors setting needed for orthogonal matching pursuit.")

    # return orthogonal_mp(response_mat, diff_vec, opt.n_correctors)
    res = OrthogonalMatchingPursuit(opt.n_correctors).fit(
        response_mat, diff_vec)
    coef = res.coef_
    LOG.debug("Orthogonal Matching Pursuit Results:")
    LOG.debug("  Chosen variables: {:s}".format(
        str(response_mat.columns.values[coef.nonzero()])))
    LOG.debug("  Score: {:f}".format(res.score(response_mat, diff_vec)))

    return coef
示例#34
0
def get_model_from_name(model_name):
    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(n_jobs=-2),
        'RandomForestClassifier': RandomForestClassifier(n_jobs=-2),
        'RidgeClassifier': RidgeClassifier(),
        'XGBClassifier': xgb.XGBClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'SGDClassifier': SGDClassifier(n_jobs=-1),
        'Perceptron': Perceptron(n_jobs=-1),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),

        # Regressors
        'LinearRegression': LinearRegression(n_jobs=-2),
        'RandomForestRegressor': RandomForestRegressor(n_jobs=-2),
        'Ridge': Ridge(),
        'XGBRegressor': xgb.XGBRegressor(),
        'ExtraTreesRegressor': ExtraTreesRegressor(n_jobs=-1),
        'AdaBoostRegressor': AdaBoostRegressor(n_estimators=5),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(presort=False),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),
        'SGDRegressor': SGDRegressor(shuffle=False),
        'PassiveAggressiveRegressor':
        PassiveAggressiveRegressor(shuffle=False),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans(n_clusters=8)
    }
    return model_map[model_name]
    def run(self):
        params = {'n_nonzero_coefs': self.n_coef_spin.value(),
                  'fit_intercept': self.fitInterceptCheckBox.isChecked(),
                  'normalize': self.normalizeCheckBox.isChecked(),
                  'precompute': True}

        return params, self.getChangedValues(params, OrthogonalMatchingPursuit())
示例#36
0
    def __init__(self, models_parameters, base_forest_estimator):
        if models_parameters.extraction_strategy == 'omp_nn':
            self._omp = NonNegativeOrthogonalMatchingPursuit(
                max_iter=models_parameters.extracted_forest_size,
                intermediate_solutions_sizes=models_parameters.
                intermediate_solutions_sizes,
                fill_with_final_solution=True)
        else:
            # fit_intercept shouldn't be set to False as the data isn't necessarily centered here
            # normalization is handled outsite OMP
            self._omp = OrthogonalMatchingPursuit(
                n_nonzero_coefs=models_parameters.extracted_forest_size,
                fit_intercept=True,
                normalize=False)

        super().__init__(models_parameters, base_forest_estimator)
 def __init__(self, patch_size=(12,12), max_samples=1000000, **omp_args):
     self.patch_size = patch_size
     self.max_samples = max_samples
     self.omp = OrthogonalMatchingPursuit(**omp_args)
     self.D = None
     self.data = None
     self.components = None
     self.zscore=False
     self.log_amplitude=False
示例#38
0
 def __init__(self, n_components=49, patch_size=(8,8), max_samples=1000000, **kwargs):
     self.omp = OrthogonalMatchingPursuit()
     self.n_components = n_components
     self.patch_size = patch_size
     self.max_samples = max_samples
     self.D = None
     self.data = None
     self.components = None
     self.standardize=False
示例#39
0
def SparseDeconvolution(x,y,p,rtype='omp'):
    
    from numpy import zeros, hstack, floor, array, shape, sign
    from scipy.linalg import toeplitz, norm
    from sklearn.linear_model import OrthogonalMatchingPursuit, Lasso
    
    xm = x[abs(x).argmax()]

    # x = (x.copy())/xm
    x = (x.copy())/xm
    x = x/norm(x)
    
    y = (y.copy())/xm
    
    Nx=len(x)
    Ny=len(y)
    
    X = toeplitz(hstack((x,zeros(Nx+Ny-2))),r=zeros(Ny+Nx-1))

    Y = hstack((zeros(Nx-1),y,zeros(Nx-1)))
    
    if (rtype=='omp')&(type(p)==int):
        
        model = OrthogonalMatchingPursuit(n_nonzero_coefs=p,normalize=False)
        
    elif (rtype=='omp')&(p<1.0):
                
        model = OrthogonalMatchingPursuit(tol=p,normalize=False)
        
        
    elif (rtype=='lasso'):
        
        model = Lasso(alpha=p)

    
    model.fit(X,Y)

    h = model.coef_
    b = model.intercept_
    
    return Y-b,X,h
示例#40
0
def CSSK(h,const=5.0,noise=0.0000001):
    """Compressed Sensing replacement of Fourier Transform on 1D array h
       * REQUIRES CVXPY PACKAGE *
         h       = sampled time signal
         const   = scalar multiple dimension of h, larger values give greater
                     resolution albeit with increased cost.
         noise   = scalar constant to account for numerical noise

         returns:
         g       = fourier transform h to frequency domain using CS technique
    """

    h = np.asarray(h, dtype=float)
    Nt = len(h)
    Nw = int(const*Nt)
    t = np.arange(Nt)
    w = np.arange(Nw)
    #F = np.sin(2 * np.pi * np.outer(t,w) / Nw)
    F = (1/np.float(Nw))*np.sin(2.0*np.pi*np.outer(t,w)/np.float(Nw))

    #omp_cv = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    #omp_cv = OrthogonalMatchingPursuitCV(verbose=True,normalize=True)
    omp_cv = OrthogonalMatchingPursuit(tol=noise)
    omp_cv.fit(F, h)
    coef = omp_cv.coef_
    #idx_r, = coef.nonzero()
    g = coef


    ### begin using cvxpy
    #g = cvx.Variable(Nw)
    ## min |g|_1 subject to |F.g - h|_2 < noise
    #objective = cvx.Minimize(cvx.norm(g,1))
    #constraints = [cvx.norm(F*g - h,2) <= noise]
    #prob = cvx.Problem(objective, constraints)
    #prob.solve(solver='SCS',verbose=True)
    #g = np.asarray(g.value)
    #g = g[:,0]
    ### end using cvxpy
    return g
示例#41
0
def test_estimator_shapes():
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(X, y[:, 0])
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_.shape, ())
    assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs)

    omp.fit(X, y)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_.shape, (n_targets,))
    assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)

    omp.fit(X, y[:, 0], Gram=G, Xy=Xy[:, 0])
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_.shape, ())
    assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs)

    omp.fit(X, y, Gram=G, Xy=Xy)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_.shape, (n_targets,))
    assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)
示例#42
0
def test_estimator():
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(X, y[:, 0])
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_.shape, ())
    assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs)

    omp.fit(X, y)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_.shape, (n_targets,))
    assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)

    omp.set_params(fit_intercept=False, normalize=False)

    assert_warns(DeprecationWarning, omp.fit, X, y[:, 0], Gram=G, Xy=Xy[:, 0])
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_, 0)
    assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs)

    assert_warns(DeprecationWarning, omp.fit, X, y, Gram=G, Xy=Xy)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_, 0)
    assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)
示例#43
0
class SparseApproxSpectrum(object):
    def __init__(self, n_components=49, patch_size=(8,8), max_samples=1000000, **kwargs):
        self.omp = OrthogonalMatchingPursuit()
        self.n_components = n_components
        self.patch_size = patch_size
        self.max_samples = max_samples
        self.D = None
        self.data = None
        self.components = None
        self.standardize=False

    def _extract_data_patches(self, X):
        self.X = X
        data = extract_patches_2d(X, self.patch_size)
        data = data.reshape(data.shape[0], -1)
        if len(data)>self.max_samples:
            data = np.random.permutation(data)[:self.max_samples]
        print data.shape
        if self.standardize:
            self.mn = np.mean(data, axis=0) 
            self.std = np.std(data, axis=0)
            data -= self.mn
            data /= self.std
        self.data = data

    def extract_codes(self, X, standardize=False):
        self.standardize=standardize
        self._extract_data_patches(X)
        self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, alpha=1, n_iter=500)
        print "Dictionary learning from data..."
        self.D = self.dico.fit(self.data)
        return self

    def plot_codes(self, cbar=False, **kwargs):
        #plt.figure(figsize=(4.2, 4))
        N = int(np.ceil(np.sqrt(self.n_components)))
        kwargs.setdefault('cmap', pl.cm.gray_r)
        kwargs.setdefault('origin','bottom')
        kwargs.setdefault('interpolation','nearest')
        for i, comp in enumerate(self.D.components_):
            plt.subplot(N, N, i + 1)
            comp  = comp * self.std + self.mn if self.standardize else comp
            plt.imshow(comp.reshape(self.patch_size), **kwargs)
            if cbar:
                plt.colorbar()
            plt.xticks(())
            plt.yticks(())
        plt.suptitle('Dictionary learned from spectrum patches\n', fontsize=16)
        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav',**kwargs):
        flist=glob.glob(dir_expr)
        self.X = np.vstack([feature_scale(LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T
        self.D = extract_codes(self.X, **kwargs)
        self.plot_codes(**kwargs)
        return self

    def _get_approximation_coefs(self,data, components):
        w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data])
        return w

    def reconstruct_spectrum(self, w=None, randomize=False):
        data = self.data
        components = self.D.components_
        if w is None:
            self.w = self._get_approximation_coefs(data, components)
            w = self.w
        if self.standardize:
            for comp in components: comp  = comp * self.std + self.mn
        if randomize:
            components = np.random.permutation(components)
        recon = np.dot(w, components).reshape(-1,self.patch_size[0],self.patch_size[1])
        self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape)
        return self

    def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, **kwargs):
        self.reconstruct_spectrum(w,randomize)
        w, components = self.w, self.D.components_
        self.X_hat_l = []
        for i in range(len(self.w.T)):
            r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,self.patch_size[0],self.patch_size[1])
            self.X_hat_l.append(reconstruct_from_patches_2d(r, self.X.shape))
        if plotting:
            plt.figure()            
            for k in range(self.n_components):
                plt.subplot(self.n_components**0.5,self.n_components**0.5,k+1)
                feature_plot(self.X_hat_l[k],nofig=1,**kwargs)
        return self
class SparseApproxSpectrum(object):
    """class for 2D patch analysis of audio files
    initialization:
    	patch_size - size of time-frequency 2D patches in spectrogram units (freq,time) [(12,12)]
    	max_samples - if num audio patches exceeds this threshold, randomly sample spectrum [1000000]
        **omp_args - keyword arguments to OrthogonalMatchingPursuit(...) [None]
    """
    def __init__(self, patch_size=(12,12), max_samples=1000000, **omp_args):
        self.patch_size = patch_size
        self.max_samples = max_samples
        self.omp = OrthogonalMatchingPursuit(**omp_args)
        self.D = None
        self.data = None
        self.components = None
        self.zscore=False
        self.log_amplitude=False

    def _extract_data_patches(self, X, zscore, log_amplitude):
    	"utility method for converting spectrogram data to 2D patches "
        self.zscore=zscore
        self.log_amplitude=log_amplitude
        self.X = X
        if self.log_amplitude:
            X = np.log(1+X)
        data = extract_patches_2d(X, self.patch_size)
        data = data.reshape(data.shape[0], -1)
        if len(data)>self.max_samples:
            data = np.random.permutation(data)[:self.max_samples]
        print data.shape
        if self.zscore:
            self.mn = np.mean(data, axis=0) 
            self.std = np.std(data, axis=0)
            data -= self.mn
            data /= self.std
        self.data = data

    def make_gabor_field(self, X, zscore=True, log_amplitude=True, thetas=range(4), 
    		sigmas=(1,3), frequencies=(0.05, 0.25)) :
        """Given a spectrogram, prepare 2D patches and Gabor filter bank kernels
        inputs:
           X - spectrogram data (frequency x time)
           zscore - whether to zscore the ensemble of 2D patches [True]
           log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True]
           thetas - list of 2D Gabor filter orientations in units of pi/4. [range(4)]
           sigmas - list of 2D Gabor filter standard deviations in oriented direction [(1,3)]
           frequencies - list of 2D Gabor filter frequencies [(0.05,0.25)]
        outputs:
           self.data - 2D patches of input spectrogram
           self.D.components_ - Gabor dictionary of thetas x sigmas x frequencies atoms
        """
        self._extract_data_patches(X, zscore, log_amplitude)
        self.n_components = len(thetas)*len(sigmas)*len(frequencies)
        self.thetas = thetas
        self.sigmas = sigmas
        self.frequencies = frequencies
        a,b = self.patch_size
        self.kernels = []
        for theta in thetas:
            theta = theta / 4. * np.pi
            for sigma in sigmas:
                for frequency in frequencies:
                    kernel = np.real(gabor_kernel(frequency, theta=theta,
                                                  sigma_x=sigma, sigma_y=sigma))
                    c,d = kernel.shape
                    if c<=a:
                        z = np.zeros(self.patch_size)
                        z[(a/2-c/2):(a/2-c/2+c),(b/2-d/2):(b/2-d/2+d)] = kernel
                    else:
                        z = kernel[(c/2-a/2):(c/2-a/2+a),(d/2-b/2):(d/2-b/2+b)]
                    self.kernels.append(z.flatten())
        class Bunch:
            def __init__(self, **kwds):
                self.__dict__.update(kwds)
        self.D = Bunch(components_ = np.vstack(self.kernels))

    def extract_codes(self, X, n_components=16, zscore=True, log_amplitude=True, **mbl_args):
    	"""Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data
        inputs:
            X - spectrogram data (frequency x time)
    	    n_components - how many components to extract [16]
            zscore - whether to zscore the ensemble of 2D patches [True]
            log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True]
            **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None]
        outputs:
            self.data - 2D patches of input spectrogram
            self.D.components_ - dictionary of learned 2D atoms for sparse coding
        """
        self._extract_data_patches(X, zscore, log_amplitude)
        self.n_components = n_components
        self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, **mbl_args)
        print "Dictionary learning from data..."
        self.D = self.dico.fit(self.data)

    def plot_codes(self, cbar=False, show_axis=False, **kwargs):
        "plot the learned or generated 2D sparse code dictionary"
        N = int(np.ceil(np.sqrt(self.n_components)))
        kwargs.setdefault('cmap', plt.cm.gray_r)
        kwargs.setdefault('origin','bottom')
        kwargs.setdefault('interpolation','nearest')
        for i, comp in enumerate(self.D.components_):
            plt.subplot(N, N, i+1)
            plt.imshow(comp.reshape(self.patch_size), **kwargs)
            if cbar:
                plt.colorbar()
            if not show_axis:
                plt.axis('off')
            plt.xticks(())
            plt.yticks(())
            plt.title('%d'%(i))
        plt.suptitle('Dictionary of Spectrum Patches\n', fontsize=14)
        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav', **mbl_args):
    	"""apply dictionary learning to entire directory of audio files (requires LOTS of RAM)
            inputs:
                **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None]
        """
        flist=glob.glob(dir_expr)
        self.X = np.vstack([br.feature_scale(br.LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T
        self.D = extract_codes(self.X, **mbl_args)

    def _get_approximation_coefs(self, data, components):
    	"""utility function to fit dictionary components to data
    	inputs:
    		data - spectrogram data (frqeuency x time) [None]
    	  components - the dictionary components to fit to the data [None]
        """
        w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data])
        return w

    def reconstruct_spectrum(self, w=None, randomize=False):
    	"""reconstruct by fitting current 2D dictionary to self.data 
        inputs:
            w - per-component reconstruction weights [None=calculate weights]
            randomize - randomly permute components after getting weights [False]
        returns:
            self.X_hat - spectral reconstruction of self.data
        """
        data = self.data
        components = self.D.components_
        if w is None:
            self.w = self._get_approximation_coefs(data, components)
            w = self.w
        if randomize:
            components = np.random.permutation(components)
        recon = np.dot(w, components)
        if self.zscore:
            recon = recon * self.std
            recon = recon + self.mn
        recon = recon.reshape(-1, *self.patch_size)
        self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape)
        if self.log_amplitude:
            self.X_hat = np.exp(self.X_hat) - 1.0 # invert log transform

    def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, rectify=True, **kwargs):
    	"""fit each dictionary component to self.data
        inputs:
            w - per-component reconstruction weights [None=calculate weights]
            randomize - randomly permute components after getting weights [False]
            plotting - whether to subplot individual spectrum reconstructions [True]
            rectify- remove negative ("dark energy") from individual reconstructions [True]
            **kwargs - keyword arguments for plotting
        returns:
            self.X_hat_l - list of indvidual spectrum reconstructions per dictionary atom
        """
        omp_args = {}
        self.reconstruct_spectrum(w, randomize, **omp_args)
        w, components = self.w, self.D.components_
        self.X_hat_l = []
        for i in range(len(self.w.T)):
	    	r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,*self.patch_size)
        	X_hat = reconstruct_from_patches_2d(r, self.X.shape)
                if self.log_amplitude:
                    X_hat = np.exp(X_hat) - 1.0
                if rectify: # half wave rectification
                    X_hat[X_hat<0] = 0
                self.X_hat_l.append(X_hat)
        if plotting:
            self.plot_individual_spectra(**kwargs)

    def plot_individual_spectra(self, **kwargs):
        "plot individual spectrum reconstructions for self.X_hat_l"
        if self.X_hat_l is None: return
        plt.figure()
        rn = np.ceil(self.n_components**0.5)
        for k in range(self.n_components):
            plt.subplot(rn,rn,k+1)
            br.feature_plot(self.X_hat_l[k], nofig=1, **kwargs)
            plt.title('%d'%(k))
        plt.suptitle('Component Reconstructions\n', fontsize=14)
示例#45
0
# distort the clean signal
##########################
y_noisy = y + 0.05 * np.random.randn(len(y))

# plot the sparse signal
########################
pl.figure(figsize=(7, 7))
pl.subplot(4, 1, 1)
pl.xlim(0, 512)
pl.title("Sparse signal")
pl.stem(idx, w[idx])

# plot the noise-free reconstruction
####################################

omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
omp.fit(X, y)
coef = omp.coef_
idx_r, = coef.nonzero()
pl.subplot(4, 1, 2)
pl.xlim(0, 512)
pl.title("Recovered signal from noise-free measurements")
pl.stem(idx_r, coef[idx_r])

# plot the noisy reconstruction
###############################
omp.fit(X, y_noisy)
coef = omp.coef_
idx_r, = coef.nonzero()
pl.subplot(4, 1, 3)
pl.xlim(0, 512)
示例#46
0
def orthogonal_matching_pursuit(y, D):
    omp = OrthogonalMatchingPursuit()
    omp.fit(D, y)
    return omp
示例#47
0
def test_scaling_with_gram():
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        # Use only 1 nonzero coef to be faster and to avoid warnings
        omp1 = OrthogonalMatchingPursuit(n_nonzero_coefs=1, fit_intercept=False, normalize=False)
        omp2 = OrthogonalMatchingPursuit(n_nonzero_coefs=1, fit_intercept=True, normalize=False)
        omp3 = OrthogonalMatchingPursuit(n_nonzero_coefs=1, fit_intercept=False, normalize=True)
        omp1.fit(X, y, Gram=G)
        omp1.fit(X, y, Gram=G, Xy=Xy)
        assert_true(len(w) == 0)
        omp2.fit(X, y, Gram=G)
        assert_true(len(w) == 1)
        omp2.fit(X, y, Gram=G, Xy=Xy)
        assert_true(len(w) == 2)
        omp3.fit(X, y, Gram=G)
        assert_true(len(w) == 3)
        omp3.fit(X, y, Gram=G, Xy=Xy)
        assert_true(len(w) == 4)
示例#48
0
def test_estimator():
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(X, y[:, 0])
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_.shape, ())
    assert_true(np.count_nonzero(omp.coef_) <= n_nonzero_coefs)

    omp.fit(X, y)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_.shape, (n_targets,))
    assert_true(np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)

    omp.set_params(fit_intercept=False, normalize=False)

    omp.fit(X, y[:, 0])
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_, 0)
    assert_true(np.count_nonzero(omp.coef_) <= n_nonzero_coefs)

    omp.fit(X, y)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_, 0)
    assert_true(np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)
示例#49
0
def test_estimator():
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(X, y[:, 0])
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_.shape, ())
    assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs

    omp.fit(X, y)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_.shape, (n_targets,))
    assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs

    coef_normalized = omp.coef_[0].copy()
    omp.set_params(fit_intercept=True, normalize=False)
    omp.fit(X, y[:, 0])
    assert_array_almost_equal(coef_normalized, omp.coef_)

    omp.set_params(fit_intercept=False, normalize=False)
    omp.fit(X, y[:, 0])
    assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
    assert_equal(omp.coef_.shape, (n_features,))
    assert_equal(omp.intercept_, 0)

    omp.fit(X, y)
    assert_equal(omp.coef_.shape, (n_targets, n_features))
    assert_equal(omp.intercept_, 0)
    assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
示例#50
0
for i in range(n_features):
    print "Count %d of %d" % ((i + 1), n_features)
    clf = SVC(kernel='linear', C=1.)
    feature_selection = SelectKBest(f_classif, k=50)
    anova_svc = Pipeline([('anova', feature_selection), ('svc', clf)])
    anova_svc.fit(X_train, y_train[i, :])
    pipelines.append(anova_svc)

"""
"""
    f_classif 100 + Ridge
"""

from sklearn.linear_model import OrthogonalMatchingPursuit as OMP

clf = OMP(n_nonzero_coefs=20)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

"""
clf.fit(X_train, y_train_tall.T)
y_pred_tall = clf.predict(X_test)

clf.fit(X_train, y_train_large.T)
y_pred_large = clf.predict(X_test)

clf.fit(X_train, y_train_big.T)
y_pred_big = clf.predict(X_test)
"""
    def sparse_encode(self, X, dictionary, n_nonzero_coefs=None, verbose=0):
        omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
        omp.fit(dictionary, X.T)
        new_code = omp.coef_.T

        return new_code
        def sparse_encode(self, n_nonzero_coefs):
            omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
            omp.fit(self.dictionary, self.X_residual.T)
            new_code = omp.coef_.T

            return new_code
示例#53
0
def sparse_code(Y, D, X = None):
    if X is None:
        y_cols, d_cols = Y.shape[1], D.shape[1]
        X = np.asmatrix(np.empty((d_cols, y_cols))
    
    x_rows, x_cols = X.shape
    
    for k in range(x_cols):
        omp = OMP()
        omp.fit(D, y[:, k])
        X[:,k] = np.asmatrix(omp.coef_).T
        
    return X


"""
Forms a matrix for a given vector x to enforce that the new update x will be
sparse. Here N is the columns of Y. Returns the matrix omega.
"""
def form_omega(x, N):
    w = []
    for i, x_i in enumerate(np.nditer(x)):
        if abs(x_i) > 0:
            w.append((i, x_i))
    
    W = np.asmatrix(np.zeroes((N, len(w))
    for w_i, i in w:
        W[w_i, i] = 1
        
    return W
    

"""
Update the dictionary D and the matrix X (phase 2)
"""
def update_dictionary(Y, D, X):
    n, K = D.shape
    # Dhat = np.asmatrix(np.zeroes((n, K)))
    
    # Form E_k
    for k in range(K):
        j = 0
        
        while j < K:
            if j != k:
                E_k = Y - D[:,j]*X[j,:]
                j += 1
            else:
                j += 1
        
        # Form E_kr to ensure that the update will be sparse. Call form_omega
        omega_k = form_omega(X[k,:])
        E_kr = E_k * omega_k
        
        # Form SVD of E_kr and update matrices
        U, sig, V = np.linalg.svd(E_kr, full_matrices = True)
        
        x_kr = sig[0, 0]*V[0,:]
        # Dhat[k,:] = U[0,:]
        D[k,:] = U[0,:]
    
    # Dhat = D
    
def main():
	pass

if __name__ == '__main__':
	main()