Пример #1
0
def test_pca_mortgage(nrows=1000,ncols=100,n_components=10,
        svd_solver='full',whiten=False,random_state=42,
        threshold=1e-3,data_source = 'random',use_assert=True,
        quarters=8,rows_per_quarter=100000,test_model='cuml'):
    print()
    if data_source=='random':
        X = np.random.rand(nrows,ncols)
    elif data_source=='mortgage':
        X = load_mortgage(quarters=quarters,rows_per_quarter=rows_per_quarter)
        X = X[np.random.randint(0,X.shape[0]-1,nrows),:ncols]
    else:
        raise NotImplementedError
    X = pd.DataFrame({'fea%d'%i:X[:,i] for i in range(X.shape[1])})
    print('%s data'%data_source,X.shape)
    test_pca_helper(X,n_components,svd_solver,whiten,random_state,threshold,use_assert,test_model)
Пример #2
0
def test_dbscan(nrows=1000,
                ncols=100,
                eps=3,
                min_samples=2,
                threshold=1e-3,
                data_source='random',
                use_assert=True,
                quarters=8,
                rows_per_quarter=100000,
                test_model='cuml'):
    print()
    #X = np.random.rand(nrows,ncols)
    #X = np.array([[1, 2], [2, 2], [2, 3],[8, 7], [8, 8], [25, 80]],dtype='float64')
    if data_source == 'random':
        X = np.random.rand(nrows, ncols)
    elif data_source == 'mortgage':
        X = load_mortgage(quarters=quarters, rows_per_quarter=rows_per_quarter)
        X = X[np.random.randint(0, X.shape[0] - 1, nrows), :ncols]
    else:
        raise NotImplementedError
    X = pd.DataFrame({'fea%d' % i: X[:, i] for i in range(X.shape[1])})
    print('%s data' % data_source, X.shape)
    test_dbscan_helper(X, eps, min_samples, threshold, use_assert, test_model)