示例#1
0
文件: 01.py 项目: brenden17/infinity
def resolve():
    df = loadzipdata('01', 'web_traffic.tsv', get_data_by_pd)
    x = df['a']
    y = df['b']
    polyf = np.poly1d(np.polyfit(x, y, 10))
    df['c'] = polyf(x)
    df['b'].plot(style=['ro-'])
    df['c'].plot(style=['bs-'])
    print 'error rate {:,}'.format(np.sum((df['b'] - df['c'])**2))

    sampling_factor = 15
    sampling_df = df.ix[::sampling_factor]
    x = sampling_df['a']
    y = sampling_df['b']
    polyf = np.poly1d(np.polyfit(x, y, 10))
    sampling_df['c'] = polyf(x)
    sampling_df['b'].plot(style=['go-'])
    sampling_df['c'].plot(style=['ys-'])
    print 'error rate {:,}'.format(np.sum((sampling_df['b'] - sampling_df['c'])**2))

    pl.show()
示例#2
0
文件: 02.py 项目: brenden17/infinity
def resolve():
    print('===== load data =====')
    df = loadzipdata('02', 'seeds.tsv', get_data_by_pd)
    for i, x in enumerate(np.unique(df.X7)):
        df['X7'][df.X7==x] = i
    data = df[df.columns[0:7]].values
    target = df.X7.values

    print('===== preprocessing : selectk with SVM =====')
    feats = SelectKBest()
    clf = SVC()
    k = [1, 2, 6, 7]
    c = [0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2]
    degree = [1, 3, 5]
    gamma= [0.0, 0.2, 2.4]
    pipe = Pipeline([('feats', feats), ('svm', clf)])
    estimator = GridSearchCV(pipe, dict(feats__score_func=[f_regression], 
                                        feats__k=k,
                                        svm__degree=degree,
                                        svm__gamma=gamma,
                                        #svm__kernel=['rbf', 'linear'],
                                        svm__C=c))

    estimator.fit(data, target)
    print(estimator.score(data, target))

    print(estimator.best_estimator_)
    print(estimator.best_score_)
    print(estimator.best_params_)

    print('===== preprocessing : pca with SVM =====')
    pca = PCA()
    pca.fit(data)
    n_components = [2, 3, 4]

    pipe = Pipeline([('pca', pca), ('svm', clf)])
    estimator = GridSearchCV(pipe, dict(pca__n_components=n_components,
                                        svm__degree=degree,
                                        svm__gamma=gamma,
                                        #svm__kernel=['rbf', 'linear'],
                                        svm__C=c))

    estimator.fit(data, target)
    print(estimator.score(data, target))

    print(estimator.best_estimator_)
    print(estimator.best_score_)
    print(estimator.best_params_)


    print('===== preprocessing : pca with GaussianNB =====')
    from sklearn.naive_bayes import GaussianNB
    clf = GaussianNB()
    feats = SelectKBest()
    k = [1, 2, 6, 7]
    pipe = Pipeline([('feats', feats), ('gnb', clf)])
    estimator = GridSearchCV(pipe, dict(feats__score_func=[f_regression], 
                                        feats__k=k,
                                        ))
    estimator.fit(data, target)
    print(estimator.score(data, target))

    print(estimator.best_estimator_)
    print(estimator.best_score_)
    print(estimator.best_params_)