Exemplo n.º 1
0
Arquivo: ml.py Projeto: raddy/copper
        return accuracy_score(y, y_pred)

    def predict(self, X):
        _X = self.pca_model.transform(X)
        return self.base_clf.predict(_X)

    def predict_proba(self, X):
        _X = self.pca_model.transform(X)
        return self.base_clf.predict_proba(_X)

if __name__ == '__main__':
    from sklearn import svm
    import pprint
    import random
    
    copper.project.path = '../../../data-mining/data-science-london/'
    train = copper.load('train')
    test = copper.load('test')
    clf = svm.SVC(kernel='rbf', gamma=0.02, C=10, probability=True)
    pca_clf = PCA_wrapper(clf, n_components=13)
    ml = copper.MachineLearning()
    ml.train = train
    ml.add_clf(clf, 'svm')
    ml.add_clf(pca_clf, 'pca')
    ml.fit()
    bag = MaxProbaBag()
    bag.add_clf(ml.clfs)
    # print(ml.predict_proba(test).head(3))
    print(bag.predict_proba(test))
    
Exemplo n.º 2
0
import copper
# copper.r.install_packages()

copper.project.path = '..'
ds = copper.load('cleaned')

print ds

copper.r.impute(ds)
Exemplo n.º 3
0
import copper
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm

copper.project.path = '..'
loans = copper.load('loans')
loans.fix_names()
loans.fillna(method='mean')

# print(loans.frame)
# print (loans.metadata)
# loans.role['InterestRate'] = loans.TARGET
# print (loans.frame.skew())
# print (loans.corr())

# loans.histogram('Employment.Length')
# plt.draw()
# plt.figure()

# loans.histogram('MonthlyIncome')
# plt.show()


# mod = sm.ols(formula='InterestRate ~ FICORange + LoanLength', data=loans.frame)
# mod = sm.ols(formula='InterestRate ~ FICORange + LoanLength + C(LoanPurpose)', data=loans.frame)
# mod = sm.ols(formula='InterestRate ~ C(LoanPurpose)', data=loans.frame)
# res = mod.fit()
# print (res.summary())
# print (res.pvalues)
Exemplo n.º 4
0
import copper
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

copper.project.path = '../'
train = copper.load('train.dataset')
# train = copper.load('train_mean.dataset')
# train = copper.load('train_mean_log.dataset')
# train = copper.load('train_imp.dataset')
test = copper.load('test.dataset')

# print len(test), len(train)
# print train.corr('depend')

# train.histogram('depend')
train.scatter('x39', 'x40', s=100, alpha=0.2)
# train.frame[train.frame.columns[36:40]].boxplot()
plt.show()

# from pandas.tools.plotting import scatter_matrix
# plot1 = scatter_matrix(train.frame[train.frame.columns[-5:]], alpha=0.2, figsize=(8, 8))
# plt.show()
# plt.savefig('fig6.pdf')

# from pandas.tools.plotting import radviz
# radviz(train.frame[['depend', 'x1', 'x2', 'x3', 'x4']], 'depend')
# plt.show()
Exemplo n.º 5
0
import copper
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

copper.project.path = '../'
# train = copper.load('train.dataset')
# train = copper.load('train_mean.dataset')
# train = copper.load('train_mean_log.dataset')
train = copper.load('train_imp.dataset')
# test = copper.load('test.dataset')
# test = copper.load('test_mean.dataset')
test = copper.load('test_imp.dataset')

# print test.inputs

ml = copper.MachineLearning()
ml.set_train(train)
ml.set_test(test)
ml.costs = [[0,1],[5,0]]

from PyWiseRF import WiseRF
rf = WiseRF(n_estimators=50, n_jobs=2)
ml.add_clf(rf, 'RF')

from sklearn import tree
tree_clf = tree.DecisionTreeClassifier(max_depth=5)
ml.add_clf(tree_clf, 'Tree')

ml.fit()
print ml.accuracy()