示例#1
0
class knn_classifier(Classifier):
    def __init__(self,ticker,inputSize=5, binary=True, n_neighbors=15, risk=0.5, adaboost=False):
        self.type = 'KNN'
        self.ticker=ticker
        self.days=inputSize
        self.inputSize = inputSize
        self.binary=binary
        self.risk_thresh = risk
        self.adaboost = adaboost
        if binary:
            self.clf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
        else:
            self.clf = neighbors.KNeighborsRegressor(n_neighbors, weights='distance')
            if adaboost: self.clf = AdaBoostRegressor(base_estimator=self.clf, n_estimators=100)
    

    def predict(self, inputArray):
        inputArray = np.array(inputArray)
        inputArray.reshape([1,-1])


        if self.binary:
            pred = self.clf.predict_proba(inputArray)
            pred = (np.array(pred)[:,1] > self.risk_thresh)*1
        else:
            pred = self.clf.predict(inputArray)
        return pred



    def fit(self, X, Y):
        self.clf.fit(X,Y)
class gaussProcess_classifier(Classifier):
    def __init__(self,
                 ticker,
                 inputSize=5,
                 binary=True,
                 risk=0.5,
                 numTrainDays=300,
                 adaboost=False):
        self.type = 'Gaussian Process'
        self.ticker = ticker
        self.days = inputSize
        self.inputSize = inputSize
        self.binary = binary
        self.risk_thresh = risk
        self.adaboost = adaboost
        self.numTrainDays = numTrainDays
        if binary:
            self.clf = GaussianProcessClassifier()
        else:
            self.clf = GaussianProcessRegressor()
            if adaboost:
                self.clf = AdaBoostRegressor(base_estimator=self.clf,
                                             n_estimators=100)

    def trainClf(self, endDay=date.today(), numTrainDays=100):
        X, Y = self.processData(endDay, self.numTrainDays)
        self.fit(X, Y)

    def predict(self, inputArray):
        inputArray = np.array(inputArray)
        inputArray.reshape([1, -1])

        if self.binary:
            pred = self.clf.predict_proba(inputArray)
            pred = (np.array(pred)[:, 1] > self.risk_thresh) * 1
        else:
            pred = self.clf.predict(inputArray)
        return pred

    def fit(self, X, Y):
        self.clf.fit(X, Y)
示例#3
0
class dt_class(Classifier):
    def __init__(self,
                 ticker,
                 inputSize=5,
                 binary=True,
                 risk=0.5,
                 adaboost=False):
        self.type = 'Decision Tree'
        self.ticker = ticker
        self.days = inputSize
        self.inputSize = inputSize
        self.binary = binary
        self.adaboost = adaboost
        self.risk_thresh = risk

        if binary:
            self.clf = tree.DecisionTreeClassifier(max_depth=inputSize)
            if adaboost:
                self.clf = AdaBoostClassifier(base_estimator=self.clf,
                                              n_estimators=100)
        else:
            self.clf = tree.DecisionTreeRegressor(max_depth=inputSize)
            if adaboost:
                self.clf = AdaBoostRegressor(base_estimator=self.clf,
                                             n_estimators=100)

    def predict(self, inputArray):
        inputArray = np.array(inputArray)
        inputArray.reshape([1, -1])

        if self.binary:
            pred = self.clf.predict_proba(inputArray)
            pred = (np.array(pred)[:, 1] > self.risk_thresh) * 1
        else:
            pred = self.clf.predict(inputArray)
        return pred

    def fit(self, X, Y):
        self.clf.fit(X, Y)
示例#4
0
class svm_class(Classifier):
    def __init__(self,ticker,inputSize=5, binary=True, risk=0.5, adaboost=False):
        self.type = 'SVM'
        self.ticker=ticker
        self.days=inputSize
        self.inputSize = inputSize
        self.binary=binary
        kern = 'sigmoid'
        self.risk_thresh = 1 - risk
        self.adaboost = adaboost

        if binary:
            self.clf = svm.SVC(kernel=kern)
            self.clf.probability=True
            if adaboost: self.clf = AdaBoostClassifier(base_estimator=self.clf, n_estimators=100)

        else:
            self.clf = svm.SVR(kernel=kern)
            if adaboost: self.clf = AdaBoostRegressor(base_estimator=self.clf, n_estimators=100)
    

    def predict(self, inputArray):
        inputArray = np.array(inputArray)
        inputArray.reshape([1,-1])

        if self.binary:
            pred = self.clf.predict_proba(inputArray)
            pred = (np.array(pred)[:,1] > self.risk_thresh)*1
        else:
            pred = self.clf.predict(inputArray)
        return pred



    def fit(self, X, Y):
        self.clf.fit(X,Y)
示例#5
0
        clf.fit(subTrainFeature, subTrainLabel)
        predictedTrainProb = clf.predict(trainFeature)
        predictedTestProb = clf.predict(testFeature)

        for item in predictedTrainProb:
            newTrainFeature_temp.append(item)
        for item in predictedTestProb:
            newTestFeature_temp.append(item)
        newTrainFeature.append(newTrainFeature_temp)
        newTestFeature.append(newTestFeature_temp)

    newTrainFeature = np.array(newTrainFeature).T
    newTestFeature = np.array(newTestFeature).T
    clf = linear_model.LogisticRegression(penalty='l2', dual=False, class_weight='auto')
    clf.fit(newTrainFeature, trainLabel)
    predictedLabel = clf.predict_proba(newTestFeature)
    return(predictedLabel[:, 0])

if(__name__ == "__main__"):
    trainFeature, trainLabel, testFeature, testPlatform = readFeature(5, 0.5, 10, 0.6, 15, 0.6, 5, 0.6, 1)
    '''
    selectFeature = SelectKBest(chi2, k = 55)
    selectFeature.fit(trainFeature, trainLabel)
    trainFeature_new = selectFeature.transform(trainFeature)
    testFeature_new = selectFeature.transform(testFeature)
    '''
    trainFeature_new = trainFeature[:, :]
    testFeature_new = testFeature[:, :]
    '''
    trainFeature_new = trainFeature[:, :26]
    testFeature_new = testFeature[:, :26]
示例#6
0
preds.to_csv('/Users/IkkiTanaka/Documents/KDDCup/pred/xgb/sk_GBM2.csv',
             header=None,
             index=False)

new_label = a.sort(0).iloc[(a.sort(0)[0] > 0.01).values][1].values
clf = GradientBoostingClassifier(n_estimators=400,
                                 learning_rate=0.05,
                                 subsample=.96,
                                 max_depth=4,
                                 verbose=1,
                                 max_features=.96,
                                 random_state=None)
new_dtrain_sp = dtrain_sp[new_label]
new_dval = dval[new_label]
clf.fit(dtrain_sp, label_dtrain[0].values)
pred = clf.predict_proba(dval)
print("ROC score", metrics.roc_auc_score(label_dval[0].values, pred[:, 1]))

#GaussianNB
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(dtrain_sp, label_dtrain[0].values)
pred = clf.predict_proba(dval)
print("ROC score", metrics.roc_auc_score(label_dval[0].values, pred[:, 1]))

scaler = StandardScaler()
dtrain_sp = scaler.fit_transform(dtrain_sp)
dval = scaler.transform(dval)

from sklearn import svm
clf = svm.SVC(C=1,
示例#7
0
pred = calibrated_clf.predict_proba(dtest)
sample = pd.read_csv('/Users/IkkiTanaka/Documents/KDDCup/sampleSubmission.csv',header=None)
preds = pd.concat([sample[0],pd.DataFrame(pred[:,1])],axis=1)
preds.to_csv('/Users/IkkiTanaka/Documents/KDDCup/pred/xgb/sk_GBM2.csv' ,header=None,index=False)





new_label = a.sort(0).iloc[(a.sort(0)[0]>0.01).values][1].values
clf = GradientBoostingClassifier(n_estimators=400,learning_rate=0.05,subsample=.96,max_depth=4,verbose=1,max_features=.96, random_state=None)
new_dtrain_sp = dtrain_sp[new_label]
new_dval = dval[new_label]
clf.fit(dtrain_sp, label_dtrain[0].values)
pred = clf.predict_proba(dval)
print("ROC score", metrics.roc_auc_score(label_dval[0].values, pred[:,1]))



#GaussianNB
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(dtrain_sp, label_dtrain[0].values)
pred = clf.predict_proba(dval)
print("ROC score", metrics.roc_auc_score(label_dval[0].values, pred[:,1]))


scaler = StandardScaler()
dtrain_sp = scaler.fit_transform(dtrain_sp)
dval = scaler.transform(dval)