Exemplo n.º 1
0
def getModelFromDataframe(df,
                          max_depth=MAX_DEPTH,
                          min_samples_leaf=MIN_SAMPLES_LEAF):
    X_scaled, y = dataToNumpy(df)
    return getModelFromNumpy(X_scaled,
                             y,
                             maxDepth=max_depth,
                             minSamplesLeaf=min_samples_leaf)
Exemplo n.º 2
0
def trainAndTestFromDataframes(
        trainDf,
        testDf,
        max_depth=MAX_DEPTH,
        min_samples_leaf=MIN_SAMPLES_LEAF):  # method to be used in backtester
    model = getModelFromDataframe(trainDf,
                                  max_depth=max_depth,
                                  min_samples_leaf=min_samples_leaf)
    testX, testY = dataToNumpy(testDf, 'testing df')
    predictions = model.predict(testX)
    return predictions
Exemplo n.º 3
0
def trainAndTestFromDataframes(trainDf, testDf):
    model = getModelFromDataframe(trainDf)
    testX, testY = dataToNumpy(testDf)
    predictions = model.predict(testX)
    return predictions
Exemplo n.º 4
0
def getModelFromDataframe(df):  # method to be used in backtester
    X_scaled, y = dataToNumpy(df)
    return getModelFromNumpy(X_scaled, y)
Exemplo n.º 5
0
def getModelFromCSV(csv='data/postsWithDate.csv'):
    df = pd.read_csv(csv)
    getModelFromDataframe(df)


def trainAndTestFromDataframes(trainDf, testDf):
    model = getModelFromDataframe(trainDf)
    testX, testY = dataToNumpy(testDf)
    predictions = model.predict(testX)
    return predictions


if __name__ == '__main__':

    X_scaled, y = dataToNumpy('data/postsWithDate.csv')
    clf = getModelFromNumpy(X_scaled, y)

    length = X_scaled.shape[0]

    # MANUAL TUNING DONE HERE
    predictions = clf.predict(X_scaled)
    predictions_proba = clf.predict_proba(X_scaled)

    print("confusion matrix:")
    print(confusion_matrix(y, predictions))

    print(f'prediction: {clf.predict_proba([X[140]])}')

    zeroCounter = 0
    oneCounter = 0
Exemplo n.º 6
0
def trainAndTestFromDataframes(
        trainDf,
        testDf,
        max_depth=MAX_DEPTH,
        min_samples_leaf=MIN_SAMPLES_LEAF):  # method to be used in backtester
    model = getModelFromDataframe(trainDf,
                                  max_depth=max_depth,
                                  min_samples_leaf=min_samples_leaf)
    testX, testY = dataToNumpy(testDf, 'testing df')
    predictions = model.predict(testX)
    return predictions


if __name__ == '__main__':

    X_scaled, y = dataToNumpy('data/bigOne.csv')
    clf = getModelFromNumpy(X_scaled, y)

    length = X_scaled.shape[0]

    # MANUAL TUNING DONE HERE
    predictions = clf.predict(X_scaled)
    predictions_proba = clf.predict_proba(X_scaled)

    print("confusion matrix:")
    print(confusion_matrix(y, predictions))

    print(f'prediction: {clf.predict_proba([X_scaled[140]])}')

    zeroCounter = 0
    oneCounter = 0