def getModelFromDataframe(df, max_depth=MAX_DEPTH, min_samples_leaf=MIN_SAMPLES_LEAF): X_scaled, y = dataToNumpy(df) return getModelFromNumpy(X_scaled, y, maxDepth=max_depth, minSamplesLeaf=min_samples_leaf)
def trainAndTestFromDataframes( trainDf, testDf, max_depth=MAX_DEPTH, min_samples_leaf=MIN_SAMPLES_LEAF): # method to be used in backtester model = getModelFromDataframe(trainDf, max_depth=max_depth, min_samples_leaf=min_samples_leaf) testX, testY = dataToNumpy(testDf, 'testing df') predictions = model.predict(testX) return predictions
def trainAndTestFromDataframes(trainDf, testDf): model = getModelFromDataframe(trainDf) testX, testY = dataToNumpy(testDf) predictions = model.predict(testX) return predictions
def getModelFromDataframe(df): # method to be used in backtester X_scaled, y = dataToNumpy(df) return getModelFromNumpy(X_scaled, y)
def getModelFromCSV(csv='data/postsWithDate.csv'): df = pd.read_csv(csv) getModelFromDataframe(df) def trainAndTestFromDataframes(trainDf, testDf): model = getModelFromDataframe(trainDf) testX, testY = dataToNumpy(testDf) predictions = model.predict(testX) return predictions if __name__ == '__main__': X_scaled, y = dataToNumpy('data/postsWithDate.csv') clf = getModelFromNumpy(X_scaled, y) length = X_scaled.shape[0] # MANUAL TUNING DONE HERE predictions = clf.predict(X_scaled) predictions_proba = clf.predict_proba(X_scaled) print("confusion matrix:") print(confusion_matrix(y, predictions)) print(f'prediction: {clf.predict_proba([X[140]])}') zeroCounter = 0 oneCounter = 0
def trainAndTestFromDataframes( trainDf, testDf, max_depth=MAX_DEPTH, min_samples_leaf=MIN_SAMPLES_LEAF): # method to be used in backtester model = getModelFromDataframe(trainDf, max_depth=max_depth, min_samples_leaf=min_samples_leaf) testX, testY = dataToNumpy(testDf, 'testing df') predictions = model.predict(testX) return predictions if __name__ == '__main__': X_scaled, y = dataToNumpy('data/bigOne.csv') clf = getModelFromNumpy(X_scaled, y) length = X_scaled.shape[0] # MANUAL TUNING DONE HERE predictions = clf.predict(X_scaled) predictions_proba = clf.predict_proba(X_scaled) print("confusion matrix:") print(confusion_matrix(y, predictions)) print(f'prediction: {clf.predict_proba([X_scaled[140]])}') zeroCounter = 0 oneCounter = 0