forked from adrian-aley/sample_trading_system
-
Notifications
You must be signed in to change notification settings - Fork 0
/
k_fold_cross_val.py
60 lines (50 loc) · 1.96 KB
/
k_fold_cross_val.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python
# -*- coding: utf-8 -*-
# k_fold_cross_val.py
import datetime
import pandas as pd
import sklearn
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from create_lagged_series import create_lagged_series
if __name__ == "__main__":
# Create a lagged series of the S&P500 US stock market index
snpret = create_lagged_series(
"^GSPC", datetime.datetime(2001,1,10),
datetime.datetime(2005,12,31), lags=5
)
# Use the prior two days of returns as predictor
# values, with direction as the response
X = snpret[["Lag1","Lag2"]]
y = snpret["Direction"]
# Create a k-fold cross validation object
kf = cross_validation.KFold(
len(snpret), n_folds=10,
shuffle=True, random_state=42
)
# Use the kf object to create index arrays that
# state which elements have been retained for training
# and which elements have beenr retained for testing
# for each k-element iteration
for train_index, test_index in kf:
X_train = X.ix[X.index[train_index]]
X_test = X.ix[X.index[test_index]]
y_train = y.ix[y.index[train_index]]
y_test = y.ix[y.index[test_index]]
# In this instance only use the
# Radial Support Vector Machine (SVM)
print("Hit Rate/Confusion Matrix:")
model = SVC(
C=1000000.0, cache_size=200, class_weight=None,
coef0=0.0, degree=3, gamma=0.0001, kernel='rbf',
max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=False
)
# Train the model on the retained training data
model.fit(X_train, y_train)
# Make an array of predictions on the test set
pred = model.predict(X_test)
# Output the hit-rate and the confusion matrix for each model
print("%0.3f" % model.score(X_test, y_test))
print("%s\n" % confusion_matrix(pred, y_test))