/
Ishan_Ben_Final.py
157 lines (130 loc) · 4.2 KB
/
Ishan_Ben_Final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# coding: utf-8
# # Stock Market Prediction
# ## Ben Welkie & Ishan Jain
# In[33]:
import edward as ed
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from edward.models import Categorical, Normal, PointMass
import warnings
from hmmlearn.hmm import GaussianHMM
from statsmodels.tsa import ar_model
warnings.filterwarnings('ignore')
data = pd.read_csv('/Downloads/Google.csv')
data = data['Close']
data = data.as_matrix()
plt.plot(data)
plt.show()
# Format data as percent change
for i in reversed(range(1, data.size)):
data[i] = round((data[i]-data[i-1])/data[i-1], 2)
data[0] = 0
plt.plot(data)
plt.show()
N = data.size
print("Number of data points: {}".format(N))
timelen = 50
# Chain of stocks for ~1 month
numhidden = 3
# States are increasing stock, decreasing stock, stable stock
numobs = np.unique(data).size
print(np.sort(np.unique(data)))
print(numobs)
p_init = Categorical(probs=tf.fill([numhidden], 1.0 / numhidden))
# Transition Matrix
Trans = tf.nn.softmax(tf.Variable(tf.zeros([numhidden, numhidden])), dim=0)
# Emission Matrix
Emiss = tf.nn.softmax(tf.Variable(tf.zeros([numobs, numhidden])), dim=0)
# HMM model
x = []
y = []
for t in range(timelen):
x_tmp = x[-1] if x else p_init
x_i = Categorical(probs=Trans[:, x_tmp])
y_i = Categorical(probs=Emiss[:, x_i])
x.append(x_i)
y.append(y_i)
qf = [Categorical(probs=tf.nn.softmax(tf.Variable(tf.ones(numhidden))))
for t in range(timelen)]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
inf_KLqp = ed.KLqp(dict(zip(x, qf)), dict(zip(y, data)))
inf_KLqp.run(n_iter=5000, n_print=5000/10)
print(sess.run(Trans))
print(sess.run(Emiss))
data = np.column_stack(data)
data = np.transpose(data)
print("fitting to HMM and decoding ...")
# Make an HMM instance and execute fit
print(data.shape[0])
model = GaussianHMM(n_components=3, covariance_type="full",
n_iter=1000).fit(data)
# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(data)
print("done")
print("Transition matrix")
print(model.transmat_)
print()
print("Means and vars of each hidden state")
for i in range(model.n_components):
print("{0}th hidden state".format(i))
print("mean = ", model.means_[i])
print("var = ", np.diag(model.covars_[i]))
print()
mu = 0.
beta_true = np.array([0.7, 0.25])
noise_obs = 0.1
T = 128
p = 2
# Generate synthetic data
x_true = np.random.randn(T+1)*noise_obs
for t in range(p, T):
x_true[t] += beta_true.dot(x_true[t-p:t][::-1])
plt.plot(x_true)
plt.show()
mu = Normal(loc=0.0, scale=10.0)
beta = [Normal(loc=0.0, scale=2.0) for i in range(p)]
noise_proc = tf.constant(0.1)
# InverseGamma(alpha=1.0, beta=1.0)
noise_obs = tf.constant(0.1)
# InverseGamma(alpha=1.0, beta=1.0)
x = [0] * T
for n in range(p):
x[n] = Normal(loc=mu, scale=10.0) # fat prior on x
for n in range(p, T):
mu_ = mu
for j in range(p):
mu_ += beta[j] * x[n-j-1]
x[n] = Normal(loc=mu_, scale=noise_proc)
print("setting up distributions")
qmu = PointMass(params=tf.Variable(0.))
qbeta = [PointMass(params=tf.Variable(0.)) for i in range(p)]
print("constructing inference object")
vdict = {mu: qmu}
vdict.update({b: qb for b, qb in zip(beta, qbeta)})
inference = ed.MAP(vdict, data={xt: xt_true for xt, xt_true in zip(x, x_true)})
print("running inference")
inference.run()
print("parameter estimates:")
print("beta: ", [qb.value().eval() for qb in qbeta])
print("mu: ", qmu.value().eval())
print("setting up variational distributions")
qmu = Normal(loc=tf.Variable(0.), scale=tf.nn.softplus(tf.Variable(0.)))
qbeta = [Normal(loc=tf.Variable(0.), scale=tf.nn.softplus(tf.Variable(0.)))
for i in range(p)]
print("constructing inference object")
vdict = {mu: qmu}
vdict.update({b: qb for b, qb in zip(beta, qbeta)})
inference_vb = ed.KLqp(vdict, data={xt: xt_true for xt,
xt_true in zip(x, x_true)})
print("running inference")
inference_vb.run()
print("parameter estimates:")
for j in range(p):
print("beta[%d]: " % j, qbeta[j].mean().eval(),)
print("mu: ", qmu.variance().eval())
ar2_sm = ar_model.AR(x_true)
res = ar2_sm.fit(maxlag=2, ic=None, trend='c')
print("statsmodels AR(2) params: ", res.params)