-
Notifications
You must be signed in to change notification settings - Fork 0
/
stocks.py
200 lines (159 loc) · 5.79 KB
/
stocks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""
Python Project MA 792
Laurie Cuffney
"""
import datetime
import csv
import urllib2
import sys
import scipy
import numpy
from csv import Sniffer
from datetime import datetime
from matplotlib import pylab
from scipy import polyval
from scipy.stats import linregress
"""
This assignement will walk you through gathering data for a
pair of stocks and calculating the correlation between those stocks
using the daily returns.
Yahoo makes their historical stock data available in CSV format.
Below is the url for IBM stock data from Jan 1, 2010 to Dec 31, 2010.
http://ichart.finance.yahoo.com/table.csv?s=IBM&a=00&b=1&c=2010&d=11&e=31&f=2010&g=d&ignore=.csv
The data is returned in CSV (comma separated format) with the flowing columns:
Date, Open, High, Low, Close, Volume, Adj Close
The daily return of
a stock is defined by:
(C_n - C_n-1) / C_n-1
where C_n denotes the nth adjusted close and C_n-1 denotes the (n-1)th
adjusted close.
The function signatures for various steps of this process have been
given below. The names and parameters of these functions should not
be changed. You are free to write additional functions or classes as
needed. You are welcome to use any modules in the Python
standard library as well as NumPy, SciPy, and Matplotlib external
libraries. All code must run on Python 2.6.5.
"""
def build_request_url(symbol, start_date, end_date):
"""
This function should take a stock symbol as a string
along with the start and end dates as Python dates
and return the yahoo csv download url.
"""
start_year = start_date.year
start_month = start_date.month-1
start_day = start_date.day
end_year = end_date.year
end_month = end_date.month-1
end_day = end_date.day
url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&g=d&ignore=.csv' %(
symbol, start_month, start_day, start_year, end_month, end_day,end_year)
return url
def get_yahoo_data(url):
"""
This function should take a url as returned by build_request_url
and return a list of tuples with each tuple containing the
date (as a Python date) and the adjusted close (as a float).
"""
try:
response = urllib2.urlopen(url, timeout=10)
except urllib2.HTTPError as e:
print u'HTTPError getting stock data: %s' % e
sys.exit(1)
except urllib2.URLError as e:
print u'URLError getting stock data: %s' % e
sys.exit(1)
else:
data_list = []
reader = csv.reader(response)
reader.next()
for row in reader:
s = row[0]
c = row[6]
date = datetime.strptime(s,'%Y-%m-%d')
adj_close = float(c)
a = (date,adj_close)
data_list.append(a)
data_list.reverse()
return data_list
def calculate_stock_correlation(data):
"""
This function should take a list containing two lists of the form
returned by get_yahoo_data (list of date, adj. close tuples) and
return the correlation of the daily returns as defined above.
"""
apple_returns = []
google_returns = []
apple_data = data[0]
google_data = data[1]
cm = apple_data[0][1]
for i in range(1,len(apple_data)):
cn = apple_data[i][1]
daily_return = (cn-cm)/cm
apple_returns.append(daily_return)
cm = cn
cm = google_data[0][1]
for i in range(1,len(google_data)):
cn = google_data[i][1]
daily_return = (cn-cm)/cm
google_returns.append(daily_return)
cm = cn
corr_matrix = scipy.corrcoef(google_returns,apple_returns)
corr_value = corr_matrix[0][1]
return corr_value
def graph_stock_regression(data, filename):
"""
This function should take a list containing two lists of the form
returned by get_yahoo_data (list of date, adj. close tuples) and
save the graph of the series of daily return pairs as well as
the regression line. The graph should be saved to the given
filename.
"""
apple_returns = []
google_returns = []
apple_data = data[0]
google_data = data[1]
cm = apple_data[0][1]
for i in range(1,len(apple_data)):
cn = apple_data[i][1]
daily_return = (cn-cm)/cm
apple_returns.append(daily_return)
cm = cn
cm = google_data[0][1]
for i in range(1,len(google_data)):
cn = google_data[i][1]
daily_return = (cn-cm)/cm
google_returns.append(daily_return)
cm = cn
(a_s, b_s, r, tt, stderr) = linregress(google_returns, apple_returns)
line = polyval([a_s, b_s], google_returns)
pylab.title('Linear Regression')
pylab.plot(google_returns, apple_returns, 'r.', google_returns, line, 'k')
pylab.xlabel('Google')
pylab.ylabel('Apple')
pylab.legend(['data', 'regression'])
pylab.savefig(filename)
def main():
"""
This function should get the stock data for Google (GOOG)
and Apple (AAPL) for Jan 1, 2010 to Dec 31, 2010. Using that
data it should calculate and print the correlation of the daily
returns and graph the regression of Google vs Apple. Save the graph as
GOOGvsAAPL.png
"""
start = datetime.strptime('2010-01-01','%Y-%m-%d')
end = datetime.strptime('2010-12-31','%Y-%m-%d')
apple = build_request_url("AAPL", start, end)
apple_data = get_yahoo_data(apple)
google = build_request_url("GOOG", start, end)
google_data = get_yahoo_data(google)
data = [apple_data,google_data]
calculate_stock_correlation(data)
filename = 'GOOGvsAAPL.png'
graph_stock_regression(data, filename)
if __name__ == "__main__":
"""
When this module as run as a script it will call the main function.
You should not modify this code.
"""
main()