-
Notifications
You must be signed in to change notification settings - Fork 1
/
scrape_app.py
111 lines (86 loc) · 4.03 KB
/
scrape_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import pandas as pd
import requests
from bs4 import BeautifulSoup
from urllib2 import urlopen
from math import pi
from bokeh.plotting import figure, show, output_file, save
# from bokeh.sampledata.stocks import MSFT
from bokeh.io import output_notebook
from bokeh.models.widgets import TextInput
from bokeh.io import vform
from testing.tweepy_search import get_twitter_data
from testing.tfidf_tweets import create_d3_list
from flask import Flask, render_template, request
app = Flask(__name__)
# ---------- DEFINE FUNCTIONS ----------
def scrape_page(sym):
url = 'http://www.nasdaq.com/symbol/{0}/historical'.format(sym.strip())
try:
company_info = []
content = urlopen(url).read()
soup = BeautifulSoup(content)
titles = soup.select('div#quotes_content_left_pnlAJAX table tbody tr td')
list_of_numbers = []
for row in titles: # parses all the values returned from beautiful soup pull
if row.text.strip() != '':
list_of_numbers.append(row.text.strip())
formatted_list = []
# Need to skip the first row of data becuase it is not useful...
for n in xrange(5, len(list_of_numbers)/6): # creates an n by 6 list for each company
#formatted_list.append([list_of_numbers[n * 6], list_of_numbers[n * 6 + 1], list_of_numbers[n * 6 + 2], list_of_numbers[n * 6 + 3], list_of_numbers[n * 6 + 4], list_of_numbers[n * 6 + 5]])
company_info.append(['aapl', list_of_numbers[n * 6],
float(list_of_numbers[n * 6 + 1]),
float(list_of_numbers[n * 6 + 2]),
float(list_of_numbers[n * 6 + 3]),
float(list_of_numbers[n * 6 + 4]),
int(list_of_numbers[n * 6 + 5].replace(",",''))])
return company_info
except:
return "Could not find company"
def render_table(sym):
company_info = scrape_page(sym)
columns = ['name', 'date', 'open', 'high', 'low', 'close', 'volume']
df = pd.DataFrame(company_info, columns=columns)
df['date'] = pd.to_datetime(df['date']).apply(pd.datetools.normalize_date)
text_input = TextInput(value=sym.upper(), title="Label:")
mids = (df['open'] + df['close'])/2
spans = abs(df['open'] - df['close'])
inc = df['close'] > df['open']
dec = df['close'] < df['open']
w = 12*60*60*1000 # half day in ms
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=750, plot_height=300, toolbar_location="left")
p.title = sym.upper() + " Candlestick"
p.xaxis.major_label_orientation = pi/4
p.grid.grid_line_alpha=0.3
p.segment(df.date, df.high, df.date, df.low, color="black")
p.rect(df.date[inc], mids[inc], w, spans[inc], fill_color="#D5E1DD", line_color="black")
p.rect(df.date[dec], mids[dec], w, spans[dec], fill_color="#F2583E", line_color="black")
output_file("templates/candlestick.html", title="candlestick.py example")
save(p)
return "candlestick.html"
def get_com_name(sym):
df = pd.read_csv('data/companylist.csv')
try:
return df[df['Symbol']==sym.strip().upper()]['Name'].values[0]
except:
return 'Nada.....'
# ---------- START APP ----------
@app.route('/')
def form():
return render_template('form.html')
@app.route('/display', methods=['POST'])
def display():
sym = str(request.form['user_input']).strip().upper()
com_name = get_com_name(sym)
link_to_table = render_table(sym)
historical_url = 'http://www.nasdaq.com/symbol/{0}/historical'.format(sym.strip())
d3_data = create_d3_list(sym)[:30]
return render_template('display.html',
sym=sym,
link_to_table=link_to_table,
historical_url=historical_url,
com_name=com_name,
d3_data=d3_data)
if __name__ == "__main__":
app.run(host='0.0.0.0', port=8080, debug=True)