def process_ibd_50(self, msg, stocks): print 'Process IBD 50' date = msg.split(',')[1] words = msg.split(',')[2].split(' ') url = words[-1] print date, url soup = utils.get_url_soup(url) list = soup.body.find('p', attrs={'id': 'posttext'}) items = list.contents for idx, item in enumerate(items): if (idx == 0) or (idx % 2 == 1) or len(item) < 2: continue num = item.lstrip('\n').split('\t')[0] sym = item.lstrip('\n').split('\t')[1] name = item.lstrip('\n').split('\t')[2] print sym, name if sym in stocks.keys(): entry = stocks[sym] if name not in entry.keys(): entry['name'] = name if 'ibd 50' in entry.keys(): if date not in entry['ibd 50']: entry['ibd 50'].append(date) else: entry['ibd 50'] = [date] else: entry = {} entry['name'] = name entry['ibd 50'] = [date] stocks[sym] = entry
def get_classification(self, sym): url = 'http://finviz.com/quote.ashx?t=' + sym soup = utils.get_url_soup(url) table = soup.body.find_all('table', attrs={'class': 'fullview-title'})[0] rows = table.find_all('tr') entries = rows[2].find_all('a') return rows[1].find( 'b').contents[0], entries[0].contents[0], entries[1].contents[0]
def screen(self, base_url): url = base_url soup = utils.get_url_soup(base_url) total = self.get_total(soup) sym_list = self.get_result(soup) print sym_list num = len(sym_list) + 1 while num < total: url = base_url + '&r=' + str(num) print url soup = utils.get_url_soup(url) syms = self.get_result(soup) num = num + len(syms) print syms sym_list = sym_list + syms time.sleep(1) return sym_list
def basic(self, sym): url = self.sym2url['CSCO'] print url soup = utils.get_url_soup(url) #print soup stock_content = soup.body.find('div', attrs={'class': 'stockContent'}) print stock_content company_content = soup.body.find('div', attrs={'class': 'companyContent'}) print company_content group_leaderships = soup.body.find_all('div', attrs={'class': 'group_leadership_block'}) print group_leaderships[0] print group_leaderships[1]
def basic(self, sym): url = 'http://quotes.wsj.com/' + sym.upper() print url soup = utils.get_url_soup(url) print soup
def get_quotes_nasdaq(self, sym): url = self.form_url_nasdaq(sym) print url try: soup = utils.get_url_soup(url) except: print 'Request for', url, 'failed' return None quote_div = soup.body.find('div', attrs={'id': 'historicalContainer'}) tbody = quote_div.find('tbody') if tbody is None: return None tr = tbody.find_all('tr') lines = [] for item in tr: td = item.find_all('td') line = [] for i in td: tokens = i.contents[0].splitlines() if (len(tokens) == 1) or (tokens[1] == ''): continue line.append(tokens[1].strip()) lines.append(line) lines = lines[::-1] dates = [] open = [] high = [] low = [] close = [] volume = [] for line in lines: if len(line) == 0: continue if line[0] == '16:00': #dates.append(time.strftime('%Y-%m-%d')) continue if line[1] == 'N/A' or line[2] == 'N/A' or line[ 3] == 'N/A' or line[4] == 'N/A': continue tmp = line[0].split('/') dates.append(tmp[2] + '-' + tmp[0] + '-' + tmp[1]) open.append(float(line[1].replace(',', ''))) high.append(float(line[2].replace(',', ''))) low.append(float(line[3].replace(',', ''))) close.append(float(line[4].replace(',', ''))) volume.append(int(line[5].replace(',', ''))) df = pd.DataFrame(index=dates) df['open'] = open df['high'] = high df['low'] = low df['close'] = close df['volume'] = volume return df