def parse_earnings_date(self,data,): print('parsing earnings dates') tickers = list(data.keys()) tickers.sort() for i in range(len(tickers)): ticker = tickers[i] if i % 10 == 0: print('%s/%s %s' %(i+1, len(tickers), ticker)) url = 'http://biz.yahoo.com/research/earncal/%s/%s.html' %(ticker[0], ticker.lower()) lines = screener.finance().read_url(url, ticker) for i in range(len(lines)): line = lines[i] if 'Earnings Calendar for' in line: index1 = 0 index2 = lines[i+1].index('<') results = lines[i+1][index1:index2] year = int(results[-4:]) month = results[:3] day = int(results[-8:-6]) results = '%s%s%s' %(year, month, str(day).zfill(2)) if lines in [[],[''],]: data[ticker]['Date'] = '%s' %('N/A') else: data[ticker]['Date'] = '%s' %(results) return data
def parse_ownership(self, d): for ticker in d.keys(): url = 'https://finance.yahoo.com/q/mh?s={}+Major+Holders'.format(ticker) lines = screener.finance().read_url(url, '0') for line in lines: ## print(line) r = r'<table.*?><tr><th.*?>Holder</th><th.*?>Shares</th>.*?(<tr>.*?</tr>)</table>' p = re.compile(r) matches = re.finditer(p, line) for match in matches: soup = BeautifulSoup(match.group(0)) letters = soup.find_all("td", class_="yfnc_tabledata1") print(letters) for element in letters: print(element) return d
def parse_financial_highlights(self,ticker): url = 'http://moneycentral.msn.com/investor/invsub/results/hilite.asp?Symbol=%s' %(ticker) lines = screener.finance().read_url(url, ticker) for i in range(len(lines)): line = lines[i] if '<td>Payout Ratio</td>' in line: index1 = lines[i+1].index('<td>')+len('<td>') index2 = lines[i+1].index('</td>') s = lines[i+1][index1:index2] if s == 'NA': payout_ratio = 'N/A' else: payout_ratio = float(s.replace('%',''))/100. return payout_ratio
def parse_historical_prices(self, url): print(url) ## http://real-chart.finance.yahoo.com/table.csv?s=AVV.L&g=v&ignore=.csv ## key = year, value = sum of dividends d = {} lines = screener.finance().read_url(url, '0') for line in lines[1:]: date, dividend = line.split(',') year = int(date[:4]) dividend = float(dividend) try: d[year] += dividend except KeyError: d[year] = dividend return d
def parse_insidertrading(self,data, tickers): print('parsing insider trading') for i in range(len(tickers)): ticker = tickers[i] print(('\n%s/%s' %(i+1, len(tickers)), ticker)) url = 'http://moneycentral.msn.com/investor/invsub/insider/trans.asp?Symbol=%s' %(ticker) lines = screener.finance().read_url(url, ticker) for i in range(len(lines)): line = lines[i] if 'Recent Insider Trading Activity' in line: print(line) index = 0 for j in range(3): index += line[index:].index('<tr') for k in range(7): index += line[index:].index('<td') return data
def parse_statement( self, url, dic, statement, ): print(url) d_periods = { '12 Weeks': '13 Weeks', '14 Weeks': '13 Weeks', #'16 Weeks':'13 Weeks', '25 Weeks': '26 Weeks', '27 Weeks': '26 Weeks', #'24 Weeks':'26 Weeks','28 Weeks':'26 Weeks','29 Weeks':'26 Weeks', '38 Weeks': '39 Weeks', '40 Weeks': '39 Weeks', #'35 Weeks':'39 Weeks','36 Weeks':'39 Weeks', '51 Weeks': '52 Weeks', '53 Weeks': '52 Weeks', #'48 Weeks':'52 Weeks', ## '11 Months':'12 Months', } dic_out = { 'period': [], 'date': [], } lines = screener.finance().read_url(url, '0') statementNA = False bool_no_financials = False for i1 in range(len(lines)): if 'No Financials Data Available</div>' in lines[i1]: print('No Financials Data Available</div>') statementNA = True bool_no_financials = True stop5 break if '<table class="dataTable financials" cellspacing="1" cellpadding="0" width="100%">' in lines[ i1]: for i2 in range(i1 + 1, len(lines)): if '<span class="units">' in lines[i2]: l = lines[i2 + 1].split() s = l[1] d_factors = {'Millions': 1000000., 'Thousands': 1000.} factor = d_factors[s.strip()] index1 = str(lines[i2 + 1]).index(' of') + 3 index2 = str(lines[i2 + 1]).index('<') currency = str(lines[i2 + 1])[index1:index2].strip() elif '<span class="period">' in lines[i2]: index1 = 0 index2 = str(lines[i2 - 1]).index('<') s = str(lines[i2 - 1])[index1:index2].strip() if int(s[-2:]) <= 6: ## e.g. FXJ.AX, BBY, BKS s = '%5s%02i%3s' % ( s[:5], int(s[5:-3]) - 1, s[-3:], ) dic_out['date'] += [s[:-3]] if statement != 'balance': for i3 in range(i2 + 1, len(lines)): if '</span>' in lines[i3]: index1 = 0 index2 = lines[i3].index('</span>') s = lines[i3][index1:index2].strip( ).replace(' ', ' ') if s in list(d_periods.keys()): s = d_periods[s] if s not in [ '3 Months', '6 Months', '9 Months', '12 Months', '13 Weeks', '26 Weeks', '39 Weeks', '52 Weeks', ## '27 Weeks','53 Weeks','25 Weeks', ]: if statement == 'income': statementNA = True break else: print(s) print((lines[i3])) print((lines[i3])) print((lines[i3][index1:index2])) stop l = s.split() dic_out['period'] += [[ int(l[0]), l[1], ]] break elif '<tr ' in lines[i2]: col1 = True for i3 in range(i2 + 1, len(lines)): if '<td ' in lines[i3]: index1 = lines[i3].index('>') + 1 index2 = lines[i3].rindex('<') s = lines[i3][index1:index2].replace(' ', ' ') if col1 == True: key = s dic_out[key] = [] col1 = False else: s = s.replace(',', '').replace('(', '').replace( ')', '') if s == '--': s = 0 value = factor * float(s) if 'minus' in lines[i3]: value *= -1 dic_out[key] += [value] if lines[i3].strip() == '</tr>': break if '<th>' in lines[i3].strip(): break break ## break loop over lines if lines == []: if statementNA == True: currency = 'N/A' else: stop_loop if bool_no_financials == False: stop return dic_out, statementNA, currency
def find_candidates_TA( self, l_tickers, l_time, months, l_statementNA, d_portfolio, d_ADR, ): print('finding TA candidates') year2 = l_time[0] ; month2 = l_time[1] ; day2 = l_time[2] year1 = year2-11 ; month1 = month2 ; day1 = day2 TAcandidates = [] TAdata = {} l_supports = [] l_breakouts = [] l_MA50_increasing = [] l_MA50_bounce = [] l_52w_low = [] l_down10percent_morethansp500 = [] ## ## S&P 500 ## ticker = ticker_yahoo = '^GSPC' ## S&P500 TAdata[ticker] = {} period = 'daily' TAdata[ticker][period] = {} ## read url url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' %( ticker_yahoo, month2-1, day2, year2, month1-1, day1, year1, ) ## g=d signifies daily_weekly_monthly graph linesd = screener.finance().read_url(url, ticker) data = TAdata[ticker][period]['raw'] = linesd[1:] ## parse lines TAdata[ticker][period]['price'] = { 'date':[], 'open':[], 'high':[], 'low':[], 'close':[], 'volume':[],'adjclose':[], } TAdata = self.data_conversion(ticker,period,data,TAdata,) ## price today price_today = TAdata[ticker][period]['price']['adjclose'][-1] date_today = TAdata[ticker][period]['price']['date'][-1] ## price 52w date_52w = '%4s%s' %(int(date_today[:4])-1,date_today[4:]) price_52w = None for i in range(2,len(TAdata[ticker][period]['price']['date'])): if TAdata[ticker][period]['price']['date'][-i] <= date_52w: price_52w = TAdata[ticker][period]['price']['adjclose'][-i] break ## change 52w sp500_52w_change = (price_today-price_52w)/price_52w for ticker in l_tickers: ticker_FA = ticker ## if ticker[-2:] == '.I': ## continue ## if ticker[-3:] in [ ## '.HE','.VX','.IS','.BR','.MM', ## '.MX','.SA', ## '.HK','.BO', ## ]: ## continue if ticker[-3:] in [ ## '.IC', ## Iceland not on Yahoo ## '.SI', ## Singapore not on Yahoo '.BO', ## India not on Yahoo ## '.ME', ## Russia not on Yahoo ]: continue ## if ticker == 'SUN.BO': ## continue ## if ticker == 'WIPR.BO': ## continue if ticker == 'INGC.BO': continue if ticker == 'HUVR.BO': continue if '.' in ticker and ticker[-2:] in ['.A','.B',] and ticker[-2:] not in ['.O']: index = ticker.index('.') ticker = ticker[:index]+'-'+ticker[index+1:] ticker = ticker.replace('.a','-a') ticker = ticker.replace('.b','-b') ticker = ticker.replace('b','-B') ## HUBb, NOVO-B.CO ticker = ticker.replace('a','-A') ## BFa if ':' in ticker: index = ticker.index(':') ## if ticker[:index] == 'JP': ## Japan not on Yahoo ## continue if ticker[:index] == 'CA' and '.' in ticker: ticker.replace('.','-') stop ## if ticker[:index] == 'SE' and '-' in ticker: ## ticker = ticker.replace('-','') ticker = ticker_conversion.unknown2yahoo(ticker) elif '.' in ticker: ticker = ticker_conversion.unknown2yahoo(ticker) ticker = ticker.replace('..','.') ## RB..L ticker_yahoo = ticker ticker = ticker_FA ## if ticker in d_yahoo2reuters: ## ## parse historical data ## TAdata[ticker] = {} ## daily url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' %( ticker_yahoo, month2-1, day2, year2, month1-1, day1, year1, ) ## g=d signifies daily_weekly_monthly graph linesd = screener.finance().read_url(url, ticker) fp = 'urls/%s' %(url.replace(':','').replace('/','').replace('.','').replace('?','')) ## no data if linesd == ['']: continue ## ## weekly ## url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s&g=w&ignore=.csv' %( ## ticker_yahoo, month1-1, day1, year1, month2-1, day2, year2, ## ) ## g=w signifies daily_weekly_monthly graph ## for x in range(10): ## try: ## urllines = urllib2.urlopen(url) ## linesw = urllines.readlines() ## break ## except: ## print x, url ## continue ## if x == 9: ## continue ## ## ## monthly ## url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s&g=w&ignore=.csv' %( ## ticker_yahoo, month1-1, day1, year1, month2-1, day2, year2, ## ) ## g=w signifies daily_weekly_monthly graph ## for x in range(10): ## try: ## urllines = urllib2.urlopen(url) ## linesm = urllines.readlines() ## break ## except: ## print x, url ## continue ## if x == 9: ## continue TAdata[ticker]['daily'] = { 'raw':linesd[1:], } ## find TA candidates periods = list(TAdata[ticker].keys()) TAcandidate = True for period in [ 'daily', ## 'weekly','monthly', ]: TAdata[ticker][period]['price'] = { 'date':[], 'open':[], 'high':[], 'low':[], 'close':[], 'volume':[],'adjclose':[], } data = TAdata[ticker][period]['raw'] n = len(data) TAdata = self.data_conversion(ticker,period,data,TAdata,) ## calculate MA if period == 'daily': TAdata, MA50, MA200, l_MA50_increasing, l_MA50_bounce = self.MA( ticker,period,TAdata,l_MA50_increasing,l_MA50_bounce, ) TAdata[ticker][period]['MA50'] = MA50 TAdata[ticker][period]['MA200'] = MA200 ## print ticker, 'ma50', MA50, 'ma200', MA200 price_today = TAdata[ticker][period]['price']['adjclose'][-1] date_today = TAdata[ticker][period]['price']['date'][-1] date_52w = '%4s%s' %(int(date_today[:4])-1,date_today[4:]) price_52w = None for i in range(2,len(TAdata[ticker][period]['price']['date'])): if TAdata[ticker][period]['price']['date'][-i] <= date_52w: price_52w = TAdata[ticker][period]['price']['adjclose'][-i] break if price_52w == None: continue date_10y = '%4s%s' %(int(date_today[:4])-10,date_today[4:]) price_10y = None for i in range(2,len(TAdata[ticker][period]['price']['date'])): if TAdata[ticker][period]['price']['date'][-i] <= date_10y: price_10y = TAdata[ticker][period]['price']['adjclose'][-i] break l_prices_52w = [] for i in range(2,len(TAdata[ticker][period]['price']['date'])): if TAdata[ticker][period]['price']['date'][-i] >= date_52w: l_prices_52w += [TAdata[ticker][period]['price']['adjclose'][-i]] continue if price_52w: change_52w = ( price_today - price_52w ) / price_52w TAdata[ticker][period]['change_52w'] = round(100*change_52w,0) else: change_52w = None if price_10y: change_10y = (price_today-price_10y)/price_10y TAdata[ticker][period]['change_10y'] = round(100*change_10y,0) else: change_10y = None price_52w_min = min(l_prices_52w) price_52w_max = max(l_prices_52w) above_52w = (price_today-price_52w_min)/price_52w_min below_52w_max = (price_today-price_52w_max)/price_52w_max TAdata[ticker][period]['above_52w'] = round(100*above_52w,0) TAdata[ticker][period]['below_52w_max'] = round(100*below_52w_max,0) if price_today < 1.05*price_52w_min: l_52w_low += [ticker] ## dropped more than 10% relative to market if (price_today-price_52w)/price_52w < sp500_52w_change-0.1: l_down10percent_morethansp500 += [ticker] ## find support and resistance ## conflicts if support or resistance while paying out dividend... if period == 'daily': l_supports, l_breakouts = self.support_and_resistance( ticker,TAdata, l_supports,l_breakouts, ) ## find gap support/resistance if period == 'daily': l_gaps = self.gaps(ticker,data,) ## calculate RSI if period == 'daily': TAdata = self.RSI(ticker,period,TAdata) ## calculate MFI if period == 'daily': TAdata = self.MFI(ticker,period,TAdata) ## calculate MACD TAdata = self.MACD(ticker,period,TAdata,) ## evaluate MACD (bullish) if period != 'monthly' and not (TAdata[ticker][period]['MACD']['DIV'][-1] > TAdata[ticker][period]['MACD']['DIV'][-2] and TAdata[ticker][period]['MACD']['DIV'][-2] < 0): TAcandidate = False elif period == 'monthly' and not TAdata[ticker][period]['MACD']['DIV'][-2] < 0: TAcandidate = False ## end of loop over periods ## ## evaluate MACD (bullish) ## if ( #### TAdata[ticker]['daily']['MACD']['DIV'][-1] > TAdata[ticker]['daily']['MACD']['DIV'][-2] #### and #### TAdata[ticker]['daily']['MACD']['DIV'][-2] < 0 #### and ## TAdata[ticker]['weekly']['MACD']['DIV'][-1] > TAdata[ticker]['weekly']['MACD']['DIV'][-2] ## and ## TAdata[ticker]['weekly']['MACD']['DIV'][-2] < 0 ## and ## TAdata[ticker]['monthly']['MACD']['DIV'][-2] < 0 ## ): ## TAcandidates.append(ticker) ## print 'TAcandidate!!!' ## ## ## evaluate MACD (bearish) ## if ticker in d_portfolio.keys() and ticker not in l_statementNA: ## if ( ## TAdata[ticker]['daily']['MACD']['DIV'][-1] > 0 and ## TAdata[ticker]['weekly']['MACD']['DIV'][-1] > 0 and ## TAdata[ticker]['monthly']['MACD']['DIV'][-1] > 0 ## ): ## print 'SELL %s !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' %(ticker) d_TA = { 'MACD':TAcandidates, 'bouncing at support level':l_supports, 'breaking resistance level':l_breakouts, 'MA50 increasing':l_MA50_increasing, 'MA50 bounce':l_MA50_bounce, } for s_TA in list(d_TA.keys()): l_TA = d_TA[s_TA] yahoo = 'http://finance.yahoo.com/q/cq?d=v1&s=' for ticker in l_TA: yahoo += '%s+' %(ticker) print('\n') print(s_TA) print((yahoo[:-1])) print('\n') print(('l_52w_low', l_52w_low)) print(('l_down10percent_morethansp500', l_down10percent_morethansp500)) print('\n') s_CAPS = '' for ticker in l_TA: if ticker in list(d_ADR.values()): for ADR,v in list(d_ADR.items()): if v == ticker: break ticker_US = ADR else: ticker_US = ticker s_CAPS += '%s,' %(ticker_US) print((s_CAPS[:-1])) fd = open('TAcandidates.txt', 'w') fd.write('%s\n%s' %(l_tickers, TAcandidates)) fd.close() ## matrix = self.covar_matrix(d_pca) ## eigenvalues,eigenvectors = self.diagonalization(matrix) ## print 'matrix', matrix ## print 'eval', eigenvalues ## print 'evec', eigenvectors[0] ## print 'tickers', l_tickers ## for i in range(len(l_tickers)): ## print '%s \t %s' %(l_tickers[i],eigenvectors[0][i],) return TAcandidates, TAdata
def parse_currencies(self): d_currency_reuters = {} ## ISO 4217 codes l_currency_msn = [ ## most traded currencies 'Euro','EUR', 'Japanese Yen','JPY', 'British Pounds','GBP', 'Swiss Francs','CHF', 'Australian Dollars','AUD', ## Asia 'Chinese Renminbi','CNY', ## China 'Taiwanese Dollars','TWD', 'Hong Kong Dollars','HKD', 'Philippine Pesos','PHP', 'Singapore Dollars','SGD', 'Indonesian Rupiah','IDR', ## Indonesia 'Indian Rupee','INR', 'Thai Bahts','THB', 'South Korean Won','KRW', 'Malaysian Ringgit','MYR', ## Malaysia ## Europe 'Swedish Krona','SEK', 'Norwegian Krone','NOK', 'Danish Krone','DKK', ## 'Iceland Krona','ISK', 'Icelandic Kronas','ISK', 'Hungarian Forint','HUF', 'Czech Korunas','CZK', ## 'Estonian Kroon','EEK', ## Euro 2011- 'Lithuanian Lita','LTL', # Euro 2015- 'Russian Rouble','RUB', ## msn 'Turkish Lira','TRY', 'Polish Zlotys','PLN', ## North America 'Canadian Dollars','CAD', 'Mexican Pesos','MXN', ## Oceania 'N.Z. Dollars','NZD', ## South America 'Colombian Peso','COP', 'Argentine Peso','ARS', 'Chilean Peso','CLP', 'Brazilian Real','BRL', 'Peruvian Nuevo Sol','PEN', ## 'New Sol','PEN', 'Bolivar','VEB', ## Middle East 'Israeli Shekel','ILS', ## Israel 'Kuwait Dinars','KWD', 'Saudi Arabian Riyals','SAR', 'Qatari Rials','QAR', ## Africa 'South African Rand','ZAR', ## South Africa (ZAR...) 'Nigerian Naira','NGN', ] d_currency_msn = {} for i in range(0,len(l_currency_msn),2): name = l_currency_msn[i] symbol = l_currency_msn[i+1] url = 'http://download.finance.yahoo.com/d/quotes.csv?s=USD%s=X&f=sl1d1t1c1ohgv&e=.csv' %(symbol,) ## print url lines = screener.finance().read_url(url, '0') line = lines[0] rate = float(str(line).split(',')[1]) if rate == 0: print(symbol) print(rate) print(url) print(line) stop_zero_rate d_currency_reuters[symbol] = rate d_currency_msn[name] = rate print('%s%s %3s %7.2f' %(name, (24-len(name))*'-', symbol, rate)) d_currency_msn['U.S. Dollars'] = 1. d_currency_reuters['USD'] = 1. d_currency_msn['US Dollars'] = 1. ## d_currency_msn['GBX'] = d_currency_msn['GBP']*100. d_currency_reuters['TRL'] = d_currency_reuters['TRY'] d_currency_reuters['ZAX'] = d_currency_reuters['ZAR'] d_currency_reuters['GBX'] = d_currency_reuters['GBP']*100. for currency in list(d_currency_msn.keys()): d_currency_msn['%ss' %(currency)] = d_currency_msn[currency] d_currency_msn['Chinese Renminbi (Yuan)s'] = d_currency_msn['Chinese Renminbi'] d_currency_msn['Taiwan Dollars'] = d_currency_msn['Taiwanese Dollars'] ## d_currency_msn['Lithuanian Litass'] = d_currency_msn['Lithuanian Lita'] d_currency_msn['Turkish New Liras'] = d_currency_msn['Turkish Lira'] d_currency_msn['Philippines Pesos'] = d_currency_msn['Philippine Pesos'] d_currency_msn['New Zealand Dollars'] = d_currency_msn['N.Z. Dollars'] ## d_currency_msn['Qatari Rial'] = d_currency_msn['Qatari Rial'] d_currency_msn['Won'] = d_currency_msn['South Korean Won'] return d_currency_msn, d_currency_reuters
def parseKeyRatios(self, url, ticker): print(url) dic_10year = {} d_factors = {'Mil': 1., 'Bil': 1000.} ## url = 'http://financials.morningstar.com/financials/getFinancePart.html?&callback=?&t=AAP' ## url = 'http://financials.morningstar.com/financials/getFinancePart.html?&callback=?&t=ABT' lines = screener.finance().read_url(url, ticker) s_html = '\n'.join(lines) ## ticker wrong or simply no data on morningstar.com if s_html == '' or s_html == '?({"componentData":null})': return dic_10year ## print(s_html) for k in ( 'Revenue', 'Operating Income', 'Net Income', 'Earnings Per Share', 'Dividends', 'Shares', 'Book Value Per Share', 'Operating Cash Flow', 'Cap Spending', 'Free Cash Flow', 'Free Cash Flow Per Share', 'Working Capital', ): ## print(k) ## \w\w\w is the currency code p = r'>{} <span>\w\w\w.*?<\\/tr\>'.format(k) ## print(p) s_tr = re.search(p, s_html).group() try: factor = d_factors[re.search(r'[MB]il', s_tr).group()] except AttributeError: factor = 1 l = [] for match in re.finditer(r'>([-\d,.]+|—)<', s_tr): s = match.group(1) if s == '—': l.append('-') else: l.append(factor * float(s.replace(',', ''))) ## Do not include TTM. if len(l) == 11: l = l[:-1] else: print(l) print(s_tr) stop l = list(reversed(l)) assert len(l) == 10 if l.count('-') == 1: i = l.index('-') ## if earliest year, then same as year after if i == 9: l[9] = l[8] ## otherwise average of neigbouring years else: l[i] = (l[i - 1] + l[i + 1]) / 2 if k == 'Revenue' and '-' in l: print(ticker, k, l) return {} ## l = list( ## factor*float(x.replace(',','')) for x in reversed( ## re.findall(r'>([-\d,.]+)<',substr)))[1:] ## re.findall(r'>([-\d,.]+|—)<',substr)))[1:] print(k, len(l), l) ## often initial bvps missing, so just assume 10% lower last year ## fudge factor big time! if len(l) == 9 and k in ( 'Book Value Per Share', '', ): l.append(0.9 * l[-1]) if len(l) < 10 and k not in ( ## Some of them not used... 'Free Cash Flow Per Share', 'Working Capital', 'Dividends', 'Book Value Per Share', ): print(k, len(l), l) return {} dic_10year[k] = l ## print('string\n',s[s.index('Shares')-2*80:s.index('Shares')+10*80],'\nstring',) ## print(s[s.index('2003')-2*80:s.index('Shares')+10*80]) s_tr = re.search(r'>Shares <span>[MB]il.*?<\\/tr\>', s_html).group() factor = d_factors[re.search(r'[MB]il', s_tr).group()] l_shares = list(factor * float(x.replace(',', '')) for x in reversed(re.findall(r'>([\d,]+)<', s_tr)))[1:] ## print(l_shares) p = r'<th scope=\\"col\\" align=\\"right\\" id=\\"Y\d+\\">(\d\d\d\d-\d\d)<\\/th>' l_dates = list(reversed(re.findall(p, s))) ## print(l_dates) dic_10year['DATE'] = l_dates dic_10year['SHARES OUTSTANDING'] = l_shares dic_10year['SALES'] = dic_10year['Revenue'] return dic_10year
def parse_company_report(self,ticker,rate,): url = 'http://moneycentral.msn.com/companyreport?Symbol='+ticker lines = screener.finance().read_url(url, ticker) d = { 'ma50':'N/A', 'ma200':'N/A', 'relative strength':'N/A', } for i in range(len(lines)): line = lines[i] if 'Exchange : ' in lines[i]: index = lines[i].index('Exchange : ') index2 = index+lines[i][index:].index('</b>') index1 = lines[i][:index2].rindex('>')+1 exchange = lines[i][index1:index2] ## if exchange == 'OTC BB': ## break if 'Last Price' in lines[i] and '<meta ' not in lines[i]: index1 = lines[i+1].index('<td>')+4 index2 = lines[i+1].index('</td>') price = float(lines[i+1][index1:index2].replace(',','')) d['price'] = price if '50 Day Moving Average' in lines[i]: index1 = lines[i+1].index('<td>')+4 index2 = lines[i+1].index('</td>') s = lines[i+1][index1:index2] if s == 'NA': ma50 = 'N/A' else: ma50 = float(s.replace(',','')) d['ma50'] = ma50 if '200 Day Moving Average' in lines[i]: index1 = lines[i+1].index('<td>')+4 index2 = lines[i+1].index('</td>') s = lines[i+1][index1:index2] if s == 'NA': ma200 = 'N/A' else: ma200 = float(s.replace(',','')) d['ma200'] = ma200 if ': Company Report</' in line: index2 = line.index(': Company Report</') index1 = line[:index2].rindex('>')+1 name = line[index1:index2] d['name'] = name if 'Volatility (beta)' in lines[i]: index2 = lines[i+1].index('</') index1 = lines[i+1][:index2].rindex('>')+1 s = lines[i+1][index1:index2] if s == 'NA': beta = 'N/A' else: beta = float(s.replace(',','')) d['beta'] = beta if '<td>Sales</td>' in lines[i]: index2 = lines[i+2].index('</td>') index1 = lines[i+2][:index2].index('>')+1 s = lines[i+2][index1:index2] if s == 'NA': print('sales 5y N/A (maybe because negative)') growth_sales_5y = 'N/A' elif '-' in s or '<span ' in s: growth_sales_5y = 0 else: growth_sales_5y = float(s[:-1]) d['growth_sales_5y'] = growth_sales_5y if '<td>Income</td>' in lines[i]: index2 = lines[i+2].index('</td>') index1 = lines[i+2][:index2].index('>')+1 s = lines[i+2][index1:index2] if s == 'NA': print('income 5y N/A (maybe because negative)') growth_income_5y = 'N/A' elif '-' in s: growth_income_5y = 0 else: growth_income_5y = float(s[:-1]) d['growth_income_5y'] = growth_income_5y if 'Market Capitalization' in lines[i] and '<meta ' not in lines[i]: index = lines[i].index('Market Capitalization') index1 = index+lines[i][index:].index('<td>')+len('<td>') index2 = index1+lines[i][index1:].index('</td>') if lines[i][index1:index1+2] == 'NA': stop_mc else: factor = lines[i][index2-3:index2] if factor not in ['Bil','Mil']: factor = .001 mc = factor*float(lines[i][index1:index2])/rate else: if factor == 'Bil': factor = 1000000000. elif factor == 'Mil': factor = 1000000. mc = factor*float(lines[i][index1:index2-4])/rate d['mc'] = mc if '<td>Dividend Yield</td>' in lines[i]: index2 = lines[i+1].index('</td>') index1 = lines[i+1][:index2].index('>')+1 s = lines[i+1][index1:index2] if s == 'NA': div_yield = 'N/A' else: div_yield = float(s[:-1].replace(',',''))/100 d['div_yield'] = div_yield if '<td>Debt/Equity Ratio</td>' in lines[i]: index = lines[i].index('Debt/Equity Ratio') index1 = index+lines[i][index:].index('<td>')+len('<td>') index2 = index1+lines[i][index1:].index('</td>') s = lines[i][index1:index2] if s == 'NA': debt_equity_ratio = 'N/A' else: debt_equity_ratio = float(lines[i][index1:index2]) d['debt_equity_ratio'] = debt_equity_ratio if '<td>Last 12 Months</td>' in lines[i]: s = lines[i+2] while '<' in s: index1 = s.index('>')+1 index2 = s.rindex('<') relative_strength = int(s.replace('%%','')) d['relative strength'] = relative_strength return d
def parse_overview(self,ticker,): d_factors = { 'K':1000, 'M':1000000, 'B':1000000000, 'T':1000000000000, } name = '' mc = '' currencyCode = '' price = '' sector = '' industry = '' statementNA = False url = 'http://investing.businessweek.com/research/stocks/snapshot/snapshot.asp?ticker=%s' %(ticker) lines = screener.finance().read_url(url, ticker) for i1 in range(len(lines)): ## name ## if '<h1 id="companyTitle"' in lines[i1]: ## if '<h2 class="pageHeader">' in lines[i1]: if '<span itemprop="name">' in lines[i1]: ## index1 = lines[i1].index('<h2 class="pageHeader">')+len('<h2 class="pageHeader">') index1 = lines[i1].index('<span itemprop="name">')+len('<span itemprop="name">') index2 = index1+lines[i1][index1:].index('<') print(111,lines[i1]) s = lines[i1][index1:index2].strip() print(222,s) if '(' in s: s = s[:s.rindex('(')] name = s.upper() ## price, currency if ( '<div class="dataPoint"><span class="quoteHeading">LAST</span> <span class="quoteData">' in lines[i1] or '<div class="dataPoint"><span class="quoteHeading">Last</span> <span class="quoteData">' in lines[i1] ): index1 = lines[i1].index('<span class="quoteData">')+len('<span class="quoteData">') index2 = index1+lines[i1][index1:].index('<') s = lines[i1][index1:index2].strip() for x in 'abcdefghijklmnopqrstuvwxyz&;$': s = s.replace(x,'') s = s.replace(x.upper(),'') s = s.replace(',','') if s[0] == '.': s = s[1:] ## e.g. SFr. if s == '--': statementNA = True break price = float(s) if '<span class="xSmGreyTxt">' in lines[i1]: index2 = index1+lines[i1][index1:].index('</span>') index1 = lines[i1][:index2].rindex('>')+1 s = lines[i1][index1:index2].strip() currencyCode = s.upper() else: print('@@@', lines[i1]) currencyCode = None ## sector, industry if '<meta name="sector"' in lines[i1]: index2 = lines[i1].index('<meta name="sector"') index1 = index2+lines[i1][index2:].index('content="')+len(('content="')) index2 = index1+lines[i1][index1:].index('"') sector = lines[i1][index1:index2] index1 = index2+lines[i1][index2:].index('content="')+len(('content="')) index2 = index1+lines[i1][index1:].index('"') industry = lines[i1][index1:index2] ## market cap if '>MARKET CAP<' in lines[i1] or '>Market Cap<' in lines[i1]: index1 = lines[i1+1].index('>')+1 index2 = lines[i1+1].rindex('<') factor = lines[i1+1][index2-1] s = lines[i1+1][index1:index2-1] ## index1 = lines[i1].index('MARKET CAP</div><div class="quoteData">')+len('MARKET CAP</div><div class="quoteData">') ## index2 = index1+lines[i1][index1:].index('<') ## factor = lines[i1][index2-1] ## s = lines[i1][index1:index2-1] if s == '-': mc = None statementNA = True else: mc = float(s)*d_factors[factor] if name == '': print(len(lines)) retry_name if lines == [] and price != '' and mc != '': stop_loop if mc == '' or price == '': statementNA = True return ( name, currencyCode, price, sector, industry, mc, statementNA, )
def parseIncomeStatement(self, url): print(url) d_factors = {'Mil':1.,'Bil':1000.} lines = screener.finance().read_url(url, '0') s_html = s = '\n'.join(lines) ## print(lines) ## stop ## ticker wrong or simply no data on morningstar.com if s == '': return {} ## ## DOTALL = Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. ## p = re.compile(regex, re.DOTALL) ## m = re.search(p, s) ## print(m.group()) ## from xml.etree import ElementTree as ET ## table = ET.XML(m.group()) ## rows = iter(table) ## headers = [col.text for col in next(rows)] ## for row in rows: ## values = [col.text for col in row] ## print(values) ## print(headers) ## stop ## ## parser = MyHTMLParser() ## parser.feed(s) ## stop r = r'<TABLE class="yfnc_tabledata1".*?><TR><TD>(<TABLE.*?></TABLE>)</TD></TR></TABLE>' p = re.compile(r, re.DOTALL) m = re.search(p, s_html) if not m: return None, True, None s_table = m.group(1) ## r = r'Period Ending.*?<b>(.*?)</b></TD></TR>' ## p = re.compile(r, re.DOTALL) ## matches = re.finditer(p, s_table) ## for match in matches: ## print(match.group(0)) ## regex for tr ## regex1 = r'<tr>(<td.*?>.*?</td>)</tr>' ## regex1 = r'<TABLE class="yfnc_tabledata1".*?(<tr><td.*?>.*?</td></tr>)</TABLE>' ## regex1 = r'<TABLE class="yfnc_tabledata1".*?></TR>(<t[r]>.*?</t[r]>)</TABLE>' r = r'<[tT][rR]>(.*?)</[tT][rR]>' ## DOTALL = Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. p = re.compile(r, re.DOTALL) matches = re.finditer(p, s_table) d = {} for match in matches: ## print('match', match.group()) if 'style="display:block' in match.group(0): continue s_tr = match.group(0) s_tr = s_tr.replace('<strong>','').replace('</strong>','') # Replace HTML tags with an empty string. ## regex2 = r'<td.*?>.*?([-\d,.]+).*?</td>' ## matches2 = re.findall(r'<td.*?>(.*?)</td>', s_tr, re.DOTALL) matches2 = re.findall( r'<[tT][dD].*?>(.*?)</[tT][dD]>', s_tr, re.DOTALL) if len(matches2) <= 3: continue ## print(matches2) if len(matches2) >= 5 and 'spacer' in match.group(0): matches2 = matches2[1:] k = re.sub('<.*?>', '', matches2[0].strip()) ## print(matches2) l = [] for i in range(1,4): s = matches2[i] s = re.sub('<.*?>', '', s) s = s.replace(',','').replace(' ','').strip() if k == 'Period Ending': l.append(int(s[-4:])) elif s == '-': l.append(s) else: ## 1000 if all numbers in thousands assert 'All numbers in thousands' in s_html factor = 1000 if s[0] == '(' and s[-1] == ')': l.append(-factor*float(s[1:-1])) else: l.append(factor*float(s)) d[k] = l m = re.search('>Currency in (\w*)', s_html) currency = m.group(1) return d, False, currency
def parse_financial_10_year_summary(self,url): '''return values in millions''' dic_10year_summary = { ' ':[], ## 'Sales':[], ## 'EBIT':[], ## 'Current Assets':[], ## 'Current Liabilities':[], ## 'Shares Outstanding':[], 'SALES':[], 'EBIT':[], 'CURRENT ASSETS':[], 'CURRENT LIABILITIES':[], 'SHARES OUTSTANDING':[], } dic_10year_summary['DATE'] = [] ## http://investing.money.msn.com/investments/financial-statements?symbol=aap not sorted by year... d_factors = {'Mil':1.,'Bil':1000.} lines = screener.finance().read_url(url, '0') for i1 in range(len(lines)): ## if 'Balance Sheet - 10 Year Summary' in lines[i1] or 'Income Statement - 10 Year Summary' in lines[i1]: if 'INCOME STATEMENT: 10-YEAR SUMMARY' in lines[i1] or 'BALANCE SHEET: 10 YEAR SUMMARY' in lines[i1]: d_cols = {} col = 0 bool_tr = False bool_td = False for i2 in range(i1+1,len(lines)): if '</table>' in lines[i2]: break bool_tr = bool_td = False d_cols = {} continue if '</tr>' in lines[i2]: col = 0 bool_tr = False continue elif '<tr' in lines[i2]: bool_tr = True continue elif '<td' in lines[i2] or '<th' in lines[i2]: bool_td = True continue elif '</td>' in lines[i2] or '</th>' in lines[i2]: col += 1 bool_td = False continue if (bool_tr == False or bool_td == False): continue ## if '</td>' in lines[i2] and col+1 not in d_cols.keys(): ## ## header ## if '<span' in lines[i2-1] and col not in list(d_cols.keys()): ## col += 1 ## index2 = lines[i2].index('</td>') ## index1 = lines[i2][:index2].rindex('>')+1 ## k = lines[i2][index1:index2] k = lines[i2].strip().replace('<br />',' ').replace('<br/>',' ') if k in list(dic_10year_summary.keys()): d_cols[col] = k ## ## data ## elif '<span' in lines[i2-1] and col in list(d_cols.keys()): ## col += 1 ## index2 = lines[i2].index('</td>') ## index1 = lines[i2][:index2].rindex('>')+1 ## s = lines[i2][index1:index2] s = lines[i2].strip() if d_cols[col] == 'SHARES OUTSTANDING': if 'Mil' in s or 'Bil' in s: f = float(s[:-len(' Xil')]) f *= d_factors[s[-3:]] elif s == '0.00': f = 0. elif s == 'NA': f = 0. ## thousands elif ',' in s and '.' in s: f = float(s.replace(',','')) else: print(url) print(d_cols[col]) print(s) stop elif d_cols[col] == 'DATE': ## don't add dates again and assume same sequence in both (income/balance) tables if 'BALANCE SHEET: 10 YEAR SUMMARY' in lines[i1]: continue f = '%s/%s' %(s[-2:],s[:2],) else: ## s = s.replace('Bil','').replace('Mil','') ## year if d_cols[col] == ' ': f = int(s[-2:]) if f > 90: stop f += 1900 else: f += 2000 elif s == 'NA': f = 0. ## float else: if 'Mil' in s or 'Bil' in s: f = float(s.replace(',','')[:-len(' Xil')]) f *= d_factors[s[-3:]] ## make sure not a ratio with a number from financial stmt later on... else: f = float(s.replace(',','')) dic_10year_summary[d_cols[col]] += [f] ## break when second table is reached if 'BALANCE SHEET: 10 YEAR SUMMARY' in lines[i1]: break ## ## MSN sorts dates and data by month/year; sort by year/date instead ## l_dates = list(dic_10year_summary['DATE']) l_dates.sort() l_dates.reverse() l_indexes = [dic_10year_summary['DATE'].index(date) for date in l_dates] for k in list(dic_10year_summary.keys()): if len(dic_10year_summary[k]) == 0: continue l = [dic_10year_summary[k][index] for index in l_indexes] dic_10year_summary[k] = l return dic_10year_summary
def parse_overview(self,ticker,url): d_factors = {'Mil.':1000000,} name = '' mc = '' currencyCode = 'USD' price = '' sector = '' industry = '' statementNA = False beta = '' print(url) lines = screener.finance().read_url(url, ticker) for i1 in range(len(lines)): if '<div id="sectionTitle">' in lines[i1]: index1 = lines[i1+1].index('<h1>')+4 index2 = lines[i1+1].index('</h1>') s = lines[i1+1][index1:index2].strip() name = s ## price, currency if '<div class="sectionQuoteDetail">' in lines[i1]: for i2 in range(i1,len(lines)): if '<span style="font-size: 23px;">' in lines[i2]: index1 = 0 index2 = lines[i2+1].index('</span>') s = lines[i2+1][index1:index2].replace(',','') ## print lines[i2+1] if s in ('--', '\t\t\t\t--'): statementNA = True else: price = float(s) index1 = index2+lines[i2+1][index2:].index('<span>')+6 index2 = index1+lines[i2+1][index1:].index('</span>') s = lines[i2+1][index1:index2] currencyCode = s.upper() ## upper if GBp break ## sector, industry if '<div id="sectionHeaderTopics"><div id="headerTopics">' in lines[i1]: index1 = lines[i1+5].index('/sectors')+8+1 index2 = index1+lines[i1+5][index1:].index('"') sector = lines[i1+5][index1:index2] index1 = lines[i1+5].index('/sectors/industries/')+len('/sectors/industries/') index1 += lines[i1+5][index1:].index('>')+1 index2 = index1+lines[i1+5][index1:].index('<') industry = lines[i1+5][index1:index2] ## beta if '<td>Beta:</td>' in lines[i1]: index1 = lines[i1+1].index('<strong>')+8 index2 = lines[i1+1].index('</strong>') s = lines[i1+1][index1:index2] beta = s ## market cap if '<td>Market Cap' in lines[i1]: factor = 'Mil.' if not 'Mil.' in lines[i1]: print(lines[i1]) stop index1 = lines[i1+1].index('<strong>')+8 index2 = lines[i1+1].index('</strong>') s = lines[i1+1][index1:index2] s = s.replace('₩','') ## KRW s = s.replace('€','') ## EUR s = s.replace('HK$','') ## HKD s = s.replace('Â¥','') ## CNY s = s.replace('Â','') ## CNY s = s.replace('¥','') ## CNY s = s.replace('¥','') ## JPY s = s.replace('CHF','') ## CHF s = s.replace('£','') ## GBP s = s.replace('Rs','') ## INR s = s.replace('kr.','') ## DKK s = s.replace('kr','') ## NOK s = s.replace('TL','') ## Turkish Lira s = s.replace('R','') ## Brazil s = s.replace('NT$','') ## TWD ## s = s.replace('NT$','') ## SGD s = s.replace('руб','') ## Russia s = s.replace('Lt','') ## Lithuania s = s.replace('$','') ## USD (dollar symbol) s = s.replace('M','') ## MYR (Malaysian ringgit - MR) s = s.replace('฿','') ## THB s = s.replace('฿','') ## IDR Indonesian Rupiah s = s.replace('₨','') ## PKR Pakistani ... s = s.replace('₪','') s = s.replace(',','') if s == '--': print('mc', s) else: mc = float(s)*d_factors[factor] ## ## sector ## if '<a href="/finance/industries/allIndustries">' in lines[i1]: ## index1 = lines[i1].index('<a href="/finance/industries/allIndustries">')+len('<a href="/finance/industries/allIndustries">') ## index2 = index1+lines[i1][index1:].index('</a>') ## sector = lines[i1][index1:index2].strip() ## ## ## industry ## if '<strong>industry:</strong>' in lines[i1]: ## index2 = lines[i1].rindex('<') ## index1 = lines[i1][:index2].rindex('>')+1 ## industry = lines[i1][index1:index2] ## if industry == 'N/A': ## print('industry', industry) ## stop_temp ## if '<label>Mkt Cap.</label>' in lines[i1]: ## index1 = lines[i1+1].index('<span>')+6 ## index2 = lines[i1+1].index('</span>') ## s = lines[i1+1][index1:index2] ## while ';' in s: ## index1 = s.index('&') ## index2 = s.index(';') ## if s[index2+1] == '.': ## s = s[:index1]+s[index2+2:] ## else: ## s = s[:index1]+s[index2+1:] ## s = s.replace(',','').replace('Â¥','').replace('Â','') ## d_factors = {'M':1000000,} ## if s == '--M': ## statementNA = True ## elif s[-2:] == 'pM': ## mc = float(s[:-2])*d_factors[s[-1]] ## elif s[:2] == 'Rs': ## mc = float(s[2:-1])*d_factors[s[-1]] ## else: ## mc = float(s[:-1])*d_factors[s[-1]] ## if mc == '' and statementNA == False: ## retry ## else: ## break ## break loop of trys if name == '': statementNA = True ## if price != '' and mc != '': ## print(price, mc) ## stop_loop if mc == '' or price == '' or mc == '--': statementNA = True return ( name, currencyCode, price, sector, industry, statementNA, mc, beta, )
def parse_CEOcompensation(self, ticker): url = 'http://www.reuters.com/finance/stocks/companyOfficers?symbol=%s&viewId=comp' %(ticker) lines = screener.finance().read_url(url, ticker) compensation = 0 i2 = 0 l_urls = [] for i1 in range(len(lines)): if i1 < i2: continue if ( '<div class="moduleHeader"><h3>Basic Compensation</h3></div>' in lines[i1] or '<div class="moduleHeader"><h3>Options Compensation</h3></div>' in lines[i1] ): basic = False if '<div class="moduleHeader"><h3>Basic Compensation</h3></div>' in lines[i1]: basic = True i_add = 2 options = False if '<div class="moduleHeader"><h3>Options Compensation</h3></div>' in lines[i1]: options = True i_add = 3 row = 0 for i2 in range(i1+1,len(lines)): if '<tr' in lines[i2]: row += 1 if row >= 2: index2 = str(lines[i2+i_add]).index('</td>') index1 = str(lines[i2+i_add])[:index2].index('>')+1 s = str(lines[i2+i_add])[index1:index2].replace(',','') if s != '--': compensation += float(s) ## if basic compensation table if i_add == 2: index1 = lines[i2+i_add-1].index('<a href="')+len('<a href="') index2 = index1+lines[i2+i_add-1][index1:].index('"') s = lines[i2+i_add-1][index1:index2] url_executive = 'http://www.reuters.com%s' %(s) l_urls += [url_executive] if '</table>' in lines[i2]: break if options == True: break ## ## ## for i_url_executive in range(len(l_urls)): url_executive = l_urls[i_url_executive] bool_break = False lines = screener.finance().read_url(url_executive, '0') for i in range(len(lines)): if 'Fiscal Year Total, ' in lines[i]: ## currency index1 = lines[i].index('Fiscal Year Total, ')+len('Fiscal Year Total, ') index2 = index1+lines[i][index1:].index('<') currency = lines[i][index1:index2] break if currency != '': break ## if ( ## 'Chief Executive Officer' in lines[i] ## or ## '>President, Director<' in lines[i] ## or ## 'Chief Exec Officer' in lines[i] ## or ## '>Chairman of the Board, President<' in lines[i] ## e.g. NATI ## or ## '>President, Representative Director<' in lines[i] ## e.g. 6902.T ## or ## '>Representative Executive President, Director<' in lines[i] ## e.g. 4902.T ## or ## '>Chairman of the Board, Representative Director<' in lines[i] ## e.g. 7205.T ## or ## '>Group Managing Director, Executive Director<' in lines[i] ## 0013.HK ## or ## '>Chairman of the Board, Managing Director<' in lines[i] ## 0012.HK ## or ## '>Chairman of the Board, Chairman of a Subsidiary, Representative Director<' in lines[i] ## e.g. 8035.T ## or ## '>General Manager<' in lines[i] ## e.g. TKC ## or ## '>Managing Director (CEO), Chairman of the Executive Committee, Director<' in lines[i] ## e.g. TOTF.PA ## or ## '>Managing Director, Executive Director<' in lines[i] ## 0006.HK ## or ## '>Deputy Chairman of the Board, Managing Director<' in lines[i] ## 0001.HK ## or ## '>Chairman of the Executive Committee, Director<' in lines[i] ## SOLB.BR ## ): ## index1 = lines[i-3].index('<a href="')+len('<a href="') ## index2 = index1+lines[i-3][index1:].index('"') ## url = 'http://www.reuters.com'+lines[i-3][index1:index2] ## break ## ## if i == len(lines)-1: ## ## print url ## if ticker_reuters not in ['NL:MT','YZC','FR:FP','0003.HK',]: ## stop ## compensation = 0 ## ## else: ## ## for i in range(10): ## try: ## urllines = urllib2.urlopen(url) ## lines = urllines.readlines() ## break ## except: ## continue ## if i == 9: ## print url ## stop ## ## ## for i in range(len(lines)): ## ## if 'Fiscal Year Total, ' in lines[i]: ## ## ## currency ## index1 = lines[i].index('Fiscal Year Total, ')+len('Fiscal Year Total, ') ## index2 = index1+lines[i][index1:].index('<') ## currency = lines[i][index1:index2] ## if currency == '': ## rate = 0. ## else: ## rate = d_currency[currency] ## ## ## compensation ## index1 = lines[i+6].index('>')+1 ## index2 = lines[i+6].rindex('<') ## s = lines[i+6][index1:index2].replace(',','') ## if s == '--': ## compensation = 0. ## else: ## compensation = float(s)/rate ## ## break ## ## if i == len(lines)-1: ## compensation = 0. ## print url return compensation, currency
def parse_statement(self,url,dic,statement,): print(url) d_periods = { '12 Weeks':'13 Weeks','14 Weeks':'13 Weeks',#'16 Weeks':'13 Weeks', '25 Weeks':'26 Weeks','27 Weeks':'26 Weeks',#'24 Weeks':'26 Weeks','28 Weeks':'26 Weeks','29 Weeks':'26 Weeks', '38 Weeks':'39 Weeks','40 Weeks':'39 Weeks',#'35 Weeks':'39 Weeks','36 Weeks':'39 Weeks', '51 Weeks':'52 Weeks','53 Weeks':'52 Weeks',#'48 Weeks':'52 Weeks', ## '11 Months':'12 Months', } dic_out = { 'period':[], 'date':[], } lines = screener.finance().read_url(url, '0') statementNA = False bool_no_financials = False for i1 in range(len(lines)): if 'No Financials Data Available</div>' in lines[i1]: print('No Financials Data Available</div>') statementNA = True bool_no_financials = True stop5 break if '<table class="dataTable financials" cellspacing="1" cellpadding="0" width="100%">' in lines[i1]: for i2 in range(i1+1,len(lines)): if '<span class="units">' in lines[i2]: l = lines[i2+1].split() s = l[1] d_factors = {'Millions':1000000.,'Thousands':1000.} factor = d_factors[s.strip()] index1 = str(lines[i2+1]).index(' of')+3 index2 = str(lines[i2+1]).index('<') currency = str(lines[i2+1])[index1:index2].strip() elif '<span class="period">' in lines[i2]: index1 = 0 index2 = str(lines[i2-1]).index('<') s = str(lines[i2-1])[index1:index2].strip() if int(s[-2:]) <= 6: ## e.g. FXJ.AX, BBY, BKS s = '%5s%02i%3s' %(s[:5],int(s[5:-3])-1,s[-3:],) dic_out['date'] += [s[:-3]] if statement != 'balance': for i3 in range(i2+1,len(lines)): if '</span>' in lines[i3]: index1 = 0 index2 = lines[i3].index('</span>') s = lines[i3][index1:index2].strip().replace(' ',' ') if s in list(d_periods.keys()): s = d_periods[s] if s not in [ '3 Months','6 Months','9 Months','12 Months', '13 Weeks','26 Weeks','39 Weeks','52 Weeks', ## '27 Weeks','53 Weeks','25 Weeks', ]: if statement == 'income': statementNA = True break else: print(s) print((lines[i3])) print((lines[i3])) print((lines[i3][index1:index2])) stop l = s.split() dic_out['period'] += [[int(l[0]),l[1],]] break elif '<tr ' in lines[i2]: col1 = True for i3 in range(i2+1,len(lines)): if '<td ' in lines[i3]: index1 = lines[i3].index('>')+1 index2 = lines[i3].rindex('<') s = lines[i3][index1:index2].replace(' ',' ') if col1 == True: key = s dic_out[key] = [] col1 = False else: s = s.replace(',','').replace('(','').replace(')','') if s == '--': s = 0 value = factor*float(s) if 'minus' in lines[i3]: value *= -1 dic_out[key] += [value] if lines[i3].strip() == '</tr>': break if '<th>' in lines[i3].strip(): break break ## break loop over lines if lines == []: if statementNA == True: currency = 'N/A' else: stop_loop if bool_no_financials == False: stop return dic_out, statementNA, currency
def find_candidates_TA( self, l_tickers, l_time, months, l_statementNA, d_portfolio, d_ADR, ): print('finding TA candidates') year2 = l_time[0] month2 = l_time[1] day2 = l_time[2] year1 = year2 - 11 month1 = month2 day1 = day2 TAcandidates = [] TAdata = {} l_supports = [] l_breakouts = [] l_MA50_increasing = [] l_MA50_bounce = [] l_52w_low = [] l_down10percent_morethansp500 = [] ## ## S&P 500 ## ticker = ticker_yahoo = '^GSPC' ## S&P500 TAdata[ticker] = {} period = 'daily' TAdata[ticker][period] = {} ## read url url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' % ( ticker_yahoo, month2 - 1, day2, year2, month1 - 1, day1, year1, ) ## g=d signifies daily_weekly_monthly graph linesd = screener.finance().read_url(url, ticker) data = TAdata[ticker][period]['raw'] = linesd[1:] ## parse lines TAdata[ticker][period]['price'] = { 'date': [], 'open': [], 'high': [], 'low': [], 'close': [], 'volume': [], 'adjclose': [], } TAdata = self.data_conversion( ticker, period, data, TAdata, ) ## price today price_today = TAdata[ticker][period]['price']['adjclose'][-1] date_today = TAdata[ticker][period]['price']['date'][-1] ## price 52w date_52w = '%4s%s' % (int(date_today[:4]) - 1, date_today[4:]) price_52w = None for i in range(2, len(TAdata[ticker][period]['price']['date'])): if TAdata[ticker][period]['price']['date'][-i] <= date_52w: price_52w = TAdata[ticker][period]['price']['adjclose'][-i] break ## change 52w sp500_52w_change = (price_today - price_52w) / price_52w for ticker in l_tickers: ticker_FA = ticker ## if ticker[-2:] == '.I': ## continue ## if ticker[-3:] in [ ## '.HE','.VX','.IS','.BR','.MM', ## '.MX','.SA', ## '.HK','.BO', ## ]: ## continue if ticker[-3:] in [ ## '.IC', ## Iceland not on Yahoo ## '.SI', ## Singapore not on Yahoo '.BO', ## India not on Yahoo ## '.ME', ## Russia not on Yahoo ]: continue ## if ticker == 'SUN.BO': ## continue ## if ticker == 'WIPR.BO': ## continue if ticker == 'INGC.BO': continue if ticker == 'HUVR.BO': continue if '.' in ticker and ticker[-2:] in [ '.A', '.B', ] and ticker[-2:] not in ['.O']: index = ticker.index('.') ticker = ticker[:index] + '-' + ticker[index + 1:] ticker = ticker.replace('.a', '-a') ticker = ticker.replace('.b', '-b') ticker = ticker.replace('b', '-B') ## HUBb, NOVO-B.CO ticker = ticker.replace('a', '-A') ## BFa if ':' in ticker: index = ticker.index(':') ## if ticker[:index] == 'JP': ## Japan not on Yahoo ## continue if ticker[:index] == 'CA' and '.' in ticker: ticker.replace('.', '-') stop ## if ticker[:index] == 'SE' and '-' in ticker: ## ticker = ticker.replace('-','') ticker = ticker_conversion.unknown2yahoo(ticker) elif '.' in ticker: ticker = ticker_conversion.unknown2yahoo(ticker) ticker = ticker.replace('..', '.') ## RB..L ticker_yahoo = ticker ticker = ticker_FA ## if ticker in d_yahoo2reuters: ## ## parse historical data ## TAdata[ticker] = {} ## daily url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' % ( ticker_yahoo, month2 - 1, day2, year2, month1 - 1, day1, year1, ) ## g=d signifies daily_weekly_monthly graph linesd = screener.finance().read_url(url, ticker) fp = 'urls/%s' % (url.replace(':', '').replace('/', '').replace( '.', '').replace('?', '')) ## no data if linesd == ['']: continue ## ## weekly ## url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s&g=w&ignore=.csv' %( ## ticker_yahoo, month1-1, day1, year1, month2-1, day2, year2, ## ) ## g=w signifies daily_weekly_monthly graph ## for x in range(10): ## try: ## urllines = urllib2.urlopen(url) ## linesw = urllines.readlines() ## break ## except: ## print x, url ## continue ## if x == 9: ## continue ## ## ## monthly ## url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s&g=w&ignore=.csv' %( ## ticker_yahoo, month1-1, day1, year1, month2-1, day2, year2, ## ) ## g=w signifies daily_weekly_monthly graph ## for x in range(10): ## try: ## urllines = urllib2.urlopen(url) ## linesm = urllines.readlines() ## break ## except: ## print x, url ## continue ## if x == 9: ## continue TAdata[ticker]['daily'] = { 'raw': linesd[1:], } ## find TA candidates periods = list(TAdata[ticker].keys()) TAcandidate = True for period in [ 'daily', ## 'weekly','monthly', ]: TAdata[ticker][period]['price'] = { 'date': [], 'open': [], 'high': [], 'low': [], 'close': [], 'volume': [], 'adjclose': [], } data = TAdata[ticker][period]['raw'] n = len(data) TAdata = self.data_conversion( ticker, period, data, TAdata, ) ## calculate MA if period == 'daily': TAdata, MA50, MA200, l_MA50_increasing, l_MA50_bounce = self.MA( ticker, period, TAdata, l_MA50_increasing, l_MA50_bounce, ) TAdata[ticker][period]['MA50'] = MA50 TAdata[ticker][period]['MA200'] = MA200 ## print ticker, 'ma50', MA50, 'ma200', MA200 price_today = TAdata[ticker][period]['price']['adjclose'][ -1] date_today = TAdata[ticker][period]['price']['date'][-1] date_52w = '%4s%s' % (int(date_today[:4]) - 1, date_today[4:]) price_52w = None for i in range( 2, len(TAdata[ticker][period]['price']['date'])): if TAdata[ticker][period]['price']['date'][ -i] <= date_52w: price_52w = TAdata[ticker][period]['price'][ 'adjclose'][-i] break if price_52w == None: continue date_10y = '%4s%s' % (int(date_today[:4]) - 10, date_today[4:]) price_10y = None for i in range( 2, len(TAdata[ticker][period]['price']['date'])): if TAdata[ticker][period]['price']['date'][ -i] <= date_10y: price_10y = TAdata[ticker][period]['price'][ 'adjclose'][-i] break l_prices_52w = [] for i in range( 2, len(TAdata[ticker][period]['price']['date'])): if TAdata[ticker][period]['price']['date'][ -i] >= date_52w: l_prices_52w += [ TAdata[ticker][period]['price']['adjclose'][-i] ] continue if price_52w: change_52w = (price_today - price_52w) / price_52w TAdata[ticker][period]['change_52w'] = round( 100 * change_52w, 0) else: change_52w = None if price_10y: change_10y = (price_today - price_10y) / price_10y TAdata[ticker][period]['change_10y'] = round( 100 * change_10y, 0) else: change_10y = None price_52w_min = min(l_prices_52w) price_52w_max = max(l_prices_52w) above_52w = (price_today - price_52w_min) / price_52w_min below_52w_max = (price_today - price_52w_max) / price_52w_max TAdata[ticker][period]['above_52w'] = round( 100 * above_52w, 0) TAdata[ticker][period]['below_52w_max'] = round( 100 * below_52w_max, 0) if price_today < 1.05 * price_52w_min: l_52w_low += [ticker] ## dropped more than 10% relative to market if (price_today - price_52w) / price_52w < sp500_52w_change - 0.1: l_down10percent_morethansp500 += [ticker] ## find support and resistance ## conflicts if support or resistance while paying out dividend... if period == 'daily': l_supports, l_breakouts = self.support_and_resistance( ticker, TAdata, l_supports, l_breakouts, ) ## find gap support/resistance if period == 'daily': l_gaps = self.gaps( ticker, data, ) ## calculate RSI if period == 'daily': TAdata = self.RSI(ticker, period, TAdata) ## calculate MFI if period == 'daily': TAdata = self.MFI(ticker, period, TAdata) ## calculate MACD TAdata = self.MACD( ticker, period, TAdata, ) ## evaluate MACD (bullish) if period != 'monthly' and not ( TAdata[ticker][period]['MACD']['DIV'][-1] > TAdata[ticker][period]['MACD']['DIV'][-2] and TAdata[ticker][period]['MACD']['DIV'][-2] < 0): TAcandidate = False elif period == 'monthly' and not TAdata[ticker][period][ 'MACD']['DIV'][-2] < 0: TAcandidate = False ## end of loop over periods ## ## evaluate MACD (bullish) ## if ( #### TAdata[ticker]['daily']['MACD']['DIV'][-1] > TAdata[ticker]['daily']['MACD']['DIV'][-2] #### and #### TAdata[ticker]['daily']['MACD']['DIV'][-2] < 0 #### and ## TAdata[ticker]['weekly']['MACD']['DIV'][-1] > TAdata[ticker]['weekly']['MACD']['DIV'][-2] ## and ## TAdata[ticker]['weekly']['MACD']['DIV'][-2] < 0 ## and ## TAdata[ticker]['monthly']['MACD']['DIV'][-2] < 0 ## ): ## TAcandidates.append(ticker) ## print 'TAcandidate!!!' ## ## ## evaluate MACD (bearish) ## if ticker in d_portfolio.keys() and ticker not in l_statementNA: ## if ( ## TAdata[ticker]['daily']['MACD']['DIV'][-1] > 0 and ## TAdata[ticker]['weekly']['MACD']['DIV'][-1] > 0 and ## TAdata[ticker]['monthly']['MACD']['DIV'][-1] > 0 ## ): ## print 'SELL %s !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' %(ticker) d_TA = { 'MACD': TAcandidates, 'bouncing at support level': l_supports, 'breaking resistance level': l_breakouts, 'MA50 increasing': l_MA50_increasing, 'MA50 bounce': l_MA50_bounce, } for s_TA in list(d_TA.keys()): l_TA = d_TA[s_TA] yahoo = 'http://finance.yahoo.com/q/cq?d=v1&s=' for ticker in l_TA: yahoo += '%s+' % (ticker) print('\n') print(s_TA) print((yahoo[:-1])) print('\n') print(('l_52w_low', l_52w_low)) print(('l_down10percent_morethansp500', l_down10percent_morethansp500)) print('\n') s_CAPS = '' for ticker in l_TA: if ticker in list(d_ADR.values()): for ADR, v in list(d_ADR.items()): if v == ticker: break ticker_US = ADR else: ticker_US = ticker s_CAPS += '%s,' % (ticker_US) print((s_CAPS[:-1])) fd = open('TAcandidates.txt', 'w') fd.write('%s\n%s' % (l_tickers, TAcandidates)) fd.close() ## matrix = self.covar_matrix(d_pca) ## eigenvalues,eigenvectors = self.diagonalization(matrix) ## print 'matrix', matrix ## print 'eval', eigenvalues ## print 'evec', eigenvectors[0] ## print 'tickers', l_tickers ## for i in range(len(l_tickers)): ## print '%s \t %s' %(l_tickers[i],eigenvectors[0][i],) return TAcandidates, TAdata
def parse_statement(self,url,): d_periods = { '12 Weeks':'13 Weeks','14 Weeks':'13 Weeks',#'16 Weeks':'13 Weeks', '25 Weeks':'26 Weeks','27 Weeks':'26 Weeks',#'24 Weeks':'26 Weeks','28 Weeks':'26 Weeks','29 Weeks':'26 Weeks', '38 Weeks':'39 Weeks','40 Weeks':'39 Weeks',#'35 Weeks':'39 Weeks','36 Weeks':'39 Weeks', '51 Weeks':'52 Weeks','53 Weeks':'52 Weeks',#'48 Weeks':'52 Weeks', ## '11 Months':'12 Months', } dic_out = { 'period':[], 'date':[], } currency = None lines = screener.finance().read_url(url, '0') statementNA = False statement_pending = False for i1 in range(len(lines)): if '>FINANCIALS SECTOR<' in lines[i1]: statementNA = True break if 'There are no Income Statements available at this time for' in lines[i1]: print('There are no Income Statements available at this time') statement_pending = True break if ' public company results.' in lines[i1]: print('Multiple or zero results') print(lines[i1]) statementNA = True break ## if '<div id="resultCaption">Your search for <strong>' in lines[i1]: ## statementNA = True ## break ## Empty Table (nothing between end of table headers and end of table) if '4-Year<br />Trend</td></tr></table>' in lines[i1]: print('Blank Statement') statementNA = True break ## if not '<div style="padding-bottom:12px;">' in lines[i1]: ## continue if not '<div class="financialsSelectContainer">' in lines[i1]: continue s = lines[i1] index1 = s.index('Currency in<br />')+len('Currency in<br />') index2 = index1+s[index1:].index('</td>') index = index1+s[index1:index2].index(' ')+1 d_factors = {'Millions':1000000.,'Thousands':1000.} factor = d_factors[s[index1:index-1]] index = index+s[index:index2].index(' ')+1 currency = s[index:index2] index2 = index1+s[index1:].index('</tr>') index1 += s[index1:index2].index('As of:') s_year = s[index1:index2] ## if ( ## 'Press<br />Release' in s_year ## and ## ## often Japanese financial statements are press releases in BusinessWeek for a long long time it seems... ## url[-3:] != ':JP' ## ): ## print 'Press Release' ## statement_pending = True ## break index = 0 while '<br />' in s_year[index:]: index += s_year[index:].index('<br />')+1 ## company has been around for less than 4 years if s_year[index:index+10] == 'br />--</t': print('Less than 4 years of data') statement_pending = True break if s_year[index:index+10] not in [ 'br /><span', 'br />Trend', ## 4-Year Trend 'br />Relea', ## Press Release 'br />--<br', ]: year = int(s_year[index+5:index+9]) dic_out['date'] += [year] td_label = '<td class="statementLabel cell ' td_label = '<tr>' index = 0 ## while td_label1 in s[index:] or td_label2 in s[index:]: while td_label in s[index:]: index += s[index:].index(td_label)+1 index2 = index+s[index:].index('</td>') index1 = index+s[index:index2].rindex('>')+1 key = s[index1:index2] if ' ' in key: if key[:key.index(' ')] in list(d_factors.keys()): index += 1 continue dic_out[key] = [] s_row = s[index2:index2+s[index2:].index('</tr>')] index_row = 0 ## p = r'<span class="quoteData">$78.08 <span' while '<td' in s_row[index_row:]: index_row += s_row[index_row:].index('<td')+1 index_row1 = index_row+len('<td')-1 index_row1 += s_row[index_row1:].index('>')+1 index_row2 = index_row1+s_row[index_row1:].index('</td>') s_value = s_row[index_row1:index_row2] if s_value[:10] == '<img src="': break s_value = s_value.replace(',','') if s_value == '--': s_value = 0 if s_value == ' ': continue ## print(s_row) value = factor*float(s_value) dic_out[key] += [value] if len(list(dic_out.keys())) == 2: statementNA = True if currency == None and statementNA == False: statementNA = True ## stop_ticker_wrong_or_not_existing return dic_out, statementNA, statement_pending, currency
} d = instance_MSN.parse_financial_10_year_summary(url,dic_10year,) print(d) stop d = instance_MSN.key_ratios_10_year_summary('MSFT',{},) print(d) stop d = {'ABT':{}} d = instance_MSN.parse_ownership(d,) print(d) stop_end import screener instance_finance = screener.finance() ( tickers, months, time, d_indexes, d_msn2yahoo, d_msn2currency, d_ADR, d_yahoo2reuters, ) = instance_finance.init() d = {} for ticker in tickers: d[ticker] = {} d = instance_MSN.parse_ownership(d,) d2 = {} for ticker in list(d.keys()): holders = d[ticker]['holders'] for holder in holders: if not holder in list(d2.keys()): d2[holder] = 0
def parse_CEOcompensation(self, ticker): url = 'http://www.reuters.com/finance/stocks/companyOfficers?symbol=%s&viewId=comp' % ( ticker) lines = screener.finance().read_url(url, ticker) compensation = 0 i2 = 0 l_urls = [] for i1 in range(len(lines)): if i1 < i2: continue if ('<div class="moduleHeader"><h3>Basic Compensation</h3></div>' in lines[i1] or '<div class="moduleHeader"><h3>Options Compensation</h3></div>' in lines[i1]): basic = False if '<div class="moduleHeader"><h3>Basic Compensation</h3></div>' in lines[ i1]: basic = True i_add = 2 options = False if '<div class="moduleHeader"><h3>Options Compensation</h3></div>' in lines[ i1]: options = True i_add = 3 row = 0 for i2 in range(i1 + 1, len(lines)): if '<tr' in lines[i2]: row += 1 if row >= 2: index2 = str(lines[i2 + i_add]).index('</td>') index1 = str( lines[i2 + i_add])[:index2].index('>') + 1 s = str(lines[i2 + i_add])[index1:index2].replace( ',', '') if s != '--': compensation += float(s) ## if basic compensation table if i_add == 2: index1 = lines[i2 + i_add - 1].index( '<a href="') + len('<a href="') index2 = index1 + lines[i2 + i_add - 1][index1:].index('"') s = lines[i2 + i_add - 1][index1:index2] url_executive = 'http://www.reuters.com%s' % ( s) l_urls += [url_executive] if '</table>' in lines[i2]: break if options == True: break ## ## ## for i_url_executive in range(len(l_urls)): url_executive = l_urls[i_url_executive] bool_break = False lines = screener.finance().read_url(url_executive, '0') for i in range(len(lines)): if 'Fiscal Year Total, ' in lines[i]: ## currency index1 = lines[i].index('Fiscal Year Total, ') + len( 'Fiscal Year Total, ') index2 = index1 + lines[i][index1:].index('<') currency = lines[i][index1:index2] break if currency != '': break ## if ( ## 'Chief Executive Officer' in lines[i] ## or ## '>President, Director<' in lines[i] ## or ## 'Chief Exec Officer' in lines[i] ## or ## '>Chairman of the Board, President<' in lines[i] ## e.g. NATI ## or ## '>President, Representative Director<' in lines[i] ## e.g. 6902.T ## or ## '>Representative Executive President, Director<' in lines[i] ## e.g. 4902.T ## or ## '>Chairman of the Board, Representative Director<' in lines[i] ## e.g. 7205.T ## or ## '>Group Managing Director, Executive Director<' in lines[i] ## 0013.HK ## or ## '>Chairman of the Board, Managing Director<' in lines[i] ## 0012.HK ## or ## '>Chairman of the Board, Chairman of a Subsidiary, Representative Director<' in lines[i] ## e.g. 8035.T ## or ## '>General Manager<' in lines[i] ## e.g. TKC ## or ## '>Managing Director (CEO), Chairman of the Executive Committee, Director<' in lines[i] ## e.g. TOTF.PA ## or ## '>Managing Director, Executive Director<' in lines[i] ## 0006.HK ## or ## '>Deputy Chairman of the Board, Managing Director<' in lines[i] ## 0001.HK ## or ## '>Chairman of the Executive Committee, Director<' in lines[i] ## SOLB.BR ## ): ## index1 = lines[i-3].index('<a href="')+len('<a href="') ## index2 = index1+lines[i-3][index1:].index('"') ## url = 'http://www.reuters.com'+lines[i-3][index1:index2] ## break ## ## if i == len(lines)-1: ## ## print url ## if ticker_reuters not in ['NL:MT','YZC','FR:FP','0003.HK',]: ## stop ## compensation = 0 ## ## else: ## ## for i in range(10): ## try: ## urllines = urllib2.urlopen(url) ## lines = urllines.readlines() ## break ## except: ## continue ## if i == 9: ## print url ## stop ## ## ## for i in range(len(lines)): ## ## if 'Fiscal Year Total, ' in lines[i]: ## ## ## currency ## index1 = lines[i].index('Fiscal Year Total, ')+len('Fiscal Year Total, ') ## index2 = index1+lines[i][index1:].index('<') ## currency = lines[i][index1:index2] ## if currency == '': ## rate = 0. ## else: ## rate = d_currency[currency] ## ## ## compensation ## index1 = lines[i+6].index('>')+1 ## index2 = lines[i+6].rindex('<') ## s = lines[i+6][index1:index2].replace(',','') ## if s == '--': ## compensation = 0. ## else: ## compensation = float(s)/rate ## ## break ## ## if i == len(lines)-1: ## compensation = 0. ## print url return compensation, currency
def key_ratios_10_year_summary(self,ticker,dic_10year_summary): url = 'http://moneycentral.msn.com/investor/invsub/results/compare.asp?Page=TenYearSummary&symbol=%s' %(ticker) url = 'http://investing.money.msn.com/investments/key-ratios?symbol=%s&page=TenYearSummary' %(ticker) lines = screener.finance().read_url(url, ticker) ## ## ## d_cols = {} bool_init = 0 bool_tr = False bool_td = False row = 0 col = 0 for i1 in range(len(lines)): if ( ' is not available.</p>' in lines[i1] or '<span>SEARCH RESULTS</span>' in lines[i1] ): break elif ( ' AVG P/E<' in lines[i1+8] or ('<table' in lines[i1] and bool_init == 1) ): bool_init += 1 continue elif '</table' in lines[i1] and bool_init == 2: break elif bool_init == 0: continue elif '<tr' in lines[i1]: bool_tr = True row += 1 continue elif '</tr' in lines[i1]: bool_tr = False col = 0 continue elif bool_tr == False: continue elif '<td' in lines[i1] or '<th' in lines[i1]: bool_td = True col += 1 continue elif '</td' in lines[i1] or '</th' in lines[i1]: bool_td = False continue elif bool_td == False: continue elif '<span' in lines[i1]: continue elif lines[i1].strip() == '</span>': continue if '</span>' in lines[i1]: s = lines[i1].strip()[:-7].replace('<br/>',' ') d_cols[col] = s dic_10year_summary[s] = [] else: if col == 1: continue s = lines[i1].strip() s = s.replace(',','') ## thousand separator if s == 'NA': continue if col != 1: s = float(s) dic_10year_summary[d_cols[col]] += [s] ## ## d_cols = {} ## for i1 in range(len(lines)): ## ## if not ' AVG P/E<' in lines[i1]: ## continue ## ## i2 = 0 ## index1_tr = 0 ## while '<tr>' in lines[i1][i2:][index1_tr:]: ## print 'c' ## index1_tr = index1_tr+lines[i1][index1_tr:].index('<tr>')+len('<tr>') ## index2_tr = index1_tr+lines[i1][index1_tr:].index('</tr>') ## col = 0 ## index1_tx = 0 ## while '</t' in lines[i1][index1_tr:index2_tr][index1_tx:]: ## index2_tx = index1_tx+lines[i1][index1_tr:index2_tr][index1_tx:].index('</t') ## index1_tx = index1_tx+lines[i1][index1_tr:index2_tr][index1_tx:index2_tx].index('<t') ## s = lines[i1][index1_tr:index2_tr][index1_tx:index2_tx] ## while s[0] == '<': ## s = s[s.index('>')+1:] ## if lines[i1][index1_tr:index2_tr][:4] == '<th>': ## d_cols[col] = s ## dic_10year_summary[s] = [] ## elif lines[i1][index1_tr:index2_tr][:4] == '<td>': ## if col != 0: ## if s != 'NA': ## ## characters (currency symbols) in front ## while len(s) > 0 and s[0] not in '-123456789': ## s = s[1:] #### if s[1] not in '0.-123456789': #### s = s[0]+s[2:] ## ## thousand seperator ## s = s.replace(',','') ## ## characters behind ## while len(s) > 0 and s[-1] not in '0123456789': ## s = s[:-1] ## if len(s) > 0: ## s = float(s) ## dic_10year_summary[d_cols[col]] += [s] ## else: ## print lines[i1][index1_tr:index2_tr][:4] ## stop ## index1_tx = index2_tx+1 ## col += 1 ## index1_tr += 1 ## ## break return dic_10year_summary
def parse_overview(self, ticker, url): d_factors = { 'Mil.': 1000000, } name = '' mc = '' currencyCode = 'USD' price = '' sector = '' industry = '' statementNA = False beta = '' print(url) lines = screener.finance().read_url(url, ticker) for i1 in range(len(lines)): if '<div id="sectionTitle">' in lines[i1]: index1 = lines[i1 + 1].index('<h1>') + 4 index2 = lines[i1 + 1].index('</h1>') s = lines[i1 + 1][index1:index2].strip() name = s ## price, currency if '<div class="sectionQuoteDetail">' in lines[i1]: for i2 in range(i1, len(lines)): if '<span style="font-size: 23px;">' in lines[i2]: index1 = 0 index2 = lines[i2 + 1].index('</span>') s = lines[i2 + 1][index1:index2].replace(',', '') ## print lines[i2+1] if s in ('--', '\t\t\t\t--'): statementNA = True else: price = float(s) index1 = index2 + lines[i2 + 1][index2:].index( '<span>') + 6 index2 = index1 + lines[i2 + 1][index1:].index( '</span>') s = lines[i2 + 1][index1:index2] currencyCode = s.upper() ## upper if GBp break ## sector, industry if '<div id="sectionHeaderTopics"><div id="headerTopics">' in lines[ i1]: index1 = lines[i1 + 5].index('/sectors') + 8 + 1 index2 = index1 + lines[i1 + 5][index1:].index('"') sector = lines[i1 + 5][index1:index2] index1 = lines[i1 + 5].index('/sectors/industries/') + len( '/sectors/industries/') index1 += lines[i1 + 5][index1:].index('>') + 1 index2 = index1 + lines[i1 + 5][index1:].index('<') industry = lines[i1 + 5][index1:index2] ## beta if '<td>Beta:</td>' in lines[i1]: index1 = lines[i1 + 1].index('<strong>') + 8 index2 = lines[i1 + 1].index('</strong>') s = lines[i1 + 1][index1:index2] beta = s ## market cap if '<td>Market Cap' in lines[i1]: factor = 'Mil.' if not 'Mil.' in lines[i1]: print(lines[i1]) stop index1 = lines[i1 + 1].index('<strong>') + 8 index2 = lines[i1 + 1].index('</strong>') s = lines[i1 + 1][index1:index2] s = s.replace('₩', '') ## KRW s = s.replace('€', '') ## EUR s = s.replace('HK$', '') ## HKD s = s.replace('Â¥', '') ## CNY s = s.replace('Â', '') ## CNY s = s.replace('¥', '') ## CNY s = s.replace('¥', '') ## JPY s = s.replace('CHF', '') ## CHF s = s.replace('£', '') ## GBP s = s.replace('Rs', '') ## INR s = s.replace('kr.', '') ## DKK s = s.replace('kr', '') ## NOK s = s.replace('TL', '') ## Turkish Lira s = s.replace('R', '') ## Brazil s = s.replace('NT$', '') ## TWD ## s = s.replace('NT$','') ## SGD s = s.replace('руб', '') ## Russia s = s.replace('Lt', '') ## Lithuania s = s.replace('$', '') ## USD (dollar symbol) s = s.replace('M', '') ## MYR (Malaysian ringgit - MR) s = s.replace('฿', '') ## THB s = s.replace('฿', '') ## IDR Indonesian Rupiah s = s.replace('₨', '') ## PKR Pakistani ... s = s.replace('₪', '') s = s.replace(',', '') if s == '--': print('mc', s) else: mc = float(s) * d_factors[factor] ## ## sector ## if '<a href="/finance/industries/allIndustries">' in lines[i1]: ## index1 = lines[i1].index('<a href="/finance/industries/allIndustries">')+len('<a href="/finance/industries/allIndustries">') ## index2 = index1+lines[i1][index1:].index('</a>') ## sector = lines[i1][index1:index2].strip() ## ## ## industry ## if '<strong>industry:</strong>' in lines[i1]: ## index2 = lines[i1].rindex('<') ## index1 = lines[i1][:index2].rindex('>')+1 ## industry = lines[i1][index1:index2] ## if industry == 'N/A': ## print('industry', industry) ## stop_temp ## if '<label>Mkt Cap.</label>' in lines[i1]: ## index1 = lines[i1+1].index('<span>')+6 ## index2 = lines[i1+1].index('</span>') ## s = lines[i1+1][index1:index2] ## while ';' in s: ## index1 = s.index('&') ## index2 = s.index(';') ## if s[index2+1] == '.': ## s = s[:index1]+s[index2+2:] ## else: ## s = s[:index1]+s[index2+1:] ## s = s.replace(',','').replace('Â¥','').replace('Â','') ## d_factors = {'M':1000000,} ## if s == '--M': ## statementNA = True ## elif s[-2:] == 'pM': ## mc = float(s[:-2])*d_factors[s[-1]] ## elif s[:2] == 'Rs': ## mc = float(s[2:-1])*d_factors[s[-1]] ## else: ## mc = float(s[:-1])*d_factors[s[-1]] ## if mc == '' and statementNA == False: ## retry ## else: ## break ## break loop of trys if name == '': statementNA = True ## if price != '' and mc != '': ## print(price, mc) ## stop_loop if mc == '' or price == '' or mc == '--': statementNA = True return ( name, currencyCode, price, sector, industry, statementNA, mc, beta, )
def parse_statement_quarterly(self,url,dic,statement, d_currency): statementNA = False lines = screener.finance().read_url(url, '0') ## reset dictionary for key in list(dic.keys()): dic[key] = None for line in lines: if 'The financial statement for this symbol, is currently not available.' in line: ## stop1 statementNA = True s_business = 'Industry' return dic, statementNA, s_business if 'Statement information for this ticker symbol is not available at this time.' in line: ## stop2 statementNA = True s_business = 'Industry' return dic, statementNA, s_business for i in range(len(lines)): line = lines[i] if '<span id="lblErrorMessage"><br><br>Statement information for this ticker symbol is not available at this time.<br><br><br></span>' in line: statementNA = True break ## business type if ' <p><b>Business Type:</b> <span id="lblBusinessType">' in line: i2 = line.index('</span></p>') i1 = line[:i2].rindex('>')+1 s_business = line[i1:i2] if s_business in ['Bank','Insurance',]: return dic, statementNA, s_business ## currency if 'Financial data in' in line: index1 = line.index('>')+1 index2 = index1+line[index1:].index('<') s = line[index1:index2] rate = d_currency[s] dic['rate'] = rate ## multiple if 'Values in ' in line and ' (Except for per share items)' in line: index1 = line.index('Values in')+len('Values in') index2 = line.index('(Except for per share items)') s = line[index1:index2] d_factors = {'Millions':1000000.,'Thousands':1000.} factor = d_factors[s.strip()] dic['factor'] = factor ## filing dates if 'class="ftable"' in line: periods = [] index = line.index('<tr class="r1">') index += line[index:].index('</td>')+1 for quarter in range(5): index2 = index+line[index:].index('</td>') index1 = line[:index2].rindex('>')+1 index = index2+1 s = line[index1:index2] if s == '': ## e.g. GB:SSE periods += [None] else: periods += [[int(s[:4]),int(s[-1:])]] ## period lengths if statement == 'cashflow' and '>Period Length<' in line: period_lengths = [] index = line.index('>Period Length<') index += line[index:].index('</td>')+1 for quarter in range(5): index2 = index+line[index:].index('</td>') index1 = line[:index2].rindex('>')+1 index = index2+1 s = line[index1:index2] if s != s.strip(): print(s) notexpected period_lengths += [s] cf_columns_y,cf_columns_qoq = self.columns_of_interim_statement(periods,period_lengths) dic['cf_columns_y'] = cf_columns_y dic['cf_columns_qoq'] = cf_columns_qoq ## statement sources if '>Stmt Source<' in line: column1,stmt_source = self.parse_stmt_source(line) dic['column1'] = column1 dic['periods'] = periods dic['source'] = stmt_source if stmt_source == 'PRESS': stop statementNA = True break ## break line loop for key in dic: if '>%s<' %key in line: ## key already in another row? if dic[key] not in ['N/A',None]: print(key, dic[key]) print(stmt_source) stop if key in ['Total Common Shares Outstanding','Total Preferred Shares Outstanding']: dic[key] = self.parse_statement_multiple('>%s<' %(key), line, 1., factor) else: dic[key] = self.parse_statement_multiple('>%s<' %(key), line, rate, factor) if dic[key][column1] in ['N/A',None]: print(key) stop break return dic, statementNA, s_business
def parse_statements(self, url): print(url) d_factors = {'Millions':1000000.,'Bil':1000000000.} lines = screener.finance().read_url(url, '0') ## ticker wrong or simply no data on URL if not lines: return None, None, None, True, None, None for line in lines: if 'All amounts in' in line: break match = re.search(r'All amounts in (.*?) of', line) ## No financial data available. if not match: return None, None, None, True, None, None ## Millions or Billions or Thousands? factor = d_factors[match.group(1)] matches = re.findall( r'''<font face='arial' size='2'>(.*?)</font>''', line) currency_string = matches[1].strip() regex1 = r'<TR.*?(<td.*?</td>)</tr>' pattern1 = re.compile(regex1, re.DOTALL) regex2 = r'<td.*?>(.*?)</td>' pattern2 = re.compile(regex2, re.DOTALL) bool_init = False d_income = {} d_balance = {} d_cash = {} d = d_indicators = {} for line in lines: if 'INDICATORS' in line: bool_init = True if bool_init == False: continue if 'INCOME STATEMENT' in line: d = d_income if 'CASH-FLOW STATEMENT' in line or 'CASH FLOW STATEMENT' in line: d = d_cash if 'BALANCE SHEET' in line: d = d_balance ## if '</table>' in line: ## break if 'RATIOS CALCULATIONS' in line: break match1 = re.match(pattern1, line) if not match1: continue match2 = re.findall(pattern2, match1.group(0)) try: d[match2[0]] = [factor*float(_.replace(',','')) for _ in match2[1:]] except ValueError: d[match2[0]] = match2[1:] statement_error = False print(d_income.keys()) ## ## less than 5 years of data ## if len(d_income['total net income']) < 5: ## statement_error = True ## less than 5 years of data if d_income['total net income'][-1] == '': statement_error = True for d in (d_income, d_balance, d_cash): for k in d.keys(): d[k] = list(reversed(d[k])) # reverse from old to new to new to old return d_income, d_balance, d_cash, statement_error, currency_string, d_indicators
def parse_ownership(self,data): print('parsing ownership') tickers = list(data.keys()) tickers.sort() for i in range(len(tickers)): ticker = tickers[i] ticker_msn = ticker_conversion.yahoo2msn(ticker) if i % 10 == 0: print('\n%s/%s %s' %(i+1, len(tickers), ticker)) url = 'http://moneycentral.msn.com/ownership?symbol=%s&Holding=5%%25+Ownership' %(ticker_msn) url = 'http://investing.money.msn.com/investments/five-percent-ownership?symbol={}'.format(ticker_msn) lines = screener.finance().read_url(url, ticker) if lines == [''] or lines == []: print('no data', url) data[ticker]['holders'] = '' continue data[ticker]['holders'] = [] for i in range(len(lines)): line = lines[i] if 'Ownership' and 'Holder Name' in line: if 'No data available' in line: break index = line.index('Holder Name') while '<tr>' in line[index:]: index += line[index:].index('<tr>') for i_td in range(4): index2 = index+line[index:].index('</td>') index1 = line[:index2].rindex('>')+1 index = index2+1 s = line[index1:index2] if i_td == 0: holder = s elif i_td == 3: percentage = float(s) s = holder if holder in [ ## public 'GAMCO Investors, Inc.', ## 1977, GBL, Bill Gates 'Royce & Associates, LLC', ## 1972, 1899 LM, Legg Mason acquisition 2001 'Franklin Advisory Services, LLC', ## 1947, BEN, Franklin Templeton Investments, franklintempleton.com 'State Street Global Advisors (US)', ## 1978, 1792, STT ## private 'Neuberger Berman, LLC', ## 1939, private / Lehman Brothers 'Dimensional Fund Advisors, LP', ## 1981, private (Scholes, Merton) 'Capital World Investors', ## 1931 'Capital Research Global Investors', ## 1931 'Renaissance Technologies Corp.', ## 1982, private ## policy holder owned (mutual) 'State Farm Insurance Companies', ## 1922 ## LLP 'Wellington Management Company, LLP', ## 1928 ## public or private? probably private... 'Lord, Abbett & Co. LLC', ## 1929 'Keeley Asset Management Corp.', ## 1982, keeleyasset.com 'Wells Capital Management Inc.', ## Wells Fargo??? 'First Eagle Global Fund', ## firsteaglefunds.com 'First Eagle Investment Management LLC', ## firsteaglefunds.com ## 'Keeley Small Cap Value Fund, Inc.', ## 1982, keeleyasset.com ## 'Heartland Value Fund', ## heartlandfunds.com ## 'Baron Capital Management, Inc.', ## 1982, Ronald S Baron ## 'Artisan Partners Limited Partnership', ## artisanfunds.com ## 'Morgan Stanley Investment Management Inc. (US)', ## 'Goldman Sachs Asset Management (US)', ## 'J.P. Morgan Investment Management Inc. (New York)', ## 'Dodge & Cox', ## dodgeandcox.com 1930 ## 'Wells Fargo Advantage Small Cap Value Fund', ## 'Davis Selected Advisers, L.P.', ## 'Ruane, Cunniff & Goldfarb, Inc.', ## Sequoia Fund ## 'US Trust', ## ustrust.com 1853 ## 'Perry Capital', ## perrycap.com ]: continue elif ( ## ( ## 'Berkshire' not in s ## and ## 'Walton' not in s ## ) ## and ( s[:len('Vanguard ')] == 'Vanguard ' ## client owned or s[:len('Fidelity ')] == 'Fidelity ' ## 1946, private or s[:len('American Funds ')] == 'American Funds ' or s[:len('Columbia ')] == 'Columbia ' ## Columbia Management Group, Ameriprise Financial (AMP) subsidiary or s[:len('BlackRock ')] == 'BlackRock ' ## BLK or s[:len('T. Rowe Price ')] == 'T. Rowe Price ' ## 1937, TROW or s[:len('Ruane, Cunniff & Goldfarb, Inc. ')] == 'Ruane, Cunniff & Goldfarb, Inc. ' ## 1969, owns the Sequoia Fund [SEQUX] ## 'JPMorgan Chase' in s or ## 'Lord Abbett' in s or ## ## 'Financial' in s or ## ## 'International' in s or ## ## 'Mgmt' in s or ## ## 'Plc' in s or ## ## 'Associates' in s or ## 'T Rowe Price Associates', ## troweprice.com 1937 ## 'Management' in s or ## 'Partners' in s or ## 'Advisors' in s or ## 'Investors' in s or ## 'Investment' in s or ## #### 'REPUBLIC' in s.upper() or ## #### 'KINGDOM' in s.upper() or ## ## 'Capital' in s or ## ## 'Group' in s or ## s[-10:] == ' Companies' or ## s[-8:] == ' Company' or ## s[-11:] == ' Management' or ## s[-8:] == ' Managem' or ## s[-5:] == ' Bank' or ## s[-6:] == ' Trust' or ## http://en.wikipedia.org/wiki/Trust_company ## s[-9:] == ' Partners' or ## s[-8:] == ' Savings' or ## Applied Industrial Technologies Retirement Savings ## s[-5:] == ' ESOP' or ## employee stock ownership plan ## s[-7:] == ' (ESOP)' or ## employee stock ownership plan ## ## corporations ## s[-12:] == ' Corporation' or ## s[-6:] == ' Corp.' or ## s[-4:] == ' Co.' or ## s[-3:] == ' Co' or ## or ## ## limited ## s[-4:] == ' Ltd' or ## Limited (commonwealth) ## s[-5:] == ' Ltd.' or ## Limited (commonwealth) ## s[-4:] == ' LLC' ## or ## s[-7:] == ' L.L.C.' ## or ## s[-4:] == ' LLP' ## or ## s[-3:] == ' LP' ## or ## s[-5:] == ' L.P.' ## or ## s[-len(' Limited Partnership'):] == ' Limited Partnership' ## ) ) ): ## check that a private person is not being excluded if ( ## a name? ('(' in s or ')' in s) and ## not a name! '(Grove Creek)' not in s and '(US)' not in s and '(UK)' not in s and '(Americas)' not in s and '401(k)' not in s and '(New York)' not in s and '(Switzerland)' not in s and '(Singapore)' not in s and '(International)' not in s and '(ESOP)' not in s ): print(s) stop continue else: data[ticker]['holders'] += ['%s (%.1f)' %(holder,percentage,)] if ( '(' not in s and ')' not in s and 'Berkshire' not in s and 'Walton' not in s ): fd = open('investors_notexpected.txt','a') fd.write('%s\t%s\t%s\n' %(holder.split()[-1], ticker, holder)) fd.close() ## break loop over lines break data[ticker]['holders'] = ', '.join(data[ticker]['holders']) return data
def parse_stmt(self, url, ticker): print(url) d_factors = {'millions': 1000000., 'Bil': 1000000000.} d = {} statement_error = False lines = screener.finance().read_url(url, ticker) ## ticker wrong or simply no data on URL if not lines: return None, None, None, True, None, None #<div class="currencyDisclaimer contains"><span class="fleft">In millions of EUR< for i, line in enumerate(lines): # if 'currencyDisclaimer' in line: # break if 'mod-main-content' in line: ## if '<table class="mod-ui-table">' in line: break line = lines[i + 1] match = re.findall(r'>In (.*?) of (.*?)<', line) ## No financial data available. if not match: return None, None, None, True, None, None ## Millions or Billions or Thousands? factor = d_factors[match[0][0]] ## matches = re.findall( ## r'''<font face='arial' size='2'>(.*?)</font>''', line) currency_string = match[0][1] ## regex = r'<table data-ajax-content="true">(.*?)</table>' regex = r'<table class="mod-ui-table">(.*?)</table>' pattern = re.compile(regex) match = re.search(pattern, line) s = match.group(1) regex = r'<tr class="(odd|even|Bold even|Bold odd)">(.*?)</tr>' regex = r'<tr(.*?)>(.*?)</tr>' pattern = re.compile(regex) match = re.findall(pattern, s) for m in match: l = re.findall(r'<t[dh].*?>(.*?)</t[dh]>', m[1]) if not l: continue print(ticker, l) if 'Fiscal data as of' in l[0]: l[0] = 'date' d[l[0]] = l[1:] for i, x in enumerate(d[l[0]]): if x.startswith('(') and x.endswith(')'): d[l[0]][i] = factor * -float(x[1:-1].replace(',', '')) elif x == '--': d[l[0]][i] = 0 else: try: d[l[0]][i] = factor * float(x.replace(',', '')) except: d[l[0]][i] = x return d, statement_error, currency_string
def parse_stmt(self, url, ticker): print(url) d_factors = {'millions':1000000.,'Bil':1000000000.} d = {} statement_error = False lines = screener.finance().read_url(url, ticker) ## ticker wrong or simply no data on URL if not lines: return None, None, None, True, None, None #<div class="currencyDisclaimer contains"><span class="fleft">In millions of EUR< for i, line in enumerate(lines): # if 'currencyDisclaimer' in line: # break if 'mod-main-content' in line: ## if '<table class="mod-ui-table">' in line: break line = lines[i+1] match = re.findall(r'>In (.*?) of (.*?)<', line) ## No financial data available. if not match: return None, None, None, True, None, None ## Millions or Billions or Thousands? factor = d_factors[match[0][0]] ## matches = re.findall( ## r'''<font face='arial' size='2'>(.*?)</font>''', line) currency_string = match[0][1] ## regex = r'<table data-ajax-content="true">(.*?)</table>' regex = r'<table class="mod-ui-table">(.*?)</table>' pattern = re.compile(regex) match = re.search(pattern, line) s = match.group(1) regex = r'<tr class="(odd|even|Bold even|Bold odd)">(.*?)</tr>' regex = r'<tr(.*?)>(.*?)</tr>' pattern = re.compile(regex) match = re.findall(pattern, s) for m in match: l = re.findall(r'<t[dh].*?>(.*?)</t[dh]>', m[1]) if not l: continue print(ticker, l) if 'Fiscal data as of' in l[0]: l[0] = 'date' d[l[0]] = l[1:] for i, x in enumerate(d[l[0]]): if x.startswith('(') and x.endswith(')'): d[l[0]][i] = factor*-float(x[1:-1].replace(',','')) elif x == '--': d[l[0]][i] = 0 else: try: d[l[0]][i] = factor*float(x.replace(',','')) except: d[l[0]][i] = x return d, statement_error, currency_string
def parse_statements(self, url): print(url) d_factors = {'Millions': 1000000., 'Bil': 1000000000.} lines = screener.finance().read_url(url, '0') ## ticker wrong or simply no data on URL if not lines: return None, None, None, True, None, None for line in lines: if 'All amounts in' in line: break match = re.search(r'All amounts in (.*?) of', line) ## No financial data available. if not match: return None, None, None, True, None, None ## Millions or Billions or Thousands? factor = d_factors[match.group(1)] matches = re.findall(r'''<font face='arial' size='2'>(.*?)</font>''', line) currency_string = matches[1].strip() regex1 = r'<TR.*?(<td.*?</td>)</tr>' pattern1 = re.compile(regex1, re.DOTALL) regex2 = r'<td.*?>(.*?)</td>' pattern2 = re.compile(regex2, re.DOTALL) bool_init = False d_income = {} d_balance = {} d_cash = {} d = d_indicators = {} for line in lines: if 'INDICATORS' in line: bool_init = True if bool_init == False: continue if 'INCOME STATEMENT' in line: d = d_income if 'CASH-FLOW STATEMENT' in line or 'CASH FLOW STATEMENT' in line: d = d_cash if 'BALANCE SHEET' in line: d = d_balance ## if '</table>' in line: ## break if 'RATIOS CALCULATIONS' in line: break match1 = re.match(pattern1, line) if not match1: continue match2 = re.findall(pattern2, match1.group(0)) try: d[match2[0]] = [ factor * float(_.replace(',', '')) for _ in match2[1:] ] except ValueError: d[match2[0]] = match2[1:] statement_error = False print(d_income.keys()) ## ## less than 5 years of data ## if len(d_income['total net income']) < 5: ## statement_error = True ## less than 5 years of data if d_income['total net income'][-1] == '': statement_error = True for d in (d_income, d_balance, d_cash): for k in d.keys(): d[k] = list(reversed( d[k])) # reverse from old to new to new to old return d_income, d_balance, d_cash, statement_error, currency_string, d_indicators