def _html_to_pe_df(lst): del lst[0] del lst[0] del lst[0] lst.pop() l = len(lst) dates = np.empty(l, dtype = 'datetime64[D]') arr = np.empty([3,l]) c_cnt = 0 r_cnt = 0 for r in lst: c_cnt = 0 for c in r.find_all('td'): try: val = float(c.get_text().replace(',','').replace(' ','')) except: val = np.NaN if c_cnt == 0: dates[r_cnt] = str_to_date(c.text) elif c_cnt > 0 and c_cnt <= 3 : arr[c_cnt - 1][r_cnt] = val c_cnt += 1 r_cnt += 1 df = pd.DataFrame() df['P/E'] = arr[0] df['P/B'] = arr[1] df['Div Yield'] = arr[2] df.index = dates return df
def _html_to_index_df(lst): ''' delete top 3 rows which contain text headers''' del lst[0] del lst[0] del lst[0] lst.pop() l = len(lst) dates = np.empty(l, dtype = 'datetime64[D]') arr = np.empty([6,l]) c_cnt = 0 r_cnt = 0 for r in lst: c_cnt = 0 for c in r.find_all('td'): try: val = float(c.get_text().replace(',','').replace(' ','')) except: val = np.NaN if c_cnt == 0: dates[r_cnt] = str_to_date(c.text) elif c_cnt > 0 and c_cnt <= 6 : arr[c_cnt - 1][r_cnt] = val c_cnt += 1 r_cnt += 1 df = pd.DataFrame() df['Open'] = arr[0] df['High'] = arr[1] df['Low'] = arr[2] df['Close'] = arr[3] df['Shares Traded'] = arr[4] df['Turnover'] = arr[5] df.index = dates return df