def scrape_latest(soup, url, trading_date, formatted_output): if soup is None: print ('Insider ERR: no result for <' + url + '>') return None table = soup.find('table', {'class': 'nc'}) if table is None: if S.DBG_ALL: print ('INFO: No insider data is available for <' + url + '>') return None insiders = {} director = "director" in url # for each row, there are many rows including no table for tr in table.findAll('tr'): td = tr.findAll('td') if S.DBG_INSIDER: print("DBG:") for x in td: print repr(x) # u'\u2019' is the last char in DATO' which can't be encoded to ascii # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td] insider = [printable(x.text.replace(u'\u2019', '').encode("ascii")).strip() for x in td] if len(insider) >= 10: name, chg_date, price, view = "", "", "", "" from_date, to_date, min_price, max_price = "", "", "", "" if len(insider) == 11: stock, announce_date, name, chg_date, chg_type, shares, price, direct, indirect, total = \ unpack_latest_td(*insider) view = S.I3_KLSE_URL + td[10].find('a').get('href').encode("ascii") if S.DBG_ALL or S.DBG_INSIDER: print("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s" % (stock, announce_date, chg_date, chg_type, shares, price, direct, indirect, total, view)) else: stock, announce_date, from_date, to_date, chg_type, shares, min_price, max_price, total = \ unpack_company_td(*insider) view = S.I3_KLSE_URL + td[9].find('a').get('href').encode("ascii") if S.DBG_ALL or S.DBG_INSIDER: print("%s, %s, %s, %s, %s, %s, %s, %s, %s" % (stock, announce_date, from_date, to_date, chg_type, shares, min_price, max_price, total)) ann_date = change2KlseDateFmt(announce_date, "%d-%b-%Y") trd_date = change2KlseDateFmt(trading_date, "%d-%b-%Y") if S.DBG_QR: print("DBG:dates:{0}:{1}".format(ann_date, trd_date)) if ann_date >= trd_date: if len(insider) == 11: if stock not in insiders: insiders[stock] = [] insiders[stock].append(format_insider( formatted_output, director, stock, announce_date, name, chg_date, chg_type, shares, price, view)) else: if stock not in insiders: insiders[stock] = [] insiders[stock].append(format_company( formatted_output, stock, announce_date,from_date, to_date, chg_type, shares, min_price, max_price, total, view)) else: break return insiders
def scrape_entitlement(soup, url, trading_date, formatted_output): if soup is None: print('Insider ERR: no result for <' + url + '>') return None table = soup.find('table', {'class': 'nc'}) if table is None: if S.DBG_ALL: print('INFO: No insider data is available for <' + url + '>') return None entitlements = {} others = "others" in url # for each row, there are many rows including no table for tr in table.findAll('tr'): td = tr.findAll('td') if S.DBG_INSIDER: print("DBG:") for x in td: print repr(x) # u'\u2019' is the last char in DATO' which can't be encoded to ascii # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td] insider = [ printable(x.text.replace(u'\u2019', '').encode("ascii")).strip() for x in td ] if len(insider) >= 7: if len(insider) == 7: announce_date, stock, open_price, current_price, dividend, ex_date = \ unpack_dividend_td(*insider) view = S.I3_KLSE_URL + td[6].find('a').get('href').encode( "ascii") else: announce_date, stock, subject, open_price, current_price, ratio, ex_date = \ unpack_others_td(*insider) view = S.I3_KLSE_URL + td[7].find('a').get('href').encode( "ascii") if S.DBG_ALL or S.DBG_INSIDER: print "view: {}".format(view) ann_date = change2KlseDateFmt(announce_date, "%d-%b-%Y") trd_date = change2KlseDateFmt(trading_date, "%d-%b-%Y") if S.DBG_QR: print("DBG:dates:{0}:{1}".format(ann_date, trd_date)) if ann_date >= trd_date: if len(insider) == 7: entitlements[stock] = format_dividend( formatted_output, others, announce_date, stock, "", open_price, current_price, dividend, ex_date, view) else: entitlements[stock] = format_dividend( formatted_output, others, announce_date, stock, subject, open_price, current_price, ratio, ex_date, view) else: break return entitlements
def scrape_insider(counter, stk_code, soup, last_date, show_latest=False): if soup is None or len(soup) <= 0: print ('Insider ERR: no result for <' + counter + '>') return None table = soup.find('table', {'class': 'nc'}) if table is None: if S.DBG_ALL: print ('INFO: No insider data is available for <' + counter + "." + stk_code + '>') return None insiders = [] count = 0 # for each row, there are many rows including no table for tr in table.findAll('tr'): td = tr.findAll('td') if S.DBG_INSIDER: print("DBG:") for x in td: print repr(x) # u'\u2019' is the last char in DATO' which can't be encoded to ascii # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td] insider = [printable(x.text.replace(u'\u2019', '').encode("ascii")).strip() for x in td] if len(insider) == 10: anndt, name, dt, notice, shares, price, direct, indirect, total = unpack_insider_td(*insider) view = S.I3_KLSE_URL + td[9].find('a').get('href').encode("ascii") if S.DBG_ALL or S.DBG_INSIDER: # print("%s, %s, %s, %s, %s, %s, %.2f, %.2f, %.2f, %.2f, %s" % print("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s" % (counter, anndt, name, dt, notice, shares, price, direct, indirect, total, view)) # tdstr = counter + ", " + name + ", " + dt + ", " + notice + ", " + shares + ", " + price + ", " + view if dt != last_date: if show_latest: if S.DBG_ALL or S.DBG_INSIDER: print("%s, %s, %s, %s, %s, %s, %f, %f, %f, %f, %s" % (counter, anndt, name, dt, notice, shares, price, direct, indirect, total, view)) # insiders.append(tdstr) insiders.append(format_insider(counter, name, dt, notice, shares, price, view)) count += 1 if count > 9: break continue else: insiders.append(format_insider(counter, name, dt, notice, shares, price, view)) return insiders
def scrape_listing(soup, trading_date, formatted_output): if soup is None: print('Insider ERR: no result for <' + I3_LISTING_URL + '>') return None table = soup.find('table', {'class': 'nc'}) if table is None: if S.DBG_ALL: print('INFO: No insider data is available for <' + I3_LISTING_URL + '>') return None listings = {} for tr in table.findAll('tr'): td = tr.findAll('td') if S.DBG_INSIDER: print("DBG:") for x in td: print repr(x) # u'\u2019' is the last char in DATO' which can't be encoded to ascii # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td] insider = [ printable(x.text.replace(u'\u2019', '').encode("ascii")).strip() for x in td ] if len(insider) >= 7: stock, announce_date, listing_date, type, units, price = \ unpack_listing_td(*insider) view = S.I3_KLSE_URL + td[6].find('a').get('href').encode("ascii") if S.DBG_ALL or S.DBG_INSIDER: print "view: {}".format(view) ann_date = change2KlseDateFmt(announce_date, "%d-%b-%Y") trd_date = change2KlseDateFmt(trading_date, "%d-%b-%Y") if S.DBG_QR: print("DBG:dates:{0}:{1}".format(ann_date, trd_date)) if ann_date >= trd_date: listings[stock] = format_listing(formatted_output, stock, announce_date, listing_date, type, units, price, view) else: break return listings
def scrape_qr(counter, stk_code, soup): if soup is None or len(soup) <= 0: print ('QR ERR: no result for <' + counter + '>') return None table = soup.find('table', {'class': 'nc'}) if table is None: if S.DBG_ALL: print ('INFO: No QR data is available for <' + counter + "." + stk_code + '>') return None qr_list = [] for tr in table.findAll('tr'): td = tr.findAll('td') # qr = [x.text.strip().replace(' ', '').encode("ascii") for x in td] qr = [printable(x.text.replace(' ', '').encode("ascii")).strip() for x in td] if S.DBG_QR: print("DBG:") for x in qr: print repr(x) if len(qr) > 0: qr_list = unpack_qr(*qr) break return qr_list
def scrape_target(soup, trading_date, formatted_output): if soup is None: print('Insider ERR: no result for <' + I3_TARGET_URL + '>') return None table = soup.find('table', {'class': 'nc'}) if table is None: if S.DBG_ALL: print('INFO: No insider data is available for <' + I3_TARGET_URL + '>') return None targets = {} for tr in table.findAll('tr'): td = tr.findAll('td') if S.DBG_INSIDER: print("DBG:") for x in td: print repr(x) # u'\u2019' is the last char in DATO' which can't be encoded to ascii # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td] insider = [ printable(x.text.replace(u'\u2019', '').encode("ascii")).strip() for x in td ] if len(insider) >= 7: announce_date, stock, last_price, target, upside_down, call, source = \ unpack_listing_td(*insider) ann_date = change2KlseDateFmt(announce_date, "%d/%m/%Y") trd_date = change2KlseDateFmt(trading_date, "%d-%b-%Y") if S.DBG_QR: print("DBG:dates:{0}:{1}".format(ann_date, trd_date)) if ann_date >= trd_date: targets[stock] = format_target(formatted_output, announce_date, stock, last_price, target, upside_down, call, source) else: break return targets