def street_rating(sym): """ reads street quote page for rating :return: float(rating) """ STREET_QUOTE = "http://www.thestreet.com/quote/" url = STREET_QUOTE + sym + ".html" html_soup = TurtleSoup.getSoup(url) try: rating = html_soup.find('span', {'class': 'rating'}).text except AttributeError: rating = 0.0 if rating == '': rating = 0.0 return float(rating)
def street_rating(sym): """ reads street quote page for rating :return: float(rating) """ STREET_QUOTE = "http://www.thestreet.com/quote/" rating_label = 'FEDCBA' url = STREET_QUOTE + sym + ".html" html_soup = TurtleSoup.getSoup(url) try: rating_txt = html_soup.find('span', {'class': 'quote-nav-rating-qr-rating hold'}).text[0] rating = rating_label.index(rating_txt) except AttributeError: rating = 0.0 except ValueError: rating = 0.0 if rating == '': rating = 0.0 return float(rating)
def getPrice(web_url): """ Uses amazon weblink to find the price Does only wildcard search on priceblock, can improve as it fails more :param web_url: Weblink :return: Sale Price Tag """ html_soup = TurtleSoup.getSoup(web_url) price_all = html_soup.findAll("span", {"id": re.compile("priceblock.*")}) # improve as it fails price_tag = float(-1.01) # initialization if len(price_all) == 0: raise Exception("no \'priceblock\' attribute found") elif len(price_all) == 1: price_tag = price_all[0].text[1:] # remove $ price_tag = float(price_tag.replace(",", "")) # remove , from strings to convert to floats else: raise Exception("more than one price tag found") #TODO: loop under results to extract exact match if it fails return price_tag
def html_table_2_dict(web_url, table_names): """ static method to grab the data from websites in form of tables :param web_url: URL of the website :param table_names: table name to grab data :return: dictionary form of the web table data """ # Read and make a html soup html_soup = TurtleSoup.getSoup(web_url) # Initialize and update output dictionary data_out = {} for stockgroup in table_names: table = html_soup.find('table', {'id': table_names[stockgroup]}) for trow in table.tbody.findAll('tr'): data_out.setdefault('exchange', []).append(stockgroup) for tcol in trow.findAll('td'): theaders = str(tcol.get('headers')) data_out.setdefault(theaders, []).append(str(tcol.getText())) return data_out