def state_data(s): data = [{"state": _} for _ in s] territories = ["american samoa", "district of columbia", "guam", "marshall islands", "micronesia", "northern mariana islands", "palau", "puerto rico", "u.s. virgin islands"] unitedstates = s + territories page3 = requests.get("https://www.cdc.gov/coronavirus/2019-ncov/vaccines/index.html") soup3 = bs(page3.content, 'html.parser') soup3.find_all('a', attrs={"class":"dropdown-item noLinking"}) links = [] for link in soup3.find_all('a', attrs={"class":"dropdown-item noLinking"}, href=True): links.append(link['href']) statelinks = dict(zip(unitedstates, links)) for territory in territories: del statelinks[territory] for state in s: url = f"https://www.nytimes.com/interactive/2020/us/{state.replace(' ', '-').lower()}-coronavirus-cases.html" req = requests.Session() response = req.get(url) strainer = ss("td", attrs={"class": "num yesterday svelte-fin3s2"}) soup = bs(response.content, features="html.parser", parse_only=strainer) counts = soup.find_all("span", attrs={"class": "svelte-fin3s2"}) strainer = ss("tr", attrs={"class": "svelte-fin3s2"}) soup = bs(response.content, features="html.parser", parse_only=strainer) yesterday = str(soup.find_all("th", attrs={"class": "header yesterday svelte-fin3s2"})[0].text).split("On ")[1] data[s.index(state)]["link"] = statelinks[state] data[s.index(state)]["cases"] = counts[0].text data[s.index(state)]["deaths"] = counts[1].text data[s.index(state)]["hospitalized"] = counts[2].text return data, yesterday
def scrapeFun1(lemonde_url, from_lang, to_lang): lemond_worldnews = SimpleUrl(lemonde_url) page = lemond_worldnews() filtertag = ss("article") titletag = ss("title") souptitle = bs(page, "html.parser", parse_only=titletag) subj = souptitle.get_text() soup = bs(page, "html.parser", parse_only=filtertag) soupstr = soup.get_text() markup = marksoup(soupstr, from_lang, to_lang) sendnews(subj, markup)
def find_price_nasdaq(t): from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time from fake_useragent import UserAgent ua = UserAgent() user_agent = ua.random options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('user-agent={0}'.format(user_agent)) capa = DesiredCapabilities.CHROME capa["pageLoadStrategy"] = "none" driver = webdriver.Chrome(options=options, desired_capabilities=capa) driver.set_window_size(1440, 900) driver.get('https://www.nasdaq.com/market-activity/stocks/{}'.format(t)) time.sleep(4) plain_text = driver.page_source driver.quit() only_class = ss(class_='symbol-page-header__pricing-price') soup = bs(plain_text, 'html.parser', parse_only=only_class) prices_found = [] for result in soup: if result.text != '': price = result.text prices_found.append(price) price = float(max(prices_found).replace('$', '')) return {'price': price}
def __parse_data(self:object,start_date:str,end_date:str) -> None: data = self.__get_html(start_date,end_date) print('Your request in progress please wait...') for x in data : link = f'https://www1.arun.gov.uk/aplanning/OcellaWeb/{x.get("href")}' item = {} r = requests_session.get(link) innerstrainter = ss('table') innerhtml = bs(r.text,'lxml',parse_only=innerstrainter) innerinfoblocks = innerhtml.findAll('tr') ''' Parse data from html document ''' item['Url'] = link item['Address'] = innerinfoblocks[4].findAll('td')[1].text item['ReferenceNumber'] = innerinfoblocks[1].findAll('td')[1].text item['Validateddate'] = innerinfoblocks[8].findAll('td')[1].text item['Status'] = innerinfoblocks[2].findAll('td')[1].text item['Proposal'] = innerinfoblocks[3].findAll('td')[1].text ''' Write data to csv file ''' writer = csv.DictWriter(csv_file, fieldnames=csv_columns) writer.writerow(item)
def bbHeatMapEval(date, size, puzzlesEvaluated): puzzleLink = "https://www.xwordinfo.com/Crossword?date=" + date puzzle = requests.get(puzzleLink) # populates bbHeatMapDict for evaluated puzzle tablesoup = bs(puzzle.content, 'html.parser', parse_only=ss(id="PuzTable")) rowsoup = tablesoup.find_all('tr') num_rows = len(rowsoup) cellsoup = tablesoup.find_all('td') num_cols = len(cellsoup) // len(rowsoup) if num_rows != size or num_cols != size: print("Crossword puzzle board for", date, "is nonstandard size!") return puzzlesEvaluated bbNum = 0 for x in range(len(rowsoup)): cellsoup = rowsoup[x].find_all('td') cellList = str(cellsoup).split(',') for y in range(len(cellList)): if 'class="black"' in cellList[y]: bbNum += 1 bbHeatMapDict[x][y] += 1 bbNumList.append(bbNum) datesEvaluated.append(date) puzzlesEvaluated += 1 if puzzlesEvaluated % 5 == 0: print(puzzlesEvaluated, "puzzles evaluated so far") return puzzlesEvaluated
def find_price_tradingview(t): from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time from fake_useragent import UserAgent ua = UserAgent() user_agent = ua.random options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('user-agent={0}'.format(user_agent)) capa = DesiredCapabilities.CHROME capa["pageLoadStrategy"] = "none" driver = webdriver.Chrome(options=options, desired_capabilities=capa) driver.set_window_size(1440, 900) driver.get('https://www.tradingview.com/symbols/{}/'.format(t)) time.sleep(4) plain_text = driver.page_source driver.quit() only_class = ss(class_='tv-symbol-price-quote__value js-symbol-last') price = float(bs(plain_text, 'html.parser', parse_only=only_class).text) return {'price': price}
def __get_html(self:object, fromm:str, to:str) -> list: r = requests_session.get(f'https://www1.arun.gov.uk/aplanning/OcellaWeb/planningSearch?action=Search&showall=showall&reference=&location=&OcellaPlanningSearch.postcode=&area=&applicant=&agent=&undecided=&receivedFrom={fromm}&receivedTo={to}&decidedFrom=&decidedTo=') strainter = ss('table') html = bs(r.text,'lxml',parse_only=strainter) ''' Caught link for every item ''' infoblocks = html.findAll('a',href=True) return infoblocks
def marketwatch_other_data(t): import requests from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss import time from fake_useragent import UserAgent ua = UserAgent() user_agent = ua.random # get market cap and 52 week range from table header = {'user-agent': user_agent} url = 'https://www.marketwatch.com/investing/stock/{}?mod=over_search' f = requests.get(url.format(t, t), headers=header) time.sleep(4) only_class = ss(class_='list list--kv list--col50') soup = bs(f.content, 'html.parser', parse_only=only_class) mlist = soup.find_all(class_='primary') fifty_two_week_range = mlist[2].text.split(' - ') fifty_two_week_low = float(fifty_two_week_range[0]) fifty_two_week_high = float(fifty_two_week_range[1]) market_cap_string = mlist[3].text.replace('$', '') if market_cap_string[-1] == 'T': split = market_cap_string.split('.') first = int(split[0]) * (10**12) second = int(split[1].replace('T', '')) * (10**9) market_cap = first + second if market_cap_string[-1] == 'B': split = market_cap_string.split('.') first = int(split[0]) * (10**9) second = int(split[1].replace('B', '')) * (10**6) market_cap = first + second elif market_cap_string[-1] == 'M': split = market_cap_string.split('.') first = int(split[0]) * (10**6) second = int(split[1].replace('M', '')) * (10**3) market_cap = first + second data = { 'market_cap': market_cap, 'fifty_two_week_low': fifty_two_week_low, 'fifty_two_week_high': fifty_two_week_high } return data
def nasdaq_other_data(t): from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time from fake_useragent import UserAgent ua = UserAgent() user_agent = ua.random options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('user-agent={0}'.format(user_agent)) capa = DesiredCapabilities.CHROME capa["pageLoadStrategy"] = "none" driver = webdriver.Chrome(options=options, desired_capabilities=capa) driver.set_window_size(1440, 900) driver.get('https://www.nasdaq.com/market-activity/stocks/{}'.format(t)) time.sleep(2) driver.execute_script("window.scrollTo(0, 800)") time.sleep(4) plain_text = driver.page_source driver.quit() # find table only_table_class = ss(class_="summary-data__cell") table = bs(plain_text, 'html.parser', parse_only=only_table_class) cell_list = [] for x in table: cell_list.append(x.text) fifty_two_week_high_low = cell_list[8].split('/') # find 52-week low fifty_two_week_low = float(fifty_two_week_high_low[1].replace('$', '')) # find 52-week high fifty_two_week_high = float(fifty_two_week_high_low[0].replace('$', '')) # market cap market_cap = int(cell_list[9].replace(',', '')) return { 'fifty_two_week_low': fifty_two_week_low, 'fifty_two_week_high': fifty_two_week_high, 'market_cap': market_cap }
def check_url(self, url): req = urllib.request.Request(url) req.add_header('User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36') try: html = urllib.request.urlopen(req).read() except urllib.error.HTTPError as e: html = e.read() soup = bs(html, parse_only=ss('a')) link = soup.find_all('a', attrs={'href': re.compile(self.domain)}) if len(link) > 0: #link from domain was found for i in link: if i.has_attr('rel'): return 'NOFOLLOWED' else: return 'EXISTS' else: return 'REMOVED'
def find_price_marketwatch(t): import requests from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss import time from fake_useragent import UserAgent ua = UserAgent() user_agent = ua.random header = {'user-agent': user_agent} url = 'https://www.marketwatch.com/investing/stock/{}?mod=over_search' f = requests.get(url.format(t, t), headers=header) time.sleep(2) only_class = ss(class_='intraday__data') soup = bs(f.content, 'html.parser', parse_only=only_class) price = float(soup.find(class_='value').text) return {'price': price}
def fromMonthPage(url): """ return:[{"title":item_title."url":url_of_next_page},more...] """ rlt = [] request = urllib2.Request(url, headers=Common.HEADER) try: html = urllib2.urlopen(request).read() except: if Common.DEBUG: print "Error:Reading MonthPage From %s" % url return rlt bx = bs(html, "html.parser", parse_only=ss("div", id="content")) aas = bx.find_all("a") for a in aas: href = a.get("href") if re.search(r'(\d\d-){5}\d\d\.html', href): rlt.append({"title": a.text, "url": urlparse.urljoin(url, href)}) return rlt
def find_price_barrons(t): import requests from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss import time from fake_useragent import UserAgent ua = UserAgent() user_agent = ua.random header = {'user-agent': user_agent} url = 'https://www.barrons.com/quote/stock/{}' f = requests.get(url.format(t), headers=header) time.sleep(2) html_test = f.content price_class = ss(class_='market__price bgLast') price = float(bs(html_test, 'html.parser', parse_only=price_class).text) return {'price': price}
def fromItemPage(url): """ return:[{"tor":url_of_torrent_page,"img":[url_of_one_img,url_of_other_img],'purl':parent_url},{more...}] """ #urlparse.urljoin(filepath,n) request = urllib2.Request(url, headers=Common.HEADER) rlt = [] try: html = urllib2.urlopen(request).read() except: if Common.DEBUG: print "Error:Reading %s" % url return {url: rlt} bx = bs(html, "html.parser", parse_only=ss("div", id="content")) nexxt = bx.find(["a", "img"]) while nexxt: if nexxt.name == "a": try: nexxt.img.get( "src" ) #Failed When The Link Is A Pic instead of a link to torrent download page except: #To Get A TorPage's Link if len(rlt) != 0: href = nexxt.get("href").strip() if not rlt[-1].get("tor", None) and re.search( r'([A-Z0-9]{6,10}\.html$)|([a-z0-9]{16}\.html$)', href): #getFirst Tor and the link of the page is like XJKJDL.htm or eab34dfa8ab.html rlt[-1]["tor"] = href elif nexxt.name == "img": src = nexxt.get("src").strip() if re.search(r'jpg$', src, re.I): if len(rlt) == 0 or rlt[-1].get("tor", None): rlt.append({"img": [src]}) else: rlt[-1]["img"].append(src) nexxt = nexxt.find_next(["a", "img"]) return {url: rlt}
from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss import requests import sys # request page, then load only the div tags with 'menu-item' class attribute into memory url = requests.get('http://www.teiteirobata.com/new-page-2/') items = bs(url.text, 'html.parser', parse_only=ss('div', class_='menu-item')).contents # create menu dictionary, then fill it with menu items and their descriptions menu = {} for x in range(len(items)): # checks to see if a description is available for the menu item # note: items with '3' as their length do not have descriptions if len(items[x]) == 3: item = str(items[x].contents[1].contents[0]).strip() desc = '' else: item = str(items[x].contents[1].contents[0]).strip() desc = str(items[x].contents[3].contents[0]).strip() menu[item] = desc # function to perform a search on the menu dictionary # note: default search is for carpaccio, because it is awesome. def find_in_menu(sub='carpaccio'): try: search = [s for s in menu if sub in s] result_item, result_desc = str(search[0]), menu[str(search[0])] print(result_item.title() + '\n' + result_desc) except: print('Sorry, \"' + str(sub) + '\" is not on the menu today.')
import bleach import re import csv #open csv with open('<csv file goes here>', 'w') as fout: w = csv.writer(fout) w.writerow([ 'URL', 'Keyterm Density', 'Keyterm Score', 'PageSpeed Score', 'Backlink Score', 'Content Score', 'Total Score' ]) #open sitemap with ul.request.urlopen('<domain/sitemap url goes here>') as response: html = response.read() #extract sitemap links within the domain articles = ss('a') linklist = [] soup = bs(html, "lxml", parse_only=articles) for link in soup.find_all('a', href=True): linklist.append(link['href']) clearflags = ['/', '/rss', '#weatherinline', '/happenings/', '/sitemap'] clearlist = [x for x in linklist if '.' not in x] clearlinks = [y for y in clearlist if y not in clearflags] finallinks = [] a = 0 while a < len(clearlinks): fulllink = '<domain url goes here>' + clearlinks[a] finallinks.append(fulllink) a = a + 1
def tradingview_other_data(t): from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time from fake_useragent import UserAgent ua = UserAgent() user_agent = ua.random options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('user-agent={0}'.format(user_agent)) capa = DesiredCapabilities.CHROME capa["pageLoadStrategy"] = "none" driver = webdriver.Chrome(options=options, desired_capabilities=capa) driver.set_window_size(1440, 900) driver.get('https://www.tradingview.com/symbols/{}/'.format(t)) time.sleep(4) plain_text = driver.page_source driver.quit() only_class = ss( class_='tv-widget-fundamentals__value apply-overflow-tooltip') found = bs(plain_text, 'html.parser', parse_only=only_class) found_list = [] for x in found: found_list.append(x.text) # market_cap market_cap_string = found_list[0].strip() # find 52-week low fifty_two_week_low = float(found_list[23].strip()) # find 52-week high fifty_two_week_high = float(found_list[22].strip()) split = market_cap_string.split('.') if len(market_cap_string) > 1: if market_cap_string[-1] == 'T': first = int(split[0]) * (10**12) second = int(split[1].replace('T', '')) * (10**9) market_cap = first + second elif market_cap_string[-1] == 'B': first = int(split[0]) * (10**9) second = int(split[1].replace('B', '')) * (10**6) market_cap = first + second elif market_cap_string[-1] == 'M': first = int(split[0]) * (10**6) second = int(split[1].replace('M', '')) * (10**3) market_cap = first + second elif market_cap_string[-1] == 'K': first = int(split[0]) * (10**3) second = int(split[1].replace('K', '')) * (10**0) market_cap = first + second data = { 'market_cap': market_cap, 'fifty_two_week_low': fifty_two_week_low, 'fifty_two_week_high': fifty_two_week_high } else: data = { 'fifty_two_week_low': fifty_two_week_low, 'fifty_two_week_high': fifty_two_week_high } return data
from bs4 import BeautifulSoup as bs from bs4 import SoupStrainer as ss import urllib as ul import urllib.request import bleach import re with ul.request.urlopen('http://www.premier-mountain-properties.net/sitemap/') as response: html = response.read() articles = ss('a') linklist=[] soup = bs(html, "lxml", parse_only=articles) for link in soup.find_all('a', href=True): linklist.append(link['href']) print(linklist) clearflags = ['/', '/rss','#weatherinline','/happenings/','/sitemap'] clearlist = [x for x in linklist if '.' not in x] clearlinks = [y for y in clearlist if y not in clearflags] print(clearlinks) finallinks = [] a=0 while a < len(clearlinks): fulllink = 'http://www.premier-mountain-properties.net' + clearlinks[a] finallinks.append(fulllink) a=a+1 print(finallinks)
print(SC[b] + ": " + str(sctotal)) seccontent = seccontent + sctotal b = b+1 maincontent = maincontent*10 seccontent = seccontent*5 tags = html.count(UN[0]) uncontent = tags - maincontent - seccontent print("Total Unrelated Tags: " + str(uncontent)) #Calculate % of MC and SC. mcper = round((maincontent / uncontent)*100, 2) scper = round((seccontent / uncontent)*100, 2) print("Main: " + str(mcper)) print("Secondary: " + str(scper)) #pull just the article from the html articles = ss('article') #clean all tags from the article soup = bs(html, "lxml", parse_only=articles) article = soup.find_all('article') thing = bleach.clean(str(article), strip=True) thing = re.sub('\W+',' ', thing) thing = re.sub( r"([A-Z])", r" \1", thing) #remove extraneous words from the text text = thing text = text.lower() text = text.split() cleanlist = ["a", "an", "the", "for", "of", "it", "but", "nor", "so", "and", "but", "or", "yet", "is", "to", "at", "i", "if", "as", "in", "by", "on", "li", "ul", "p"] cleantext = [word for word in text if word.lower() not in cleanlist] #count keyterms in the text izer = len(keyterm) x = 0
async def _dict(self, ctx, *, term: str = None): """(∩`-´)⊃━☆゚.*・。゚ Search definitions in English using Oxford English Dictionary database Usage: {prefix}dict <word> [synonyms|proverbs|examples]""" if term is None: # Simple usage return for invoking an empty cmd sample = random.choice([ 'lecture', 'fantasy', 'gathering', 'gradually ', 'international', 'desire' ]) v = f'{ctx.prefix}{ctx.invoked_with} {sample}' usage = f'**Usage:** basic results\n{v}\n\n' \ f'**Advanced Usage:** add any parameter\n{v} `examples` `synonyms` `proverbs` `sourcecode`' return await ctx.send(usage) await ctx.channel.trigger_typing() query = ''.join( term.split(' ')[0] ) # We only want to search the first term, the rest is for extra result url = f"{self.query_url}{query.lower()}" # we lower it so it works as part of the search link page = requests.get( url, headers=_HEADERS ) # requests code, use the headers to appear like a normal browser e = discord.Embed( color=self.user_color ) # This command is EMBED-only, it doesn't work without embed perms x = "https://media.discordapp.net/attachments/541059392951418880/557660549073207296/oxford_favicon.png" try: e.set_author( name= f'Definition of {query.title()} in English by Oxford Dictionaries', url=url, icon_url=x) # SoupStrainer is required to load 1/3 of the page, discarding unnecessary content # "gamb" contains definition, "etym" contains pronunciation and origin _section_content = ss( "section", attrs={ "class": ["gramb", "etymology etym", "pronSection etym"] }) # Then we parse the resulting web page with Beautiful Soup 4 soup = bs(page.content, "html.parser", parse_only=_section_content, from_encoding="utf-8") # ================= Send HTML5 code as a message into chat ==================== if ctx.message.content.endswith( 'sourcecode') and query != 'sourcecode': # This is mostly for debugging purposes, if cmd doesn't give a result, check that the code works, # if `code` returns empty, it is because the command couldn't find a valid page for {query} defs = soup.find('section', attrs={"class": "gramb"}) # sends page parsed as HTML5 if defs is not None: block = await ctx.send( f'```html\n{defs.prettify()[:1970]}``` Chars: `{len(defs.text)}`' ) await block.add_reaction('\N{WHITE HEAVY CHECK MARK}') # ============= Word and its classification and pronunciation ================ classification = soup.find( 'span', attrs={"class": "pos"}) # noun, verb, adjective, adverb, etc... if classification is not None: cl = f"*`[{classification.text}]`* " or "\u200b" e.title = cl # f"{cl}{query.title()}{pr.replace('/', '')}" # ============================================================================= definition = soup.find('span', attrs={"class": "ind"}) # first description if definition is not None: # BUG-HUNTER, 1ˢᵗ 2ⁿᵈ 3ʳᵈ, 4ᵗʰ # Checks for a definition, if not found, it defaults to fail-safe description below e.description = f"1. {definition.text[:500]}" # await ctx.send(first.text[:500]) # BUG-HUNTER # ===================== if cmd *args == 'examples' ============================ if 'examples' in ctx.message.content and query != 'examples': example_1 = soup.find('div', attrs={"class": "exg"}) # first example if example_1 is not None: ex_1 = f'*{example_1.text[1:]}*' or "\u200b" try: example_2 = soup.find_all('div', attrs={"class": "exg"})[1] list_1 = example_2.text[1:].replace("’ ‘", "’*\n*‘") ex_2 = f'\n*{list_1}' except IndexError: # ResultSet object has no attribute '.text' ex_2 = "\u200b" result = f"{ex_1}{ex_2}" # This is merely aesthetic so that it ends with ... or not if result[:800].endswith("’"): # We expect it to ed well complete = f'{result[:800]}*' else: # if it doesn't, then we format it properly here complete = f'{result[:800]}...*' e.add_field(name='Examples', value=complete, inline=False) # BUG-HUNTER # ======================= First Synonyms in result ============================= try: synonyms_1 = soup.find('div', attrs={ "class": "synonyms" }) # .find_all('strong') # Synonyms for search if synonyms_1 is not None: results = synonyms_1.text syns = results.replace('Synonyms', '').replace( 'View synonyms', '') or "#z" if 'synonyms' in ctx.message.content and query != 'synonyms': e.add_field(name='Synonyms', value=f'```bf\n{syns[:460]}```', inline=False) # BUG-HUNTER else: synonyms_2 = soup.find('div', attrs={"class": "exs"}) res = synonyms_2.find_all('strong').text e.add_field(name='Synonyms', value=f'```bf\n{res}```', inline=False) # BUG-HUNTER # await ctx.send(phrases.text[:270]) # BUG-HUNTER except AttributeError: # ResultSet object has no attribute '.text' pass # ======================= Output proverbs and samples ========================== proverb = soup.find('div', attrs={"class": "trg"}) if proverb is not None: try: proverb.find('div', attrs={ "span": "sense-registers" }) # Proverb, {query} used in sentences x = proverb.text.replace("’ ‘", "’\n‘").replace(". ‘", ".\n\n‘") if 'proverbs' in ctx.message.content and query != 'proverbs': z = '’'.join( x.split("’")[3:-4] ) # split x and output after 'More example sentences...' e.add_field(name='Proverb', value=f"*{z[1:][:960]}...*", inline=False) else: z = '’'.join(x.split("’")[3:-2]) e.add_field(name='Proverb', value=f"*{z[1:][:240]}...*", inline=False) # return await ctx.send(z[:1600]) # BUG-HUNTER except TypeError: # TypeError: unhashable type: 'slice' in [:260] pass # =================== Word Origin ETYMOLOGY [working] ========================= try: pronunciation_2 = soup.find('span', attrs={ "class": "phoneticspelling" }) # etymology & pronunciation if pronunciation_2 is not None: try: classification_2 = soup.find_all('section', attrs={ "class": "etymology etym" })[1].find('p').text msg = f'\n**Origin:** *{classification_2}*' except IndexError: # ResultSet object has no attribute '.text' msg = "" pro = f"**Pronunciation:** `({pronunciation_2.text})`" or "N/A" e.add_field(name=f'Etymology of {query.title()}', value=f"{pro.replace('/', '')}{msg[:750]}", inline=False) # await ctx.send(msg[:750]) # BUG-HUNTER except IndexError: # ResultSet object has no attribute '.text' pass # ================== copyright acknowledgments ================================ e.set_footer( text= f'Oxford University Press © 2020 | Duration: {self.bot.ws.latency * 1000:.2f} ms' ) # ================== Fail-safe for words without a definition ================= if not definition: e.description = f"Whoopsie! I couldn't find a definition for *{query}*.\n" \ f"Check spelling, or look for a variation of {query} as verb, noun, etc." try: return await ctx.send(embed=e) except Exception as e: tb = traceback.format_exc() return await ctx.send( f'```css\n[DAFUQ]\n{e}```\n```py\n、ヽ`、ヽ`个o(・・。)`ヽ、`ヽ、\n\n{tb}```' ) # await ctx.message.add_reaction('thankful:389969145019498502') except Exception as e: tb = traceback.format_exc() return await ctx.send( f'```css\n[OOPS, I DID IT AGAIN]\n{e}```\n```py\nヾ(゚∀゚○)ツ三ヾ(●゚д゚)ノ\n\n{tb}```' )
from bs4 import BeautifulSoup as bs, SoupStrainer as ss from lxml.cssselect import CSSSelector import const import lxml.html as lh import string BOX_SELECTOR = CSSSelector(".center a ") UNFORMATTED_URL = "https://www.basketball-reference.com{}" basic_stats_strainer = ss("table", id = lambda x : x and x.endswith('-game-basic')) def extract_box_scores_from_raw_box_score_pages(authorizer,season): ''' takes raw html and returns a list of string matrices ''' steve_cur = authorizer.conn.cursor() steve_cur.execute(""" select url,raw_html from raw_box_score_pages where (season,type) = (%s,%s) """, (season,const.BOX_SCORE_PAGE_TYPE)) box_scores = [] for url,raw_html in steve_cur: game_soup = bs(raw_html, 'lxml', parse_only = basic_stats_strainer) date = url[47:55] game_stats = [] scores = [] teams = [] for table in game_soup.find_all('table'): team = table.get('id') team = team[4:7] teams.append(team)