def scrapeMoneySection(URL): #scrapes a section page of CNN Money #returns a list of all article links linkList = [] soup = BeautifulSoup(urllib.urlopen(URL)) header = checkVal(soup.find('div', attrs = {'class' : 'cnnHeadline'}), True) if header != None: linkList.append(header) for link in soup.find_all('h2'): link = checkVal(link, True) if link != None: linkList.append(link) return linkList
def scrapeSection(URL): #scrapes any section of CNN.com except for the tech section #returns a list of all article links linkList = [] soup = BeautifulSoup(urllib.urlopen(URL)) header = checkVal(soup.find('div', attrs = {'class' : 'zn-banner'}), False) if header != None: linkList.append(header) for link in soup.find_all('h3', attrs = {'class':'cd__headline'}): link = checkVal(link, False) if link != None: linkList.append(link) return linkList
def scrapeSection(URL): soup = BeautifulSoup(urllib.urlopen(URL)) return (list(set(map(lambda x: checkVal(x, False), soup.find_all('h2', attrs = {'class': 'article-headline'})))))