示例#1
0
def scrapeMoneySection(URL):
    #scrapes a section page of CNN Money
    #returns a list of all article links
    linkList = []
    soup = BeautifulSoup(urllib.urlopen(URL))
    header = checkVal(soup.find('div', attrs = {'class' : 'cnnHeadline'}), True)
    if header != None:
        linkList.append(header)
    for link in soup.find_all('h2'):
        link = checkVal(link, True)
        if link != None:
            linkList.append(link)
    return linkList
示例#2
0
def scrapeSection(URL):
    #scrapes any section of CNN.com except for the tech section
    #returns a list of all article links
    linkList = []
    soup = BeautifulSoup(urllib.urlopen(URL))
    header = checkVal(soup.find('div', attrs = {'class' : 'zn-banner'}), False)
    if header != None:
        linkList.append(header)
    for link in soup.find_all('h3', attrs = {'class':'cd__headline'}):
        link = checkVal(link, False)
        if link != None:
            linkList.append(link)
    return linkList       
示例#3
0
def scrapeSection(URL):
    soup = BeautifulSoup(urllib.urlopen(URL))
    return (list(set(map(lambda x: checkVal(x, False),
                         soup.find_all('h2', attrs = {'class': 'article-headline'})))))