示例#1
0
def scrape(url):
    rawPage = requests.get(url)
    data = rawPage.text
    soup = BeautifulSoup(data)

    for line in soup.find_all("div", {"class": "txt-container"}):
        title = line.find("div", {"class": "title"}).text.strip()
        link = line.parent.get("href")

        recipePage = requests.get("http://www.allthecooks.com" + link)
        onionSoup = BeautifulSoup(recipePage.text)

        rating = onionSoup.find("span", {"itemprop": "ratingValue"})
        numRatings = onionSoup.find("span", {"itemprop": "reviewCount"})

        if rating != None:
            rating = rating.text
        else:
            rating = 0

        if numRatings != None:
            numRatings = numRatings.text
        else:
            numRatings = 0
         

        ingredients = []
        
        for ingredient in line.find_all("div", {"class": "ingredient"}):
            ingredName = ingredient.text.replace("\n", "").replace(u"·", "").replace(u"½", "1/2").replace(u"¾", "3/4").replace(u"¼", "1/4").replace(u"⅓", "1/3")
            ingredients.append(ingredName)

        helper.store(recipeName=title, rating=rating, numRatings=numRatings, source=url, ingredients=ingredients)
示例#2
0
def scrapeRecipe(url, title):
    rawIngredientPage = requests.get(url)
    ingredientData = rawIngredientPage.text
    soup = BeautifulSoup(ingredientData)

    rating = soup.find("div", {"class": "rating-stars stars113x20 fl-left"}).find("meta").get("content")
    numRatings = soup.find("p", {"id": "pRatings"}).text.split()
    if len(numRatings) > 0:
        numRatings = numRatings[0].replace(",", "")
    else:
        numRatings = 0

    ingredients = []

    for nline in soup.find_all("li", {"id": "liIngredient"}):

        try:
            parsedIngredName = nline.find("span", {"class": "ingredient-name"}).text.strip()
            parsedIngredName = parsedIngredName.decode("utf-8")
        except:
            continue

        try:
            ingredName = nline.find("span", {"class": "ingredient-amount"}).text.strip() + " " + parsedIngredName
        except:
            ingredName = parsedIngredName

        ingredients.append(ingredName)

    helper.store(recipeName=title, rating=rating, numRatings=numRatings, source=url, ingredients=ingredients)
示例#3
0
def scrape(url):
    rawPage = requests.get(url)
    data = rawPage.text
    soup = BeautifulSoup(data)

    title = soup.find("h1", {"itemprop": "name"}).text.strip().encode("ascii", "ignore")

    ratingsDiv = soup.find("div", {"class": "post-ratings"})
    rating = ratingsDiv.find("span", {"class", "average"}).text.split()[0]
    numRatings = ratingsDiv.find("span", {"class", "total"}).text.split()[0]

    ingredients = []
    for line in soup.find_all("span", {"itemprop": "ingredient"}):
        line = line.find_all("span")
        if len(line) > 1:
            amount = line[0].text
            name = line[1].text
        else:
            amount = ""
            name = line[0].text  

        ingredients.append((amount + " " + name).strip().encode("ascii", "ignore"))

    helper.store(recipeName=title, rating=rating, numRatings=numRatings, source=url, ingredients=ingredients)