Пример #1
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.recipe-info'):
            thumbnail_url = NotAvailable
            short_description = NotAvailable
            imgs = self.parser.select(div.getparent(), 'img')
            if len(imgs) > 0:
                url = unicode(imgs[0].attrib.get('src', ''))
                if url.startswith('http://'):
                    thumbnail_url = url

            link = self.parser.select(div, 'a.title', 1)
            title = unicode(link.text)
            id = unicode(link.attrib.get('href', '').split('/')[2])

            recipe = Recipe(id, title)
            recipe.thumbnail_url = thumbnail_url
            recipe.short_description = short_description
            recipe.instructions = NotLoaded
            recipe.ingredients = NotLoaded
            recipe.nb_person = NotLoaded
            recipe.cooking_time = NotLoaded
            recipe.preparation_time = NotLoaded
            recipe.author = NotLoaded
            yield recipe
Пример #2
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), 'div.m_search_result'):
            tds = self.parser.select(div, 'td')
            if len(tds) == 2:
                title = NotAvailable
                thumbnail_url = NotAvailable
                short_description = NotAvailable
                imgs = self.parser.select(tds[0], 'img')
                if len(imgs) > 0:
                    thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
                link = self.parser.select(tds[1], 'div.m_search_titre_recette a', 1)
                title = unicode(link.text)
                id = link.attrib.get('href', '').replace('.aspx', '').replace('/recettes/recette_', '')
                short_description = unicode(' '.join(self.parser.select(tds[
                                            1], 'div.m_search_result_part4', 1).text.strip().split('\n')))

                recipe = Recipe(id, title)
                recipe.thumbnail_url = thumbnail_url
                recipe.short_description = short_description
                recipe.instructions = NotLoaded
                recipe.author = NotLoaded
                recipe.ingredients = NotLoaded
                recipe.nb_person = NotLoaded
                recipe.cooking_time = NotLoaded
                recipe.preparation_time = NotLoaded
                yield recipe
Пример #3
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), 'div.result-recipe'):
            thumbnail_url = NotAvailable
            short_description = NotAvailable
            imgs = self.parser.select(div, 'a.pull-image-left img')
            if len(imgs) > 0:
                url = unicode(imgs[0].attrib.get('src', ''))
                if url.startswith('http://'):
                    thumbnail_url = url

            link = self.parser.select(div, 'div.result-text a', 1)
            title = unicode(link.text)
            id = unicode(link.attrib.get('href', '').split('/')[2])

            txt = self.parser.select(div, 'div.result-text p', 1)
            short_description = unicode(txt.text_content())

            recipe = Recipe(id, title)
            recipe.thumbnail_url = thumbnail_url
            recipe.short_description = short_description
            recipe.instructions = NotLoaded
            recipe.ingredients = NotLoaded
            recipe.nb_person = NotLoaded
            recipe.cooking_time = NotLoaded
            recipe.preparation_time = NotLoaded
            recipe.author = NotLoaded
            yield recipe
Пример #4
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), 'div.recette_description > div.data'):
            links = self.parser.select(div, 'div.info > p.title > a.fn')
            if len(links) > 0:
                link = links[0]
                title = unicode(link.text)
                # id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
                id = unicode(self.parser.select(div, 'div.carnet-add a', 1).attrib.get('href', '').split('=')[-1])
                thumbnail_url = NotAvailable
                short_description = NotAvailable

                imgs = self.parser.select(div, 'img.recipe-image')
                if len(imgs) > 0:
                    thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
                short_description = unicode(' '.join(self.parser.select(
                    div, 'div.infos_column', 1).text_content().split()).strip())
                imgs_cost = self.parser.select(div, 'div.infos_column img')
                cost_tot = len(imgs_cost)
                cost_on = 0
                for img in imgs_cost:
                    if img.attrib.get('src', '').endswith('euro_on.png'):
                        cost_on += 1
                short_description += u' %s/%s' % (cost_on, cost_tot)

                recipe = Recipe(id, title)
                recipe.thumbnail_url = thumbnail_url
                recipe.short_description = short_description
                recipe.instructions = NotLoaded
                recipe.ingredients = NotLoaded
                recipe.nb_person = NotLoaded
                recipe.cooking_time = NotLoaded
                recipe.preparation_time = NotLoaded
                recipe.author = NotLoaded
                yield recipe
Пример #5
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.m_search_result'):
            tds = self.parser.select(div, 'td')
            if len(tds) == 2:
                title = NotAvailable
                thumbnail_url = NotAvailable
                short_description = NotAvailable
                imgs = self.parser.select(tds[0], 'img')
                if len(imgs) > 0:
                    thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
                link = self.parser.select(tds[1],
                                          'div.m_search_titre_recette a', 1)
                title = unicode(link.text)
                id = link.attrib.get('href', '').replace('.aspx', '').replace(
                    '/recettes/recette_', '')
                short_description = unicode(' '.join(
                    self.parser.select(tds[1], 'div.m_search_result_part4',
                                       1).text.strip().split('\n')))

                recipe = Recipe(id, title)
                recipe.thumbnail_url = thumbnail_url
                recipe.short_description = short_description
                recipe.instructions = NotLoaded
                recipe.author = NotLoaded
                recipe.ingredients = NotLoaded
                recipe.nb_person = NotLoaded
                recipe.cooking_time = NotLoaded
                recipe.preparation_time = NotLoaded
                yield recipe
Пример #6
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        comments = NotAvailable

        title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip())
        main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1)
        preparation_time = int(self.parser.select(main, 'p.m_content_recette_info span.preptime', 1).text_content())
        cooking_time = int(self.parser.select(main, 'p.m_content_recette_info span.cooktime', 1).text_content())
        ing_header_line = self.parser.select(main, 'p.m_content_recette_ingredients span', 1).text_content()
        if '(pour' in ing_header_line and ')' in ing_header_line:
            nb_person = [int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0])]
        ingredients = self.parser.select(main, 'p.m_content_recette_ingredients', 1).text_content().strip().split('- ')
        ingredients = ingredients[1:]
        rinstructions = self.parser.select(main, 'div.m_content_recette_todo', 1).text_content().strip()
        instructions = u''
        for line in rinstructions.split('\n'):
            instructions += '%s\n' % line.strip()
        instructions = instructions.strip('\n')
        imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        divcoms = self.parser.select(self.document.getroot(), 'div.m_commentaire_row')
        if len(divcoms) > 0:
            comments = []
            for divcom in divcoms:
                note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
                user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
                content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
                comments.append(Comment(author=user, rate=note, text=content))

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.thumbnail_url = NotLoaded
        recipe.author = NotAvailable
        return recipe
Пример #7
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), 'div.rechRecette'):
            thumbnail_url = NotAvailable
            short_description = NotAvailable
            imgs = self.parser.select(div, 'img')
            if len(imgs) > 0:
                url = unicode(imgs[0].attrib.get('src', ''))
                if url.startswith('http://'):
                    thumbnail_url = url

            link = self.parser.select(div, 'a.rechRecetTitle', 1)
            title = unicode(link.text)
            id = unicode(link.attrib.get('href', '').split(
                '/')[-1].replace('.aspx', ''))

            short_description = u''
            ldivprix = self.parser.select(div, 'div.prix')
            if len(ldivprix) > 0:
                divprix = ldivprix[0]
                nbprixneg = 0
                spanprix = self.parser.select(divprix, 'span')
                if len(spanprix) > 0:
                    nbprixneg = unicode(spanprix[0].text).count(u'€')
                nbprixtot = unicode(divprix.text_content()).count(u'€')
                short_description += u'Cost: %s/%s ; ' % (
                    nbprixtot - nbprixneg, nbprixtot)

            short_description += unicode(' '.join(self.parser.select(
                div, 'div.rechResume', 1).text_content().split()).strip()).replace(u'€', '')
            short_description += u' '
            short_description += unicode(' '.join(self.parser.select(
                div, 'div.rechIngredients', 1).text_content().split()).strip())

            recipe = Recipe(id, title)
            recipe.thumbnail_url = thumbnail_url
            recipe.short_description = short_description
            recipe.instructions = NotLoaded
            recipe.ingredients = NotLoaded
            recipe.nb_person = NotLoaded
            recipe.cooking_time = NotLoaded
            recipe.preparation_time = NotLoaded
            recipe.author = NotLoaded
            yield recipe
Пример #8
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.recette_description > div.data'):
            links = self.parser.select(div, 'div.info > p.title > a.fn')
            if len(links) > 0:
                link = links[0]
                title = unicode(link.text)
                # id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
                id = unicode(
                    self.parser.select(div, 'div.carnet-add a',
                                       1).attrib.get('href',
                                                     '').split('=')[-1])
                thumbnail_url = NotAvailable
                short_description = NotAvailable

                imgs = self.parser.select(div, 'img.recipe-image')
                if len(imgs) > 0:
                    thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
                short_description = unicode(' '.join(
                    self.parser.select(div, 'div.infos_column',
                                       1).text_content().split()).strip())
                imgs_cost = self.parser.select(div, 'div.infos_column img')
                cost_tot = len(imgs_cost)
                cost_on = 0
                for img in imgs_cost:
                    if img.attrib.get('src', '').endswith('euro_on.png'):
                        cost_on += 1
                short_description += u' %s/%s' % (cost_on, cost_tot)

                recipe = Recipe(id, title)
                recipe.thumbnail_url = thumbnail_url
                recipe.short_description = short_description
                recipe.instructions = NotLoaded
                recipe.ingredients = NotLoaded
                recipe.nb_person = NotLoaded
                recipe.cooking_time = NotLoaded
                recipe.preparation_time = NotLoaded
                recipe.author = NotLoaded
                yield recipe
Пример #9
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), "div.recipe-info"):
            thumbnail_url = NotAvailable
            short_description = NotAvailable
            imgs = self.parser.select(div.getparent(), "img")
            if len(imgs) > 0:
                url = unicode(imgs[0].attrib.get("src", ""))
                if url.startswith("http://"):
                    thumbnail_url = url

            link = self.parser.select(div, "a.title", 1)
            title = unicode(link.text)
            id = unicode(link.attrib.get("href", "").split("/")[2])

            recipe = Recipe(id, title)
            recipe.thumbnail_url = thumbnail_url
            recipe.short_description = short_description
            recipe.instructions = NotLoaded
            recipe.ingredients = NotLoaded
            recipe.nb_person = NotLoaded
            recipe.cooking_time = NotLoaded
            recipe.preparation_time = NotLoaded
            recipe.author = NotLoaded
            yield recipe
Пример #10
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        author = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        comments = NotAvailable

        title = unicode(
            self.parser.select(self.document.getroot(), 'h1#itemTitle',
                               1).text)
        imgillu = self.parser.select(self.document.getroot(), 'img#imgPhoto')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        ingredients = []
        l_ing = self.parser.select(self.document.getroot(), 'li#liIngredient')
        for ing in l_ing:
            ingtxt = unicode(ing.text_content().strip())
            if ingtxt != '':
                ingredients.append(ingtxt)

        instructions = u''
        l_divinst = self.parser.select(self.document.getroot(),
                                       'div.directLeft li')
        for num_instr, inst in enumerate(l_divinst, start=1):
            instructions += '%s: %s\n' % (num_instr, inst.text_content())
        prepmin = 0
        emprep = self.parser.select(self.document.getroot(),
                                    'span#prepHoursSpan em')
        if len(emprep) > 0:
            prepmin += int(emprep[0].text) * 60
        emprep = self.parser.select(self.document.getroot(),
                                    'span#prepMinsSpan em')
        if len(emprep) > 0:
            prepmin += int(emprep[0].text)
        if prepmin != 0:
            preparation_time = prepmin
        cookmin = 0
        emcooktime = self.parser.select(self.document.getroot(),
                                        'span#cookHoursSpan em')
        if len(emcooktime) > 0:
            cookmin += int(emcooktime[0].text) * 60
        emcooktime = self.parser.select(self.document.getroot(),
                                        'span#cookMinsSpan em')
        if len(emcooktime) > 0:
            cookmin += int(emcooktime[0].text)
        if cookmin != 0:
            cooking_time = cookmin
        l_nbpers = self.parser.select(self.document.getroot(),
                                      'span#lblYield[itemprop=recipeYield]')
        if len(l_nbpers) > 0 and 'servings' in l_nbpers[0].text:
            nb_person = [int(l_nbpers[0].text.split()[0])]

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.author = author
        recipe.thumbnail_url = NotLoaded
        return recipe
Пример #11
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        author = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        comments = NotAvailable

        title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text)
        main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1)
        imgillu = self.parser.select(main, 'div.image-with-credit img')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        l_spanprep = self.parser.select(self.document.getroot(), 'span.preptime[property$=prepTime]')
        if len(l_spanprep) > 0:
            preparation_time = 0
            prep = l_spanprep[0].attrib.get('content','')
            if 'H' in prep:
                preparation_time += 60 * (int(prep.split('PT')[-1].split('H')[0]))
            if 'M' in prep:
                preparation_time += int(prep.split('PT')[-1].split('H')[-1].split('M')[0])
        l_cooktime = self.parser.select(main, 'span.cooktime[property$=cookTime]')
        if len(l_cooktime) > 0:
            cooking_time = 0
            cook = l_cooktime[0].attrib.get('content','')
            if 'H' in cook:
                cooking_time += 60 * (int(cook.split('PT')[-1].split('H')[0]))
            if 'M' in cook:
                cooking_time += int(cook.split('PT')[-1].split('H')[-1].split('M')[0])
        l_nbpers = self.parser.select(main, 'div.ingredients p.servings')
        if len(l_nbpers) > 0:
            rawnb = l_nbpers[0].text.strip(string.letters+' ')
            if '/' in rawnb:
                nbs = rawnb.split('/')
                nb_person = [int(nbs[0]), int(nbs[1])]
            else:
                nb_person = [int(rawnb)]

        ingredients = []
        l_ing = self.parser.select(main, 'div.ingredients ul.dotlist')
        for ing in l_ing:
            sublists = self.parser.select(ing, 'li')
            for i in sublists:
                ingtxt = unicode(i.text_content().strip())
                if ingtxt != '':
                    ingredients.append(' '.join(ingtxt.split()))

        instructions = u''
        num_inst = 1
        l_divinst = self.parser.select(self.document.getroot(), 'div#recipe-steps-list p.step-details')
        for inst in l_divinst:
            instructions += '%s: %s\n' % (num_inst, inst.text_content())
            num_inst += 1

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.author = author
        recipe.thumbnail_url = NotLoaded
        return recipe
Пример #12
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        author = NotAvailable
        comments = NotAvailable

        title = unicode(
            self.parser.select(self.document.getroot(), 'head > title',
                               1).text.split(' - ')[1])
        main = self.parser.select(self.document.getroot(),
                                  'div.recette_description', 1)

        rec_infos = self.parser.select(
            self.document.getroot(),
            'div.recette_infos div.infos_column strong')
        for info_title in rec_infos:
            if u'Temps de préparation' in unicode(info_title.text):
                if info_title.tail.strip() != '':
                    preparation_time = int(info_title.tail.split()[0])
                    if 'h' in info_title.tail:
                        preparation_time = 60 * preparation_time
            if 'Temps de cuisson' in info_title.text:
                if info_title.tail.strip() != '':
                    cooking_time = int(info_title.tail.split()[0])
                    if 'h' in info_title.tail:
                        cooking_time = 60 * cooking_time
            if 'Nombre de personnes' in info_title.text:
                if info_title.tail.strip() != '':
                    nb_person = [int(info_title.tail)]

        ingredients = []
        p_ing = self.parser.select(main, 'div.data.top.left > div.content p')
        for ing in p_ing:
            ingtxt = unicode(ing.text_content().strip())
            if ingtxt != '':
                ingredients.append(ingtxt)

        lines_instr = self.parser.select(main,
                                         'div.data.top.right div.content li')
        if len(lines_instr) > 0:
            instructions = u''
            for line in lines_instr:
                inst = ' '.join(line.text_content().strip().split())
                instructions += '%s\n' % inst
            instructions = instructions.strip('\n')

        imgillu = self.parser.select(self.document.getroot(),
                                     'div.resume_recette_illustree img.photo')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        divcoms = self.parser.select(self.document.getroot(),
                                     'div.comment-outer')
        if len(divcoms) > 0:
            comments = []
            for divcom in divcoms:
                comtxt = unicode(' '.join(
                    divcom.text_content().strip().split()))
                if u'| Répondre' in comtxt:
                    comtxt = comtxt.strip('0123456789').replace(
                        u' | Répondre', '')
                    author = None
                    if 'par ' in comtxt:
                        author = comtxt.split('par ')[-1].split('|')[0]
                        comtxt = comtxt.replace('par %s' % author, '')
                comments.append(Comment(text=comtxt, author=author))

        links_author = self.parser.select(self.document.getroot(),
                                          'p.auteur a.couleur_membre')
        if len(links_author) > 0:
            author = unicode(links_author[0].text.strip())

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.author = author
        recipe.thumbnail_url = NotLoaded
        return recipe
Пример #13
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        author = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        comments = NotAvailable

        title = unicode(
            self.parser.select(self.document.getroot(),
                               'h1 span[property$=name]', 1).text)
        main = self.parser.select(self.document.getroot(),
                                  'div[typeof$=Recipe]', 1)
        imgillu = self.parser.select(main, 'div.image-with-credit img')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        l_spanprep = self.parser.select(self.document.getroot(),
                                        'span.preptime[property$=prepTime]')
        if len(l_spanprep) > 0:
            preparation_time = 0
            prep = l_spanprep[0].attrib.get('content', '')
            if 'H' in prep:
                preparation_time += 60 * (int(
                    prep.split('PT')[-1].split('H')[0]))
            if 'M' in prep:
                preparation_time += int(
                    prep.split('PT')[-1].split('H')[-1].split('M')[0])
        l_cooktime = self.parser.select(main,
                                        'span.cooktime[property$=cookTime]')
        if len(l_cooktime) > 0:
            cooking_time = 0
            cook = l_cooktime[0].attrib.get('content', '')
            if 'H' in cook:
                cooking_time += 60 * (int(cook.split('PT')[-1].split('H')[0]))
            if 'M' in cook:
                cooking_time += int(
                    cook.split('PT')[-1].split('H')[-1].split('M')[0])
        l_nbpers = self.parser.select(main, 'div.ingredients p.servings')
        if len(l_nbpers) > 0:
            rawnb = l_nbpers[0].text.strip(string.letters + ' ')
            if '/' in rawnb:
                nbs = rawnb.split('/')
                nb_person = [int(nbs[0]), int(nbs[1])]
            else:
                nb_person = [int(rawnb)]

        ingredients = []
        l_ing = self.parser.select(main, 'div.ingredients ul.dotlist')
        for ing in l_ing:
            sublists = self.parser.select(ing, 'li')
            for i in sublists:
                ingtxt = unicode(i.text_content().strip())
                if ingtxt != '':
                    ingredients.append(' '.join(ingtxt.split()))

        instructions = u''
        num_inst = 1
        l_divinst = self.parser.select(self.document.getroot(),
                                       'div#recipe-steps-list p.step-details')
        for inst in l_divinst:
            instructions += '%s: %s\n' % (num_inst, inst.text_content())
            num_inst += 1

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.author = author
        recipe.thumbnail_url = NotLoaded
        return recipe
Пример #14
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        author = NotAvailable
        comments = NotAvailable

        title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1])
        main = self.parser.select(self.document.getroot(), 'div.recette_description', 1)

        rec_infos = self.parser.select(self.document.getroot(), 'div.recette_infos div.infos_column strong')
        for info_title in rec_infos:
            if u'Temps de préparation' in unicode(info_title.text):
                if info_title.tail.strip() != '':
                    preparation_time = int(info_title.tail.split()[0])
                    if 'h' in info_title.tail:
                        preparation_time = 60*preparation_time
            if 'Temps de cuisson' in info_title.text:
                if info_title.tail.strip() != '':
                    cooking_time = int(info_title.tail.split()[0])
                    if 'h' in info_title.tail:
                        cooking_time = 60*cooking_time
            if 'Nombre de personnes' in info_title.text:
                if info_title.tail.strip() != '':
                    nb_person = [int(info_title.tail)]

        ingredients = []
        p_ing = self.parser.select(main, 'div.data.top.left > div.content p')
        for ing in p_ing:
            ingtxt = unicode(ing.text_content().strip())
            if ingtxt != '':
                ingredients.append(ingtxt)

        lines_instr = self.parser.select(main, 'div.data.top.right div.content li')
        if len(lines_instr) > 0:
            instructions = u''
            for line in lines_instr:
                inst = ' '.join(line.text_content().strip().split())
                instructions += '%s\n' % inst
            instructions = instructions.strip('\n')

        imgillu = self.parser.select(self.document.getroot(), 'div.resume_recette_illustree img.photo')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        divcoms = self.parser.select(self.document.getroot(), 'div.comment-outer')
        if len(divcoms) > 0:
            comments = []
            for divcom in divcoms:
                comtxt = unicode(' '.join(divcom.text_content().strip().split()))
                if u'| Répondre' in comtxt:
                    comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
                    author = None
                    if 'par ' in comtxt:
                        author = comtxt.split('par ')[-1].split('|')[0]
                        comtxt = comtxt.replace('par %s' % author, '')
                comments.append(Comment(text=comtxt, author=author))

        links_author = self.parser.select(self.document.getroot(), 'p.auteur a.couleur_membre')
        if len(links_author) > 0:
            author = unicode(links_author[0].text.strip())

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.author = author
        recipe.thumbnail_url = NotLoaded
        return recipe
Пример #15
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        author = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        comments = NotAvailable

        title = unicode(self.parser.select(self.document.getroot(), 'h1#itemTitle', 1).text)
        imgillu = self.parser.select(self.document.getroot(), 'img#imgPhoto')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        ingredients = []
        l_ing = self.parser.select(self.document.getroot(), 'li#liIngredient')
        for ing in l_ing:
            ingtxt = unicode(ing.text_content().strip())
            if ingtxt != '':
                ingredients.append(ingtxt)

        instructions = u''
        l_divinst = self.parser.select(self.document.getroot(), 'div.directLeft li')
        num_instr = 1
        for inst in l_divinst:
            instructions += '%s: %s\n' % (num_instr, inst.text_content())
            num_instr += 1

        prepmin = 0
        emprep = self.parser.select(self.document.getroot(), 'span#prepHoursSpan em')
        if len(emprep) > 0:
            prepmin += int(emprep[0].text) * 60
        emprep = self.parser.select(self.document.getroot(), 'span#prepMinsSpan em')
        if len(emprep) > 0:
            prepmin += int(emprep[0].text)
        if prepmin != 0:
            preparation_time = prepmin
        cookmin = 0
        emcooktime = self.parser.select(self.document.getroot(), 'span#cookHoursSpan em')
        if len(emcooktime) > 0:
            cookmin += int(emcooktime[0].text) * 60
        emcooktime = self.parser.select(self.document.getroot(), 'span#cookMinsSpan em')
        if len(emcooktime) > 0:
            cookmin += int(emcooktime[0].text)
        if cookmin != 0:
            cooking_time = cookmin
        l_nbpers = self.parser.select(self.document.getroot(), 'span#lblYield[itemprop=recipeYield]')
        if len(l_nbpers) > 0 and 'servings' in l_nbpers[0].text:
            nb_person = [int(l_nbpers[0].text.split()[0])]

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.author = author
        recipe.thumbnail_url = NotLoaded
        return recipe
Пример #16
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        comments = NotAvailable

        title = unicode(
            self.parser.select(self.document.getroot(), 'h1.m_title',
                               1).text_content().strip())
        main = self.parser.select(self.document.getroot(),
                                  'div.m_content_recette_main', 1)
        preparation_time = int(
            self.parser.select(main, 'p.m_content_recette_info span.preptime',
                               1).text_content())
        cooking_time = int(
            self.parser.select(main, 'p.m_content_recette_info span.cooktime',
                               1).text_content())
        ing_header_line = self.parser.select(
            main, 'p.m_content_recette_ingredients span', 1).text_content()
        if '(pour' in ing_header_line and ')' in ing_header_line:
            nb_person = [
                int(
                    ing_header_line.split('pour ')[-1].split('personnes)')
                    [0].split()[0])
            ]
        ingredients = self.parser.select(main,
                                         'p.m_content_recette_ingredients',
                                         1).text_content().strip().split('- ')
        ingredients = ingredients[1:]
        rinstructions = self.parser.select(main, 'div.m_content_recette_todo',
                                           1).text_content().strip()
        instructions = u''
        for line in rinstructions.split('\n'):
            instructions += '%s\n' % line.strip()
        instructions = instructions.strip('\n')
        imgillu = self.parser.select(self.document.getroot(),
                                     'a.m_content_recette_illu img')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        divcoms = self.parser.select(self.document.getroot(),
                                     'div.m_commentaire_row')
        if len(divcoms) > 0:
            comments = []
            for divcom in divcoms:
                note = self.parser.select(divcom,
                                          'div.m_commentaire_note span',
                                          1).text.strip()
                user = self.parser.select(divcom,
                                          'div.m_commentaire_content span',
                                          1).text.strip()
                content = self.parser.select(divcom,
                                             'div.m_commentaire_content p',
                                             1).text.strip()
                comments.append(Comment(author=user, rate=note, text=content))

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.thumbnail_url = NotLoaded
        recipe.author = NotAvailable
        return recipe
Пример #17
0
    def get_recipe(self, id):
        title = NotAvailable
        preparation_time = NotAvailable
        cooking_time = NotAvailable
        author = NotAvailable
        nb_person = NotAvailable
        ingredients = NotAvailable
        picture_url = NotAvailable
        instructions = NotAvailable
        comments = NotAvailable

        title = unicode(self.parser.select(
            self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text)
        main = self.parser.select(
            self.document.getroot(), 'div#ficheRecette', 1)
        imgillu = self.parser.select(main, 'div#recetteLeft img.photo')
        if len(imgillu) > 0:
            picture_url = unicode(imgillu[0].attrib.get('src', ''))

        l_spanprep = self.parser.select(main, 'span.preptime')
        if len(l_spanprep) > 0:
            preparation_time = int(self.parser.tocleanstring(l_spanprep[0]).split()[0])
        l_cooktime = self.parser.select(main, 'span.cooktime')
        if len(l_cooktime) > 0:
            cooking_time = int(self.parser.tocleanstring(l_cooktime[0]).split()[0])
        l_nbpers = self.parser.select(main, 'td#recipeQuantity span')
        if len(l_nbpers) > 0:
            rawnb = l_nbpers[0].text.split()[0]
            if '/' in rawnb:
                nbs = rawnb.split('/')
                nb_person = [int(nbs[0]), int(nbs[1])]
            else:
                nb_person = [int(rawnb)]

        ingredients = []
        l_ing = self.parser.select(main, 'div#ingredients li.ingredient')
        for ing in l_ing:
            ingtxt = unicode(ing.text_content().strip())
            if ingtxt != '':
                ingredients.append(ingtxt)

        instructions = u''
        l_divinst = self.parser.select(
            main, 'div#preparation span.instructions div')
        for inst in l_divinst:
            instructions += '%s: ' % inst.text
            instructions += '%s\n' % inst.getnext().text

        divcoms = self.parser.select(self.document.getroot(), 'div.comment')
        if len(divcoms) > 0:
            comments = []
            for divcom in divcoms:
                author = unicode(self.parser.select(
                    divcom, 'div.commentAuthor span', 1).text)
                comtxt = unicode(self.parser.select(
                    divcom, 'p', 1).text_content().strip())
                comments.append(Comment(author=author, text=comtxt))

        spans_author = self.parser.select(self.document.getroot(), 'span.author')
        if len(spans_author) > 0:
            author = unicode(spans_author[0].text_content().strip())

        recipe = Recipe(id, title)
        recipe.preparation_time = preparation_time
        recipe.cooking_time = cooking_time
        recipe.nb_person = nb_person
        recipe.ingredients = ingredients
        recipe.instructions = instructions
        recipe.picture_url = picture_url
        recipe.comments = comments
        recipe.author = author
        recipe.thumbnail_url = NotLoaded
        return recipe