Пример #1
0
    def _parse_soup(self):
        """Massage HTML from provider guide"""
        #Get full text from the article
        body = text.body_format(
            self.soup.find(
                "div", {
                    "id": "post_bodytext"
                }
            )
        )

        #Now do images
        images = []
        img_link_container = self.soup.find(
            "div", {
                "id": "post_photos"
            })
        if img_link_container:
            image_links = [link.attrs['href'] for link in img_link_container.find_all("a")]
            for i in image_links:
                if "myproviderguide.com" not in i:
                    images.append("http://www.myproviderguide.com" + i)
                else:
                    images.append(i)
        self.listing.update({
            "description": body,
            "images": ",".join(images)
            })
Пример #2
0
    def _parse_soup(self):
        """
        Grab out the appropriate elements in the soup
        """
        #First get the full listings body
        listing_body = self.soup.find("div", "postingBody")
        if listing_body:
            listing_body = text.body_format(listing_body)
            if listing_body:
                self.listing["description"] = listing_body

        #Next, look for images
        image_container = self.soup.find("ul", {"id": "viewAdPhotoLayout"})
        if image_container:
            images = [text.image_format(i.attrs["src"]) for i in image_container.find_all("img")]
            self.listing["images"] = ",".join(images)
        else:
            self.listing["images"] = []