Exemplo n.º 1
0
    def process_item(self, item, spider):
        if item["slug"] in BLOCK_LIST:
            return item

        # replace image url
        print(u"抓取完毕: %s" % item["title"])
        content = item["content"]
        for img in item["images"]:
            path = get_image_name(img)
            Image.insert(
                    slug=item["slug"],
                    url=img,
                    path=path
                    ).execute()
            content = content.replace(img, '../images/%s' % path)

        try:
            Note.insert(
                title = item["title"],
                slug = item["slug"],
                url = item["url"],
                content = content,
                likes_count = int(item["likes_count"]),
                views_count = int(item["views_count"])
                ).execute()

        except IntegrityError as e:
            logger.warn('%s SKIP E: (%s)' % (dict(item), str(e)))

        return item
Exemplo n.º 2
0
    def process_item(self, item, spider):
        if item["slug"] in BLOCK_LIST:
            return item

        # replace image url
        print(u"抓取完毕: %s" % item["title"])
        content = item["content"]
        for img in item["images"]:
            path = get_image_name(img)
            Image.insert(slug=item["slug"], url=img, path=path).execute()
            content = content.replace(img, '../images/%s' % path)

        try:
            Note.insert(title=item["title"],
                        slug=item["slug"],
                        url=item["url"],
                        content=content,
                        likes_count=int(item["likes_count"]),
                        views_count=int(item["views_count"])).execute()

        except IntegrityError as e:
            logger.warn('%s SKIP E: (%s)' % (dict(item), str(e)))

        return item