Exemplo n.º 1
0
    def process_item(self, item, spider):
        if item["slug"] in BLOCK_LIST:
            return item

        # replace image url
        print(u"抓取完毕: %s" % item["title"])
        content = item["content"]
        for img in item["images"]:
            path = get_image_name(img)
            Image.insert(
                    slug=item["slug"],
                    url=img,
                    path=path
                    ).execute()
            content = content.replace(img, '../images/%s' % path)

        try:
            Note.insert(
                title = item["title"],
                slug = item["slug"],
                url = item["url"],
                content = content,
                likes_count = int(item["likes_count"]),
                views_count = int(item["views_count"])
                ).execute()

        except IntegrityError as e:
            logger.warn('%s SKIP E: (%s)' % (dict(item), str(e)))

        return item
Exemplo n.º 2
0
def write_summary():
    content = '* [Jianshu Hot](markdown/README.md)\n'

    for i in Note.select().order_by(Note.views_count.desc()).execute():
        content += ' - [%s](markdown/%s.md)\n' % (i.title, i.slug)

    with open('output/SUMMARY.md', 'w') as f:
        f.write(content)
Exemplo n.º 3
0
def write_summary():
    content = '* [Jianshu Hot](markdown/README.md)\n'

    for i in Note.select().order_by(Note.views_count.desc()).execute():
        content += ' - [%s](markdown/%s.md)\n' % (i.title, i.slug)

    with open('output/SUMMARY.md', 'w') as f:
        f.write(content)
Exemplo n.º 4
0
    def process_item(self, item, spider):
        if item["slug"] in BLOCK_LIST:
            return item

        # replace image url
        print(u"抓取完毕: %s" % item["title"])
        content = item["content"]
        for img in item["images"]:
            path = get_image_name(img)
            Image.insert(slug=item["slug"], url=img, path=path).execute()
            content = content.replace(img, '../images/%s' % path)

        try:
            Note.insert(title=item["title"],
                        slug=item["slug"],
                        url=item["url"],
                        content=content,
                        likes_count=int(item["likes_count"]),
                        views_count=int(item["views_count"])).execute()

        except IntegrityError as e:
            logger.warn('%s SKIP E: (%s)' % (dict(item), str(e)))

        return item
Exemplo n.º 5
0
def gen_markdown():
    write_readme()
    write_summary()
    for item in Note.select().execute():
        export_to_markdown(item)
Exemplo n.º 6
0
def gen_markdown():
    write_readme()
    write_summary()
    for item in Note.select().execute():
        export_to_markdown(item)