Python MongoDBUtil примеры использования

Язык программирования: Python

Пространство имен/Пакет: AtaBlog.MongoDBUtil

Класс/Тип: MongoDBUtil

Примеров на hotexamples.com: 8

Python MongoDBUtil - 8 примеров найдено. Это лучшие примеры Python кода для AtaBlog.MongoDBUtil.MongoDBUtil, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

isUrlScrawled(3)

__init__(2)

save_blog(2)

get_blog(1)

get_last_article_url(1)

get_start_article_url(1)

get_vote_avg(1)

Пример #1

Показать файл

 def parse(self, response):
     url = response.url
     print "parse article: [%s]" % url
     title = response.xpath('//title/text()').extract()[0]
     print title
     if title != " 404 NOT FOUND ":
         # 解析文章
         # self.write2file("articles", context[0].decode('utf-8'))
         item = self.parse_page_contents(response)
         self.used_urls.append(url)
         yield self.getUrls(item, response)
     else:
         self.log("article: [%s] not exist!!!" % response.url)
         while self.url_pools.qsize() > 0:
             url = self.url_pools.get()[1]
             uri = url.split('/')[-1]
             if url not in self.used_urls:
                 if (not MongoDBUtil.isUrlScrawled(uri)):
                     print "crawl next url: " + url
                     yield scrapy.Request(url,
                                          cookies=self.cookies,
                                          headers=self.headers)
                     break
     while self.url_pools.qsize() > 0:
         url = self.url_pools.get()[1]
         uri = url.split('/')[-1]
         if url not in self.used_urls:
             if (not MongoDBUtil.isUrlScrawled(uri)):
                 print "crawl next url: " + url
                 yield scrapy.Request(url,
                                      cookies=self.cookies,
                                      headers=self.headers)
                 break

Пример #2

Показать файл

 def __init__(self):
     self.source = "www.atatech.org"
     self.user_names = []
     self.headers = HEADER
     self.cookies = COOKIES
     self.url_pools.put((1, "https://www.atatech.org/articles/72373"))
     MongoDBUtil.__init__()
     startUrl = MongoDBUtil.get_start_article_url()
     if startUrl is not None:
         startUrl = "https://www.atatech.org/articles/" + startUrl
         self.url_pools.get()
         self.url_pools.put((1, startUrl))

Пример #3

Показать файл

def save2():
    authorItem = AuthorItem()
    authorItem['authorName'] = "thomugo"
    authorItem['authorHomePage'] = "www.baidu.com"
    blogItem = AtablogItem()
    blogItem['id'] = "123"
    blogItem['blog'] = "test"
    blogItem['dir'] = './test'
    blogItem['title'] = "title"
    blogItem['tags'] = ['a', 'b']
    blogItem['mark'] = 2
    blogItem['vote'] = 3
    MongoDBUtil.save_blog(authorItem, blogItem)

Пример #4

Показать файл

 def persistenceBlog(self, authorItem, blogItem):
     uri = blogItem['id']
     if not os.path.exists(blogItem['dir']):
         os.makedirs(blogItem['dir'])
     fileName = blogItem['dir'] + blogItem['title'] + ".md"
     alreadyWritten = os.path.exists(fileName)
     alreadyScrawled = MongoDBUtil.isUrlScrawled(uri)
     if not alreadyWritten:
         # 将爬取到的文章保存到本地文件系统
         with codecs.open(fileName, 'wb', encoding="utf-8") as md:
             md.write(blogItem['blog'])
     if not alreadyScrawled:
         # 将解析到的article保存到数据库
         MongoDBUtil.save_blog(authorItem, blogItem)

Пример #5

Показать файл

 def parse_similar_articles(self, response):
     print "in parse_similar_articles"
     context = response.xpath('//table').extract()[0].decode('utf-8')
     urls = response.xpath(
         '//a[contains(@href, "/articles")]/@href').extract()
     for url in urls:
         self.log("similar url: " + response.urljoin(url))
         self.url_pools.put((3, response.urljoin(url)))
     while self.url_pools.qsize() > 0:
         url = self.url_pools.get()[1]
         uri = url.split('/')[-1]
         if url not in self.used_urls:
             if (not MongoDBUtil.isUrlScrawled(uri)):
                 print "crawl next url: " + url
                 yield scrapy.Request(url,
                                      cookies=self.cookies,
                                      headers=self.headers)
                 break
     yield response.meta['item']

Пример #6

Показать файл

def testGetBlog():
    blog = MongoDBUtil.get_blog("0、Python与设计模式--前言")
    with codecs.open(u"0、Python与设计模式--前言.md", 'wb', encoding="utf-8") as md:
        md.write(blog['blog'])

Пример #7

Показать файл

def getStartUrl():
    print MongoDBUtil.get_last_article_url()

Пример #8

Показать файл

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import Queue
import codecs

from AtaBlog.MongoDBUtil import MongoDBUtil
from AtaBlog.items import *

MongoDBUtil.__init__()

def save1():
    # authorItem = AuthorItem()
    # authorItem['authorName'] = "thomugo"
    # authorItem['authorHomePage'] = "www.baidu.com"
    # author = dict(authorItem)
    #MongoDBUtil.save_author_item(authorItem)
    #MongoDBUtil.authorItemCollection.insert(author)
    authorItem = MongoDBUtil.authorItemCollection.find_one({'authorName':"thomugo"})
    blogItem = AtablogItem()
    blogItem['id'] = "123"
    blogItem['blog'] = "test"
    blogItem['dir'] = './test'
    blogItem['title'] = "title"
    blogItem['tags'] = ['a', 'b']
    blogItem['mark'] = 2
    blogItem['vote'] = 3
    blogItem['author'] = authorItem
    blog = dict(blogItem)
    MongoDBUtil.blogItemCollection.insert(blog)
    #MongoDBUtil.save_blog_item(blogItem)