Пример #1
0
def main():
    # 构建自定义的Spider对象
    # baidu_spider = BaiduSpider()
    # douban_spider = DoubanSpider()
    # # 将对象做为参数传给Engine


    #getattr(对象,属性)
    #getattr(文件绝对路径,文件里的类)



    # 构建需要执行的多个爬虫的字典
    # spiders = {
    #     BaiduSpider.name : BaiduSpider(),
    #     DoubanSpider.name : DoubanSpider()
    # }

    # # 自定义管道
    # pipelines = [
    #     BaiduPipeline1(),
    #     BaiduPipeline2(),
    #     DoubanPipeline1(),
    #     DoubanPipeline2(),
    # ]

    # # 自定义爬虫中间件
    # spider_middlewares = [
    #     SpiderMiddleware1(),
    #     SpiderMiddleware2()
    # ]

    # # 自定义下载中间件
    # downloader_middlewares = [
    #     DownloaderMiddleware1(),
    #     DownloaderMiddleware2()
    # ]


    # engine = Engine(
    #     spiders = spiders,
    #     pipelines = pipelines,
    #     spider_mids = spider_middlewares,
    #     downloader_mids = downloader_middlewares
    # )


    engine = Engine()
    engine.start()
Пример #2
0
def main():
    # 通过自定义的BaiduSpider类构建了爬虫对象
    # baidu_spider = BaiduSpider()
    # douban_spider = DoubanSpider()
    #spiders = {"baidu" : baidu_spider, "douban" : douban_spider}
    #spiders = {baidu_spider.name : baidu_spider, douban_spider.name : douban_spider}
    # spiders = {BaiduSpider.name : BaiduSpider(), DoubanSpider.name : DoubanSpider()}

    # 通过自定义的Pipeline类构建了多个管道对象
    # pipelines = [
    #     BaiduPipeline1(),
    #     BaiduPipeline2(),
    #     DoubanPipeline1(),
    #     DoubanPipeline2()
    # ]

    # 通过自定义的middlewares构建了多个爬虫中间件
    # spider_middlewares = [
    #     SpiderMiddleware1(),
    #     SpiderMiddleware2()
    # ]

    # 通过自定义的middlewares构建了多个下载中间件
    # downloader_middlewares = [
    #     DownloaderMiddleware1(),
    #     DownloaderMiddleware2()
    # ]

    # ,把对象传给Engine,再创建框架提供的引擎对象
    # engine = Engine(
    #     spiders=spiders,
    #     pipelines=pipelines,
    #     spider_mids = spider_middlewares,
    #     downloader_mids = downloader_middlewares
    # )

    engine = Engine()
    # 执行start()方法,启动框架执行程序
    engine.start()
Пример #3
0
from scrapy_plus.core.engine import Engine
from spiders.baidu import BaiduSpider
from spiders.douban import DoubanSpider
# from spiders.pipelines import BaiduPipline
# from spiders.pipelines import DoubanPipline
# from middlewares.spider_middlewares import TestSpiderMiddleware1,TestSpiderMiddleware2
# from middlewares.downloader_middlewares import TestDownloaderMiddleware1,TestDownloaderMiddleware2

if __name__ == '__main__':
    # baidu_spider = BaiduSpider()
    # douban_spider = DoubanSpider()
    # #
    # spiders = {
    #     BaiduSpider.name:baidu_spider,
    #     DoubanSpider.name:douban_spider
    # }
    # pipelines = [
    #     BaiduPipline(),
    #     DoubanPipline()
    # ]
    # spider_mids = [TestSpiderMiddleware1(),TestSpiderMiddleware2()]
    # downloader_mids = [TestDownloaderMiddleware1(),TestDownloaderMiddleware2()]

    engine = Engine()
    engine.start()
Пример #4
0
from spider_middlewares import TestSpidermiddleware1, TestSpiderMiddleware2
from downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2

# if __name__ == '__main__':
#     # spider = BaiduSpider()
#     # baidu_spider = BaiduSpider()    # 实例化爬虫对象
#     douban_spider = DoubanSpider()    # 实例化爬虫对象
#     spiders = {DoubanSpider.name: douban_spider}
#     pipelines = [DoubanPipline()]
#
#     spider_mids = [TestSpidermiddleware1(), TestSpiderMiddleware2()]
#     downloader_mids = [TestDownloaderMiddleware1(), TestDownloaderMiddleware2()]
#
#
#     engine = Engine(spiders,piplines=pipelines,spider_mids=spider_mids, downloader_mids=downloader_mids)    # 传入爬虫对象
#     engine.start()    # 启动引擎

if __name__ == '__main__':
    # spider = BaiduSpider()
    # # baidu_spider = BaiduSpider()    # 实例化爬虫对象
    # douban_spider = DoubanSpider()    # 实例化爬虫对象
    # spiders = {DoubanSpider.name: douban_spider}
    # pipelines = [DoubanPipline()]
    #
    # spider_mids = [TestSpidermiddleware1(), TestSpiderMiddleware2()]
    # downloader_mids = [TestDownloaderMiddleware1(), TestDownloaderMiddleware2()]


    engine = Engine()    # 传入爬虫对象
    engine.start()    # 启动引擎
Пример #5
0
# -*- coding: utf-8 -*-

from scrapy_plus.core.engine import Engine

if __name__ == '__main__':
    engine =Engine()
    engine.start()
Пример #6
0
def main():
    # 1. 定时发送请求
    engine = Engine()
    while True:
        engine.start()
        time.sleep(3)
Пример #7
0
from scrapy_plus.core.engine import Engine
from spiders.baidu import BaiduSpider
from spiders.douban import DoubanSpider
from pipelines import BaiduPipeline, DoubanPipeline
from spider_middlewares import TestSpiderMiddleware1, TestSpiderMiddleware2
from downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2
import settings
if __name__ == '__main__':
    # 实例化爬虫
    # baidu = BaiduSpider()
    # douban = DoubanSpider()
    # spiders = {baidu.name:baidu,douban.name:douban}
    # pipelines = [BaiduPipeline(),DoubanPipeline()]
    # spider_mids = [TestSpiderMiddleware1(),TestSpiderMiddleware2()]
    # downloader_mids = [TestDownloaderMiddleware1(),TestDownloaderMiddleware2()]
    engine = Engine()  # 实例化引擎
    engine.start()  # 引擎启动