Python Shell примеры, scrapy.shell.Shell Python примеры использования

Пример #1

0

Показать файл

Файл: shell.py Проект: elacuesta/scrapy

    def run(self, args, opts):
        url = args[0] if args else None
        if url:
            # first argument may be a local file
            url = guess_scheme(url)

        spider_loader = self.crawler_process.spider_loader

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spider_loader.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spider_loader, Request(url),
                                              spidercls, log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, redirect=not opts.no_redirect)

Пример #2

0

Показать файл

Файл: utils.py Проект: chenhc/laravel

def get_fetch(log=False):
    settings = Settings()
    settings.set('LOG_ENABLED', log)

    crawler_process = CrawlerProcess(settings)
    crawler = crawler_process.create_crawler()
    crawler_process.start_crawling()

    t = Thread(target=crawler_process.start_reactor)
    t.daemon = True
    t.start()

    shell = Shell(crawler)
    shell.code = 'adsf'

    import threading
    lock = threading.Lock()

    def fetch(url_or_request):
        lock.acquire()
        try:
            shell.fetch(url_or_request)
            response = shell.vars.get('response')
            return response
        finally:
            lock.release()

    return fetch

Пример #3

0

Показать файл

Файл: shell.py Проект: weisbeck/403Section

 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.crawler,
                   update_vars=self.update_vars,
                   code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url)

Пример #4

0

Показать файл

    def run(self, args, opts):
        url = args[0] if args else None
        if url:
            # first argument may be a local file
            url = guess_scheme(url)

        spider_loader = self.crawler_process.spider_loader

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spider_loader.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spider_loader,
                                              Request(url),
                                              spidercls,
                                              log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, redirect=not opts.no_redirect)

Пример #5

0

Показать файл

    def run(self, args, opts):
        url = args[0] if args else None
        spiders = self.crawler_process.spiders

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spiders.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spiders,
                                              Request(url),
                                              spidercls,
                                              log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_logged_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self.crawler_process.start(start_reactor=False)
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url)

Пример #6

0

Показать файл

Файл: shell.py Проект: 00gpowe/scrapy

 def run(self, args, opts):
     url = args[0] if args else None
     spider = None
     if opts.spider:
         spider = self.crawler.spiders.create(opts.spider)
     shell = Shell(self.crawler, update_vars=self.update_vars, code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url, spider=spider)

Пример #7

0

Показать файл

Файл: test_shell.py Проект: Blender3D/scrapy

    def test_inspect_response_text(self):
        response = TextResponse(url='http://example.com/', body='''
            {"hello": "world"}
        ''')
        shell = Shell(self.crawler, code='None')
        shell.start(response=response, spider=self.spider)

        self.assertNotIn('sel', shell.vars)

Пример #8

0

Показать файл

Файл: test_shell.py Проект: Blender3D/scrapy

    def test_inspect_response_xml(self):
        response = XmlResponse(url='http://example.com/', body='''
            <?xml version="1.0" encoding="UTF-8"?>
            <foo>Testing</foo>
        ''')
        shell = Shell(self.crawler, code='None')
        shell.start(response=response, spider=self.spider)

        self.assertIn('sel', shell.vars)

Пример #9

0

Показать файл

Файл: shell.py Проект: ymzy/scrapy

 def run(self, args, opts):
     url = args[0] if args else None
     spider = None
     if opts.spider:
         spider = self.crawler.spiders.create(opts.spider)
     shell = Shell(self.crawler,
                   update_vars=self.update_vars,
                   code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url, spider=spider)

Пример #10

0

Показать файл

Файл: test_shell.py Проект: Blender3D/scrapy

    def test_inspect_response_binary(self):
        response = Response(url='http://example.com/', body='''
            '{\xcc\xe8\x92\xe6\xb8\xa21\xb2\xe5O6\xc9\x84\xba8
            \xa3\x877\xa8v\xee9p.UJ\xa1m\x8a"H\xb3\xcc\x08\xff
            \x87d\x00i\xce\xb7a\xff\x8c\xd8NX\xae\xc2'
        ''')
        shell = Shell(self.crawler, code='None')
        shell.start(response=response, spider=self.spider)

        self.assertNotIn('sel', shell.vars)

Пример #11

0

Показать файл

Файл: test_shell.py Проект: Blender3D/scrapy

    def test_inspect_response_html(self):
        response = HtmlResponse(url='http://example.com/', body='''
            <!doctype html>
            <html>
                <p>Testing</p>
            </html>
        ''')
        shell = Shell(self.crawler, code='None')
        shell.start(response=response, spider=self.spider)

        self.assertIn('sel', shell.vars)

Пример #12

0

Показать файл

 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.crawler, update_vars=self.update_vars, inthread=True, \
         code=opts.code)
     def err(f):
         log.err(f, "Shell error")
         self.exitcode = 1
     d = shell.start(url=url)
     d.addErrback(err)
     d.addBoth(lambda _: self.crawler.stop())
     self.crawler.start()

Пример #13

0

Показать файл

Файл: shell.py Проект: 0xfab/scrapy

    def run(self, args, opts):
        crawler = self.crawler_process.create_crawler()

        url = args[0] if args else None
        spider = crawler.spiders.create(opts.spider) if opts.spider else None

        self.crawler_process.start_crawling()
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, spider=spider)

Пример #14

0

Показать файл

Файл: shell.py Проект: xtmhm2000/scrapy-0.22

    def run(self, args, opts):
        crawler = self.crawler_process.create_crawler()

        url = args[0] if args else None
        spider = crawler.spiders.create(opts.spider) if opts.spider else None

        self.crawler_process.start_crawling()
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, spider=spider)

Пример #15

0

Показать файл

Файл: __init__.py Проект: stevearc/ozzy

def shell(argv):
    """ Open a url in the scrapy shell """
    parser = argparse.ArgumentParser('ozzy shell',
                                     description=shell.__doc__)
    parser.add_argument('url', help="URL to open in a shell")
    args = parser.parse_args(argv)

    crawler_process = CrawlerProcess(load_settings())
    crawler = crawler_process.create_crawler()
    crawler_process.start_crawling()
    thread = Thread(target=crawler_process.start_reactor)
    thread.daemon = True
    thread.start()
    sh = Shell(crawler)
    sh.start(url=args.url)

Пример #16

0

Показать файл

Файл: shell.py Проект: Bia-lx/scrapy

    def run(self, args, opts):
        url = args[0] if args else None
        spiders = self.crawler_process.spiders

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spiders.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spiders, Request(url),
                                              spidercls, log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_logged_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self.crawler_process.start(start_reactor=False)
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url)

Пример #17

0

Показать файл

Файл: shell.py Проект: kenzouyeh/scrapy

 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.update_vars)
     shell.start(url)

Пример #18

0

Показать файл

Файл: shell.py Проект: joehillen/scrapy

 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.crawler, update_vars=self.update_vars, code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url)

Python Shell примеры использования