Exemplo n.º 1
0
 def setUp(self):
     orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
     self.tmpdir = self.mktemp()
     os.mkdir(self.tmpdir)
     self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
     shutil.copytree(orig_spiders_dir, self.spiders_dir)
     sys.path.append(self.tmpdir)
     self.spiderman = SpiderManager(['test_spiders_xxx'])
Exemplo n.º 2
0
class SpiderManagerTest(unittest.TestCase):

    def setUp(self):
        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
        self.tmpdir = self.mktemp()
        os.mkdir(self.tmpdir)
        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
        shutil.copytree(orig_spiders_dir, self.spiders_dir)
        sys.path.append(self.tmpdir)
        self.spiderman = SpiderManager(['test_spiders_xxx'])

    def tearDown(self):
        del self.spiderman
        sys.path.remove(self.tmpdir)

    def test_interface(self):
        verifyObject(ISpiderManager, self.spiderman)

    def test_list(self):
        self.assertEqual(set(self.spiderman.list()),
                         set(['spider1', 'spider2', 'spider3', 'spider4']))

    def test_create(self):
        spider1 = self.spiderman.create("spider1")
        self.assertEqual(spider1.__class__.__name__, 'Spider1')
        spider2 = self.spiderman.create("spider2", foo="bar")
        self.assertEqual(spider2.__class__.__name__, 'Spider2')
        self.assertEqual(spider2.foo, 'bar')

    def test_find_by_request(self):
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy1.org/test')),
                         ['spider1'])
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy2.org/test')),
                         ['spider2'])
        self.assertEqual(set(self.spiderman.find_by_request(Request('http://scrapy3.org/test'))),
                         set(['spider1', 'spider2']))
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy999.org/test')),
                         [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com')),
                         [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com/onlythis')),
                         ['spider3'])

    def test_load_spider_module(self):
        self.spiderman = SpiderManager(
            ['scrapy.tests.test_spidermanager.test_spiders.spider1'])
        assert len(self.spiderman._spiders) == 1

    def test_load_base_spider(self):
        self.spiderman = SpiderManager(
            ['scrapy.tests.test_spidermanager.test_spiders.spider0'])
        assert len(self.spiderman._spiders) == 0

    def test_load_from_crawler(self):
        spider = self.spiderman.create('spider4', a='OK')
        self.assertEqual(spider.a, 'OK')
Exemplo n.º 3
0
class SpiderManagerTest(unittest.TestCase):

    def setUp(self):
        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
        self.tmpdir = self.mktemp()
        os.mkdir(self.tmpdir)
        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
        shutil.copytree(orig_spiders_dir, self.spiders_dir)
        sys.path.append(self.tmpdir)
        self.spiderman = SpiderManager(['test_spiders_xxx'])

    def tearDown(self):
        del self.spiderman
        del sys.modules['test_spiders_xxx']
        sys.path.remove(self.tmpdir)

    def test_interface(self):
        verifyObject(ISpiderManager, self.spiderman)

    def test_list(self):
        self.assertEqual(set(self.spiderman.list()),
            set(['spider1', 'spider2', 'spider3', 'spider4']))

    def test_create(self):
        spider1 = self.spiderman.create("spider1")
        self.assertEqual(spider1.__class__.__name__, 'Spider1')
        spider2 = self.spiderman.create("spider2", foo="bar")
        self.assertEqual(spider2.__class__.__name__, 'Spider2')
        self.assertEqual(spider2.foo, 'bar')

    def test_find_by_request(self):
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy1.org/test')),
            ['spider1'])
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy2.org/test')),
            ['spider2'])
        self.assertEqual(set(self.spiderman.find_by_request(Request('http://scrapy3.org/test'))),
            set(['spider1', 'spider2']))
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy999.org/test')),
            [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com')),
            [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com/onlythis')),
            ['spider3'])

    def test_load_spider_module(self):
        self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider1'])
        assert len(self.spiderman._spiders) == 1

    def test_load_base_spider(self):
        self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider0'])
        assert len(self.spiderman._spiders) == 0

    def test_load_from_crawler(self):
        spider = self.spiderman.create('spider4', a='OK')
        self.assertEqual(spider.a, 'OK')
Exemplo n.º 4
0
 def setUp(self):
     orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
     self.tmpdir = self.mktemp()
     os.mkdir(self.tmpdir)
     self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
     shutil.copytree(orig_spiders_dir, self.spiders_dir)
     sys.path.append(self.tmpdir)
     settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']})
     self.spiderman = SpiderManager.from_settings(settings)
Exemplo n.º 5
0
 def setUp(self):
     orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
     self.tmpdir = self.mktemp()
     os.mkdir(self.tmpdir)
     self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
     shutil.copytree(orig_spiders_dir, self.spiders_dir)
     sys.path.append(self.tmpdir)
     settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']})
     self.spiderman = SpiderManager.from_settings(settings)
Exemplo n.º 6
0
 def test_load_base_spider(self):
     self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider0'])
     assert len(self.spiderman._spiders) == 0
Exemplo n.º 7
0
 def test_load_spider_module(self):
     self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider1'])
     assert len(self.spiderman._spiders) == 1
Exemplo n.º 8
0
def discover_spiders():
    for project in _discover_spider_projects():
        with spider_project(project) as settings:
            manager = SpiderManager.from_settings(settings)
            for spider in manager._spiders.itervalues():
                yield spider
Exemplo n.º 9
0
 def test_load_base_spider(self):
     module = 'tests.test_spidermanager.test_spiders.spider0'
     settings = Settings({'SPIDER_MODULES': [module]})
     self.spiderman = SpiderManager.from_settings(settings)
     assert len(self.spiderman._spiders) == 0
Exemplo n.º 10
0
 def test_load_spider_module(self):
     prefix = 'tests.test_spidermanager.test_spiders.'
     module = ','.join(prefix + s for s in ('spider1', 'spider2'))
     settings = Settings({'SPIDER_MODULES': module})
     self.spiderman = SpiderManager.from_settings(settings)
     assert len(self.spiderman._spiders) == 2
Exemplo n.º 11
0
 def test_load_base_spider(self):
     module = 'tests.test_spidermanager.test_spiders.spider0'
     settings = Settings({'SPIDER_MODULES': [module]})
     self.spiderman = SpiderManager.from_settings(settings)
     assert len(self.spiderman._spiders) == 0
Exemplo n.º 12
0
 def test_load_spider_module(self):
     prefix = 'tests.test_spidermanager.test_spiders.'
     module = ','.join(prefix + s for s in ('spider1', 'spider2'))
     settings = Settings({'SPIDER_MODULES': module})
     self.spiderman = SpiderManager.from_settings(settings)
     assert len(self.spiderman._spiders) == 2
Exemplo n.º 13
0
 def setUp(self):
     orig_spiders_dir = os.path.join(get_projectpath(), 'tests', 'feature',
                                     'searchspiders')
     sys.path.append(orig_spiders_dir)
     self.spiderman = SpiderManager(['feature.searchspiders'])
Exemplo n.º 14
0
from inspect import isclass

from twisted.internet import reactor, defer
from scrapy import log
from scrapy.settings import CrawlerSettings
from scrapy.crawler import Crawler
from scrapy.spidermanager import SpiderManager

from oucfeed.crawler import settings, datastore, history
from oucfeed.crawler.uploader import upload


crawler_settings = CrawlerSettings(settings)
log.start_from_settings(crawler_settings)
spidermanager = SpiderManager.from_settings(crawler_settings)


def setup_output():
    crawler_settings.overrides['FEED_URI'] = 'test.js'
    crawler_settings.overrides['FEED_FORMAT'] = 'js'


def init_spider(spider):
    if isinstance(spider, basestring):
        spider = spidermanager.create(spider)
    elif isclass(spider):
        spider = spider()
    return spider

Exemplo n.º 15
0
def discover_spiders():
    for project in _discover_spider_projects():
        with spider_project(project) as settings:
            manager = SpiderManager.from_settings(settings)
            for spider in manager._spiders.itervalues():
                yield spider