示例#1
0
class EggStorageTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        os.mkdir(d)
        config = Config(values={'eggs_dir': d, 'logs_dir': d})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        self.environ = Environment(config, initenv={})

    def test_interface(self):
        verifyObject(IEnvironment, self.environ)

    def test_get_environment_with_eggfile(self):
        msg = {'project': 'mybot'}
        slot = 3
        env = self.environ.get_environment(msg, slot, '/path/to/file.egg')
        self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
        self.assert_(env['SCRAPY_SQLITE_DB'].endswith('mybot.db'))
        self.assert_(env['SCRAPY_LOG_FILE'].endswith('slot3.log'))
        self.assert_(env['SCRAPY_EGGFILE'].endswith('/path/to/file.egg'))
        self.failIf('SCRAPY_SETTINGS_MODULE' in env)

    def test_get_environment_without_eggfile(self):
        msg = {'project': 'newbot'}
        slot = 3
        env = self.environ.get_environment(msg, slot, None)
        self.assertEqual(env['SCRAPY_PROJECT'], 'newbot')
        self.assert_(env['SCRAPY_SQLITE_DB'].endswith('newbot.db'))
        self.assert_(env['SCRAPY_LOG_FILE'].endswith('slot3.log'))
        self.assertEqual(env['SCRAPY_SETTINGS_MODULE'], 'newbot.settings')
        self.failIf('SCRAPY_EGGFILE' in env)
 def setUp(self):
     d = self.mktemp()
     os.mkdir(d)
     config = Config(values={'eggs_dir': d, 'logs_dir': d})
     config.cp.add_section('settings')
     config.cp.set('settings', 'newbot', 'newbot.settings')
     self.environ = Environment(config, initenv={})
示例#3
0
 def test_get_environment_with_logfile(self):
     config = Config(values={'items_dir': '', 'logs_dir': '.', 'logs_filename': '{project}-{spider}-{Y}{m}{d}T{H}{M}{S}'})
     msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
     slot = 3
     environ = Environment(config, initenv={})
     now = datetime.datetime.now()
     env = environ.get_environment(msg, slot)
     expected_logfilename = now.strftime("mybot-spider-%Y%m%dT%H%M%S")
     self.assert_(env['SCRAPY_LOG_FILE'], expected_logfilename)
 def test_get_environment_with_no_items_dir(self):
     config = Config(values={'items_dir': '', 'logs_dir': ''})
     config.cp.add_section('settings')
     config.cp.set('settings', 'newbot', 'newbot.settings')
     msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
     slot = 3
     environ = Environment(config, initenv={})
     env = environ.get_environment(msg, slot)
     self.failUnless('SCRAPY_FEED_URI' not in env)
     self.failUnless('SCRAPY_LOG_FILE' not in env)
示例#5
0
 def test_get_environment_with_no_items_dir(self):
     config = Config(values={'items_dir': '', 'logs_dir': ''})
     config.cp.add_section('settings')
     config.cp.set('settings', 'newbot', 'newbot.settings')
     msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
     slot = 3
     environ = Environment(config, initenv={})
     env = environ.get_environment(msg, slot)
     self.failUnless('SCRAPY_FEED_URI' not in env)
     self.failUnless('SCRAPY_LOG_FILE' not in env)
 def test_get_environment_with_logfile(self):
     config = Config(
         values={
             'items_dir': '',
             'logs_dir': '.',
             'logs_filename': '{project}-{spider}-{job}-{Y}{m}{d}T{H}{M}{S}'
         })
     msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
     slot = 3
     environ = Environment(config, initenv={})
     now = datetime.datetime.now()
     env = environ.get_environment(msg, slot)
     expected_logfilename = now.strftime("mybot-spider-%Y%m%dT%H%M%S")
     self.assert_(env['SCRAPY_LOG_FILE'], expected_logfilename)
示例#7
0
 def setUp(self):
     d = self.mktemp()
     os.mkdir(d)
     config = Config(values={'eggs_dir': d, 'logs_dir': d})
     config.cp.add_section('settings')
     config.cp.set('settings', 'newbot', 'newbot.settings')
     self.environ = Environment(config, initenv={})
示例#8
0
class EnvironmentTest(unittest.TestCase):
    def setUp(self):
        d = self.mktemp()
        os.mkdir(d)
        config = Config(values={'eggs_dir': d, 'logs_dir': d})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        self.environ = Environment(config, initenv={})

    def test_interface(self):
        verifyObject(IEnvironment, self.environ)

    def test_get_environment_with_eggfile(self):
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        env = self.environ.get_environment(msg, slot)
        self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
        self.assertEqual(env['SCRAPY_SLOT'], '3')
        self.assertEqual(env['SCRAPY_SPIDER'], 'myspider')
        self.assertEqual(env['SCRAPY_JOB'], 'ID')
        self.assert_(env['SCRAPY_SQLITE_DB'].endswith('mybot.db'))
        self.assert_(env['SCRAPY_LOG_FILE'].endswith(
            os.path.join('mybot', 'myspider', 'ID.log')))
        self.assert_(env['SCRAPY_FEED_URI'].endswith(
            os.path.join('mybot', 'myspider', 'ID.jl')))
        self.failIf('SCRAPY_SETTINGS_MODULE' in env)
示例#9
0
class EnvironmentTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        os.mkdir(d)
        config = Config(values={'eggs_dir': d, 'logs_dir': d})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        self.environ = Environment(config, initenv={})

    def test_interface(self):
        verifyObject(IEnvironment, self.environ)

    def test_get_environment_with_eggfile(self):
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        env = self.environ.get_environment(msg, slot)
        self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
        self.assertEqual(env['SCRAPY_SLOT'], '3')
        self.assertEqual(env['SCRAPY_SPIDER'], 'myspider')
        self.assertEqual(env['SCRAPY_JOB'], 'ID')
        self.assertEqual(env['SCRAPY_CONCURRENT_SPIDERS'], '1')
        self.assert_(env['SCRAPY_SQLITE_DB'].endswith('mybot.db'))
        self.assert_(env['SCRAPY_LOG_FILE'].endswith(os.path.join('mybot', 'myspider', 'ID.log')))
        self.failIf('SCRAPY_SETTINGS_MODULE' in env)
示例#10
0
 def setUp(self):
     d = self.mktemp()
     os.mkdir(d)
     config = Config(values={"eggs_dir": d, "logs_dir": d})
     config.cp.add_section("settings")
     config.cp.set("settings", "newbot", "newbot.settings")
     self.environ = Environment(config, initenv={})
示例#11
0
def get_application(config):
    app = Application('Scrapyd')
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '0.0.0.0')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    laupath = config.get('launcher', 'scrapyd_mongodb.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port,
                           server.Site(Root(config, app)),
                           interface=bind_address)
    log.msg(format=('Scrapyd web console available at '
                    'http://%(bind_address)s:%(http_port)s/', ),
            bind_address=bind_address,
            http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
示例#12
0
def application(config):
    app = Application("Scrapyd")
    http_port = int(environ.get('PORT', config.getint('http_port', 6800)))
    config.cp.set('scrapyd', 'database_url', environ.get('DATABASE_URL'))

    poller = Psycopg2QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = Psycopg2SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    launcher = Launcher(config, app)
    timer = TimerService(5, poller.poll)
    webservice = TCPServer(http_port, server.Site(Root(config, app)))
    log.msg("Scrapyd web console available at http://localhost:%s/ (HEROKU)" %
            http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
示例#13
0
class EnvironmentTest(unittest.TestCase):
    def setUp(self):
        d = self.mktemp()
        os.mkdir(d)
        config = Config(values={'eggs_dir': d, 'logs_dir': d})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        self.environ = Environment(config, initenv={})

    def test_interface(self):
        verifyObject(IEnvironment, self.environ)

    def test_get_environment_with_eggfile(self):
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        env = self.environ.get_environment(msg, slot)
        self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
        self.assertEqual(env['SCRAPY_SLOT'], '3')
        self.assertEqual(env['SCRAPY_SPIDER'], 'myspider')
        self.assertEqual(env['SCRAPY_JOB'], 'ID')
        self.assert_(env['SCRAPY_LOG_FILE'].endswith(
            os.path.join('mybot', 'myspider', 'ID.log')))
        if env.get('SCRAPY_FEED_URI'):  # Not compulsory
            self.assert_(env['SCRAPY_FEED_URI'].startswith('file://{}'.format(
                os.getcwd())))
            self.assert_(env['SCRAPY_FEED_URI'].endswith(
                os.path.join('mybot', 'myspider', 'ID.jl')))
        self.failIf('SCRAPY_SETTINGS_MODULE' in env)

    def test_get_environment_with_no_items_dir(self):
        config = Config(values={'items_dir': '', 'logs_dir': ''})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        environ = Environment(config, initenv={})
        env = environ.get_environment(msg, slot)
        self.failUnless('SCRAPY_FEED_URI' not in env)
        self.failUnless('SCRAPY_LOG_FILE' not in env)

    def test_get_environment_with_logfile(self):
        config = Config(
            values={
                'items_dir': '',
                'logs_dir': '.',
                'logs_filename': '{project}-{spider}-{job}-{Y}{m}{d}T{H}{M}{S}'
            })
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        environ = Environment(config, initenv={})
        now = datetime.datetime.now()
        env = environ.get_environment(msg, slot)
        expected_logfilename = now.strftime("mybot-spider-%Y%m%dT%H%M%S")
        self.assert_(env['SCRAPY_LOG_FILE'], expected_logfilename)
示例#14
0
class EnvironmentTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        os.mkdir(d)
        config = Config(values={'eggs_dir': d, 'logs_dir': d})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        self.environ = Environment(config, initenv={})

    def test_interface(self):
        verifyObject(IEnvironment, self.environ)

    def test_get_environment_with_eggfile(self):
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        env = self.environ.get_environment(msg, slot)
        self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
        self.assertEqual(env['SCRAPY_SLOT'], '3')
        self.assertEqual(env['SCRAPY_SPIDER'], 'myspider')
        self.assertEqual(env['SCRAPY_JOB'], 'ID')
        self.assert_(env['SCRAPY_LOG_FILE'].endswith(os.path.join('mybot', 'myspider', 'ID.log')))
        if env.get('SCRAPY_FEED_URI'):  # Not compulsory
            self.assert_(env['SCRAPY_FEED_URI'].startswith('file://{}'.format(os.getcwd())))
            self.assert_(env['SCRAPY_FEED_URI'].endswith(os.path.join('mybot', 'myspider', 'ID.jl')))
        self.failIf('SCRAPY_SETTINGS_MODULE' in env)

    def test_get_environment_with_no_items_dir(self):
        config = Config(values={'items_dir': '', 'logs_dir': ''})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        environ = Environment(config, initenv={})
        env = environ.get_environment(msg, slot)
        self.failUnless('SCRAPY_FEED_URI' not in env)
        self.failUnless('SCRAPY_LOG_FILE' not in env)

    def test_get_environment_with_logfile(self):
        config = Config(values={'items_dir': '', 'logs_dir': '.', 'logs_filename': '{project}-{spider}-{Y}{m}{d}T{H}{M}{S}'})
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        environ = Environment(config, initenv={})
        now = datetime.datetime.now()
        env = environ.get_environment(msg, slot)
        expected_logfilename = now.strftime("mybot-spider-%Y%m%dT%H%M%S")
        self.assert_(env['SCRAPY_LOG_FILE'], expected_logfilename)
示例#15
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '127.0.0.1')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    jspath = config.get('jobstorage', 'scrapyd.jobstorage.MemoryJobStorage')
    jscls = load_object(jspath)
    jobstorage = jscls(config)
    app.setComponent(IJobStorage, jobstorage)
    eggstorage = config.get('eggstorage',
                            'scrapyd.eggstorage.FilesystemEggStorage')
    eggstoragecls = load_object(eggstorage)
    app.setComponent(IEggStorage, eggstoragecls(config))

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)

    webpath = config.get('webroot', 'scrapyd.website.Root')
    webcls = load_object(webpath)
    resource = create_wrapped_resource(webcls, config, app)
    webservice = TCPServer(http_port,
                           server.Site(resource),
                           interface=bind_address)
    log.msg(
        format=
        "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
        bind_address=bind_address,
        http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
示例#16
0
文件: app.py 项目: drankinn/scrapyd
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '0.0.0.0')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)

    schedpath = config.get('scheduler', 'scrapyd.scheduler.SpiderScheduler')
    schedCls = load_object(schedpath)
    scheduler = schedCls(config, app)

    environment = Environment(config)

    pubsub_path = config.get('pubsub', 'scrapyd.pubsub.BasePubSub')
    pubsubCls = load_object(pubsub_path)
    pubsub = pubsubCls(config, app)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)
    app.setComponent(IPubSub, pubsub)

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port,
                           server.Site(Root(config, app)),
                           interface=bind_address)
    log.msg(
        format=
        "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
        bind_address=bind_address,
        http_port=http_port)

    pubsub.setServiceParent(app)
    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)
    return app
示例#17
0
class EnvironmentTest(unittest.TestCase):
    def setUp(self):
        d = self.mktemp()
        os.mkdir(d)
        config = Config(values={'eggs_dir': d, 'logs_dir': d})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        self.environ = Environment(config, initenv={})

    def test_interface(self):
        verifyObject(IEnvironment, self.environ)

    def test_get_environment_with_eggfile(self):
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        env = self.environ.get_environment(msg, slot)
        self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
        self.assertEqual(env['SCRAPY_SLOT'], '3')
        self.assertEqual(env['SCRAPY_SPIDER'], 'myspider')
        self.assertEqual(env['SCRAPY_JOB'], 'ID')
        self.assert_(env['SCRAPY_LOG_FILE'].endswith(
            os.path.join('mybot', 'myspider', 'ID.log')))
        if env.get('SCRAPY_FEED_URI'):  # Not compulsory
            self.assert_(env['SCRAPY_FEED_URI'].startswith('file://{}'.format(
                os.getcwd())))
            self.assert_(env['SCRAPY_FEED_URI'].endswith(
                os.path.join('mybot', 'myspider', 'ID.jl')))
        self.assertNotIn('SCRAPY_SETTINGS_MODULE', env)

    def test_get_environment_with_no_items_dir(self):
        config = Config(values={'items_dir': '', 'logs_dir': ''})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        environ = Environment(config, initenv={})
        env = environ.get_environment(msg, slot)
        self.assertNotIn('SCRAPY_FEED_URI', env)
        self.assertNotIn('SCRAPY_LOG_FILE', env)
示例#18
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)

    if 'PORT' in os.environ:
        http_port = int(os.environ.get('PORT'))

    bind_address = '0.0.0.0' if 'PORT' in os.environ else config.get('bind_address', '127.0.0.1')

    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    webpath = config.get('webroot', 'scrapyd.website.Root')
    webcls = load_object(webpath)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port, server.Site(webcls(config, app)), interface=bind_address)
    log.msg(format="Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
            bind_address=bind_address, http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
示例#19
0
class EnvironmentTest(unittest.TestCase):
    def setUp(self):
        d = self.mktemp()
        os.mkdir(d)
        config = Config(values={"eggs_dir": d, "logs_dir": d})
        config.cp.add_section("settings")
        config.cp.set("settings", "newbot", "newbot.settings")
        self.environ = Environment(config, initenv={})

    def test_interface(self):
        verifyObject(IEnvironment, self.environ)

    def test_get_environment_with_eggfile(self):
        msg = {"_project": "mybot", "_spider": "myspider", "_job": "ID"}
        slot = 3
        env = self.environ.get_environment(msg, slot)
        self.assertEqual(env["SCRAPY_PROJECT"], "mybot")
        self.assertEqual(env["SCRAPY_SLOT"], "3")
        self.assertEqual(env["SCRAPY_SPIDER"], "myspider")
        self.assertEqual(env["SCRAPY_JOB"], "ID")
        self.assert_(env["SCRAPY_LOG_FILE"].endswith(os.path.join("mybot", "myspider", "ID.log")))
        self.assert_(env["SCRAPY_FEED_URI"].endswith(os.path.join("mybot", "myspider", "ID.jl")))
        self.failIf("SCRAPY_SETTINGS_MODULE" in env)
示例#20
0
class EnvironmentTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        os.mkdir(d)
        config = Config(values={'eggs_dir': d, 'logs_dir': d})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        self.environ = Environment(config, initenv={})

    def test_interface(self):
        verifyObject(IEnvironment, self.environ)

    def test_get_environment_with_eggfile(self):
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        env = self.environ.get_environment(msg, slot)
        self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
        self.assertEqual(env['SCRAPY_SLOT'], '3')
        self.assertEqual(env['SCRAPY_SPIDER'], 'myspider')
        self.assertEqual(env['SCRAPY_JOB'], 'ID')
        self.assert_(env['SCRAPY_LOG_FILE'].endswith(os.path.join('mybot', 'myspider', 'ID.log')))
        if env.get('SCRAPY_FEED_URI'): #not compulsory
            self.assert_(env['SCRAPY_FEED_URI'].endswith(os.path.join('mybot', 'myspider', 'ID.jl')))
        self.failIf('SCRAPY_SETTINGS_MODULE' in env)

    def test_get_environment_with_no_items_dir(self):
        config = Config(values={'items_dir': '', 'logs_dir': ''})
        config.cp.add_section('settings')
        config.cp.set('settings', 'newbot', 'newbot.settings')
        msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'}
        slot = 3
        environ = Environment(config, initenv={})
        env = environ.get_environment(msg, slot)
        self.failUnless('SCRAPY_FEED_URI' not in env)
        self.failUnless('SCRAPY_LOG_FILE' not in env)