def _spawn_process(self, message, slot): msg = stringify_dict(message, keys_only=False) args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = stringify_dict(env, keys_only=False) pp = ScrapyProcessProtocol(slot, msg['_project'], msg['_spider'], msg['_job'], env) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def get_crawl_args_dict(message): """Return arguments dictionary to use for output""" argsDict = {} msg = message.copy() args = [unicode_to_str(msg['_spider'])] del msg['_project'], msg['_spider'] settings = msg.pop('settings', {}) for k, v in stringify_dict(msg, keys_only=False).items(): argsDict[k] = v for k, v in stringify_dict(settings, keys_only=False).items(): argsDict[k] = v return argsDict
def _spawn_process(self, message, slot): msg = stringify_dict(message, keys_only=False) project = msg['_project'] args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = stringify_dict(env, keys_only=False) pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \ msg['_job'], env) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def _spawn_process(self, message, slot): msg = stringify_dict(message, keys_only=False) project = msg['_project'] args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = stringify_dict(env, keys_only=False) # Sending the start_url parameter to the method pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \ msg['_job'], env, msg['start_url'],msg['currency'],msg['country'],msg['site_id']) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def get_crawl_args(message): """Return the command-line arguments to use for the scrapy crawl process that will be started for this message """ msg = message.copy() args = [unicode_to_str(msg['_spider'])] del msg['_project'], msg['_spider'] settings = msg.pop('settings', {}) for k, v in stringify_dict(msg, keys_only=False).items(): args += ['-a'] args += ['%s=%s' % (k, v)] for k, v in stringify_dict(settings, keys_only=False).items(): args += ['-s'] args += ['%s=%s' % (k, v)] return args
def _make_scrapy_args(arg, args_dict): if not args_dict: return [] args = [] for kv in stringify_dict(args_dict, keys_only=False).items(): args += [arg, "%s=%s" % kv] return args
def test_stringify_dict(self): d = {'a': 123, u'b': b'c', u'd': u'e', object(): u'e'} d2 = stringify_dict(d, keys_only=False) self.assertEqual(d, d2) self.failIf(d is d2) # shouldn't modify in place self.failIf(any(isinstance(x, six.text_type) for x in d2.keys())) self.failIf(any(isinstance(x, six.text_type) for x in d2.values()))
def test_stringify_dict(self): d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'} d2 = stringify_dict(d, keys_only=False) self.failUnlessEqual(d, d2) self.failIf(d is d2) # shouldn't modify in place self.failIf(any(isinstance(x, unicode) for x in d2.keys())) self.failIf(any(isinstance(x, unicode) for x in d2.values()))
def test_stringify_dict(self): d = {"a": 123, u"b": b"c", u"d": u"e", object(): u"e"} d2 = stringify_dict(d, keys_only=False) self.assertEqual(d, d2) self.failIf(d is d2) # shouldn't modify in place self.failIf(any(isinstance(x, six.text_type) for x in d2.keys())) self.failIf(any(isinstance(x, six.text_type) for x in d2.values()))
def test_stringify_dict_tuples(self): tuples = [('a', 123), (u'b', 'c'), (u'd', u'e'), (object(), u'e')] d = dict(tuples) d2 = stringify_dict(tuples, keys_only=False) self.assertEqual(d, d2) self.failIf(d is d2) # shouldn't modify in place self.failIf(any(isinstance(x, six.text_type) for x in d2.keys()), d2.keys()) self.failIf(any(isinstance(x, six.text_type) for x in d2.values()))
def _job_args_and_env(msg): env = msg.get('job_env') if not isinstance(env, dict): env = {} cmd = msg.get('job_cmd') if not isinstance(cmd, list): cmd = [str(cmd)] return cmd, stringify_dict(env, keys_only=False)
def test_stringify_dict(self): d = {'a': 123, u'b': b'c', u'd': u'e', object(): u'e'} d2 = stringify_dict(d, keys_only=False) self.assertEqual(d, d2) self.assertIsNot(d, d2) # shouldn't modify in place self.assertFalse(any(isinstance(x, six.text_type) for x in d2.keys())) self.assertFalse(any( isinstance(x, six.text_type) for x in d2.values()))
def test_stringify_dict_tuples(self): tuples = [('a', 123), (u'b', 'c'), (u'd', u'e'), (object(), u'e')] d = dict(tuples) d2 = stringify_dict(tuples, keys_only=False) self.failUnlessEqual(d, d2) self.failIf(d is d2) # shouldn't modify in place self.failIf(any(isinstance(x, unicode) for x in d2.keys()), d2.keys()) self.failIf(any(isinstance(x, unicode) for x in d2.values()))
def _append_next(self): """Called when there are no more items left in self.spider_requests. This method is meant to be overriden in subclasses to add new (spider, requests) tuples to self.spider_requests. It can return a Deferred. """ msg = yield self._queue.pop() if msg: name = msg.pop('name') msg = stringify_dict(msg) # see #250 self.append_spider_name(name, **msg)
def test_stringify_dict_tuples(self): tuples = [('a', 123), (u'b', 'c'), (u'd', u'e'), (object(), u'e')] d = dict(tuples) d2 = stringify_dict(tuples, keys_only=False) self.assertEqual(d, d2) self.assertIsNot(d, d2) # shouldn't modify in place self.assertFalse(any(isinstance(x, six.text_type) for x in d2.keys()), d2.keys()) self.assertFalse(any( isinstance(x, six.text_type) for x in d2.values()))
def test_stringify_dict_keys_only(self): d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'} d2 = stringify_dict(d) self.assertEqual(d, d2) self.assertIsNot(d, d2) # shouldn't modify in place self.assertFalse(any(isinstance(x, six.text_type) for x in d2.keys()))
def test_stringify_dict_keys_only(self): d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'} d2 = stringify_dict(d) self.assertEqual(d, d2) self.failIf(d is d2) # shouldn't modify in place self.failIf(any(isinstance(x, unicode) for x in d2.keys()))
def test_stringify_dict_keys_only(self): d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'} d2 = stringify_dict(d) self.failUnlessEqual(d, d2) self.failIf(d is d2) # shouldn't modify in place self.failIf(any(isinstance(x, unicode) for x in d2.keys()))