Exemplo n.º 1
0
 def _spawn_process(self, message, slot):
     msg = stringify_dict(message, keys_only=False)
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = stringify_dict(env, keys_only=False)
     pp = ScrapyProcessProtocol(slot, msg['_project'], msg['_spider'], msg['_job'], env)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Exemplo n.º 2
0
def get_crawl_args_dict(message):
    """Return arguments dictionary to use for output"""
    argsDict = {}
    msg = message.copy()
    args = [unicode_to_str(msg['_spider'])]
    del msg['_project'], msg['_spider']
    settings = msg.pop('settings', {})
    for k, v in stringify_dict(msg, keys_only=False).items():
        argsDict[k] = v
    for k, v in stringify_dict(settings, keys_only=False).items():
        argsDict[k] = v
    return argsDict
Exemplo n.º 3
0
 def _spawn_process(self, message, slot):
     msg = stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = stringify_dict(env, keys_only=False)
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Exemplo n.º 4
0
 def _spawn_process(self, message, slot):
     msg = stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = stringify_dict(env, keys_only=False)
     # Sending the start_url parameter to the method
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env, msg['start_url'],msg['currency'],msg['country'],msg['site_id'])
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Exemplo n.º 5
0
def get_crawl_args(message):
    """Return the command-line arguments to use for the scrapy crawl process
    that will be started for this message
    """
    msg = message.copy()
    args = [unicode_to_str(msg['_spider'])]
    del msg['_project'], msg['_spider']
    settings = msg.pop('settings', {})
    for k, v in stringify_dict(msg, keys_only=False).items():
        args += ['-a']
        args += ['%s=%s' % (k, v)]
    for k, v in stringify_dict(settings, keys_only=False).items():
        args += ['-s']
        args += ['%s=%s' % (k, v)]
    return args
Exemplo n.º 6
0
def get_crawl_args(message):
    """Return the command-line arguments to use for the scrapy crawl process
    that will be started for this message
    """
    msg = message.copy()
    args = [unicode_to_str(msg['_spider'])]
    del msg['_project'], msg['_spider']
    settings = msg.pop('settings', {})
    for k, v in stringify_dict(msg, keys_only=False).items():
        args += ['-a']
        args += ['%s=%s' % (k, v)]
    for k, v in stringify_dict(settings, keys_only=False).items():
        args += ['-s']
        args += ['%s=%s' % (k, v)]
    return args
Exemplo n.º 7
0
def _make_scrapy_args(arg, args_dict):
    if not args_dict:
        return []
    args = []
    for kv in stringify_dict(args_dict, keys_only=False).items():
        args += [arg, "%s=%s" % kv]
    return args
Exemplo n.º 8
0
 def test_stringify_dict(self):
     d = {'a': 123, u'b': b'c', u'd': u'e', object(): u'e'}
     d2 = stringify_dict(d, keys_only=False)
     self.assertEqual(d, d2)
     self.failIf(d is d2)  # shouldn't modify in place
     self.failIf(any(isinstance(x, six.text_type) for x in d2.keys()))
     self.failIf(any(isinstance(x, six.text_type) for x in d2.values()))
Exemplo n.º 9
0
 def test_stringify_dict(self):
     d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'}
     d2 = stringify_dict(d, keys_only=False)
     self.failUnlessEqual(d, d2)
     self.failIf(d is d2)  # shouldn't modify in place
     self.failIf(any(isinstance(x, unicode) for x in d2.keys()))
     self.failIf(any(isinstance(x, unicode) for x in d2.values()))
Exemplo n.º 10
0
def _make_scrapy_args(arg, args_dict):
    if not args_dict:
        return []
    args = []
    for kv in stringify_dict(args_dict, keys_only=False).items():
        args += [arg, "%s=%s" % kv]
    return args
Exemplo n.º 11
0
 def test_stringify_dict(self):
     d = {"a": 123, u"b": b"c", u"d": u"e", object(): u"e"}
     d2 = stringify_dict(d, keys_only=False)
     self.assertEqual(d, d2)
     self.failIf(d is d2)  # shouldn't modify in place
     self.failIf(any(isinstance(x, six.text_type) for x in d2.keys()))
     self.failIf(any(isinstance(x, six.text_type) for x in d2.values()))
Exemplo n.º 12
0
 def test_stringify_dict_tuples(self):
     tuples = [('a', 123), (u'b', 'c'), (u'd', u'e'), (object(), u'e')]
     d = dict(tuples)
     d2 = stringify_dict(tuples, keys_only=False)
     self.assertEqual(d, d2)
     self.failIf(d is d2)  # shouldn't modify in place
     self.failIf(any(isinstance(x, six.text_type) for x in d2.keys()), d2.keys())
     self.failIf(any(isinstance(x, six.text_type) for x in d2.values()))
Exemplo n.º 13
0
def _job_args_and_env(msg):
    env = msg.get('job_env')
    if not isinstance(env, dict):
        env = {}
    cmd = msg.get('job_cmd')
    if not isinstance(cmd, list):
        cmd = [str(cmd)]
    return cmd, stringify_dict(env, keys_only=False)
Exemplo n.º 14
0
 def test_stringify_dict(self):
     d = {'a': 123, u'b': b'c', u'd': u'e', object(): u'e'}
     d2 = stringify_dict(d, keys_only=False)
     self.assertEqual(d, d2)
     self.assertIsNot(d, d2)  # shouldn't modify in place
     self.assertFalse(any(isinstance(x, six.text_type) for x in d2.keys()))
     self.assertFalse(any(
         isinstance(x, six.text_type) for x in d2.values()))
Exemplo n.º 15
0
def _job_args_and_env(msg):
    env = msg.get('job_env')
    if not isinstance(env, dict):
        env = {}
    cmd = msg.get('job_cmd')
    if not isinstance(cmd, list):
        cmd = [str(cmd)]
    return cmd, stringify_dict(env, keys_only=False)
Exemplo n.º 16
0
 def test_stringify_dict_tuples(self):
     tuples = [('a', 123), (u'b', 'c'), (u'd', u'e'), (object(), u'e')]
     d = dict(tuples)
     d2 = stringify_dict(tuples, keys_only=False)
     self.failUnlessEqual(d, d2)
     self.failIf(d is d2)  # shouldn't modify in place
     self.failIf(any(isinstance(x, unicode) for x in d2.keys()), d2.keys())
     self.failIf(any(isinstance(x, unicode) for x in d2.values()))
Exemplo n.º 17
0
 def _append_next(self):
     """Called when there are no more items left in self.spider_requests.
     This method is meant to be overriden in subclasses to add new (spider,
     requests) tuples to self.spider_requests. It can return a Deferred.
     """
     msg = yield self._queue.pop()
     if msg:
         name = msg.pop('name')
         msg = stringify_dict(msg) # see #250
         self.append_spider_name(name, **msg)
Exemplo n.º 18
0
 def test_stringify_dict_tuples(self):
     tuples = [('a', 123), (u'b', 'c'), (u'd', u'e'), (object(), u'e')]
     d = dict(tuples)
     d2 = stringify_dict(tuples, keys_only=False)
     self.assertEqual(d, d2)
     self.assertIsNot(d, d2)  # shouldn't modify in place
     self.assertFalse(any(isinstance(x, six.text_type) for x in d2.keys()),
                      d2.keys())
     self.assertFalse(any(
         isinstance(x, six.text_type) for x in d2.values()))
Exemplo n.º 19
0
 def test_stringify_dict_keys_only(self):
     d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'}
     d2 = stringify_dict(d)
     self.assertEqual(d, d2)
     self.assertIsNot(d, d2)  # shouldn't modify in place
     self.assertFalse(any(isinstance(x, six.text_type) for x in d2.keys()))
Exemplo n.º 20
0
 def test_stringify_dict_keys_only(self):
     d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'}
     d2 = stringify_dict(d)
     self.assertEqual(d, d2)
     self.failIf(d is d2) # shouldn't modify in place
     self.failIf(any(isinstance(x, unicode) for x in d2.keys()))
Exemplo n.º 21
0
 def test_stringify_dict_keys_only(self):
     d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'}
     d2 = stringify_dict(d)
     self.failUnlessEqual(d, d2)
     self.failIf(d is d2) # shouldn't modify in place
     self.failIf(any(isinstance(x, unicode) for x in d2.keys()))