def test_get_func_args(self): def f1(a, b, c): pass def f2(a, b=None, c=None): pass class A(object): def __init__(self, a, b, c): pass def method(self, a, b, c): pass class Callable(object): def __call__(self, a, b, c): pass a = A(1, 2, 3) cal = Callable() self.assertEqual(get_func_args(f1), ['a', 'b', 'c']) self.assertEqual(get_func_args(f2), ['a', 'b', 'c']) self.assertEqual(get_func_args(A), ['a', 'b', 'c']) self.assertEqual(get_func_args(a.method), ['a', 'b', 'c']) self.assertEqual(get_func_args(cal), ['a', 'b', 'c']) self.assertEqual(get_func_args(object), []) # TODO: how do we fix this to return the actual argument names? self.assertEqual(get_func_args(unicode.split), []) self.assertEqual(get_func_args(" ".join), [])
def callback_args(f): args = get_func_args(f)[2:] @wraps(f) def wrapper(spider, response): return f(spider, response, **{k:response.meta[k] for k in args if k in response.meta}) return wrapper
def wrap_loader_context(function, context): """Wrap functions that receive loader_context to contain the context "pre-loaded" and expose a interface that receives only one argument """ if 'loader_context' in get_func_args(function): return partial(function, loader_context=context) else: return function
def _compile(self, spider): self.callback = _get_method(self.callback, spider) self.process_links = _get_method(self.process_links, spider) self.process_request = _get_method(self.process_request, spider) self.process_request_argcount = len(get_func_args(self.process_request)) if self.process_request_argcount == 1: msg = 'Rule.process_request should accept two arguments (request, response), accepting only one is deprecated' warnings.warn(msg, category=ScrapyDeprecationWarning, stacklevel=2)
def _compile(self, spider): self.callback = _get_method(self.callback, spider) self.process_links = _get_method(self.process_links, spider) self.process_request = _get_method(self.process_request, spider) self.process_request_argcount = len(get_func_args( self.process_request)) if self.process_request_argcount == 1: msg = 'Rule.process_request should accept two arguments (request, response), accepting only one is deprecated' warnings.warn(msg, category=ScrapyDeprecationWarning, stacklevel=2)
def _add_middleware(self, pipe): super(ItemPipelineManager, self)._add_middleware(pipe) func = getattr(pipe, 'process_item', None) if func: # FIXME: remove in Scrapy 0.11 fargs = get_func_args(func.im_func) if fargs and fargs[1] == 'spider': log.msg("Update %s.process_item() method to receive (item, spider) instead of (spider, item) or they will stop working on Scrapy 0.11" % pipe.__class__.__name__, log.WARNING) func = self._wrap_old_process_item(func) self.methods['process_item'].append(func)
def _add_middleware(self, pipe): super(ItemPipelineManager, self)._add_middleware(pipe) func = getattr(pipe, 'process_item', None) if func: # FIXME: remove in Scrapy 0.11 fargs = get_func_args(func.im_func) if fargs and fargs[1] == 'spider': log.msg( "Update %s.process_item() method to receive (item, spider) instead of (spider, item) or they will stop working on Scrapy 0.11" % pipe.__class__.__name__, log.WARNING) func = self._wrap_old_process_item(func) self.methods['process_item'].append(func)
def build_storage(builder, uri, *args, feed_options=None, preargs=(), **kwargs): argument_names = get_func_args(builder) if "feed_options" in argument_names: kwargs["feed_options"] = feed_options else: warnings.warn( "{} does not support the 'feed_options' keyword argument. Add a " "'feed_options' parameter to its signature to remove this " "warning. This parameter will become mandatory in a future " "version of Scrapy.".format(builder.__qualname__), category=ScrapyDeprecationWarning, ) return builder(*preargs, uri, *args, **kwargs)
def from_crawler(cls, crawler): if len(get_func_args(cls)) < 1: # FIXME: remove for scrapy 0.17 import warnings from scrapy.exceptions import ScrapyDeprecationWarning warnings.warn("%s must receive a settings object as first constructor argument." % cls.__name__, ScrapyDeprecationWarning, stacklevel=2) o = cls() else: o = cls(crawler.settings) crawler.signals.connect(o.open_spider, signals.spider_opened) crawler.signals.connect(o.close_spider, signals.spider_closed) crawler.signals.connect(o.item_scraped, signals.item_scraped) return o
def from_crawler(cls, crawler): if len(get_func_args(cls)) < 1: # FIXME: remove for scrapy 0.17 import warnings from scrapy.exceptions import ScrapyDeprecationWarning warnings.warn( "%s must receive a settings object as first constructor argument." % cls.__name__, ScrapyDeprecationWarning, stacklevel=2) o = cls() else: o = cls(crawler.settings) crawler.signals.connect(o.open_spider, signals.spider_opened) crawler.signals.connect(o.close_spider, signals.spider_closed) crawler.signals.connect(o.item_scraped, signals.item_scraped) return o
def test_get_func_args(self): def f1(a, b, c): pass def f2(a, b=None, c=None): pass class A(object): def __init__(self, a, b, c): pass def method(self, a, b, c): pass class Callable(object): def __call__(self, a, b, c): pass a = A(1, 2, 3) cal = Callable() partial_f1 = functools.partial(f1, None) partial_f2 = functools.partial(f1, b=None) partial_f3 = functools.partial(partial_f2, None) self.assertEqual(get_func_args(f1), ['a', 'b', 'c']) self.assertEqual(get_func_args(f2), ['a', 'b', 'c']) self.assertEqual(get_func_args(A), ['a', 'b', 'c']) self.assertEqual(get_func_args(a.method), ['a', 'b', 'c']) self.assertEqual(get_func_args(partial_f1), ['b', 'c']) self.assertEqual(get_func_args(partial_f2), ['a', 'c']) self.assertEqual(get_func_args(partial_f3), ['c']) self.assertEqual(get_func_args(cal), ['a', 'b', 'c']) self.assertEqual(get_func_args(object), []) if platform.python_implementation() == 'CPython': # TODO: how do we fix this to return the actual argument names? self.assertEqual(get_func_args(six.text_type.split), []) self.assertEqual(get_func_args(" ".join), []) self.assertEqual(get_func_args(operator.itemgetter(2)), []) else: stripself = not six.PY2 # PyPy3 exposes them as methods self.assertEqual(get_func_args(six.text_type.split, stripself), ['sep', 'maxsplit']) self.assertEqual(get_func_args(" ".join, stripself), ['list']) self.assertEqual(get_func_args(operator.itemgetter(2), stripself), ['obj'])
""" This module implements the XmlRpcRequest class which is a more convenient class (that Request) to generate xml-rpc requests. See documentation in docs/topics/request-response.rst """ from six.moves import xmlrpc_client as xmlrpclib from scrapy import Request from scrapy.utils.python import get_func_args DUMPS_ARGS = get_func_args(xmlrpclib.dumps) class XmlRpcRequest(Request): def __init__(self, *args, **kwargs): encoding = kwargs.get('encoding', None) if 'body' not in kwargs and 'params' in kwargs: kw = dict((k, kwargs.pop(k)) for k in DUMPS_ARGS if k in kwargs) kwargs['body'] = xmlrpclib.dumps(**kw) # spec defines that requests must use POST method kwargs.setdefault('method', 'POST') # xmlrpc query multiples times over the same url kwargs.setdefault('dont_filter', True) # restore encoding if encoding is not None: kwargs['encoding'] = encoding
""" This module implements the XmlRpcRequest class which is a more convenient class (that Request) to generate xml-rpc requests. See documentation in docs/topics/request-response.rst """ from six.moves import xmlrpc_client as xmlrpclib from scrapy.http.request import Request from scrapy.utils.python import get_func_args DUMPS_ARGS = get_func_args(xmlrpclib.dumps) class XmlRpcRequest(Request): def __init__(self, *args, **kwargs): encoding = kwargs.get('encoding', None) if 'body' not in kwargs and 'params' in kwargs: kw = dict((k, kwargs.pop(k)) for k in DUMPS_ARGS if k in kwargs) kwargs['body'] = xmlrpclib.dumps(**kw) # spec defines that requests must use POST method kwargs.setdefault('method', 'POST') # xmlrpc query multiples times over the same url kwargs.setdefault('dont_filter', True) # restore encoding if encoding is not None:
def test_get_func_args(self): def f1(a, b, c): pass def f2(a, b=None, c=None): pass class A: def __init__(self, a, b, c): pass def method(self, a, b, c): pass class Callable: def __call__(self, a, b, c): pass a = A(1, 2, 3) cal = Callable() partial_f1 = functools.partial(f1, None) partial_f2 = functools.partial(f1, b=None) partial_f3 = functools.partial(partial_f2, None) self.assertEqual(get_func_args(f1), ["a", "b", "c"]) self.assertEqual(get_func_args(f2), ["a", "b", "c"]) self.assertEqual(get_func_args(A), ["a", "b", "c"]) self.assertEqual(get_func_args(a.method), ["a", "b", "c"]) self.assertEqual(get_func_args(partial_f1), ["b", "c"]) self.assertEqual(get_func_args(partial_f2), ["a", "c"]) self.assertEqual(get_func_args(partial_f3), ["c"]) self.assertEqual(get_func_args(cal), ["a", "b", "c"]) self.assertEqual(get_func_args(object), []) if platform.python_implementation() == "CPython": # TODO: how do we fix this to return the actual argument names? self.assertEqual(get_func_args(str.split), []) self.assertEqual(get_func_args(" ".join), []) self.assertEqual(get_func_args(operator.itemgetter(2)), []) else: self.assertEqual(get_func_args(str.split, stripself=True), ["sep", "maxsplit"]) self.assertEqual( get_func_args(operator.itemgetter(2), stripself=True), ["obj"]) if version_info < (3, 6): self.assertEqual(get_func_args(" ".join, stripself=True), ["list"]) else: self.assertEqual(get_func_args(" ".join, stripself=True), ["iterable"])
def test_get_func_args(self): def f1(a, b, c): pass def f2(a, b=None, c=None): pass class A(object): def __init__(self, a, b, c): pass def method(self, a, b, c): pass class Callable(object): def __call__(self, a, b, c): pass a = A(1, 2, 3) cal = Callable() partial_f1 = functools.partial(f1, None) partial_f2 = functools.partial(f1, b=None) partial_f3 = functools.partial(partial_f2, None) self.assertEqual(get_func_args(f1), ['a', 'b', 'c']) self.assertEqual(get_func_args(f2), ['a', 'b', 'c']) self.assertEqual(get_func_args(A), ['a', 'b', 'c']) self.assertEqual(get_func_args(a.method), ['a', 'b', 'c']) self.assertEqual(get_func_args(partial_f1), ['b', 'c']) self.assertEqual(get_func_args(partial_f2), ['a', 'c']) self.assertEqual(get_func_args(partial_f3), ['c']) self.assertEqual(get_func_args(cal), ['a', 'b', 'c']) self.assertEqual(get_func_args(object), []) if platform.python_implementation() == 'CPython': # TODO: how do we fix this to return the actual argument names? self.assertEqual(get_func_args(six.text_type.split), []) self.assertEqual(get_func_args(" ".join), []) self.assertEqual(get_func_args(operator.itemgetter(2)), []) else: stripself = not six.PY2 # PyPy3 exposes them as methods self.assertEqual( get_func_args(six.text_type.split, stripself), ['sep', 'maxsplit']) self.assertEqual(get_func_args(" ".join, stripself), ['list']) self.assertEqual( get_func_args(operator.itemgetter(2), stripself), ['obj'])
def get_arguments(self, func, stripself): self._result = get_func_args(func, stripself)
def test_get_func_args(self): def f1(a, b, c): pass def f2(a, b=None, c=None): pass def f3(a, b=None, *, c=None): pass class A: def __init__(self, a, b, c): pass def method(self, a, b, c): pass class Callable: def __call__(self, a, b, c): pass a = A(1, 2, 3) cal = Callable() partial_f1 = functools.partial(f1, None) partial_f2 = functools.partial(f1, b=None) partial_f3 = functools.partial(partial_f2, None) self.assertEqual(get_func_args(f1), ['a', 'b', 'c']) self.assertEqual(get_func_args(f2), ['a', 'b', 'c']) self.assertEqual(get_func_args(f3), ['a', 'b', 'c']) self.assertEqual(get_func_args(A), ['a', 'b', 'c']) self.assertEqual(get_func_args(a.method), ['a', 'b', 'c']) self.assertEqual(get_func_args(partial_f1), ['b', 'c']) self.assertEqual(get_func_args(partial_f2), ['a', 'c']) self.assertEqual(get_func_args(partial_f3), ['c']) self.assertEqual(get_func_args(cal), ['a', 'b', 'c']) self.assertEqual(get_func_args(object), []) if platform.python_implementation() == 'CPython': # TODO: how do we fix this to return the actual argument names? self.assertEqual(get_func_args(str.split), []) self.assertEqual(get_func_args(" ".join), []) self.assertEqual(get_func_args(operator.itemgetter(2)), []) elif platform.python_implementation() == 'PyPy': self.assertEqual(get_func_args(str.split, stripself=True), ['sep', 'maxsplit']) self.assertEqual( get_func_args(operator.itemgetter(2), stripself=True), ['obj']) build_date = datetime.strptime(platform.python_build()[1], '%b %d %Y') if build_date >= datetime(2020, 4, 7): # PyPy 3.6-v7.3.1 self.assertEqual(get_func_args(" ".join, stripself=True), ['iterable']) else: self.assertEqual(get_func_args(" ".join, stripself=True), ['list'])
def test_get_func_args(self): def f1(a, b, c): pass def f2(a, b=None, c=None): pass class A(object): def __init__(self, a, b, c): pass def method(self, a, b, c): pass class Callable(object): def __call__(self, a, b, c): pass a = A(1, 2, 3) cal = Callable() partial_f1 = functools.partial(f1, None) partial_f2 = functools.partial(f1, b=None) partial_f3 = functools.partial(partial_f2, None) self.assertEqual(get_func_args(f1), ['a', 'b', 'c']) self.assertEqual(get_func_args(f2), ['a', 'b', 'c']) self.assertEqual(get_func_args(A), ['a', 'b', 'c']) self.assertEqual(get_func_args(a.method), ['a', 'b', 'c']) self.assertEqual(get_func_args(partial_f1), ['b', 'c']) self.assertEqual(get_func_args(partial_f2), ['a', 'c']) self.assertEqual(get_func_args(partial_f3), ['c']) self.assertEqual(get_func_args(cal), ['a', 'b', 'c']) self.assertEqual(get_func_args(object), []) # TODO: how do we fix this to return the actual argument names? self.assertEqual(get_func_args(unicode.split), []) self.assertEqual(get_func_args(" ".join), []) self.assertEqual(get_func_args(operator.itemgetter(2)), [])
def test_get_func_args(self): def f1(a, b, c): pass def f2(a, b=None, c=None): pass class A(object): def __init__(self, a, b, c): pass def method(self, a, b, c): pass class Callable(object): def __call__(self, a, b, c): pass a = A(1, 2, 3) cal = Callable() partial_f1 = functools.partial(f1, None) partial_f2 = functools.partial(f1, b=None) partial_f3 = functools.partial(partial_f2, None) self.assertEqual(get_func_args(f1), ['a', 'b', 'c']) self.assertEqual(get_func_args(f2), ['a', 'b', 'c']) self.assertEqual(get_func_args(A), ['a', 'b', 'c']) self.assertEqual(get_func_args(a.method), ['a', 'b', 'c']) self.assertEqual(get_func_args(partial_f1), ['b', 'c']) self.assertEqual(get_func_args(partial_f2), ['a', 'c']) self.assertEqual(get_func_args(partial_f3), ['c']) self.assertEqual(get_func_args(cal), ['a', 'b', 'c']) self.assertEqual(get_func_args(object), []) # TODO: how do we fix this to return the actual argument names? self.assertEqual(get_func_args(six.text_type.split), []) self.assertEqual(get_func_args(" ".join), []) self.assertEqual(get_func_args(operator.itemgetter(2)), [])