def async_parser(_, objconf, skip=False, **kwargs): """ Asynchronously parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: assign (str): Attribute to assign parsed content (default: content) stream (dict): The original item Returns: Iter[dict]: The stream of items Examples: >>> from riko import get_path >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> from meza.fntools import Objectify >>> >>> @coroutine ... def run(reactor): ... xml_url = get_path('ouseful.xml') ... xml_conf = {'url': xml_url, 'xpath': '/rss/channel/item'} ... xml_objconf = Objectify(xml_conf) ... xml_args = (None, xml_objconf) ... html_url = get_path('sciencedaily.html') ... html_conf = {'url': html_url, 'xpath': '/html/head/title'} ... html_objconf = Objectify(html_conf) ... html_args = (None, html_objconf) ... kwargs = {'stream': {}} ... ... try: ... xml_stream = yield async_parser(*xml_args, **kwargs) ... html_stream = yield async_parser(*html_args, **kwargs) ... print(next(xml_stream)['title'][:44]) ... print(next(html_stream)) ... except Exception as e: ... logger.error(e) ... logger.error(traceback.format_exc()) ... >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... Running “Native” Data Wrangling Applications Help Page -- ScienceDaily """ if skip: stream = kwargs['stream'] else: url = get_abspath(objconf.url) ext = splitext(url)[1].lstrip('.') xml = (ext == 'xml') or objconf.strict try: f = yield io.async_url_open(url) tree = yield util.xml2etree(f, xml=xml) except Exception as e: logger.error(e) logger.error(traceback.format_exc()) elements = xpath(tree, objconf.xpath) f.close() items = map(util.etree2dict, elements) stringified = ({kwargs['assign']: encode(i)} for i in items) stream = stringified if objconf.stringify else items return_value(stream)
def async_parser(_, objconf, skip=False, **kwargs): """ Asynchronously parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: assign (str): Attribute to assign parsed content (default: content) stream (dict): The original item Returns: Deferred: twisted.internet.defer.Deferred stream Examples: >>> from six.moves.urllib.request import urlopen >>> from riko import get_path >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> from riko.utils import get_abspath >>> from meza.fntools import Objectify >>> >>> feed = 'http://feeds.feedburner.com/TechCrunch/' >>> url = 'http://query.yahooapis.com/v1/public/yql' >>> query = "select * from feed where url='%s'" % feed >>> f = urlopen(get_abspath(get_path('yql.xml'))) >>> >>> def run(reactor): ... callback = lambda x: print(next(x)['title']) ... conf = {'query': query, 'url': url, 'debug': False} ... objconf = Objectify(conf) ... kwargs = {'stream': {}, 'response': f} ... d = async_parser(None, objconf, **kwargs) ... d.addCallbacks(callback, logger.error) ... d.addCallback(lambda _: f.close()) ... return d >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... finally: ... f.close() Bring pizza home """ if skip: stream = kwargs['stream'] else: f = kwargs.get('response') if not f: params = {'q': objconf.query, 'diagnostics': objconf.debug} r = yield treq.get(objconf.url, params=params) f = yield treq.content(r) tree = yield util.xml2etree(f) results = next(tree.getElementsByTagName('results')) stream = map(util.etree2dict, results.childNodes) return_value(stream)
def async_parser(_, objconf, skip, **kwargs): """ Asynchronously parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: assign (str): Attribute to assign parsed content (default: content) stream (dict): The original item Returns: Deferred: twisted.internet.defer.Deferred Tuple of (stream, skip) Examples: >>> from six.moves.urllib.request import urlopen >>> from riko import get_path >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> from riko.lib.utils import Objectify, get_abspath >>> >>> feed = 'http://feeds.feedburner.com/TechCrunch/' >>> url = 'http://query.yahooapis.com/v1/public/yql' >>> query = "select * from feed where url='%s'" % feed >>> f = urlopen(get_abspath(get_path('yql.xml'))) >>> >>> def run(reactor): ... callback = lambda x: print(next(x[0])['title']) ... conf = {'query': query, 'url': url, 'debug': False} ... objconf = Objectify(conf) ... kwargs = {'stream': {}, 'response': f} ... d = async_parser(None, objconf, False, **kwargs) ... d.addCallbacks(callback, logger.error) ... d.addCallback(lambda _: f.close()) ... return d >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... Bring pizza home """ if skip: stream = kwargs['stream'] else: f = kwargs.get('response') if not f: params = {'q': objconf.query, 'diagnostics': objconf.debug} r = yield treq.get(objconf.url, params=params) f = yield treq.content(r) tree = yield util.xml2etree(f) results = next(tree.getElementsByTagName('results')) stream = map(util.etree2dict, results.childNodes) result = (stream, skip) return_value(result)