Exemplo n.º 1
0
def get_string(content, start, end):
    # TODO: convert relative links to absolute
    # TODO: remove the closing tag if using an HTML tag stripped of HTML tags
    # TODO: clean html with Tidy
    content = encode(content)
    start_pos = content.find(encode(start)) if start else 0
    right = content[start_pos + (len(start) if start else 0):]
    end_pos = right[1:].find(encode(end)) + 1 if end else len(right)
    return right[:end_pos] if end_pos > 0 else right
Exemplo n.º 2
0
def get_string(content, start, end):
    # TODO: convert relative links to absolute
    # TODO: remove the closing tag if using an HTML tag stripped of HTML tags
    # TODO: clean html with Tidy
    content = encode(content)
    start_pos = content.find(encode(start)) if start else 0
    right = content[start_pos + (len(start) if start else 0):]
    end_pos = right[1:].find(encode(end)) + 1 if end else len(right)
    return right[:end_pos] if end_pos > 0 else right
Exemplo n.º 3
0
Arquivo: io.py Projeto: tianhm/riko
def async_url_read(url, timeout=0, **kwargs):
    if url.startswith('http'):
        content = getPage(encode(url), timeout=timeout)
    else:
        content = async_read_file(url, StringTransport(), **kwargs)

    return content
Exemplo n.º 4
0
Arquivo: io.py Projeto: nerevu/riko
def async_url_read(url, timeout=0, **kwargs):
    if url.startswith('http'):
        content = getPage(encode(url), timeout=timeout)
    else:
        content = async_read_file(url, StringTransport(), **kwargs)

    return content
Exemplo n.º 5
0
    def writexml(self, stream, *args, **kwargs):
        if self.raw:
            val = decode(self.nodeValue)
        else:
            v = decode(self.nodeValue)
            v = ' '.join(v.split()) if kwargs.get('strip') else v
            val = escape(v)

        val = encode(val)
        stream.write(val)
Exemplo n.º 6
0
    def writexml(self, stream, *args, **kwargs):
        if self.raw:
            val = decode(self.nodeValue)
        else:
            v = decode(self.nodeValue)
            v = ' '.join(v.split()) if kwargs.get('strip') else v
            val = escape(v)

        val = encode(val)
        stream.write(val)
Exemplo n.º 7
0
Arquivo: io.py Projeto: mnjstwins/riko
def async_url_open(url, timeout=0, **kwargs):
    if url.startswith('http'):
        page = NamedTemporaryFile(delete=False)
        new_url = page.name
        yield downloadPage(encode(url), page, timeout=timeout)
    else:
        page, new_url = None, url

    f = yield async_get_file(new_url, StringTransport(), **kwargs)

    if page:
        page.close()
        remove(page.name)

    return_value(f)
Exemplo n.º 8
0
Arquivo: io.py Projeto: nerevu/riko
def async_url_open(url, timeout=0, **kwargs):
    if url.startswith('http'):
        page = NamedTemporaryFile(delete=False)
        new_url = page.name
        yield downloadPage(encode(url), page, timeout=timeout)
    else:
        page, new_url = None, url

    f = yield async_get_file(new_url, StringTransport(), **kwargs)

    if not hasattr(f, 'name') and url.startswith('file'):
        f.name = url.split('://')[1]

    if page:
        page.close()
        remove(page.name)

    return_value(f)
Exemplo n.º 9
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> @coroutine
        ... def run(reactor):
        ...     xml_url = get_path('ouseful.xml')
        ...     xml_conf = {'url': xml_url, 'xpath': '/rss/channel/item'}
        ...     xml_objconf = Objectify(xml_conf)
        ...     xml_args = (None, xml_objconf)
        ...     html_url = get_path('sciencedaily.html')
        ...     html_conf = {'url': html_url, 'xpath': '/html/head/title'}
        ...     html_objconf = Objectify(html_conf)
        ...     html_args = (None, html_objconf)
        ...     kwargs = {'stream': {}}
        ...
        ...     try:
        ...         xml_stream = yield async_parser(*xml_args, **kwargs)
        ...         html_stream = yield async_parser(*html_args, **kwargs)
        ...         print(next(xml_stream)['title'][:44])
        ...         print(next(html_stream))
        ...     except Exception as e:
        ...         logger.error(e)
        ...         logger.error(traceback.format_exc())
        ...
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Running “Native” Data Wrangling Applications
        Help Page -- ScienceDaily
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        try:
            f = yield io.async_url_open(url)
            tree = yield util.xml2etree(f, xml=xml)
        except Exception as e:
            logger.error(e)
            logger.error(traceback.format_exc())

        elements = xpath(tree, objconf.xpath)
        f.close()
        items = map(util.etree2dict, elements)
        stringified = ({kwargs['assign']: encode(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return_value(stream)
Exemplo n.º 10
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        assign (str): Attribute to assign parsed content (default: content)
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> @coroutine
        ... def run(reactor):
        ...     xml_url = get_path('ouseful.xml')
        ...     xml_conf = {'url': xml_url, 'xpath': '/rss/channel/item'}
        ...     xml_objconf = Objectify(xml_conf)
        ...     xml_args = (None, xml_objconf)
        ...     html_url = get_path('sciencedaily.html')
        ...     html_conf = {'url': html_url, 'xpath': '/html/head/title'}
        ...     html_objconf = Objectify(html_conf)
        ...     html_args = (None, html_objconf)
        ...     kwargs = {'stream': {}}
        ...
        ...     try:
        ...         xml_stream = yield async_parser(*xml_args, **kwargs)
        ...         html_stream = yield async_parser(*html_args, **kwargs)
        ...         print(next(xml_stream)['title'][:44])
        ...         print(next(html_stream))
        ...     except Exception as e:
        ...         logger.error(e)
        ...         logger.error(traceback.format_exc())
        ...
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        Running “Native” Data Wrangling Applications
        Help Page -- ScienceDaily
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        ext = splitext(url)[1].lstrip('.')
        xml = (ext == 'xml') or objconf.strict

        try:
            f = yield io.async_url_open(url)
            tree = yield util.xml2etree(f, xml=xml)
        except Exception as e:
            logger.error(e)
            logger.error(traceback.format_exc())

        elements = xpath(tree, objconf.xpath)
        f.close()
        items = map(util.etree2dict, elements)
        stringified = ({kwargs['assign']: encode(i)} for i in items)
        stream = stringified if objconf.stringify else items

    return_value(stream)
Exemplo n.º 11
0
 def writexml(self, stream, *args, **kwargs):
     val = encode(self.data)
     stream.write("<!--%s-->" % val)
Exemplo n.º 12
0
def parseString(content, *args, **kwargs):
    f = BytesIO(encode(content))
    return parse(f, *args, **kwargs)
Exemplo n.º 13
0
 def writexml(self, stream, *args, **kwargs):
     val = encode(self.data)
     stream.write("<!--%s-->" % val)
Exemplo n.º 14
0
def parseString(content, *args, **kwargs):
    f = BytesIO(encode(content))
    return parse(f, *args, **kwargs)