Python GenericTranslator示例，cssselect.GenericTranslator Python示例

示例#1

0

显示文件

文件： __main__.py 项目： f43i4n/md-schema

    def parse(self, markdown):
        res = {}

        html = markdown2.markdown(markdown).rstrip()
        html_parsable = '<div>' + html + '</div>'
        document = lxml.etree.fromstring(html_parsable)
        for schema_elem in self.schema_dict:
            if schema_elem['type'] == 'titlearea':
                elem_xpath = cssselect.GenericTranslator().css_to_xpath(
                    schema_elem['selector'])
                text_xpath = elem_xpath + '/following::p[1]'
                found_elems = [e for e in document.xpath(elem_xpath)]
                found_texts = [e.text for e in document.xpath(text_xpath)]

                res_key = schema_elem['name']
                title = found_elems[0].text
                res[res_key] = {'title': title, 'description': found_texts[0]}
            elif schema_elem['type'] == 'list':
                elem_xpath = cssselect.GenericTranslator().css_to_xpath(
                    schema_elem['selector'])
                text_xpath = elem_xpath + '/following::p[1]'
                found_elems = [e for e in document.xpath(elem_xpath)]
                found_texts = [e.text for e in document.xpath(text_xpath)]

                res_key = schema_elem['name']
                title = found_elems[0].text
                res[res_key] = {'title': title, 'description': found_texts[0]}
        return res

示例#2

0

显示文件

文件： nagios.py 项目： harlowja/langstroth

def parse_availability(html, service_group):
    tr = cssselect.GenericTranslator()
    h = lxml.etree.HTML(html)
    table = None
    for i, e in enumerate(h.xpath(tr.css_to_xpath('.dataTitle')), -1):
        if service_group not in e.text:
            continue
        if 'Service State Breakdowns' not in e.text:
            continue
        table = h.xpath(tr.css_to_xpath('table.data'))[i]
        break
    services = {}
    average = {}
    if table is not None:
        for row in table.xpath(tr.css_to_xpath("tr.dataOdd, tr.dataEven")):
            if 'colspan' in row.getchildren()[0].attrib:
                title, ok, warn, unknown, crit, undet = row.getchildren()
                average = {"name": "Average",
                           "ok": ok.text.split(' ')[0],
                           "warning": warn.text.split(' ')[0],
                           "unknown": unknown.text.split(' ')[0],
                           "critical": crit.text.split(' ')[0]}
                continue
            service = parse_service_availability(row)
            services[service['name']] = service

    context = {
        "services": services,
        "average": average}
    return context

示例#3

0

显示文件

文件： nagios.py 项目： harlowja/langstroth

def parse_status(html, service_group):
    tr = cssselect.GenericTranslator()
    h = lxml.etree.HTML(html)
    table = None
    for i, e in enumerate(h.xpath(tr.css_to_xpath('.statusTitle')), -1):
        if service_group not in e.text:
            continue
        if 'Service Status Details' not in e.text:
            continue
        table = h.xpath(tr.css_to_xpath('table.status'))[i]
        break
    hosts = {}
    if table is not None:
        for row in table.getchildren():
            children = row.getchildren()
            # Skip empty rows
            if not len(children) > 1:
                continue
            # Skip header row
            if children[0].tag == 'th':
                continue
            children = row.getchildren()

            hostname = children[0].xpath(tr.css_to_xpath('a'))
            if len(hostname) > 1:
                LOG.warning("Too many links found.")
            elif len(hostname) == 1:
                current_host = parse_hostlink(hostname[0])
                hosts[current_host['hostname']] = current_host

            service = parse_service(children[1:])
            current_host['services'].append(service)
    context = {"hosts": hosts}
    return context

示例#4

0

显示文件

def translate_css_to_xpath(*sheets):
    gtrans = cssselect.GenericTranslator()
    xpath_n_styles = []  # using a list, to be hable to "cascade".
    for sheet_path in sheets:
        stylesheet = cssutils.parseFile(sheet_path)
        xpath_n_styles.extend(_translate_stylesheet(stylesheet, gtrans))
    return xpath_n_styles

示例#5

0

显示文件

文件： nagios.py 项目： NeCTAR-RC/langstroth

def parse_availability(html, service_group):
    tr = cssselect.GenericTranslator()
    h = lxml.etree.HTML(html)
    table = None
    for i, e in enumerate(h.xpath(tr.css_to_xpath('.dataTitle')), -1):
        if service_group not in e.text:
            continue
        if 'Service State Breakdowns' not in e.text:
            continue
        table = h.xpath(tr.css_to_xpath('table.data'))[i]
        break
    services = {}
    average = {}
    if table is not None:
        for row in table.xpath(tr.css_to_xpath("tr.dataOdd, tr.dataEven")):
            if 'colspan' in row.getchildren()[0].attrib:
                title, ok, warn, unknown, crit, undet = row.getchildren()
                ok_value = (parse_percent_string(ok.text)
                            + parse_percent_string(warn.text))
                critical_value = parse_percent_string(crit.text)
                average = {'name': 'Average',
                           'ok': ok_value,
                           'critical': critical_value}
                continue
            service = parse_service_availability(row)
            services[service['name']] = service

    context = {
        'services': services,
        'average': average}
    return context

示例#6

0

显示文件

    def selector_to_xpath(cls, selector, xmlns=None):
        """convert a css selector into an xpath expression. 
            xmlns is option single-item dict with namespace prefix and href
        """
        selector = selector.replace(' .', ' *.')
        if selector[0] == '.':
            selector = '*' + selector
            log.debug(selector)

        if '#' in selector:
            selector = selector.replace('#', '*#')
            log.debug(selector)

        if xmlns is not None:
            prefix = list(xmlns.keys())[0]
            href = xmlns[prefix]
            selector = ' '.join([
                (n.strip() != '>' and prefix + '|' + n.strip() or n.strip())
                for n in selector.split(' ')
            ])
            log.debug(selector)

        path = cssselect.GenericTranslator().css_to_xpath(selector)
        path = path.replace("descendant-or-self::", "")
        path = path.replace("/descendant::", "//")
        path = path.replace('/*/', '//')

        log.debug(' ==> %s' % path)

        return path

示例#7

0

显示文件

class By(object):
    """
    Set of supported locator strategies.
    """

    ID = "id"
    XPATH = "xpath"
    NAME = "name"
    TAG_NAME = "tag name"
    CLASS_NAME = "class name"
    CSS_SELECTOR = "css selector"

    translator = cssselect.GenericTranslator()

    @classmethod
    def convert_selector_to_xpath(cls, by=ID, value=None):
        if by == cls.ID:
            by = cls.CSS_SELECTOR
            value = '[id="%s"]' % value
        elif by == cls.TAG_NAME:
            by = cls.CSS_SELECTOR
        elif by == cls.CLASS_NAME:
            by = cls.CSS_SELECTOR
            value = ".%s" % value
        elif by == cls.NAME:
            by = cls.CSS_SELECTOR
            value = '[name="%s"]' % value
        if by == cls.CSS_SELECTOR:
            value = cls.translator.css_to_xpath(value)
        return value

示例#8

0

显示文件

def match_selector(rule, tree):
    """Yield the ``(element, specificity)`` in ``tree`` matching ``rule``."""
    selector_list = cssselect.parse(rule.selector.as_css())
    translator = cssselect.GenericTranslator()
    for selector in selector_list:
        if not selector.pseudo_element:
            specificity = selector.specificity()
            for element in tree.xpath(translator.selector_to_xpath(selector)):
                yield element, specificity

示例#9

0

显示文件

    def get_cinemas_raw_data(self):
        # get all cinemas
        r = cache.get_url("https://www.eventcinemas.com.au/")
        doc = html.fromstring(r.content)

        selector = css.GenericTranslator().css_to_xpath("[data-lat]")

        cinemasRaw = doc.xpath(selector)
        return cinemasRaw

示例#10

0

显示文件

文件： nagios.py 项目： harlowja/langstroth

def parse_service(service_columns):
    tr = cssselect.GenericTranslator()
    name = service_columns[0].xpath(tr.css_to_xpath('a'))[0].text
    service_name = SERVICE_NAMES.get(name)
    return {
        'name': name,
        'display_name': service_name,
        'status': service_columns[1].text,
        'last_checked': service_columns[2].text,
        'duration': service_columns[3].text}

示例#11

0

显示文件

文件： nagios.py 项目： waipeng/hivemind

def services_in_maintenance():
    resp = requests.get(env.nagios + "extinfo.cgi?type=6",
                        auth=env.nagios_auth)
    h = lxml.etree.HTML(resp.text)
    tr = cssselect.GenericTranslator()
    nodes = h.xpath(tr.css_to_xpath("table.downtime"))
    services = []

    if len(nodes) < 2:
        return services

    for tr in nodes[1]:
        if tr.getchildren()[0].tag == "th":
            continue
        if tr.getchildren()[0].text\
           == 'There are no services with scheduled downtime':
            continue
        host = tr.getchildren()[0].getchildren()[0].text
        service = tr.getchildren()[1].getchildren()[0].text
        reason = tr.getchildren()[4].text
        down_id = tr.getchildren()[9].text
        services.append({"host": host, "service": service,
                         "reason": reason, "id": down_id})
    return services

示例#12

0

显示文件

文件： kxhtml.py 项目： bibimbop/pptools-openshift

    def check_css(self, myfile):
        """Find unused CSS and undefined used CCS.
        """

        # Fails on a few corner cases, such as
        #  ".tdm > tbody > tr > td:first-child + td"
        #  ".onetable td"
        #
        # Ignores @media

        # Find the CCS style
        css = myfile.tree.find('head').find('style')

        if css == None:
            return

        # The CSS can be in a comment or not
        if len(css):
            # Not sure whether that covers all the comment cases. Maybe add
            # all the children
            css_text = etree.tostring(css[0]).decode("utf8")
        else:
            css_text = css.text

        stylesheet = tinycss.make_parser().parse_stylesheet(css_text)

        # retrieve the errors
        self.cssutils_errors = [
            "{0},{1}: {2}".format(err.line + css.sourceline - 1, err.column,
                                  err.reason) for err in stylesheet.errors
        ]

        css_selectors = []
        for rule in stylesheet.rules:

            if rule.at_keyword is None:
                # Regular rules.
                # Add the selector as a string
                css_selectors += rule.selector.as_css().split(',')

            elif rule.at_keyword == '@media':
                # Itself is a bunch of regular rules
                for rule2 in rule.rules:
                    if rule2.at_keyword is None:
                        # Regular rules.
                        # Add the selector as a string
                        css_selectors += rule2.selector.as_css().split(',')

        css_selectors = list(set(css_selectors))

        # Find the unused/undefined CSS. It is possible 2 rules will
        # match the same class (for instance "p.foo" and ".foo" will
        # match "class=foo"). That is not detected, and both rules
        # will be valid.
        self.sel_unchecked = []
        self.sel_unused = []
        for selector in css_selectors:

            # Get the selector (eg. "body", "p", ".some_class")
            try:
                sel_xpath = cssselect.GenericTranslator().css_to_xpath(
                    selector)
            except (cssselect.xpath.ExpressionError,
                    cssselect.parser.SelectorSyntaxError):
                self.sel_unchecked.append(selector)
                continue

            # Retrieve where it is used in the xhtml
            occurences = etree.XPath(sel_xpath)(myfile.tree)

            if len(occurences) == 0:
                self.sel_unused.append(selector)
                continue

            # If it's from a class, find the name. It should be the
            # last word starting with a dot (eg. "p.foo", ".foo",
            # "#toc .foo" => "foo")
            #
            # Remove a pseudo selector with rsplit, if there is one.
            m = re.search(r'\.([\w-]+)$', selector.rsplit(":", 1)[0])
            if m == None:
                continue

            cl = m.group(1)
            if len(cl) == 0:
                continue

            # Mark the class wherever it is used, in each element
            for item in occurences:
                item.attrib['__used_classes'] = item.attrib.get(
                    '__used_classes', '') + ' ' + cl

        # Look for unused classes
        self.classes_undefined = []
        find = etree.XPath("//*[@class]")
        for element in find(myfile.tree):
            classes = set(element.attrib['class'].split())

            used_classes = element.attrib.get('__used_classes', None)
            if used_classes:
                # Substract content of used_classes from classes
                # leaving classes that were not matched.
                classes -= set(used_classes.split())

            # Finally, create the warning
            for cl in classes:
                self.classes_undefined.append([element.sourceline, cl])

示例#13

0

显示文件

文件： response_queue.py 项目： hongbowang/yascrapy

 def _css(self, css_selector):
     xpath_selector = cssselect.GenericTranslator().css_to_xpath(
         css_selector)
     return self._xpath(xpath_selector)