Exemplos de ETXPath em Python, exemplos de lxml.etree.ETXPath em Python

Exemplo n.º 1

0

Exibir arquivo

def articlecollector(path_articles_xml, outpath_articles, articleids):
    print("\nCollecting articles for \'%s\' from %s\n..." % (
        wantedCategory, path_articles_xml))
    title_path = etree.ETXPath("child::" + Ttitle)
    id_path = etree.ETXPath("child::" + Tid)
    text_path = etree.ETXPath("child::" + Trev + "/" + Ttext)
    extracted_count = 0
    start = time.time()
    try:
        with BZ2File(outpath_articles, "w", compresslevel=9) as file, \
                etree.xmlfile(file, encoding="utf-8") as newfile, \
                newfile.element("mediawiki",
                                xmlns=Header):
            context = etree.iterparse(path_articles_xml,
                                      events=("end",),
                                      tag={Tnamespaces, Tpage})
            for action, elem in context:
                if elem.tag == Tpage and id_path(elem)[
                    0].text in articleids:
                    create_page(elem, title_path, id_path, text_path,
                                articleids, newfile)
                    extracted_count += 1
                elif elem.tag == Tnamespaces:
                    create_namespace(elem, newfile)
                elem.clear()
                while elem.getprevious() is not None:
                    del elem.getparent()[0]
    except FileNotFoundError as e:
        print(e.filename, "not found")
        raise e
    end = time.time()
    printTime(start, end)
    return extracted_count

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_api.py Projeto: discoverygarden/dgi_repo

    def test_value(self):
        """
        Test object profile values.
        """
        my_object_resource = object_resource.ObjectResource()
        xml = my_object_resource._get_object_profile('boaty:mcboatface',
                                                     'intertubes',
                                                     ['do:thereisnotry'],
                                                     datetime.now(pytz.utc),
                                                     datetime.now(pytz.utc),
                                                     'A', 'dr who')
        tree = etree.fromstring(xml)

        label_xpath = etree.ETXPath('//{{{}}}objLabel/text()'.format(
            api.FEDORA_ACCESS_URI))
        self.assertEqual(label_xpath(tree)[0], 'intertubes')

        state_xpath = etree.ETXPath('//{{{}}}objState/text()'.format(
            api.FEDORA_ACCESS_URI))
        self.assertEqual(state_xpath(tree)[0], 'A')

        pid_xpath = etree.ETXPath('/{{{}}}objectProfile/@pid'.format(
            api.FEDORA_ACCESS_URI))
        self.assertEqual(pid_xpath(tree)[0], 'boaty:mcboatface')

        model_xpath = etree.ETXPath('//{{{}}}model/text()'.format(
            api.FEDORA_ACCESS_URI))
        self.assertIn('do:thereisnotry', model_xpath(tree))

        owner_xpath = etree.ETXPath('//{{{}}}objOwnerId/text()'.format(
            api.FEDORA_ACCESS_URI))
        self.assertEqual(owner_xpath(tree)[0], 'dr who')

Exemplo n.º 3

0

Exibir arquivo

def analyze_wsdl(config_file):
    # execute ws-i tests
    # don't execute Analyzer.sh directly since it needs bash
    os.system(WSI_EXECUTION_COMMAND + config_file)

    # parse result
    e = etree.parse(SOAPLIB_REPORT_FILE).getroot()
    summary = etree.ETXPath('{%s}summary' % e.nsmap['wsi-report'])(e)
    if summary:
        # retrieve overall result of the test
        result = summary[0].get('result')
        if result == 'failed':
            outs = etree.ETXPath('{%s}artifact' % (e.nsmap['wsi-report'], ))(e)

            # filter for the object describing the wsdl test
            desc = [o for o in outs if o.get('type') == 'description'][0]

            # loop over every group test
            for entry in desc.iterchildren():
                # loop over every single test
                for test in entry.iterchildren():
                    # simply print the error if there is one
                    # an html can be generated using files in wsi-test-tools/common/xsl
                    if test.get('result') == 'failed':
                        fail_msg = etree.ETXPath('{%s}failureMessage' %
                                                 e.nsmap['wsi-report'])(test)
                        fail_det = etree.ETXPath('{%s}failureDetail' %
                                                 e.nsmap['wsi-report'])(test)
                        if fail_msg:
                            print '\nFAILURE in test %s\n' % test.get('id')
                            print fail_msg[0].text
                        if fail_det:
                            print '\nFAILURE MSG\n'
                            print fail_det[0].text

Exemplo n.º 4

0

Exibir arquivo

 def parse_file(cls, xml_file):
     print("Parsing: " + xml_file)
     with open(xml_file, 'r') as fd:
         et = etree.parse(fd)
     for table1_group1 in etree.ETXPath(
             './{rptAllCDE}table1/{rptAllCDE}table1_Group1_Collection/{rptAllCDE}table1_Group1'
     )(et.getroot()):
         pvg = None
         for table1_group2 in etree.ETXPath(
                 './{rptAllCDE}table1_Group2_Collection/{rptAllCDE}table1_Group2'
         )(table1_group1):
             pvg = NINDSReportParser.parse_pvg(table1_group2)
         NINDSReportParser.parse_cde(table1_group1, pvg)

Exemplo n.º 5

0

Exibir arquivo

 def print_update(self, doc):
     xpath_expr = "//{{{0}}}job".format(settings.PX_NS)
     xpath_fn = etree.ETXPath(xpath_expr)
     results = xpath_fn(doc)
     jobid = results[0].attrib['id']
     xpath_expr = "//{{{0}}}job/{{{0}}}messages/{{{0}}}message".format(
         settings.PX_NS)
     xpath_fn = etree.ETXPath(xpath_expr)
     results = xpath_fn(doc)
     print "JOB UPDATE\n\tID {0}\n\n\tMessage(s):".format(jobid)
     for m in results:
         print "\t#{0}. {1} - {2}".format(m.attrib['sequence'],
                                          m.attrib['level'], m.text)
     print ""

Exemplo n.º 6

0

Exibir arquivo

def update_feed(feedentry, feed_path):
    """Update the feed with the last individual feed entry.

    * return None if nothing has changed
    * add a new entry, delete the last if a new post
    * add a new entry, remove the old entry if post has changed.
    """
    new_entry = False
    feed = helper.parse_feed(feed_path)
    # XPath for finding tagid
    find_entry = etree.ETXPath("//{%s}entry" % ATOMNS)
    find_id = etree.ETXPath("{%s}id/text()" % ATOMNS)
    find_date = etree.ETXPath("{%s}updated/text()" % ATOMNS)
    # We need the information about the new entry
    new_id = find_id(feedentry)[0]
    new_updated = find_date(feedentry)[0]
    # Processing and comparing
    entries = find_entry(feed)
    posts_number = len(entries)
    for entry in entries:
        old_id = find_id(entry)[0]
        old_updated = find_date(entry)[0]
        if old_id == new_id:
            if old_updated == new_updated:
                logging.info("The feed has not changed.")
                return None
            else:
                logging.info("The feed has been updated.")
                # we remove from feed the specific entry
                entry.getparent().remove(entry)
                # Find the first entry element in the feed
                position = feed.getroot().index(
                    feed.find("//{%s}entry" % ATOMNS))
                feed.getroot().insert(position, feedentry.getroot())
                # Change the <updated> date of the feed
                feed.find("//{%s}updated" % ATOMNS).text = new_updated
                return lxml.html.tostring(feed, encoding='utf-8')
    else:
        logging.info("This is a new feed entry.")
        new_entry = True
    if new_entry:
        if posts_number > FEED_MAX_POSTS:
            entries[-1].getparent().remove(entries[-1])
        position = feed.getroot().index(feed.find("//{%s}entry" % ATOMNS))
        feed.getroot().insert(position, feedentry.getroot())
        # Change the <updated> date of the feed
        feed.find("//{%s}updated" % ATOMNS).text = new_updated
        return lxml.html.tostring(feed, encoding='utf-8')
    return None

Exemplo n.º 7

0

Exibir arquivo

def extract_articles_inscope(csvwriter, adict):
    t = start_time()

    idexp = etree.ETXPath("child::" + ID)
    titlexp = etree.ETXPath("child::" + TITLE)
    textxp = etree.ETXPath("child::" + REV + "/" + TEXT)
    context = etree.iterparse(
        DATAP + '/dump/enwiki-20180901-pages-articles-multistream.xml',
        events=('end', ),
        tag=PAGE)
    fast_iter(
        context, lambda elem: extract_with_xpath(elem, idexp, titlexp, textxp,
                                                 csvwriter, adict))

    stop_time(t)

Exemplo n.º 8

0

Exibir arquivo

def eval_test(tree, strid):
    # registry_test
    path = "//{http://oval.mitre.org/XMLSchema/oval-definitions-5#windows}registry_test[@id='" + strid + "']"
    findall = etree.ETXPath(path)
    if len(findall(tree)) > 0:
        return eval_registry_test(tree, strid)

    # family_test
    path = "//{http://oval.mitre.org/XMLSchema/oval-definitions-5#independent}family_test[@id='" + strid + "']"
    findall = etree.ETXPath(path)
    if len(findall(tree)) > 0:
        return eval_family_test(tree, strid)

    # cannot evaluate
    return -1

Exemplo n.º 9

0

Exibir arquivo

Arquivo: indexes.py Projeto: karlcow/ymir

def to_entry_dict(entry_index_xml):
    """Convert an XML entry index into a dictionary."""
    # Search paths
    find_href = etree.ETXPath("a/@href")
    find_short_date = etree.ETXPath("time/text()")
    find_created = etree.ETXPath("time/@datetime")
    find_title = etree.ETXPath("a/text()")
    # extract data
    entry_index = {
        'created': find_created(entry_index_xml)[0],
        'iso_short_date': find_short_date(entry_index_xml)[0],
        'path': find_href(entry_index_xml)[0],
        'title': find_title(entry_index_xml)[0],
    }
    return entry_index

Exemplo n.º 10

0

Exibir arquivo

    def parse_pvg(cls, elem):
        pvg_code = elem.get(cls.PVG_CODE)
        # dummy element
        if pvg_code is None:
            return
        try:
            CDEPermittedValueGroup.objects.get(code__exact=pvg_code)
            print("PVG %s already exists." % pvg_code)
            return
        except CDEPermittedValueGroup.DoesNotExist:
            pass

        pvg = CDEPermittedValueGroup(code=pvg_code)
        pvg.save()

        print("Created: ", pvg)

        for detail in etree.ETXPath(
                './{rptAllCDE}Detail_Collection/{rptAllCDE}Detail')(elem):
            pv_code = detail.get(cls.PV_CODE)
            if pv_code is None:
                continue
            pv = CDEPermittedValue(code=detail.get(cls.PV_CODE),
                                   value=detail.get(cls.PV_VALUE),
                                   desc=detail.get(cls.PV_DESC),
                                   pv_group=pvg)
            print("Created:", pv)
            pv.save()

        return pvg

Exemplo n.º 11

0

Exibir arquivo

def eval_family_test(tree, strid):
    l_regtest = []
    path = "//{http://oval.mitre.org/XMLSchema/oval-definitions-5#independent}family_test[@id='" + strid + "']"
    findall = etree.ETXPath(path)
    try:
        rtest = findall(tree)[0]
    except:
        return -1

    # state
    ste = rtest.findall(
        './{http://oval.mitre.org/XMLSchema/oval-definitions-5#independent}state'
    )
    try:
        l_state = [get_family_state(tree, ste[0].get('state_ref'))]
    except:
        print "non fatal error in family state for strid: " + strid

    # logical operation
    # TODO: ops are different from family_test than registry_test
    regop = 'AND'
    try:
        if rtest.get('check') == 'at least one': regop = 'OR'
    except:
        print "non fatal error in determining op for strid: " + strid

    try:
        return eval_family_state(l_state[0]['famtext'])
    except:
        return -1

Exemplo n.º 12

0

Exibir arquivo

def xmlPath(element):
    '''Return a simple, unambiguous path for an XML element'''
    path = []

    while True:
        parent = element.getparent()
        name = element.tag
        if name.startswith(POM_NS_PREFIX):
            name = name[len(POM_NS_PREFIX):]

        if parent is None:
            path.insert(0, '/%s' % name)
            break

        expr = etree.ETXPath(element.tag)
        children = expr(parent)
        #print 'xmlPath',element.tag,children
        index = children.index(element)

        if len(children) == 1:
            item = '/%s' % name
        else:
            item = '/%s[%d]' % (name, index)

        path.insert(0, item)

        element = parent

    return ''.join(path)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: setup_2.extract_texts.py Projeto: cwf2/mta_summer_2018

def parseXML(xmlFile):
    '''Parse a local xml file'''

    xml = etree.parse(xmlFile).getroot()
    lines = []

    # get the number of the enclosing book
    # FIXME: make this more elegant
    book_n = 0
    book_xpath = './/{http://www.tei-c.org/ns/1.0}div[@type="textpart" and @subtype="book"]'
    bookFinder = etree.ETXPath(book_xpath)
    book_list = bookFinder(xml)
    if len(book_list) > 0:
        book_n = book_list[0].get('n', 0)

    for l in xml.iter('{http://www.tei-c.org/ns/1.0}l'):
        for note in l.iter('{http://www.tei-c.org/ns/1.0}note'):
            tail = note.tail
            note.clear()
            note.tail = tail
        line_n = l.get('n')
        lines.append(('{}.{}'.format(book_n,
                                     line_n), l.xpath('string()').strip()))

    return lines

Exemplo n.º 14

0

Exibir arquivo

def locate_node(arch, spec):
    """ Locate a node in a source (parent) architecture.

    Given a complete source (parent) architecture (i.e. the field
    `arch` in a view), and a 'spec' node (a node in an inheriting
    view that specifies the location in the source view of what
    should be changed), return (if it exists) the node in the
    source view matching the specification.

    :param arch: a parent architecture to modify
    :param spec: a modifying node in an inheriting view
    :return: a node in the source matching the spec
    """
    if spec.tag == 'xpath':
        nodes = etree.ETXPath(spec.get('expr'))(arch)
        return nodes[0] if nodes else None
    elif spec.tag == 'field':
        # Only compare the field name: a field can be only once in a given view
        # at a given level (and for multilevel expressions, we should use xpath
        # inheritance spec anyway).
        for node in arch.iter('field'):
            if node.get('name') == spec.get('name'):
                return node
        return None

    for node in arch.iter(spec.tag):
        if isinstance(node, SKIPPED_ELEMENT_TYPES):
            continue
        if all(node.get(attr) == spec.get(attr) for attr in spec.attrib
               if attr not in ('position', 'version')):
            # Version spec should match parent's root element's version
            if spec.get('version') and spec.get('version') != arch.get('version'):
                return None
            return node
    return None

Exemplo n.º 15

0

Exibir arquivo

def edit_dash_playlist(*args, **kwards):
    '''
    create dash chunks for every video in the transcoded folder
    '''
    # print args, kwargs
    context = args[0]

    tree = LXML.parse(get_dash_mpd_file_path(context))
    root = tree.getroot()
    # Namespace map
    nsmap = root.nsmap.get(None)

    #Function to find all the BaseURL
    find_baseurl = LXML.ETXPath("//{%s}BaseURL" % nsmap)
    results = find_baseurl(root)
    audio_file = results[-1].text
    results[-1].text = "audio/" + results[
        -1].text  # Warning : This is quite dirty ! We suppose the last element is the only audio element
    tree.write(get_dash_mpd_file_path(context))

    #Move audio files into audio directory
    os.makedirs(os.path.join(get_dash_folder(context), "audio"))
    shutil.move(os.path.join(get_dash_folder(context), audio_file),
                os.path.join(get_dash_folder(context), "audio", audio_file))

    #Create .htaccess for apache
    f = open(os.path.join(get_dash_folder(context), "audio", ".htaccess"), "w")
    f.write("AddType audio/mp4 .mp4 \n")
    f.close()
    return context

Exemplo n.º 16

0

Exibir arquivo

    def save_chart(self):
        """This method uses a pre-defined XML layout and adds the random strings
        generated above and then saves the password chart as a png.

        :return:
        None
        """

        xml_data = etree.fromstring(layout.get_layout())

        for i in range(36):
            tmp = getattr(self, "label_{}".format(i)).text()

            etree.ETXPath(
                "//{%s}*[@id='label_%s']" %
                (u"http://www.w3.org/2000/svg", i))(xml_data)[0].text = tmp

        svg = etree.tostring(xml_data)

        file_handle, filename = tempfile.mkstemp()
        try:
            os.write(file_handle, svg)

            subprocess.call(
                # Default location of inkscape x64. Change if inkscape is
                # installed in a separate location
                [
                    r"C:\Program Files\Inkscape\inkscape.exe", filename,
                    "--export-png", "password_chart.png", "--export-dpi", "96"
                ])

            os.close(file_handle)

        finally:
            os.remove(filename)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: segmentcommands.py Projeto: r09491/gpxrte

def commandFlat(sInFile,sOutFile):
    """
    Flattens all RTE segements into singel segment by appending
    all points to the RTE segment
    """

    eGpx = etree.parse(sInFile).getroot()
    if eGpx is None:
        raise commandError("NOROOT")
    NS = getNS(eGpx)

    eRtes= eGpx.findall(NS % 'rte')
    if eRtes is None: 
        raise commandError("NOSEG")

    for eRte in eRtes[1:]:
        eRtePts = eRte.findall(NS % 'rtept')
        if eRtePts is not None: eRtes[0].extend(eRtePts)
        eGpx.remove(eRte)

    eRteName = eRtes[0].find(NS % 'name')
    eRteName.text = os.path.splitext(os.path.basename(sOutFile))[0]

    lLatLons=[getLatLon(ePt) for ePt in eRtePts]
    writeGpxFile(eGpx,lLatLons,sOutFile)

    return len(etree.ETXPath(NS % 'rte')(eGpx))

Exemplo n.º 18

0

Exibir arquivo

def delete_all(args):
    """Delete all jobs"""
    doc = resources.get_jobs()
    xpath_expr = "//{{{0}}}job/@id".format(settings.PX_NS)
    xpath_fn = etree.ETXPath(xpath_expr)
    results = xpath_fn(doc)
    for r in results:
        delete_job(r)

Exemplo n.º 19

0

Exibir arquivo

def create(fname, ca_ext, data_dir='.', lang='', app_type=None):
    urn, extension = parse_urn(fname)
    tree = etree.parse(path.join(data_dir, fname))
    root = tree.getroot()
    NS = f'{{{root.nsmap[None]}}}'
    if not lang:
        lang = etree.ETXPath(f'//{NS}text/{NS}body/{NS}div/@xml:lang')(root)[0]
    print(f"Lanugage: {lang}")

    # find refsDecl
    refsDecl = to_string(etree.ETXPath(f'//{NS}refsDecl')(root)[0])

    # Catch first cRefPattern
    cRef = etree.ETXPath(
        f'//{NS}refsDecl/{NS}cRefPattern/@replacementPattern')(root)[0]
    levels = regex.findall(r'tei:([a-zA-Z]+)\[@n=[\'"]\$[0-9]+[\'"]\]', cRef)
    print(f"cRefPattern: {', '.join(levels)}")

    # find apps
    passage = r'\.'.join(r'(\w+)' for _ in range(len(levels)))
    urn_wo_ext = regex.sub('-.+?$', '', urn)
    print(fr'Searching for {urn_wo_ext}-[\w\-]+?:{passage}')
    re_passage = regex.compile(fr'{urn_wo_ext}-[\w\-]+?:{passage}')
    app_xpath = f'//{NS}app[@loc]'
    if app_type:
        app_xpath = f'//{NS}listApp[@type="{app_type}"]' + app_xpath
    apps = etree.ETXPath(app_xpath)(root)
    ca_dict = collect_apps(apps, re_passage, NS)

    ca_string = format_critapp(ca_dict, levels)

    # Create new file
    new_urn = regex.sub(f'{extension}$', f'{ca_ext}', urn)
    new_fname = regex.sub(f'{extension}(?=.xml$)', f'{ca_ext}', fname)
    content = template.format(content=ca_string,
                              id=new_urn,
                              refsDecl=refsDecl,
                              lang=lang)

    # Indentation
    tree = etree.fromstring(content)
    etree.indent(tree)

    with open(path.join(data_dir, new_fname), 'w') as f:
        f.write(etree.tostring(tree, encoding="unicode"))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: imports.py Projeto: roxyland/cyberxml-django

def import_cisco_cvrf():
    flist = []
    exdb = db.ExistDB()
    validateCollection(exdb, db_cvrf_cisco_collection)

    # -----------------------------------------------------------------------------
    # get list of cvrf urls
    # -----------------------------------------------------------------------------
    nurl = "http://tools.cisco.com/security/center/cvrfListing.x"
    request = urllib2.Request(nurl)
    rawPage = urllib2.urlopen(request)
    read = rawPage.read()
    #print read
    tree = etree.HTML(read)
    tpath = "//a[contains(@href,'cvrf.xml')]"
    findall = etree.ETXPath(tpath)
    arefs = findall(tree)

    urls = []
    for a in arefs:
        urls.append(a.get('href').replace('\t', '').replace('\n', ''))

    # just for tracking for now, need to get cisco to fix or apply a fix
    # i might ignore if it wasn't for poodle
    badfiles = [
        "/cisco-sa-20040420-tcp-nonios_cvrf.xml",
        "cisco-sa-20120328-msdp_cvrf.xml",
        "cisco-sa-20141015-poodle_cvrf.xml",
    ]

    # -----------------------------------------------------------------------------
    # download files if they don't exist
    # -----------------------------------------------------------------------------
    for u in urls:
        uname = u.split('/')[-1]
        # if file does not exist, download
        #if (not os.path.isfile(cisco_data_dir+uname) and os.access(".", os.W_OK)):
        if (os.access(".", os.W_OK)):
            try:
                print("downloading " + uname)
                urllib.urlretrieve(u, cisco_data_dir + uname)
                try:
                    fo = open(cisco_data_dir + uname, 'rb')
                    if exdb.load(fo, db_cvrf_cisco_collection + '/' + uname,
                                 True):
                        flist.append(uname + ": data import successful")
                    else:
                        flist.append(uname + ": data import failed")
                    fo.close()
                except:
                    flist.append(uname + ": file read failed")
            except:
                flist.append(uname + ": file download failed")
        else:
            flist.append(uname + ": file write failed")

    return flist

Exemplo n.º 21

0

Exibir arquivo

def get_abstract():
    with open(DATAP + '/dump/article_ids_reverse.json', "r",
              encoding="UTF8") as f:
        title_to_id = load(f)
    with open(DATAP + '/dump/articles_inscope.json', "r",
              encoding="UTF8") as f:
        scope = load(f)
        scope = {int(key): values for key, values in scope.items()}
    f = open(DATAP + '/dump/articles_all_abstracts.csv', "w", encoding="UTF8")
    abstract_xpath = etree.ETXPath("child::abstract")
    url_xpath = etree.ETXPath("child::url")
    context = etree.iterparse(DATAP + '/dump/enwiki-20180901-abstract.xml',
                              events=('end', ),
                              tag="doc")
    fast_iter(
        context, lambda elem: extract_with_xpath(
            elem, abstract_xpath, url_xpath, title_to_id, scope, f))
    f.close()

Exemplo n.º 22

0

Exibir arquivo

def get_job_status(job_id):
    """Return the status of the given job"""
    doc = get_job(job_id)
    if doc == None:
        return ""
    xpath_expr = "//{{{0}}}job".format(settings.PX_NS)
    xpath_fn = etree.ETXPath(xpath_expr)
    results = xpath_fn(doc)
    return results[0].attrib['status']

Exemplo n.º 23

0

Exibir arquivo

Arquivo: indexes.py Projeto: karlcow/ymir

def entries_as_dict(month_index):
    """Convert index xml list to list of dictionaries."""
    # Search path
    findentrylist = etree.ETXPath("//section[@id='month-index']/ul/li")
    # Extract data
    entries_xml = findentrylist(month_index)
    entries = [
        to_entry_dict(entry_index_xml) for entry_index_xml in entries_xml
    ]
    return entries

Exemplo n.º 24

0

Exibir arquivo

def get_definition_cpe(tree, strid):
    path = "//{http://oval.mitre.org/XMLSchema/oval-definitions-5}definition[@id='" + strid + "']"
    findall = etree.ETXPath(path)
    df = findall(tree)[0]
    try:
        cpath = ".//{http://oval.mitre.org/XMLSchema/oval-definitions-5}reference"
        cpe = df.find(cpath)
        return cpe.get('ref_id')
    except:
        return -1

Exemplo n.º 25

0

Exibir arquivo

Arquivo: parsing.py Projeto: karlcow/ymir

def get_title(doc):
    """Return a list of markup and text being the title of the document."""
    target = '//{%s}h1[text()]' % HTMLNS
    findtitle = etree.ETXPath(target)
    if not findtitle(doc):
        sys.exit("ERROR: The document has no title")
    title = findtitle(doc)[0]
    titletext = etree.tostring(title, encoding="utf-8", method="text")
    titletext = titletext.strip()
    return titletext.decode('utf-8').strip()

Exemplo n.º 26

0

Exibir arquivo

Arquivo: parsing.py Projeto: karlcow/ymir

def get_content(doc):
    """Return the full content of an article."""
    findcontent = etree.ETXPath("//{%s}article" % HTMLNS)
    try:
        content = findcontent(doc)[0]
        import lxml.html
        # print(lxml.html.tostring(content))
    except IndexError as e:
        raise IndexError('Ooops. No article.')
    # We want the content without the dates and the title
    findheader = etree.ETXPath("//{%s}header" % HTMLNS)
    try:
        # header = findheader(content)[0]
        # content.remove(header)
        header = findheader(doc)[0]
        # content.remove(header)
    except IndexError as e:
        logging.info('No header inside article: {e}'.format(e=e))
    return content

Exemplo n.º 27

0

Exibir arquivo

Arquivo: ymir.py Projeto: karlcow/ymir

def last_posts(feed_path):
    """Create a list of dictionaries of the last posts using the Atom feed."""
    entries = []
    feed_root = helper.parse_feed(feed_path)
    # Information we need: title, dates, link
    find_entry = etree.ETXPath("//{%s}entry" % ATOMNS)
    find_title = etree.ETXPath("{%s}title/text()" % ATOMNS)
    find_published = etree.ETXPath("{%s}published/text()" % ATOMNS)
    find_updated = etree.ETXPath("{%s}updated/text()" % ATOMNS)
    # Only the link pointing to the blog post
    find_url = etree.ETXPath("{%s}link[@rel='alternate']/@href" % ATOMNS)
    # Extract all the entries
    feed_entries = find_entry(feed_root)
    # We iterate through them
    for entry in feed_entries:
        entry_data = {'title': find_title(entry)[0],
                      'published': find_published(entry)[0],
                      'updated': find_updated(entry)[0],
                      'url': find_url(entry)[0]}
        entries.append(entry_data)
    return entries

Exemplo n.º 28

0

Exibir arquivo

Arquivo: pdmn_youtube.py Projeto: getwindow/szarp

 def __init__(self):
     conf_str = ipc.get_conf_str()
     self.xml_root = etree.fromstring(conf_str)
     namespaces = self.xml_root.nsmap
     root_namespace = namespaces[None]
     extra_namespace = namespaces["extra"]
     self.rn = root_namespace
     self.en = extra_namespace
     query = etree.ETXPath("{%s}device/{%s}unit/@{%s}url" %
                           (self.rn, self.rn, self.en))
     self.URL = str(query(self.xml_root)[0])
     self.URL2 = str(query(self.xml_root)[1])

Exemplo n.º 29

0

Exibir arquivo

Arquivo: app.py Projeto: JLanda91/ibm-ace-flexaas

def is_valid_query(query):
    """Function to check for XPath validity. Tries to create an etree ETXPath instance from the query. If this fails,
    the XPathSyntaxError is excepted to return a False. Returns True otherwise

    :param query: XPath query
    :type query: string
    :returns: True/False"""
    try:
        etree.ETXPath(query)
        return True
    except etree.XPathSyntaxError:
        return False

Exemplo n.º 30

0

Exibir arquivo

Arquivo: app_view.py Projeto: linwinfan/OdooAppBox

        def compine_view_by_xpath(view_arch, xpath_arch):
            view_arch_tree = etree.fromstring(view_arch)
            xpath_tree = etree.fromstring(xpath_arch)
            if xpath_tree.tag != 'data':
                raise Exception('继承视图的根节点必须是data节点')
            for xpath_element in xpath_tree:
                if xpath_element.tag != 'xpath':
                    continue

                expr = xpath_element.get('expr', None)
                if expr is None:
                    raise Exception('xpath节点的expr属性不能为空')
                nodes = etree.ETXPath(expr)(view_arch_tree)
                node = nodes[0] if nodes else None
                if node is None:
                    raise Exception('无法通过表达式' + expr + '在父视图中找到相关节点')

                pos = xpath_element.get('position', 'inside')
                if pos == 'replace':
                    if node.getparent() is None:
                        raise Exception('您不能对父视图的根节点执行替换操作')
                    else:
                        for child in xpath_element:
                            node.addprevious(child)
                        node.getparent().remove(node)
                elif pos == 'attributes':
                    for child in xpath_element.getiterator('attribute'):
                        attribute = child.get('name')
                        value = child.text or ''
                        node.set(attribute, value)
                elif pos == 'inside':
                    add_text_inside(node, xpath_element.text)
                    for child in xpath_element:
                        node.append(child)
                elif pos == 'after':
                    # add a sentinel element right after node, insert content of
                    # spec before the sentinel, then remove the sentinel element
                    sentinel = E.sentinel()
                    node.addnext(sentinel)
                    add_text_before(sentinel, xpath_element.text)
                    for child in xpath_element:
                        sentinel.addprevious(child)
                    remove_element(sentinel)
                elif pos == 'before':
                    add_text_before(node, xpath_element.text)
                    for child in xpath_element:
                        node.addprevious(child)
                else:
                    self.raise_view_error(
                        "不支持的position属性（" + pos +
                        ")，position必须为inside、replace、after、before或attributes")
            return etree.tostring(view_arch_tree, encoding='utf-8')