def _newstocks(data, pageNo, retry_count, pause): for _ in range(retry_count): time.sleep(pause) ct._write_console() try: html = lxml.html.parse(rv.NEW_STOCKS_URL%(ct.P_TYPE['http'],ct.DOMAINS['vsf'], ct.PAGES['newstock'], pageNo)) res = html.xpath('//table[@id=\"NewStockTable\"]/tr') if len(res) == 0: return data if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = sarr.replace('<font color="red">*</font>', '') sarr = '<table>%s</table>'%sarr df = pd.read_html(StringIO(sarr), skiprows=[0, 1])[0] df = df.drop([df.columns[idx] for idx in [12, 13, 14]], axis=1) df.columns = rv.NEW_STOCKS_COLS df['code'] = df['code'].map(lambda x : str(x).zfill(6)) df['xcode'] = df['xcode'].map(lambda x : str(x).zfill(6)) res = html.xpath('//table[@class=\"table2\"]/tr[1]/td[1]/a/text()') tag = '下一页' if ct.PY3 else unicode('下一页', 'utf-8') hasNext = True if tag in res else False data = data.append(df, ignore_index=True) pageNo += 1 if hasNext: data = _newstocks(data, pageNo, retry_count, pause) except Exception as ex: print(ex) else: return data
def update(self, resp = None): if resp is None: resp = self.make_request(update_template % (self.nick, self.uuid, self.tableid), "GET") if resp.get("type") == "error": print "Looks like we've got an error:" message = find_subelem(resp, "message") print message.text return tableInfo = find_subelem(resp, "tableInfo") if tableInfo is None: print "wtf mate" print etree.tostring(resp, pretty_print=True) raise Exception if not self.tableid: self.tableid = find_subelem(tableInfo, "tableid").text if not self.color: if find_subelem(tableInfo, "blackPlayer").text == self.nick: self.color = "black" else: self.color = "white" if find_subelem(tableInfo, "board") is not None: self.playing = True for row in find_subelem(tableInfo, "board"): print row.text userInfo = find_subelem(resp, "userInfo") if userInfo is not None: self.ratio = float(find_subelem(userInfo, "ratio").text) self.points = int(find_subelem(userInfo, "points").text)
def _get_storage_xml(params, ignore_source=False): src_type = params.get('src_type') disk = E.disk(type=src_type, device=params.get('type')) disk.append(E.driver(name='qemu', type=params['format'])) disk.append(E.target(dev=params.get('dev'), bus=params['bus'])) if params.get('address'): # ide disk target id is always '0' disk.append(E.address( type='drive', controller=params['address']['controller'], bus=params['address']['bus'], target='0', unit=params['address']['unit'])) if ignore_source: return ET.tostring(disk) # Working with url paths if src_type == 'network': output = urlparse.urlparse(params.get('path')) host = E.host(name=output.hostname, port= output.port or socket.getservbyname(output.scheme)) source = E.source(protocol=output.scheme, name=output.path) source.append(host) disk.append(source) else: # Fixing source attribute source = E.source() source.set(DEV_TYPE_SRC_ATTR_MAP[src_type], params.get('path')) disk.append(source) return ET.tostring(disk)
def remove_spaces(data): # Spaces need to be consistent with splits and the TOC. # "Foo Bar.htm" and "Foo%20Bar.htm" are seen as different. def fnsan(fn): return fn.replace(" ", "").replace("%20", "") # rename actual files for file_name in data: if file_name.endswith(".htm") and file_name != fnsan(file_name): data[fnsan(file_name)] = data[file_name] del data[file_name] # update TOC file_name = "toc.ncx" root = etree.fromstring(data[file_name]) for element in root.findall('.//*'): if element.get("src"): element.set("src", fnsan(element.get("src"))) data[file_name] = "<?xml version='1.0' encoding='utf-8'?>\n" + etree.tostring(root) # update inventory file_name = "content.opf" root = etree.fromstring(data[file_name]) for element in root.findall('.//*'): if element.get("href"): element.set("href", fnsan(element.get("href"))) data[file_name] = "<?xml version='1.0' encoding='utf-8'?>\n" + etree.tostring(root)
def test_fromstring_kml_document(self): """Tests the parsing of an valid KML string""" test_kml = b'<kml xmlns="http://www.opengis.net/kml/2.2"/>' tree = fromstring(test_kml, schema=Schema('ogckml22.xsd')) self.assertEqual(etree.tostring(tree, encoding='ascii'), test_kml) tree = fromstring(test_kml) self.assertEqual(etree.tostring(tree, encoding='ascii'), test_kml)
def test_get_html(self): # usual output output = self.the_input.get_html() self.assertEqual( etree.tostring(output), """<div>{\'status\': Status(\'queued\'), \'button_enabled\': True, \'rows\': \'10\', \'queue_len\': \'3\', \'mode\': \'\', \'cols\': \'80\', \'STATIC_URL\': \'/dummy-static/\', \'linenumbers\': \'true\', \'queue_msg\': \'\', \'value\': \'print "good evening"\', \'msg\': u\'Submitted. As soon as a response is returned, this message will be replaced by that feedback.\', \'matlab_editor_js\': \'/dummy-static/js/vendor/CodeMirror/octave.js\', \'hidden\': \'\', \'id\': \'prob_1_2\', \'tabsize\': 4}</div>""" ) # test html, that is correct HTML5 html, but is not parsable by XML parser. old_render_template = self.the_input.capa_system.render_template self.the_input.capa_system.render_template = lambda *args: textwrap.dedent(""" <div class='matlabResponse'><div id='mwAudioPlaceHolder'> <audio controls autobuffer autoplay src='data:audio/wav;base64='>Audio is not supported on this browser.</audio> <div>Right click <a href=https://endpoint.mss-mathworks.com/media/filename.wav>here</a> and click \"Save As\" to download the file</div></div> <div style='white-space:pre' class='commandWindowOutput'></div><ul></ul></div> """).replace('\n', '') output = self.the_input.get_html() self.assertEqual( etree.tostring(output), textwrap.dedent(""" <div class='matlabResponse'><div id='mwAudioPlaceHolder'> <audio src='data:audio/wav;base64=' autobuffer="" controls="" autoplay="">Audio is not supported on this browser.</audio> <div>Right click <a href="https://endpoint.mss-mathworks.com/media/filename.wav">here</a> and click \"Save As\" to download the file</div></div> <div style='white-space:pre' class='commandWindowOutput'/><ul/></div> """).replace('\n', '').replace('\'', '\"') ) # check that exception is raised during parsing for html. self.the_input.capa_system.render_template = lambda *args: "<aaa" with self.assertRaises(etree.XMLSyntaxError): self.the_input.get_html() self.the_input.capa_system.render_template = old_render_template
def assertEqualsXML(self, result, expect): # Parse the xml, strip white space, and convert back # this allows us to compare if they are logically equal parser = etree.XMLParser(remove_blank_text=True) result = etree.tostring(etree.XML(result, parser)) expect = etree.tostring(etree.XML(expect, parser)) self.assertEquals(result, expect)
def test_negotiated_rate_fetching(self): """ Test the rate fetching with negotiated rates. This will fail if your shipper number is not eligible for negotaited rates """ rating_request = RatingService.rating_request_type( E.Shipment( Helper.get_shipper(self.shipper_number, "US"), Helper.get_ship_to("US"), Helper.get_ship_from("US"), RatingService.service_type(Code='03'), # UPS Ground Helper.get_package("US", package_type_code="00"), RatingService.rate_information_type(negotiated=True) ), ) response = self.rating_api.request(rating_request) self.assertTrue( hasattr(response.RatedShipment, 'NegotiatedRates') ) self.assertTrue( response.RatedShipment.RatedPackage.TotalCharges.MonetaryValue ) print etree.tostring(response, pretty_print=True)
def extMainText(html, threshold = 0.5, filterMode = False): """ Parses HTML and keeps only main text parts. PARAMETERS: html - Input html text, MUST BE UNICODE! threshold - The density threshold to distinguish major content & others. filterMode - Use normal 'Extract' mode or the other 'Filter' mode. RETURN: final text of page by first gettin ghtml fragments and then use the get_text function """ finalHtml = "" html = _removeControlChars(html) # If we prepare a BeautifulSoup instance manually and pass it to lxml.html.soupparser.convert_tree() # then this func work well as 'import ExtMainText' but will throw strange error for 'import jqhtml.ExtMainText'. root = lxml.html.soupparser.fromstring(html) # densDic returns {'self': (tag density, length of pure text, total length of html tags and text, etree instance), 'child': list of density dics for child entities } densDic = _calcDensity(root) if filterMode: finalHtml = _filterSpam(densDic, threshold) else: maxPart, textLen, maxPartChilds, textLenChilds = _getMainText(densDic, threshold) if textLenChilds > textLen: finalHtml = ''.join(map(lambda tree: etree.tostring(tree, encoding = unicode) if tree != None else '', maxPartChilds)) else: finalHtml = etree.tostring(maxPart, encoding = unicode) if maxPart != None else '' return get_text(finalHtml)
def test_dhcp_xml(self): """ Test network dhcp xml """ dhcp_range = {'start': '192.168.122.100', 'end': '192.168.122.254'} host1 = { 'mac': '00:16:3e:77:e2:ed', 'name': 'foo.example.com', 'ip': '192.168.122.10', } host2 = { 'mac': '00:16:3e:3e:a9:1a', 'name': 'bar.example.com', 'ip': '192.168.122.11', } params = {} dhcp = nxml._get_dhcp_elem(**params) self.assertEqual(None, dhcp) params['range'] = dhcp_range xml = ET.tostring(nxml._get_dhcp_elem(**params)) start = xpath_get_text(xml, '/dhcp/range/@start') end = xpath_get_text(xml, '/dhcp/range/@end') self.assertEqual(dhcp_range['start'], start[0]) self.assertEqual(dhcp_range['end'], end[0]) params['hosts'] = [host1, host2] xml = ET.tostring(nxml._get_dhcp_elem(**params)) ip = xpath_get_text(xml, '/dhcp/host/@ip') self.assertEqual(ip, [host1['ip'], host2['ip']])
def test_ip_xml(self): """ Test network ip xml """ dhcp_range = {'start': '192.168.122.100', 'end': '192.168.122.254'} params = {} dhcp = nxml._get_dhcp_elem(**params) self.assertEqual(None, dhcp) params['net'] = '192.168.122.0/255.255.255.0' params['dhcp'] = {'range': dhcp_range} xml = ET.tostring(nxml._get_ip_elem(**params)) start = xpath_get_text(xml, '/ip/dhcp/range/@start')[0] end = xpath_get_text(xml, '/ip/dhcp/range/@end')[0] self.assertEqual(dhcp_range['start'], start) self.assertEqual(dhcp_range['end'], end) address = xpath_get_text(xml, '/ip/@address')[0] netmask = xpath_get_text(xml, '/ip/@netmask')[0] self.assertEqual(address, params['net'].split('/')[0]) self.assertEqual(netmask, params['net'].split('/')[1]) # test _get_ip_xml can accepts strings: '192.168.122.0/24', # which is same as "192.168.122.0/255.255.255.0" params['net'] = '192.168.122.0/24' xml = ET.tostring(nxml._get_ip_elem(**params)) netmask = xpath_get_text(xml, '/ip/@netmask')[0] self.assertEqual(netmask, str(ipaddr.IPNetwork(params['net']).netmask))
def extMainText(html, threshold = 0.5, filterMode = False): """ Parses HTML and keeps only main text parts. PARAMETERS: html - Input html text, MUST BE UNICODE! threshold - The density threshold to distinguish major content & others. filterMode - Use normal 'Extract' mode or the other 'Filter' mode. RETURN: html fragments of main text """ html = _removeControlChars(html) # If we prepare a BeautifulSoup instance manually and pass it to lxml.html.soupparser.convert_tree() # then this func work well as 'import ExtMainText' but will throw strange error for 'import jqhtml.ExtMainText'. root = lxml.html.soupparser.fromstring(html) densDic = _calcDensity(root) if filterMode: return _filterSpam(densDic, threshold) else: maxPart, textLen, maxPartChilds, textLenChilds = _getMainText(densDic, threshold) if textLenChilds > textLen: return ''.join(map(lambda tree: etree.tostring(tree, encoding = unicode) if tree != None else '', maxPartChilds)) else: return etree.tostring(maxPart, encoding = unicode) if maxPart != None else ''
def make_files(job_links,i,j,k,l): for j_link in job_links: try: page = make_connection("http://www.workopolis.com/" + j_link) tree = html.fromstring(page.content) section = tree.xpath('//section[@class="main-content job-view-main-content js-analyticsJobView"]') side_bar = tree.xpath('//section[@class="sidebar-block sidebar-clean"]') #page_cat = tree.xpath('//a[@class="job-view-header-link link"]') #if len(page_cat) > 0: Results are too restrictive if we cut all other categories (Too many miscategorized) # page_cat = etree.tostring(page_cat[0]).decode("utf-8") if len(section) > 0: #and c_link.text in page_cat: page_str = etree.tostring(section[0]).decode("utf-8") page_str += etree.tostring(side_bar[0]).decode("utf-8") page_str = page_str.replace(" ","") if len(page_str) > 0: file3.write("http://www.workopolis.com" + j_link + "\n") file4 = open(str(i) +".html", 'w') file4.write("<a class=\"page-link\" href=\"www.workopolis.com"+j_link+"\"></a>\n") file4.write(page_str) file4.close() i += 1 # Increment sub category count j += 1 # Increment city count l += 1 # Increment category count else: k += 1 # Increment empty count else: k += 1 # Increment empty count except etree.XMLSyntaxError: k += 1 # Increment empty count return [i,j,k,l]
def test_simple_two_tags(self): class SomeObject(ComplexModel): s = Unicode i = Integer v = SomeObject(s='s', i=5) cloth = E.a( E.b1(), E.b2( E.c1(spyne_id="s"), E.c2(), ), E.e( E.g1(), E.g2(spyne_id="i"), E.g3(), ), ) elt = self._run(v, cloth=cloth) print etree.tostring(elt, pretty_print=True) assert elt[0].tag == 'b1' assert elt[1].tag == 'b2' assert elt[1][0].tag == 'c1' assert elt[1][0].text == 's' assert elt[1][1].tag == 'c2' assert elt[2].tag == 'e' assert elt[2][0].tag == 'g1' assert elt[2][1].tag == 'g2' assert elt[2][1].text == '5' assert elt[2][2].tag == 'g3'
def init(handler, ctxInfo): try: # TODO: handle dependence on sys.argv intelligently for testing/etc imports. mode = ((len(sys.argv) > 1) and sys.argv[1] in ARGS_LIST) and sys.argv[1] or usage() isSetup = False hand = handler(mode, ctxInfo) if ARG_EXECUTE == mode: info = ConfigInfo() hand.execute(info) print hand.toXml(info) elif ARG_SETUP == mode: hand.setup() print hand.toXml(ConfigInfo()) else: raise InternalException("Can't get here, boss (%s)." % mode) except Exception, e: exType, exMsg, exBT = sys.exc_info() # <eai_error> root = et.Element("eai_error"); # <recognized>[true|false]</recognized> knownNode = et.SubElement(root, "recognized") knownNode.text = isinstance(e, AdminManagerException) and "true" or "false" # <type>[exception class]</type> typeNode = et.SubElement(root, "type"); typeNode.text = unicode(exType) # <message>[exception value]</message> msgNode = et.SubElement(root, "message"); msgNode.text = unicode(exMsg) # <stacktrace>[bt]</stacktrace> stackNode = et.SubElement(root, "stacktrace"); stackNode.text = traceback.format_exc() # </eai_error> print et.tostring(root)
def test_payment_create_customer_xml(): gateway = dinero.get_gateway('authorize.net') options = { 'email': '*****@*****.**', 'number': '4' + '1' * 15, 'month': '12', 'year': '2012', } xml = gateway._create_customer_xml(options) should_be = trimmy( """<createCustomerProfileRequest xmlns="AnetApi/xml/v1/schema/AnetApiSchema.xsd"> <merchantAuthentication> <name>{login_id}</name> <transactionKey>{transaction_key}</transactionKey> </merchantAuthentication> <profile> <email>[email protected]</email> <paymentProfiles> <payment> <creditCard> <cardNumber>4111111111111111</cardNumber> <expirationDate>2012-12</expirationDate> </creditCard> </payment> </paymentProfiles> </profile> </createCustomerProfileRequest>""".format( login_id=gateway.login_id, transaction_key=gateway.transaction_key, )) assert etree.tostring(xml) == should_be, "Invalid XML (\n\t%s\n\t%s\n)" % (etree.tostring(xml), should_be)
def bookmarcs(request): #print list(Bookmarc.objects.raw("SELECT id, gen_id FROM rbooks_bookmarc WHERE user_id=%s GROUP BY gen_id ORDER BY add_date DESC", params=[request.user.id])) bookmarcs = Bookmarc.objects.values('id', 'gen_id').filter(user=request.user).order_by('-add_date') gen_ids = {} for bookmarc in bookmarcs: gen_ids[bookmarc.gen_id] = {'bookmarc': bookmarc} for record in Record.objects.using('records').filter(gen_id__in=gen_ids.keys()): doc_tree = etree.XML(record.content) doc_tree = xslt_bib_draw_transformer(doc_tree) gen_ids[record.gen_id]['record']= record gen_ids[record.gen_id]['bib'] = etree.tostring(doc_tree).replace(u'<b/>', u' '), for record in Ebook.objects.using('records').filter(gen_id__in=gen_ids): doc_tree = etree.XML(record.content) doc_tree = xslt_bib_draw_transformer(doc_tree) gen_ids[record.gen_id]['record'] = record gen_ids[record.gen_id]['bib'] = etree.tostring(doc_tree).replace(u'<b/>', u' '), records = [] for bookmarc in bookmarcs: records.append(gen_ids[bookmarc.gen_id]) return render(request, 'rbooks/frontend/bookmarcs.html', { 'records': records })
def populate_summary(self): """ Populates a summary list of vulnerabilities checked by Nexpose to a dict. Must provide a NexposeAPI instance to use. """ if not self.isLoggedIn(): self.log.warn("No Nexpose API instance provided...") return False attributes = { } xml = self.make_xml('VulnerabilityListingRequest', attributes, isroot=True) self.log.debug("Sending VulnerabilityListingRequest:\n%s" % (xml)) result = self.send_command(xml) tree = etree.parse(result) self.log.debug("Result: %s" % (etree.tostring(tree))) data = tree.getroot() if data.attrib['success'] == '1': vulns = data.findall('VulnerabilitySummary') self.vulnxml = etree.tostring(data) for vuln in vulns: self.vulnerabilities[vuln.attrib['id']] = vuln.attrib #del(self.vulnerabilities[vuln.attrib['id']]['id']) else: self.log.warn("VulnerabilityListing failed") return False self.log.debug("Loaded %s Vulnerabilities..." % (len(self.vulnerabilities))) return True
def test_charge_customer_xml(): gateway = dinero.get_gateway('authorize.net') price = 123.45 customer_id = '123456789' card_id = '987654321' options = { 'cvv': '123' } xml = gateway._charge_customer_xml(customer_id, card_id, price, options) should_be = trimmy( """<createCustomerProfileTransactionRequest xmlns="AnetApi/xml/v1/schema/AnetApiSchema.xsd"> <merchantAuthentication> <name>{login_id}</name> <transactionKey>{transaction_key}</transactionKey> </merchantAuthentication> <transaction> <profileTransAuthCapture> <amount>{price}</amount> <customerProfileId>{customer_id}</customerProfileId> <customerPaymentProfileId>{card_id}</customerPaymentProfileId> <cardCode>{cvv}</cardCode> </profileTransAuthCapture> </transaction> </createCustomerProfileTransactionRequest>""".format( login_id=gateway.login_id, transaction_key=gateway.transaction_key, price=price, customer_id=customer_id, card_id=card_id, **options )) assert etree.tostring(xml) == should_be, "Invalid XML (\n\t%s\n\t%s\n)" % (etree.tostring(xml), should_be)
def remove_module(request): xpath = request.POST['xpath'] defaultPrefix = request.session['moduleDefaultPrefix'] namespace = request.session['moduleNamespaces'][defaultPrefix] template_content = request.session['moduleTemplateContent'] dom = etree.parse(BytesIO(template_content.encode('utf-8'))) # set the element namespace xpath = xpath.replace(defaultPrefix +":", namespace) # add the element to the sequence element = dom.find(xpath) if '{http://mdcs.ns}_mod_mdcs_' in element.attrib: del element.attrib['{http://mdcs.ns}_mod_mdcs_'] # remove prefix from namespaces nsmap = element.nsmap for prefix, ns in nsmap.iteritems(): if ns == 'http://mdcs.ns': del nsmap[prefix] break # create a new element to replace the previous one (can't replace directly the nsmap using lxml) element = etree.Element(element.tag, nsmap = nsmap); # save the tree in the session request.session['moduleTemplateContent'] = etree.tostring(dom) print etree.tostring(element) return HttpResponse(json.dumps({}), content_type='application/javascript')
def set_target_inner(module, tree, xpath, namespaces, attribute, value): changed = False try: if not is_node(tree, xpath, namespaces): changed = check_or_make_target(module, tree, xpath, namespaces) except Exception as e: missing_namespace = "" # NOTE: This checks only the namespaces defined in root element! # TODO: Implement a more robust check to check for child namespaces' existance if tree.getroot().nsmap and ":" not in xpath: missing_namespace = "XML document has namespace(s) defined, but no namespace prefix(es) used in xpath!\n" module.fail_json(msg="%sXpath %s causes a failure: %s\n -- tree is %s" % (missing_namespace, xpath, e, etree.tostring(tree, pretty_print=True)), exception=traceback.format_exc()) if not is_node(tree, xpath, namespaces): module.fail_json(msg="Xpath %s does not reference a node! tree is %s" % (xpath, etree.tostring(tree, pretty_print=True))) for element in tree.xpath(xpath, namespaces=namespaces): if not attribute: changed = changed or (element.text != value) if element.text != value: element.text = value else: changed = changed or (element.get(attribute) != value) if ":" in attribute: attr_ns, attr_name = attribute.split(":") # attribute = "{{%s}}%s" % (namespaces[attr_ns], attr_name) attribute = "{{{0}}}{1}".format(namespaces[attr_ns], attr_name) if element.get(attribute) != value: element.set(attribute, value) return changed
def _parse_fq_data(url, index, retry_count, pause): for _ in range(retry_count): time.sleep(pause) try: request = Request(url) text = urlopen(request, timeout=10).read() text = text.decode('GBK') html = lxml.html.parse(StringIO(text)) res = html.xpath('//table[@id=\"FundHoldSharesTable\"]') if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) df = pd.read_html(sarr, skiprows = [0, 1])[0] if len(df) == 0: return pd.DataFrame() if index: df.columns = ct.HIST_FQ_COLS[0:7] else: df.columns = ct.HIST_FQ_COLS if df['date'].dtypes == np.object: df['date'] = df['date'].astype(np.datetime64) df = df.drop_duplicates('date') except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def addServiceItem(self, item_name, description, rate, account): root = etree.Element('QBXMLMsgsRq') root.set("onError", "continueOnError") el_base = etree.SubElement(root,'ItemServiceAddRq') el_base.set("requestID", "") el_isa = etree.SubElement(el_base, "ItemServiceAdd") el_name = etree.SubElement(el_isa, "Name") el_name.text = str(item_name) el_sop = etree.SubElement(el_isa, "SalesOrPurchase") el_desc = etree.SubElement(el_sop, "Desc") el_desc.text = str(description) el_price = etree.SubElement(el_sop, "Price") el_price.text = "%.2f" % rate el_ar = etree.SubElement(el_sop, "AccountRef") el_an = etree.SubElement(el_ar, "FullName") el_an.text = str(account) res = self.__makeQBXMLReq(root) xmldoc = self.__submitQBXMLReq(res) if self.debug: print etree.tostring(xmldoc, pretty_print=True, encoding="utf-8", xml_declaration=True)
def __makeQBXMLReq(self, data): """ Add the session authentication information to the specificed qbXML document in preperation for submission to Quickbooks. """ root = etree.Element("QBXML") tree = etree.ElementTree(root) root.addprevious(etree.ProcessingInstruction ('qbxml', 'version="6.0"')) el_signon = etree.SubElement(root, "SignonMsgsRq") el_signon_ticket = etree.SubElement(el_signon, 'SignonTicketRq') el_datetime = etree.SubElement(el_signon_ticket,'ClientDateTime') el_datetime.text = self.__getXMLDatetime() el_session_ticket = etree.SubElement(el_signon_ticket,'SessionTicket') el_session_ticket.text = self.__session_ticket el_lang = etree.SubElement(el_signon_ticket,'Language') el_lang.text = 'English' el_app_id = etree.SubElement(el_signon_ticket,'AppID') el_app_id.text = self.app_name_id el_ver = etree.SubElement(el_signon_ticket,'AppVer') el_ver.text = self.app_name_ver root.append(data) if self.debug: print etree.tostring(tree, pretty_print=True, encoding="utf-8", xml_declaration=True) return tree
def __makeSignInReq(self): """ Generate the XML document that contains the sign-in request for the Quickbooks API """ root = etree.Element("QBXML") tree = etree.ElementTree(root) root.addprevious(etree.ProcessingInstruction ('qbxml', 'version="6.0"')) el_signon = etree.SubElement(root, "SignonMsgsRq") el_app_cert = etree.SubElement(el_signon, 'SignonAppCertRq') el_datetime = etree.SubElement(el_app_cert,'ClientDateTime') el_datetime.text = self.__getXMLDatetime() el_app = etree.SubElement(el_app_cert,'ApplicationLogin') el_app.text = self.app_name el_ticket = etree.SubElement(el_app_cert,'ConnectionTicket') el_ticket.text = self.conn_ticket el_lang = etree.SubElement(el_app_cert,'Language') el_lang.text = 'English' el_app_id = etree.SubElement(el_app_cert,'AppID') el_app_id.text = self.app_name_id el_ver = etree.SubElement(el_app_cert,'AppVer') el_ver.text = self.app_name_ver if self.debug: print etree.tostring(tree, pretty_print=True, encoding="utf-8", xml_declaration=True) return tree
def test_preprocess_xml(self, content): xml = etree.fromstring(""" <CFRGRANULE> <PART> <APPENDIX> <TAG>Other Text</TAG> <GPH DEEP="453" SPAN="2"> <GID>ABCD.0123</GID> </GPH> </APPENDIX> </PART> </CFRGRANULE>""") content.Macros.return_value = [ ("//GID[./text()='ABCD.0123']/..", """ <HD SOURCE="HD1">Some Title</HD> <GPH DEEP="453" SPAN="2"> <GID>EFGH.0123</GID> </GPH>""")] reg_text.preprocess_xml(xml) should_be = etree.fromstring(""" <CFRGRANULE> <PART> <APPENDIX> <TAG>Other Text</TAG> <HD SOURCE="HD1">Some Title</HD> <GPH DEEP="453" SPAN="2"> <GID>EFGH.0123</GID> </GPH></APPENDIX> </PART> </CFRGRANULE>""") self.assertEqual(etree.tostring(xml), etree.tostring(should_be))
def extract_each(self, filename): with open(filename) as f: data = f.read() try: html = etree.HTML(data) except: return Catalog() course_block = html.xpath('//body//p') course_title = [' '.join( etree.HTML(etree.tostring(e)).xpath('//b/text()') ).strip() for e in course_block] course_desc = [' '.join( etree.HTML(etree.tostring(e)).xpath('//p/text()') ).replace('\n', ' ').replace('\t', ' ').replace('\r', ' ').strip() for e in course_block] course_instructor = [' '.join( etree.HTML(etree.tostring(e)).xpath('//i/text()') ) for e in course_block] d = DataFrame({'course_title': course_title, 'desc': course_desc, 'instructor': course_instructor}) d['course_id'] = d['course_title'].map(lambda x: ' '.join(x.encode('utf-8').split(' '.encode('utf-8'))[:2]).strip()) d['course_title'] = d['course_title'].map(lambda x: ' '.join(x.encode('utf-8').split(' '.encode('utf-8'))[2:]).strip()) d['desc'] = d['desc'].apply(lambda x: x.replace('\n', ' ').replace('\r', ' ').strip('"').strip()) d['school'] = 'uw' d['id'] = filename.strip(self.input_directory).strip('/') return Catalog(data=d)
def _today_ticks(symbol, tdate, pageNo, retry_count, pause): ct._write_console() for _ in range(retry_count): time.sleep(pause) try: html = lxml.html.parse(ct.TODAY_TICKS_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['t_ticks'], symbol, tdate, pageNo )) res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr') if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = '<table>%s</table>'%sarr sarr = sarr.replace('--', '0') df = pd.read_html(StringIO(sarr), parse_dates=False)[0] df.columns = ct.TODAY_TICK_COLUMNS df['pchange'] = df['pchange'].map(lambda x : x.replace('%', '')) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def test_iframe_youtube(self): html = etree.parse(os.path.join(TEST_DATA_DIR, 'media-video.html')) cnxml = self.call_target(html).getroot() try: elm = cnxml.xpath('//*[@id="test_media_video_youtube"]')[0] except IndexError: transformed_cnxml = etree.tostring(cnxml) self.fail('Failed to pass through media@id and/or ' 'the iframe->iframe tag transform: ' + transformed_cnxml) self.assertEqual(elm.tag, '{http://cnx.rice.edu/cnxml}media') (iframe,) = elm.getchildren() self.assertEqual(iframe.tag, '{http://cnx.rice.edu/cnxml}iframe') self.assertEqual(iframe.attrib['src'], 'http://www.youtube.com/v/k9oSQNTHUZM') try: elm = cnxml.xpath('//*[@id="test_media_video_youtube_2"]')[0] except IndexError: transformed_cnxml = etree.tostring(cnxml) self.fail('Failed to pass through media@id and/or ' 'the iframe->iframe tag transform: ' + transformed_cnxml) self.assertEqual(elm.tag, '{http://cnx.rice.edu/cnxml}media') (iframe,) = elm.getchildren() self.assertEqual(iframe.tag, '{http://cnx.rice.edu/cnxml}iframe') self.assertEqual(iframe.attrib['src'], 'http://www.youtube.com/embed/r-FonWBEb0o')
def _get_report_data(year, quarter, pageNo, dataArr, orderby): ct._write_console() try: request = Request(ct.REPORT_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, quarter, pageNo, ct.PAGE_NUM[1], orderby)) # 默认排序抓取的信息有重复和遗漏,增加排序功能参数orderby text = urlopen(request, timeout=10).read() text = text.decode('GBK') text = text.replace('--', '') html = lxml.html.parse(StringIO(text)) res = html.xpath("//table[@class=\"list_table\"]/tr") if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = '<table>%s</table>' % sarr df = pd.read_html(sarr)[0] df = df.drop(11, axis=1) df.columns = ct.REPORT_COLS dataArr = dataArr.append(df, ignore_index=True) nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') if len(nextPage) > 0: pageNo = re.findall(r'\d+', nextPage[0])[0] return _get_report_data(year, quarter, pageNo, dataArr,orderby) else: return dataArr except Exception as e: print(e)
def getAdmin(): username = checkPassword() if not username or username != 'admin': return """<meta http-equiv="refresh" content="0;url=/home/login" />""" ######################## CREATE ARTICLE ############################################################################### ###### ADMIN changepass = E.div( E.h2('Change admin password:'******'Current password:'******'name': 'oldpassword', 'type': 'password' })), E.div(E.label('New password:'******'name': 'newpassword1', 'type': 'password' })), E.div(E.label('Confirm new password:'******'name': 'newpassword2', 'type': 'password' })), E.a( { 'class': 'button-link', 'onclick': "changepassword($(this).parent().serialize());" }, 'Change password'))) changeemail = E.div( E.h2('Change admin email address:'), E.form( E.div( E.label('Password:'******'name': 'password', 'id': 'emailpassword', 'type': 'password' })), E.div( E.label('New email:'), E.input({ 'name': 'newemail1', 'id': 'newemail1', 'type': 'text' })), E.div( E.label('Confirm new email:'), E.input({ 'name': 'newemail2', 'id': 'newemail2', 'type': 'text' })), E.a( { 'class': 'button-link', 'onclick': "changeemail($(this).parent().serialize());" }, 'Change email'))) ###### USERS adduser = E.div( E.h2('Add User:'******'id': 'adduserform'}, E.div(E.label('User name:'), E.input({ 'name': 'newusername', 'type': 'text' })), E.div(E.label('Email address:'), E.input({ 'name': 'email', 'type': 'text' })), E.div(E.label('User password:'******'name': 'newpassword1', 'type': 'password' })), E.div(E.label('Confirm user password:'******'name': 'newpassword2', 'type': 'password' })), E.a( { 'class': 'button-link', 'onclick': "adduser($(this).parent().serialize());" }, 'Add user'))) userlist = E.select({'name': 'user'}) vals = db.execute("SELECT name FROM lab_members") if vals == []: userlist.append(E.option('')) else: for val in vals: userlist.append(E.option(val[0])) removeuser = E.div( E.h2('Remove User:'******'User name:'), userlist), E.a( { 'class': 'button-link', 'onclick': "removeuser($(this).parent().serialize());" }, 'Remove user'))) ##### GENES addgene = E.div( E.h2('Add Gene:'), E.form( E.div(E.label('Gene Name:'), E.input({ 'name': 'genename', 'type': 'text' })), E.div( E.label('Wild Type Presence:'), E.select({'name': 'genedefaultstatus'}, E.option('+'), E.option('-'))), E.a( { 'class': 'button-link', 'onclick': "addgene($(this).parent().serialize());" }, 'Add gene'))) genelist = E.select({'name': 'gene'}) vals = db.execute("SELECT name FROM genes") if vals == []: genelist.append(E.option('')) else: for val in vals: genelist.append(E.option(val[0])) removegene = E.div( E.h2('Remove Gene:'), E.form( E.div(E.label('Gene Name:'), genelist), E.a( { 'class': 'button-link', 'onclick': "removegene($(this).parent().serialize());" }, 'Remove gene'))) ##### STRAINS addstrain = E.div( E.h2('Add Strain:'), E.form( E.div(E.label('Strain Name:'), E.input({ 'name': 'strainname', 'type': 'text' })), E.a( { 'class': 'button-link', 'onclick': "addstrain($(this).parent().serialize());" }, 'Add Strain'))) strainlist = E.select({'name': 'strain'}) vals = db.execute("SELECT name FROM strains") if vals == []: strainlist.append(E.option('')) else: for val in vals: strainlist.append(E.option(val[0])) removestrain = E.div( E.h2('Remove strain:'), E.form({'class': 'form'}, E.div(E.label('Strain Name:'), strainlist), E.a( { 'class': 'button-link', 'onclick': "removestrain($(this).parent().serialize());" }, 'Remove strain'))) ### PUT ALL THE PIECES TOGETHER article = E.div( E.div({'id': 'notification'}, ''), E.div({'class': 'tabs'}, E.ul(E.li(E.a({'href': '#tab1'}, 'Admin')), E.li(E.a({'href': '#tab2'}, 'Users')), E.li(E.a({'href': '#tab3'}, 'Genes')), E.li(E.a({'href': '#tab4'}, 'Strains'))), E.div( E.div({'id': 'tab1'}, changepass, changeemail), E.div({'id': 'tab2'}, adduser, removeuser), E.div({'id': 'tab3'}, addgene, removegene), E.div({'id': 'tab4'}, addstrain, removestrain), ))) article = etree.tostring(article, pretty_print=True) ####################### STYLE AND JAVASCRIPT ############################################################################# style = """ h2 { margin-bottom: 10px; padding-bottom: 5px; border-bottom: 1px solid #D8D8D8; } input{ webkit-border-radius: 8px; -moz-border-radius: 8px; border-radius: 8px; padding: 4px; } table{ width:500px; margin:10px; } #notification{ display:none; padding:5px; margin:3px; background-color:#FFFF66; webkit-border-radius: 8px; -moz-border-radius: 8px; border-radius: 8px;} label{ display: inline-block; float: left; clear: left; width: 200px; margin-right: 10px; white-space: nowrap;} form .button-link{ margin-left:0px; } form div{margin-bottom:10px;} """ javascript = """ var t; function notify(data){ $('#notification').html(data); $('#notification').show(); var fadefunc="$('#notification').hide('fade', {}, 200);"; t=setTimeout(fadefunc,15000); } function changepassword(fields){ $.post('/home/ajax/changepassword/',{fields:fields}, function(data){ notify(data);});} function changeemail(fields){ alert(fields); $.post('/home/ajax/changeemail/',{fields:fields}, function(data){ notify(data);});} function adduser(fields){ $.post('/admin/adduser/',{fields:fields}, function(data){ notify(data);});} function removeuser(fields){ $.post('/admin/removeuser/',{fields:fields}, function(data){ notify(data);});} function addgene(fields){ $.post('/admin/addgene/',{fields:fields}, function(data){ notify(data);refreshgenes();});} function removegene(fields){ $.post('/admin/removegene/',{fields:fields}, function(data){ notify(data);refreshgenes();});} function refreshgenes(){ $.post('/admin/refreshgenes/', function(data){ $("select[name='gene']").html(data);});} function addstrain(fields){ $.post('/admin/addstrain/',{fields:fields}, function(data){ notify(data);refreshstrains();});} function removestrain(fields){ $.post('/admin/removestrain/',{fields:fields}, function(data){ notify(data);refreshstrains();});} function refreshstrains(){ $.post('/admin/refreshstrains/', function(data){ $("select[name='strain']").html(data);});} $(document).ready(function(){ $( ".tabs" ).tabs(); }); """ resources = "<style type='text/css'>" + style + '</style><script type="text/javascript">' + javascript + '</script>' return glamsTemplate(article, username, resources=resources)
print("Reading source component specification from '" + src_spec_path + "'") src_spec_name = os.path.basename(src_spec_path) xml_parser = etree.XMLParser(remove_blank_text=True) src_spec = etree.parse(src_spec_path, xml_parser).getroot() print("Processing Linux binary '" + src_bin_path + "'") binary_name = os.path.basename(src_bin_path) binary_size = os.path.getsize(src_bin_path) add_provides_memory(src_spec, "binary", "subject_binary", LINUX_VIRTUAL_ADDRESS, binary_name, binary_size, "true", "true") if src_initramfs_path is not None: print("Reading source system policy from '" + src_policy_path + "'") src_policy = etree.parse(src_policy_path, xml_parser).getroot() initramfs_addr = get_initramfs_address(src_policy) if initramfs_addr is None: print("Warning: Manually add mappings for " + src_initramfs_path) else: print("Processing initramfs '" + src_initramfs_path + "'") initramfs_name = os.path.basename(src_initramfs_path) initramfs_size = os.path.getsize(src_initramfs_path) add_provides_memory(src_spec, "modules_initramfs", "subject_initrd", initramfs_addr, initramfs_name, initramfs_size, "false", "false") with open(out_spec_path, 'wb') as out_spec: print("Writing component specification to '" + out_spec_path + "'") out_spec.write(etree.tostring(src_spec, pretty_print=True))
from lxml import etree import lxml.html from io import StringIO, BytesIO broken_html = '<html><head><title>test<body><h1>page title</h1><a href="hupu">aa</a>' parser = etree.HTMLParser() tree = etree.parse(StringIO(broken_html), parser) #tree = lxml.html.fromstring(broken_html) #ele = tree.xpath('.//div[@class="AfficheAccessory"]') #for child in ele: # child.set("class","sun") print(etree.tostring(tree, encoding='utf-8'))
def __init__(self, system, xml, state): """ Instantiate an InputType class. Arguments: - system : ModuleSystem instance which provides OS, rendering, and user context. Specifically, must have a render_template function. - xml : Element tree of this Input element - state : a dictionary with optional keys: * 'value' -- the current value of this input (what the student entered last time) * 'id' -- the id of this input, typically "{problem-location}_{response-num}_{input-num}" * 'status' (answered, unanswered, unsubmitted) * 'input_state' -- dictionary containing any inputtype-specific state that has been preserved * 'feedback' (dictionary containing keys for hints, errors, or other feedback from previous attempt. Specifically 'message', 'hint', 'hintmode'. If 'hintmode' is 'always', the hint is always displayed.) """ self.xml = xml self.tag = xml.tag self.system = system # NOTE: ID should only come from one place. If it comes from multiple, # we use state first, XML second (in case the xml changed, but we have # existing state with an old id). Since we don't make this guarantee, # we can swap this around in the future if there's a more logical # order. self.input_id = state.get('id', xml.get('id')) if self.input_id is None: raise ValueError("input id state is None. xml is {0}".format( etree.tostring(xml))) self.value = state.get('value', '') feedback = state.get('feedback', {}) self.msg = feedback.get('message', '') self.hint = feedback.get('hint', '') self.hintmode = feedback.get('hintmode', None) self.input_state = state.get('input_state', {}) # put hint above msg if it should be displayed if self.hintmode == 'always': self.msg = self.hint + ('<br/>' if self.msg else '') + self.msg self.status = state.get('status', 'unanswered') try: # Pre-parse and propcess all the declared requirements. self.process_requirements() # Call subclass "constructor" -- means they don't have to worry about calling # super().__init__, and are isolated from changes to the input # constructor interface. self.setup() except Exception as err: # Something went wrong: add xml to message, but keep the traceback msg = "Error in xml '{x}': {err} ".format(x=etree.tostring(xml), err=str(err)) raise Exception, msg, sys.exc_info()[2]
def to_string(self, pretty=False, method='xml', encoding='UTF-8'): """Prints the element to string.""" return etree.tostring(self._element, method=method, encoding=encoding, pretty_print=pretty)
from lxml import etree package = etree.Element('Package') doc = etree.ElementTree(package) for namedtype,dates in sorted( changedTypes.items() ): typeBlock = etree.SubElement(package,'types') for date,people in sorted( dates.items() ): for person,changes in sorted( people.items() ): typeBlock.append( etree.Comment( '{} On {} START'.format( person, date.strftime('%D') ) ) ) for change in sorted( changes ): el = etree.Element('members') el.text = change typeBlock.append( el ) nameElem = etree.SubElement(typeBlock,'name') nameElem.text = namedtype ver = etree.Element('version') ver.text = str( VERSION ) package.append(ver) with open('./package.xml', 'wb') as f: f.write( etree.tostring(doc,pretty_print=True, xml_declaration=True, encoding='UTF-8') )
def createSBNG_ER_gml(graph): page = etree.Element('sbgn', xmlns="http://sbgn.org/libsbgn/0.2") doc = etree.ElementTree(page) tagDictionary = {} stateDictionary = {} sbgnmap = etree.SubElement(page, 'map', language="entity relationship") gidx = 0 for nodeId in (graph.nodes()): if 'gid' not in graph.node[nodeId]: glyphtag = etree.SubElement(sbgnmap, 'glyph', id='id{0}'.format(nodeId)) glyphtag.set('class', 'entity') etree.SubElement(glyphtag, 'label', text=graph.node[nodeId]['LabelGraphics']['text']) etree.SubElement(glyphtag, 'bbox', x='0', y="0", w="120", h="60") tagDictionary[nodeId] = glyphtag for nodeId in (graph.nodes()): if 'gid' in graph.node[nodeId]: if 'isGroup' in graph.node[nodeId]: glyphtag = tagDictionary[graph.node[nodeId]['gid']] component = etree.SubElement(glyphtag, 'glyph', id="id{0}".format(nodeId)) component.set('class', "state variable") clabel = etree.SubElement( component, 'state', variable=graph.node[nodeId]['LabelGraphics']['text']) etree.SubElement(component, 'bbox', x='0', y="0", w="54", h="15") tagDictionary[nodeId] = component for nodeId in (graph.nodes()): if 'gid' in graph.node[nodeId] and 'isGroup' not in graph.node[nodeId]: glyphtag = tagDictionary[graph.node[nodeId]['gid']] if glyphtag.get('class') == 'entity': component = etree.SubElement(glyphtag, 'glyph', id="id{0}".format(nodeId)) component.set('class', "state variable") clabel = etree.SubElement( component, 'state', variable=graph.node[nodeId]['LabelGraphics']['text']) etree.SubElement(component, 'bbox', x='0', y="0", w="54", h="15") elif glyphtag.get('class') == 'state variable': component = etree.SubElement(sbgnmap, 'glyph', id="id{0}".format(nodeId)) component.set('class', "variable value") clabel = etree.SubElement( component, 'label', text=graph.node[nodeId]['LabelGraphics']['text']) arctag = etree.SubElement(sbgnmap, 'arc') arctag.set('class', 'assignment') arctag.set('source', 'id{0}'.format(nodeId)) arctag.set('target', 'id{0}'.format(graph.node[nodeId]['gid'])) arctag.set('id', 'as{0}'.format(gidx)) etree.SubElement(arctag, 'start', x='0', y='0') etree.SubElement(arctag, 'end', x='0', y='0') etree.SubElement(component, 'bbox', x='0', y="0", w="20", h="20") gidx += 1 idx = 0 for edgeId in (graph.edges()): arctag = etree.SubElement(sbgnmap, 'arc') arctag.set('class', 'assignment') arctag.set('source', 'id{0}'.format(edgeId[0])) arctag.set('target', 'id{0}'.format(edgeId[1])) arctag.set('id', 'a{0}'.format(idx)) etree.SubElement(arctag, 'start', x='0', y='0') etree.SubElement(arctag, 'end', x='0', y='0') idx += 1 return etree.tostring(page, pretty_print=True)
def _handle_exception(cls, exception): code = 500 # default code is_website_request = bool(getattr(request, 'is_frontend', False) and getattr(request, 'website', False)) if not is_website_request: # Don't touch non website requests exception handling return super(Http, cls)._handle_exception(exception) else: try: response = super(Http, cls)._handle_exception(exception) if isinstance(response, Exception): exception = response else: # if parent excplicitely returns a plain response, then we don't touch it return response except Exception as e: if 'werkzeug' in config['dev_mode']: raise e exception = e values = dict( exception=exception, traceback=traceback.format_exc(), ) if isinstance(exception, werkzeug.exceptions.HTTPException): if exception.code is None: # Hand-crafted HTTPException likely coming from abort(), # usually for a redirect response -> return it directly return exception else: code = exception.code if isinstance(exception, odoo.exceptions.AccessError): code = 403 if isinstance(exception, QWebException): values.update(qweb_exception=exception) if type(exception.error) == odoo.exceptions.AccessError: code = 403 values.update( status_message=werkzeug.http.HTTP_STATUS_CODES[code], status_code=code, ) view_id = code if request.website.is_publisher() and isinstance(exception, werkzeug.exceptions.NotFound): view_id = 'page_404' values['path'] = request.httprequest.path[1:] if not request.uid: cls._auth_method_public() with registry(request.env.cr.dbname).cursor() as cr: env = api.Environment(cr, request.uid, request.env.context) if code == 500: logger.error("500 Internal Server Error:\n\n%s", values['traceback']) View = env["ir.ui.view"] if 'qweb_exception' in values: if 'load could not load template' in exception.args: # When t-calling an inexisting template, we don't have reference to # the view that did the t-call. We need to find it. values['views'] = View.search([ ('type', '=', 'qweb'), '|', ('arch_db', 'ilike', 't-call="%s"' % exception.name), ('arch_db', 'ilike', "t-call='%s'" % exception.name) ], order='write_date desc', limit=1) else: try: # exception.name might be int, string exception_template = int(exception.name) except: exception_template = exception.name view = View._view_obj(exception_template) et = etree.fromstring(view.with_context(inherit_branding=False).read_combined(['arch'])['arch']) node = et.find(exception.path.replace('/templates/t/', './')) line = node is not None and etree.tostring(node, encoding='unicode') # line = exception.html # FALSE -> contains branding <div t-att-data="request.browse('ok')"/> if line: # If QWebException occurs in a child view, the parent view is raised values['editable'] = request.uid and request.website.is_publisher() values['views'] = View._views_get(exception_template).filtered( lambda v: line in v.arch ) else: values['views'] = view # Keep only views that we can reset values['views'] = values['views'].filtered( lambda view: view._get_original_view().arch_fs or 'oe_structure' in view.key ) elif code == 403: logger.warn("403 Forbidden:\n\n%s", values['traceback']) try: html = env['ir.ui.view'].render_template('website.%s' % view_id, values) except Exception: html = env['ir.ui.view'].render_template('website.http_error', values) return werkzeug.wrappers.Response(html, status=code, content_type='text/html;charset=utf-8')
def pprint(self): print(etree.tostring(self.to_xml(), pretty_print=True).decode())
def _eval_xml(self, node, env): if node.tag in ('field','value'): t = node.get('type','char') f_model = node.get('model') if node.get('search'): f_search = node.get("search") f_use = node.get("use",'id') f_name = node.get("name") idref2 = {} if f_search: idref2 = _get_idref(self, env, f_model, self.idref) q = safe_eval(f_search, idref2) ids = env[f_model].search(q).ids if f_use != 'id': ids = [x[f_use] for x in env[f_model].browse(ids).read([f_use])] _fields = env[f_model]._fields if (f_name in _fields) and _fields[f_name].type == 'many2many': return ids f_val = False if len(ids): f_val = ids[0] if isinstance(f_val, tuple): f_val = f_val[0] return f_val a_eval = node.get('eval') if a_eval: idref2 = _get_idref(self, env, f_model, self.idref) try: return safe_eval(a_eval, idref2) except Exception: logging.getLogger('odoo.tools.convert.init').error( 'Could not eval(%s) for %s in %s', a_eval, node.get('name'), env.context) raise def _process(s): matches = re.finditer(br'[^%]%\((.*?)\)[ds]'.decode('utf-8'), s) done = set() for m in matches: found = m.group()[1:] if found in done: continue done.add(found) id = m.groups()[0] if not id in self.idref: self.idref[id] = self.id_get(id) # So funny story: in Python 3, bytes(n: int) returns a # bytestring of n nuls. In Python 2 it obviously returns the # stringified number, which is what we're expecting here s = s.replace(found, pycompat.text_type(self.idref[id])) s = s.replace('%%', '%') # Quite wierd but it's for (somewhat) backward compatibility sake return s if t == 'xml': _fix_multiple_roots(node) return '<?xml version="1.0"?>\n'\ +_process("".join(etree.tostring(n, encoding='unicode') for n in node)) if t == 'html': return _process("".join(etree.tostring(n, encoding='unicode') for n in node)) data = node.text if node.get('file'): with file_open(node.get('file'), 'rb') as f: data = f.read() if t == 'base64': return base64.b64encode(data) # after that, only text content makes sense data = pycompat.to_text(data) if t == 'file': from ..modules import module path = data.strip() if not module.get_module_resource(self.module, path): raise IOError("No such file or directory: '%s' in %s" % ( path, self.module)) return '%s,%s' % (self.module, path) if t == 'char': return data if t == 'int': d = data.strip() if d == 'None': return None return int(d) if t == 'float': return float(data.strip()) if t in ('list','tuple'): res=[] for n in node.iterchildren(tag='value'): res.append(_eval_xml(self, n, env)) if t=='tuple': return tuple(res) return res elif node.tag == "function": args = [] a_eval = node.get('eval') # FIXME: should probably be exclusive if a_eval: self.idref['ref'] = self.id_get args = safe_eval(a_eval, self.idref) for n in node: return_val = _eval_xml(self, n, env) if return_val is not None: args.append(return_val) model = env[node.get('model')] method = node.get('name') # this one still depends on the old API return odoo.api.call_kw(model, method, args, {}) elif node.tag == "test": return node.text
#!/usr/bin/python from jnpr.junos import Device from lxml import etree if __name__ == '__main__': # Hardcoded credentials just for lab ease with Device(host='66.129.235.12', port=45002, user='******', passwd='Juniper!1') as dev: #dev.open() ^with statements omits manual connection open and close for reliability. # Launch rpc from pyEZ, using etree as parser cnf = dev.rpc.get_config(filter_xml=etree.XML( '<configuration><interfaces/></configuration>')) print(etree.tostring(cnf)) #dev.close()
def _get_iiif_error(parameter, text): error = Element('error', nsmap=NSMAP) SubElement(error, 'parameter').text = parameter SubElement(error, 'text').text = text return etree.tostring(error, method='xml')
"mode", self.handler.get_argument('debug', 'text')) self.debug_log_handler.log_data.set("request-id", str(self.handler.request_id)) # if we have 500 but have "noxsl" in args without "debug" in args # apply xsl for debug info anyway if self.handler.xml.apply_xsl or not self.debug_mode: # show 'awesome' debug page try: xsl_file = open(tornado.options.options.debug_xsl) tranform = etree.XSLT(etree.XML(xsl_file.read())) xsl_file.close() log_document = str(tranform(self.debug_log_handler.log_data)) self.handler.set_header('Content-Type', 'text/html; charset=UTF-8') except Exception, e: self.handler.log.exception('XSLT debug file error') self.handler.set_header('Content-Type', 'application/xml; charset=UTF-8') log_document = etree.tostring(self.debug_log_handler.log_data, encoding='UTF-8', xml_declaration=True) else: self.handler.set_header('Content-Type', 'application/xml; charset=UTF-8') log_document = etree.tostring(self.debug_log_handler.log_data, encoding='UTF-8', xml_declaration=True) return log_document
def check(self, **kwargs): '''Performs the checks on the metadata.xml for the package @param xpkg: the pacakge being checked @param checkdir: string, directory path @param checkdirlist: list of checkdir's @param repolevel: integer @returns: boolean ''' xpkg = kwargs.get('xpkg') checkdir = kwargs.get('checkdir') checkdirlist = kwargs.get('checkdirlist').get() self.musedict = {} if self.options.mode in ['manifest']: self.muselist = frozenset(self.musedict) return False # metadata.xml file check if "metadata.xml" not in checkdirlist: self.qatracker.add_error("metadata.missing", xpkg + "/metadata.xml") self.muselist = frozenset(self.musedict) return False # metadata.xml parse check metadata_bad = False # read metadata.xml into memory try: _metadata_xml = etree.parse(os.path.join(checkdir, 'metadata.xml')) except (ParserError, SyntaxError, EnvironmentError) as e: metadata_bad = True self.qatracker.add_error("metadata.bad", "%s/metadata.xml: %s" % (xpkg, e)) del e self.muselist = frozenset(self.musedict) return False indentation_chars = Counter() for l in etree.tostring(_metadata_xml).splitlines(): indentation_chars.update(re.match(rb"\s*", l).group(0)) if len(indentation_chars) > 1: self.qatracker.add_error("metadata.warning", "%s/metadata.xml: %s" % (xpkg, "inconsistent use of tabs and spaces in indentation") ) xml_encoding = _metadata_xml.docinfo.encoding if xml_encoding.upper() != metadata_xml_encoding: self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: " "xml declaration encoding should be '%s', not '%s'" % (xpkg, metadata_xml_encoding, xml_encoding)) if not _metadata_xml.docinfo.doctype: metadata_bad = True self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: %s" % (xpkg, "DOCTYPE is missing")) else: doctype_system = _metadata_xml.docinfo.system_url if doctype_system.replace('http://', 'https://') != metadata_dtd_uri: if doctype_system is None: system_problem = "but it is undefined" else: system_problem = "not '%s'" % doctype_system self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: " "DOCTYPE: SYSTEM should refer to '%s', %s" % (xpkg, metadata_dtd_uri, system_problem)) doctype_name = _metadata_xml.docinfo.doctype.split(' ')[1] if doctype_name != metadata_doctype_name: self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: " "DOCTYPE: name should be '%s', not '%s'" % (xpkg, metadata_doctype_name, doctype_name)) # load USE flags from metadata.xml self.musedict = parse_metadata_use(_metadata_xml) for atom in chain(*self.musedict.values()): if atom is None: continue try: atom = Atom(atom) except InvalidAtom as e: self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: Invalid atom: %s" % (xpkg, e)) else: if atom.cp != xpkg: self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: Atom contains " "unexpected cat/pn: %s" % (xpkg, atom)) # Only carry out if in package directory or check forced if not metadata_bad: validator = etree.XMLSchema(file=self.metadata_xsd) if not validator.validate(_metadata_xml): self._add_validate_errors(xpkg, validator.error_log) self.muselist = frozenset(self.musedict) return False
def _xml_strip(self, xml): parser = etree.XMLParser(remove_blank_text=True) return etree.tostring(etree.fromstring(xml, parser=parser))
import argparse argparser = argparse.ArgumentParser( description='Converts KonText config.xml version 0.7.x ' 'to the version 0.8') argparser.add_argument('conf_file', metavar='CONF_FILE', help='an XML configuration file') argparser.add_argument( '-u', '--update', type=int, help='Perform a single update (identified by a number)') argparser.add_argument('-p', '--print', action='store_const', const=True, help='Print result instead of writing it to a file') args = argparser.parse_args() doc = etree.parse(args.conf_file) process_document(doc, getattr(args, 'update')) result_xml = etree.tostring(doc, encoding='utf-8', pretty_print=True) if getattr(args, 'print'): print(result_xml) else: output_path = '%s.new.xml' % args.conf_file.rsplit('.', 1)[0] with open(output_path, 'wb') as f: f.write(result_xml) print('DONE!\nConverted config written to %s\n' % output_path)
def save_graph(self, filename): with open(filename, 'wb') as file: file.write('<?xml version="1.0" encoding="UTF-8"?>\n') file.write(etree.tostring(self.root, pretty_print=True))
def _to_xml(self, root): """Convert the xml object to an xml string.""" return etree.tostring(root, encoding='UTF-8', xml_declaration=True)
def create_metadata(metadata, filename): namespaces = dict() namespaces['opt'] = 'http://www.opengis.net/opt/2.1' namespaces['om'] = 'http://www.opengis.net/om/2.0' namespaces['gml'] = 'http://www.opengis.net/gml/3.2' namespaces['eop'] = 'http://www.opengis.net/eop/2.1' namespaces['sar'] = 'http://www.opengis.net/sar/2.1' namespaces['ssp'] = 'http://www.opengis.net/ssp/2.1' for key, value in namespaces.items(): etree.register_namespace(key, value) root = etree.Element('{{}}EarthObservation'.format(namespaces['ssp'])) # Time if 'startdate' in metadata.keys(): phenomenon_time = etree.SubElement(root, '{{{}}}phenomenonTime'.format(namespaces['om'])) time_period = etree.SubElement(phenomenon_time, '{{{}}}TimePeriod'.format(namespaces['gml'])) begin_position = etree.SubElement(time_period, '{{{}}}beginPosition'.format(namespaces['gml'])) end_position = etree.SubElement(time_period, '{{{}}}endPosition'.format(namespaces['gml'])) begin_position.text = metadata['startdate'] end_position.text = metadata['enddate'] # geo if 'wkt' in metadata.keys(): feature_of_interest = etree.SubElement(root, '{{{}}}featureOfInterest'.format(namespaces['om'])) footprint = etree.SubElement(feature_of_interest, '{{{}}}Footprint'.format(namespaces['ssp'])) multi_extentOf = etree.SubElement(footprint, '{{{}}}multiExtentOf'.format(namespaces['ssp'])) multi_surface = etree.SubElement(multi_extentOf, '{{{}}}MultiSurface'.format(namespaces['gml'])) surface_members = etree.SubElement(multi_surface, '{{{}}}surfaceMembers'.format(namespaces['gml'])) polygon = etree.SubElement(surface_members, '{{{}}}Polygon'.format(namespaces['gml'])) exterior = etree.SubElement(polygon, '{{{}}}exterior'.format(namespaces['gml'])) linear_ring = etree.SubElement(exterior, '{{{}}}LinearRing'.format(namespaces['gml'])) poslist = etree.SubElement(linear_ring, '{{{}}}posList'.format(namespaces['gml'])) coords = np.asarray([t[::-1] for t in list(loads(metadata['wkt']).exterior.coords)]).tolist() pos_list = '' for elem in coords: pos_list += ' '.join(str(e) for e in elem) + ' ' poslist.attrib['count'] = str(len(coords)) poslist.text = pos_list if 'product_type' in metadata.keys(): # Metadata property metadata_property = etree.SubElement(root, '{{{}}}metaDataProperty'.format(namespaces['eop'])) earth_observation_metadata = etree.SubElement(metadata_property, '{{{}}}EarthObservationMetaData'.format(namespaces['eop'])) identifier = etree.SubElement(earth_observation_metadata, '{{{}}}identifier'.format(namespaces['eop'])) product_type = etree.SubElement(earth_observation_metadata, '{{{}}}productType'.format(namespaces['eop'])) identifier.text = metadata['identifier'] product_type.text = metadata['product_type'] if 'vs' in metadata.keys(): if not 'product_type' in metadata.keys(): metadata_property = etree.SubElement(root, '{{{}}}metaDataProperty'.format(namespaces['eop'])) earth_observation_metadata = etree.SubElement(metadata_property, '{{{}}}EarthObservationMetaData'.format(namespaces['eop'])) vendor_specific = etree.SubElement(earth_observation_metadata, '{{{}}}vendorSpecific'.format(namespaces['eop'])) for k, v in vs.items(): specific_information = etree.SubElement(vendor_specific, '{{{}}}SpecificInformation'.format(namespaces['eop'])) local_attribute = etree.SubElement(specific_information, '{{{}}}localAttribute'.format(namespaces['eop'])) local_value = etree.SubElement(specific_information, '{{{}}}localValue'.format(namespaces['eop'])) local_attribute.text = k local_value.text = v eop_xml = filename + '.xml' with open(eop_xml, 'wb') as file: file.write('<?xml version="1.0" encoding="UTF-8"?>\n') file.write(etree.tostring(root, pretty_print=True)) with open(filename + '.properties', 'wb') as file: if 'title' in metadata.keys(): file.write('title={}\n'.format(metadata['title'])) if 'startdate' and 'enddate' in metadata.keys(): file.write('date={}/{}\n'.format(metadata['startdate'], metadata['enddate'])) if 'wkt' in metadata.keys(): file.write('geometry={0}\n'.format(metadata['wkt'])) if 'cat' in metadata.keys(): temp_expression = [] for k, v in cat.items(): temp_expression.append(','.join(metadata['cat'][k].values())) cat_expression = '|'.join(temp_expression) file.write('category={}'.format(cat_expression)) return filename + '.xml', filename + '.properties'
def write_to_file(self, file_path): self.xml.append(self.choices) file = open(file_path, 'w') file.write(etree.tostring(self.xml, pretty_print=True))
def egress( self, envelope, http_headers, operation, binding_options ): # Format the request body as pretty printed XML xml = etree.tostring( envelope, pretty_print = True, encoding = 'unicode') print( f'\nRequest\n-------\nHeaders:\n{http_headers}\n\nBody:\n{xml}' )
def view_graph(self): print etree.tostring(self.root , pretty_print=True)
def html2plaintext(html, body_id=None, encoding='utf-8'): """ From an HTML text, convert the HTML to plain text. If @param body_id is provided then this is the tag where the body (not necessarily <body>) starts. """ ## (c) Fry-IT, www.fry-it.com, 2007 ## <*****@*****.**> ## download here: http://www.peterbe.com/plog/html2plaintext html = ustr(html) if not html: return '' tree = etree.fromstring(html, parser=etree.HTMLParser()) if body_id is not None: source = tree.xpath('//*[@id=%s]' % (body_id,)) else: source = tree.xpath('//body') if len(source): tree = source[0] url_index = [] i = 0 for link in tree.findall('.//a'): url = link.get('href') if url: i += 1 link.tag = 'span' link.text = '%s [%s]' % (link.text, i) url_index.append(url) html = ustr(etree.tostring(tree, encoding=encoding)) # \r char is converted into , must remove it html = html.replace(' ', '') html = html.replace('<strong>', '*').replace('</strong>', '*') html = html.replace('<b>', '*').replace('</b>', '*') html = html.replace('<h3>', '*').replace('</h3>', '*') html = html.replace('<h2>', '**').replace('</h2>', '**') html = html.replace('<h1>', '**').replace('</h1>', '**') html = html.replace('<em>', '/').replace('</em>', '/') html = html.replace('<tr>', '\n') html = html.replace('</p>', '\n') html = re.sub('<br\s*/?>', '\n', html) html = re.sub('<.*?>', ' ', html) html = html.replace(' ' * 2, ' ') html = html.replace('>', '>') html = html.replace('<', '<') html = html.replace('&', '&') # strip all lines html = '\n'.join([x.strip() for x in html.splitlines()]) html = html.replace('\n' * 2, '\n') for i, url in enumerate(url_index): if i == 0: html += '\n\n' html += ustr('[%s] %s\n') % (i + 1, url) return html
def ingress( self, envelope, http_headers, operation ): # Format the response body as pretty printed XML xml = etree.tostring( envelope, pretty_print = True, encoding = 'unicode') print( f'\nResponse\n-------\nHeaders:\n{http_headers}\n\nBody:\n{xml}' )
def _verify(t, keyspec, sig_path=".//{%s}Signature" % NS['ds'], drop_signature=False): """ Verify the signature(s) in an XML document. Throws an XMLSigException on any non-matching signatures. :param t: XML as lxml.etree :param keyspec: X.509 cert filename, string with fingerprint or X.509 cert as string :returns: True if signature(s) validated, False if there were no signatures """ if config.debug_write_to_files: with open("/tmp/foo-sig.xml", "w") as fd: fd.write(etree_to_string(t)) validated = [] for sig in t.findall(sig_path): try: sv = sig.findtext(".//{%s}SignatureValue" % NS['ds']) if not sv: raise XMLSigException("No SignatureValue") log.debug("SignatureValue: {!s}".format(sv)) this_cert = xmlsec.crypto.from_keyspec(keyspec, signature_element=sig) log.debug("key size: {!s} bits".format(this_cert.keysize)) si = sig.find(".//{%s}SignedInfo" % NS['ds']) log.debug("Found signedinfo {!s}".format(etree.tostring(si))) cm_alg = _cm_alg(si) try: sig_digest_alg = _sig_alg(si) except AttributeError: raise XMLSigException( "Failed to validate {!s} because of unsupported hash format" .format(etree.tostring(sig))) refmap = _process_references(t, sig, verify_mode=True, sig_path=sig_path, drop_signature=drop_signature) for ref, obj in refmap.items(): log.debug("transform %s on %s" % (cm_alg, etree.tostring(si))) sic = _transform(cm_alg, si) log.debug("SignedInfo C14N: %s" % sic) if this_cert.do_digest: digest = xmlsec.crypto._digest(sic, sig_digest_alg) log.debug("SignedInfo digest: %s" % digest) b_digest = b64d(digest) actual = _signed_value(b_digest, this_cert.keysize, True, sig_digest_alg) else: actual = sic if not this_cert.verify(b64d(sv), actual, sig_digest_alg): raise XMLSigException( "Failed to validate {!s} using sig digest {!s} and cm {!s}" .format(etree.tostring(sig), sig_digest_alg, cm_alg)) validated.append(obj) except (XMLSigException, ValueError) as ex: # we will try the next available signature log.error(ex) if not validated: raise XMLSigException("No valid ds:Signature elements found") return validated
def parse_html(self, url_boj, response): #处理编码问题 # charset = 'utf-8' # try: # try: # search_res = re.search('meta.*?charset="(.*?)"', response.text) # charset = search_res.group(1) # except: # search_res = re.search('meta.*?charset=(.*?)"', response.text) # charset = search_res.group(1) # except: # pass html_text = response results = re.findall('<script.*?data-repeatable>({"data".*?)</script>', html_text) order_list = [] item_list = [] for res in results: json_obj = json.loads(res) #搜索列表位置 order order = json_obj['order'] order_list.append(order) # contentType 音频,视频,优质科普文章,专家回答, 专家语音解答, 问答 if 'title' in json_obj['data']: contentType = json_obj['data']['title'] elif 'extend_data' in json_obj['data']: contentType = json_obj['data']['extend_data']['title'] elif 'extendData' in json_obj['data']: contentType = json_obj['data']['extendData']['title'] else: contentType = '结构卡' if '_' in contentType: contentType = contentType.split('_')[1] contentType = re.sub('(\(.*?\))', '', contentType) if '-' in contentType: contentType = contentType.split('-')[0].strip() if 'hasVoice' in json_obj['data']: contentType = '音频' if 'video' in json_obj['data'] or ( 'videoList' in json_obj['data'] and len(json_obj['data']['videoList']) > 0 ) or 'media' in json_obj['data']: contentType = '视频' if '<em>' in contentType: contentType = '问答' #contentStyle top1, 搜索智能聚合, 权威样式(特殊处理) if 'showLeftText' in json_obj['data']: contentStyle = json_obj['data']['showLeftText'] elif 'extend_data' in json_obj[ 'data'] and 'showLeftText' in json_obj['data'][ 'extend_data']: contentStyle = json_obj['data']['extend_data']['showLeftText'] elif 'extendData' in json_obj[ 'data'] and 'showLeftText' in json_obj['data'][ 'extendData']: contentStyle = json_obj['data']['extendData']['showLeftText'] else: contentStyle = 'top1' #医生详情 #1.top1样式1 if 'info' in json_obj['data']: try: query = json_obj['data']['unhighTitle'] name = json_obj['data']['info']['author']['name'] hospital = json_obj['data']['info']['content'][1] jobTitle = json_obj['data']['info']['content'][0] origin = json_obj['data']['showurl_area']['logo_name'] obj = { 'keyword': url_boj['keyword'], 'order': order, 'query': query, 'contentType': contentType, 'contentStyle': contentStyle, 'name': name, 'hospital': hospital, 'jobTitle': jobTitle, 'origin': origin, } item_list.append(obj) print(obj) except: pass #1.top1样式2 结构卡 (头疼怎么办) elif 'tabList' in json_obj['data']: if 'imageCount' not in json_obj['data']: query = json_obj['data']['sgTitle'] name = json_obj['data']['tabList'][0]['doctor']['name'] hospital = json_obj['data']['tabList'][0]['doctor'][ 'hospital'] jobTitle = json_obj['data']['tabList'][0]['doctor'][ 'level'] origin = '' contentStyle = 'top1' obj = { 'keyword': url_boj['keyword'], 'order': order, 'query': query, 'contentType': contentType, 'contentStyle': contentStyle, 'name': name, 'hospital': hospital, 'jobTitle': jobTitle, 'origin': origin, } item_list.append(obj) print(obj) #2.优质科普文章类型, 专家回答类型 elif 'list' in json_obj['data'] or 'extend_data' in json_obj[ 'data']: if 'list' in json_obj['data']: data = json_obj['data']['list'] elif 'extend_data' in json_obj['data']: data = json_obj['data']['extend_data']['list'] for item in data: query = item['title'].replace('<em>', '').replace( '</em>', '').replace('?', '').replace('。', '') if 'doctorInfo' in item: name = item[ 'doctorName'] if 'doctorName' in item else item[ 'doctorInfo'].split(' ')[0] hospital = item[ 'hospital'] if 'hospital' in item else item[ 'doctorInfo'].split(' ')[1] else: name = item[ 'doctorName'] if 'doctorName' in item else '' hospital = item[ 'hospital'] if 'hospital' in item else '' jobTitle = item[ 'doctorTitle'] if 'doctorTitle' in item else '' if 'list' in json_obj['data']: origin = item['source'] if 'source' in item else '' elif 'extend_data' in json_obj['data']: origin = item['miptitle'] if 'miptitle' in item else '' obj = { 'keyword': url_boj['keyword'], 'order': order, 'query': query, 'contentType': contentType, 'contentStyle': contentStyle, 'name': name, 'hospital': hospital, 'jobTitle': jobTitle, 'origin': origin, } if name == '': continue item_list.append(obj) print(obj) #3.视频类 elif 'videoList' in json_obj['data']: for item in json_obj['data']['videoList']: query = item['title'].replace('<em>', '').replace( '</em>', '').replace('?', '').replace('。', '') name = item['doctor_name'] if 'doctor_name' in item else '' hospital = item['hospital'] if 'hospital' in item else '' jobTitle = item[ 'doctor_level'] if 'doctor_level' in item else '' origin = item['source'] if 'source' in item else '' obj = { 'keyword': url_boj['keyword'], 'order': order, 'query': query, 'contentType': contentType, 'contentStyle': contentStyle, 'name': name, 'hospital': hospital, 'jobTitle': jobTitle, 'origin': origin, } if name == None: continue item_list.append(obj) print(obj) #4.音频类 elif 'extendData' in json_obj['data']: for item in json_obj['data']['extendData']['list']: query = item['title'].replace('<em>', '').replace( '</em>', '').replace('?', '').replace('。', '') name = item['doctorName'] if 'doctorName' in item else '' hospital = item['hospital'] if 'hospital' in item else '' jobTitle = item[ 'doctorTitle'] if 'doctorTitle' in item else '' origin = item['miptitle'] if 'miptitle' in item else '' obj = { 'keyword': url_boj['keyword'], 'order': order, 'query': query, 'contentType': contentType, 'contentStyle': contentStyle, 'name': name, 'hospital': hospital, 'jobTitle': jobTitle, 'origin': origin, } if name == None: continue item_list.append(obj) print(obj) #匹配非js json模式的,权威样式 html = HTML(html_text) # results = html.xpath('//div[@id="results"]/div[@class="c-result result c-clk-recommend"]') results = html.xpath( '//div[@id="results"]/div[@class="c-result result"]') for res in results: detail_html_text = etree.tostring(res) order_res = re.search('order="(\d+)"', detail_html_text.decode()).group(1) if order_res in order_list: continue detail_html = HTML(detail_html_text.decode()) order = order_res query = detail_html.xpath( 'string(//span[@class="c-title-text"])').split('_')[0].replace( '?', '') # contentType = detail_html.xpath('string(//span[@class="c-title-text"])').split('_')[1] contentType = '问答' contentStyle = '权威样式' name = detail_html.xpath( 'string(//div[@class="c-span11 c-line-clamp1"]//span[1])') hospital = detail_html.xpath( 'string(//div[@class="c-span11 c-line-clamp1"]//span[3])') jobTitle = detail_html.xpath( 'string(//div[@class="c-span11 c-line-clamp1"]//span[2])') origin = detail_html.xpath('string(//span[@class="c-color-gray"])') obj = { 'keyword': url_boj['keyword'], 'order': order, 'query': query, 'contentType': contentType, 'contentStyle': contentStyle, 'name': name, 'hospital': hospital, 'jobTitle': jobTitle, 'origin': origin, } if name == '': continue item_list.append(obj) print(obj) self.write(url_boj, item_list)
def test_xml_roundtrip(self): p1 = Property("tau_m", 20.0, mV) element = p1.to_xml() xml = etree.tostring(element, pretty_print=True) p2 = Property.from_xml(element, Document(mV=mV)) self.assertEqual(p1, p2)
def sign(t, key_spec, cert_spec=None, reference_uri='', insert_index=0, sig_path=".//{%s}Signature" % NS['ds']): """ Sign an XML document. This means to 'complete' all Signature elements in the XML. :param t: XML as lxml.etree :param key_spec: private key reference, see xmlsec.crypto.from_keyspec() for syntax. :param cert_spec: None or public key reference (to add cert to document), see xmlsec.crypto.from_keyspec() for syntax. :param sig_path: An xpath expression identifying the Signature template element :param reference_uri: Envelope signature reference URI :param insert_index: Insertion point for the Signature element, Signature is inserted at beginning by default :returns: XML as lxml.etree (for convenience, 't' is modified in-place) """ private = xmlsec.crypto.from_keyspec(key_spec, private=True) public = None if cert_spec is not None: public = xmlsec.crypto.from_keyspec(cert_spec) if public is None: raise XMLSigException("Unable to load public key from '%s'" % cert_spec) if public.keysize and private.keysize: # XXX maybe one set and one not set should also raise exception? if public.keysize != private.keysize: raise XMLSigException( "Public and private key sizes do not match ({!s}, {!s})". format(public.keysize, private.keysize)) # This might be incorrect for PKCS#11 tokens if we have no public key log.debug("Using {!s} bit key".format(private.keysize)) sig_paths = t.findall(sig_path) templates = list(filter(_is_template, sig_paths)) if not templates: tmpl = add_enveloped_signature(t, reference_uri=reference_uri, pos=insert_index) templates = [tmpl] assert templates, XMLSigException( "Failed to both find and add a signing template") if config.debug_write_to_files: with open("/tmp/sig-ref.xml", "w") as fd: fd.write(etree_to_string(root_elt(t))) for sig in templates: log.debug("processing sig template: %s" % etree.tostring(sig)) si = sig.find(".//{%s}SignedInfo" % NS['ds']) assert si is not None cm_alg = _cm_alg(si) sig_alg = _sig_alg(si) _process_references(t, sig, verify_mode=False, sig_path=sig_path) # XXX create signature reference duplicates/overlaps process references unless a c14 is part of transforms log.debug("transform %s on %s" % (cm_alg, etree.tostring(si))) sic = _transform(cm_alg, si) log.debug("SignedInfo C14N: %s" % sic) # sign hash digest and insert it into the XML if private.do_digest: digest = xmlsec.crypto._digest(sic, sig_alg) log.debug("SignedInfo digest: %s" % digest) b_digest = b64d(digest) tbs = _signed_value(b_digest, private.keysize, private.do_padding, sig_alg) else: tbs = sic signed = private.sign(tbs, sig_alg) signature = b64e(signed) if isinstance(signature, six.binary_type): signature = six.text_type(signature, 'utf-8') log.debug("SignatureValue: %s" % signature) sv = sig.find(".//{%s}SignatureValue" % NS['ds']) if sv is None: si.addnext(DS.SignatureValue(signature)) else: sv.text = signature for cert_src in (public, private): if cert_src is not None and cert_src.cert_pem: # Insert cert_data as b64-encoded X.509 certificate into XML document sv_elt = si.getnext() sv_elt.addnext( DS.KeyInfo( DS.X509Data( DS.X509Certificate(pem2b64(cert_src.cert_pem))))) break # add the first we find, no more return t
def from_ele(maybe_ele): if etree.iselement(maybe_ele): return etree.tostring(maybe_ele).decode("utf-8") else: return maybe_ele
def _process_references(t, sig, verify_mode=True, sig_path=".//{%s}Signature" % NS['ds'], drop_signature=False): """ :returns: hash algorithm as string """ verified_objects = {} for ref in sig.findall(".//{%s}Reference" % NS['ds']): obj = None hash_alg = None uri = ref.get('URI', None) if uri is None or uri == '#' or uri == '': ref_obj = _implicit_same_document(t, sig) if ref_obj is None: raise XMLSigException( "Unable to find reference while processing implicit same document reference" ) ct = _remove_child_comments(ref_obj) obj = root_elt(ct) elif uri.startswith('#'): ct = copy.deepcopy(t) ref_obj = _get_by_id(ct, uri[1:]) if ref_obj is None: raise XMLSigException( "Unable to find reference while processing '%s'" % uri) obj = _remove_child_comments(ref_obj) else: raise XMLSigException("Unknown reference %s" % uri) if obj is None: raise XMLSigException("Unable to dereference Reference URI='%s'" % uri) obj_copy = obj if verify_mode: obj_copy = copy.deepcopy(obj) if drop_signature: for sig in obj_copy.findall(sig_path): sig.getparent().remove(sig) if config.debug_write_to_files: with open("/tmp/foo-pre-transform.xml", "w") as fd: fd.write(etree_to_string(obj)) for tr in ref.findall(".//{%s}Transform" % NS['ds']): obj = _transform(_alg(tr), obj, tr=tr, sig_path=sig_path) nslist = _find_nslist(tr) if nslist is not None: r = root_elt(t) for nsprefix in nslist: if nsprefix in r.nsmap: obj_copy.nsmap[nsprefix] = r.nsmap[nsprefix] if not isinstance(obj, six.string_types): if config.debug_write_to_files: with open("/tmp/foo-pre-serialize.xml", "w") as fd: fd.write(etree_to_string(obj)) obj = _transform(constants.TRANSFORM_C14N_INCLUSIVE, obj) if config.debug_write_to_files: with open("/tmp/foo-obj.xml", "w") as fd: if six.PY2: obj = obj.encode('utf-8') fd.write(obj) hash_alg = _ref_digest(ref) log.debug("using hash algorithm %s" % hash_alg) digest = xmlsec.crypto._digest(obj, hash_alg) log.debug("computed %s digest %s for ref %s" % (hash_alg, digest, uri)) dv = ref.find(".//{%s}DigestValue" % NS['ds']) if verify_mode: log.debug("found %s digest %s for ref %s" % (hash_alg, dv.text, uri)) computed_digest_binary = b64d(digest) digest_binary = b64d(dv.text) if digest_binary == computed_digest_binary: # no point in verifying signature if the digest doesn't match verified_objects[ref] = obj_copy else: log.error("not returning ref %s - digest mismatch" % uri) else: # signing - lets store the digest log.debug("replacing digest in %s" % etree.tostring(dv)) dv.text = digest if verify_mode: return verified_objects else: return None