def add_anchor(html, anchor_link_text=u'¶'): """Add an id and an anchor-link to an html header For use on markdown headings """ try: h = ElementTree.fromstring( py3compat.cast_bytes_py2(html, encoding='utf-8')) except Exception: # failed to parse, just return it unmodified return html link = _convert_header_id(html2text(h)) h.set('id', link) a = Element("a", {"class": "anchor-link", "href": "#" + link}) try: # Test if the anchor link text is HTML (e.g. an image) a.append(ElementTree.fromstring(anchor_link_text)) except Exception: # If we fail to parse, assume we've just got regular text a.text = anchor_link_text h.append(a) # Known issue of Python3.x, ElementTree.tostring() returns a byte string # instead of a text string. See issue http://bugs.python.org/issue10942 # Workaround is to make sure the bytes are casted to a string. return py3compat.decode(ElementTree.tostring(h), 'utf-8')
def add_anchor_lower_id(html, anchor_link_text="¶"): from xml.etree.cElementTree import Element from defusedxml import cElementTree as ElementTree from ipython_genutils import py3compat from nbconvert.filters.strings import _convert_header_id, html2text try: h = ElementTree.fromstring( py3compat.cast_bytes_py2(html, encoding="utf-8")) except Exception: # failed to parse, just return it unmodified return html link = _convert_header_id(html2text(h)) h.set("id", slugify(link)) a = Element("a", {"class": "anchor-link", "href": "#" + link}) try: # Test if the anchor link text is HTML (e.g. an image) a.append(ElementTree.fromstring(anchor_link_text)) except Exception: # If we fail to parse, assume we've just got regular text a.text = anchor_link_text h.append(a) # Known issue of Python3.x, ElementTree.tostring() returns a byte string # instead of a text string. See issue http://bugs.python.org/issue10942 # Workaround is to make sure the bytes are casted to a string. return py3compat.decode(ElementTree.tostring(h), "utf-8")
def __init__(self, Data: str): super().__init__() self.Tree = ElementTree.fromstring(Data) self.TextElements = { float(TextElement.attrib["start"]): { "start": float(TextElement.attrib["start"]), "duration": float(TextElement.attrib["dur"]), "end": round( float(TextElement.attrib["start"]) + float(TextElement.attrib["dur"]), 2, ), "text": TextElement.text, "markdown": markdownify(TextElement.text), } for TextElement in self.Tree.findall("text") if "dur" in TextElement.attrib } self.duration = sorted( map(lambda TextElement: TextElement["end"], self.TextElements.values()))[-1]
async def get_subtitle(videoId: str, session): log.info(f"Downloading subtitle page of {videoId}") async with session.get( "https://video.google.com/timedtext", params={"hl": "en", "type": "list", "v": videoId}, ) as resp: Body: str = await resp.text() Tree = ElementTree.fromstring(Body) Subtitles: dict = {} for Track in Tree.findall("track"): if Track.attrib["lang_code"] in Subtitles: continue Params = urllib.parse.urlencode( { "lang": Track.attrib["lang_code"], "v": videoId, "fmt": "srv1", "name": Track.attrib["name"].encode("utf-8"), } ) Subtitles[Track.attrib["lang_code"]] = { "url": "https://www.youtube.com/api/timedtext?" + Params, "ext": "srv1", } return Subtitles
def clean(self): if 'opml_file' not in self.cleaned_data: return self.cleaned_data cleaned_data = super().clean() opml_file = cleaned_data['opml_file'] if opml_file.size > MAX_OPML_FILE_SIZE: raise forms.ValidationError( 'Your OPML file was too large: %(size)s', code="too_large", params={'size': opml_file.size}) if opml_file.content_type not in OPML_CONTENT_TYPES: raise forms.ValidationError( 'The file was not a valid OPML file: %(content_type)s', code="invalid", params={'content_type': opml_file.content_type}) opml_contents = opml_file.read() try: opml = fromstring(opml_contents, forbid_dtd=True) feeds = opml.findall('./body//outline[@xmlUrl]') if not feeds: raise forms.ValidationError( 'No feeds were found in the OPML file', code="no_feeds") except DefusedXmlException: raise forms.ValidationError('The file was not a valid OPML file', code="invalid") # Use SyntaxError, as ParseError doesn't work here except SyntaxError: raise forms.ValidationError('The file was not a valid OPML file', code="invalid") cleaned_data['feeds'] = feeds return cleaned_data
def get_packages_swid(package_list): """ Get the packages from a swid string :param package_strs: :return: """ package_xml = None packages = defaultdict(set) errors = [] for xml_doc in package_list.split("\n"): try: #print('Hello 81 xml_doc="{0}"',format(xml_doc)) # remove the <? ?> if any xml_doc = re.sub('<\?[^>]+\?>', '', xml_doc) # use DET since this is untrusted data data = DET.fromstring(xml_doc) """""" name, version = data.attrib['name'], data.attrib['version'] #print('87 name="{0}" version="{1}"'.format(data.attrib['name'], data.attrib['version'])) version = version.split("-")[0] packages[name].add(version) except Exception as e: errors.append(str(e)) return errors, packages
async def __handle_xml_data(self, data): self.logger.debug('Received XML data: %s', data) element_tree = Et.fromstring(data) if element_tree.tag == 'policy-file-request': await self.send_policy_file() elif element_tree.tag == 'msg': self.logger.debug('Received valid XML data') try: body_tag = element_tree[0] action = body_tag.get('action') packet = XMLPacket(action) if packet in self.server.xml_listeners: xml_listeners = self.server.xml_listeners[packet] for listener in xml_listeners: if listener.client_type is None or listener.client_type == self.client_type: await listener(self, body_tag) self.received_packets.add(packet) else: self.logger.warn('Packet did not contain a valid action attribute!') except IndexError: self.logger.warn('Received invalid XML data (didn\'t contain a body tag)') else: self.logger.warn('Received invalid XML data!')
def console_writer(msg, tab=-1): """ Internal API: Returns a prettified tree-based output of an XML message for debugging purposes. This helper function is used by the debug-transform command. """ tab += 1 if isinstance(msg, Model): msg = fromstring(msg.render(fragment=True)) print('%s`- %s: %s %s' % ( ' ' * tab, highlight(msg.tag, None, True), highlight(msg.text, 'red', False) if msg.text is not None else '', highlight(msg.attrib, 'green', True) if msg.attrib.keys() else '' )) for c in msg.getchildren(): print(' %s`- %s: %s %s' % ( ' ' * tab, highlight(c.tag, None, True), highlight(c.text, 'red', False) if c.text is not None else '', highlight(c.attrib, 'green', True) if c.attrib.keys() else '' )) for sc in c.getchildren(): tab += 1 console_writer(sc, tab) tab -= 1
def main(): path = sys.argv[1] if len(sys.argv) >= 2 else '' root = (WORKING_DIR / path).resolve() # # version = find_version( # MODERN_VERSION_FILES, # MODERN_VERSION_REGEX, # ) # if version is None: # version = find_version( # LEGACY_VERSION_FILES, # LEGACY_VERSION_REGEX, # ) # if version is None: # raise ValueError( # f"Cannot find a supported Drupal installation in {root}" # ) version = '8.1.1' major, _, _ = version.partition('.') versions_url = RELEASE_VERSIONS_URL_BASE + f'/{major}.x' version_feed = requests.get(versions_url) version_feed.raise_for_status() xml = ElementTree.fromstring(version_feed.text) releases = xml.find('releases') versions_available = [] for release in releases: rel_ver = release.find('version') rel_tag = release.find('tag') rel_dl_url = release.find('download_link') rel_dl_sum = release.find('mdhash') is_dev = release.find('version_extra') is not None if None in [rel_ver, rel_tag, rel_dl_url, rel_dl_sum]: continue data = { 'version': rel_ver.text, 'tar': rel_dl_url.text, 'md5': rel_dl_sum.text, 'dev': is_dev, } if rel_tag.text != rel_ver.text: data['tag'] = rel_tag.text versions_available.append(data) versions_available.sort(key=semver.parse_version_info) schema_output = json.dumps({ 'dependencies': [{ 'name': 'Drupal', 'installed': { 'version': version, 'series': f'{major}.x', }, 'available': versions_available, 'path': str(root.relative_to(WORKING_DIR)), 'source': 'drupal-core', }] }) print( f'BEGIN_DEPENDENCIES_SCHEMA_OUTPUT>{schema_output}<END_DEPENDENCIES_SCHEMA_OUTPUT' )
def xmlstring_to_data(string: str) -> Any: if not cElementTree: raise NotImplementedError('Install defusedxml') element_tree = cElementTree.fromstring(string) output = XML._etree_to_dict(tree=element_tree) return output[XML.wrapper_key]
def _xml2dict(xml, sanitize=True, prefix=None): """Return XML as dict. >>> _xml2dict('<?xml version="1.0" ?><root attr="name"><key>1</key></root>') {'root': {'key': 1, 'attr': 'name'}} """ from defusedxml import cElementTree as etree # delayed import at = tx = "" if prefix: at, tx = prefix def astype(value): # return value as int, float, bool, or str if not isinstance(value, str): return value for t in (int, float, asbool): try: return t(value) except (TypeError, ValueError): pass return value def etree2dict(t): # adapted from https://stackoverflow.com/a/10077069/453463 key = t.tag if sanitize: key = key.rsplit("}", 1)[-1] d = {key: {} if t.attrib else None} children = list(t) if children: dd = collections.defaultdict(list) for dc in map(etree2dict, children): for k, v in dc.items(): dd[k].append(astype(v)) d = { key: { k: astype(v[0]) if len(v) == 1 else astype(v) for k, v in dd.items() } } if t.attrib: d[key].update( (at + k, astype(v)) for k, v in t.attrib.items()) if t.text: text = t.text.strip() if children or t.attrib: if text: d[key][tx + "value"] = astype(text) else: d[key] = astype(text) return d return etree2dict(etree.fromstring(xml))
def handle_xml_to_dict(xml_info, element): msg = {} xml_split = xml_info.split('\n') xml_data = xml_split[1:len(xml_split) - 1] detail_xml = ''.join(xml_data) xml_element = Et.fromstring(detail_xml) for children in xml_element.iter('OBJECT'): property_name = children.get('basetype') if element == property_name: for child in children.iter('PROPERTY'): msg[child.get('name')] = child.text return msg
def string_to_element(element_as_string, include_namespaces=False): """ :return: an element parsed from a string value, or the element as is if already parsed """ if element_as_string is None: return None elif isinstance(element_as_string, ElementTree): return element_as_string.getroot() elif isinstance(element_as_string, ElementType): return element_as_string else: element_as_string = _xml_content_to_string(element_as_string) if not isinstance(element_as_string, str): # Let cElementTree handle the error return fromstring(element_as_string) elif not strip_xml_declaration(element_as_string): # Same as ElementTree().getroot() return None elif include_namespaces: return fromstring(element_as_string) else: return fromstring(strip_namespaces(element_as_string))
def handle_xml_to_json(detail_info, element): detail_arr = [] detail_data = detail_info.split('\n') detail = detail_data[1:len(detail_data) - 1] detail_xml = ''.join(detail) xml_element = Et.fromstring(detail_xml) for children in xml_element.iter('OBJECT'): property_name = children.get('basetype') if element == property_name: msg = {} for child in children.iter('PROPERTY'): msg[child.get('name')] = child.text detail_arr.append(msg) return detail_arr
def html2text(element): """extract inner text from html Analog of jQuery's $(element).text() """ if isinstance(element, py3compat.string_types): try: element = ElementTree.fromstring(element) except Exception: # failed to parse, just return it unmodified return element text = element.text or "" for child in element: text += html2text(child) text += (element.tail or "") return text
def get_stream(url, order=0, page_type='', suborder=False, submenu=False): print("url is: ") print(url) scraper = cfscrape.create_scraper() if Addon().getSettingBool('enable_proxy') == True: if Addon().getSettingInt('proxy_type') == 0: proxy_type = 'http' elif Addon().getSettingInt('proxy_type') == 1: proxy_type = 'socks4' elif Addon().getSettingInt('proxy_type') == 2: proxy_type = 'socks5' proxy = {proxy_type : Addon().getSettingString('proxy_url')} else: proxy = {} if '{' in url: res_raw = json.loads(url) resolutions = [] for res in res_raw.keys(): resolutions.append(res_raw[res]) #print(type(resolutions[int(order)])) return resolutions[int(order)]['url'] else: page = scraper.get(url, params={"box_mac" : get_mac(), "box_hardware" : "Kodi FXML Helper"}, proxies=proxy) raw_page = page.text if suborder != False: return json.loads(raw_page)['channels'][int(suborder)]['submenu'][int(order)]['stream_url'] else: print("suborder is: "+str(suborder)) if page_type == 'json': if 'channels' in json.loads(raw_page): return json.loads(raw_page)['channels'][int(order)]['stream_url'] else: res_raw = json.loads(raw_page) resolutions = [] for res in res_raw.keys(): resolutions.append(res_raw[res]) #print(type(resolutions[int(order)])) return resolutions[int(order)]['url'] elif page_type == 'xml': return fromstring(raw_page).findall('channel')[int(order)].find('stream_url').text elif page_type == "m3u": return url else: url = get_page(url) print(url) return url[0]
def _discover_features(self): """Pull and parse the desc.xml so we can query it later.""" try: desc_xml = self._session.get(self.unit_desc_url).content if not desc_xml: logger.error( "Unsupported Yamaha device? Failed to fetch {}".format( self.unit_desc_url)) return self._desc_xml = cElementTree.fromstring(desc_xml) except xml.etree.ElementTree.ParseError: logger.exception("Invalid XML returned for request %s: %s", self.unit_desc_url, desc_xml) raise except Exception: logger.exception("Failed to fetch %s" % self.unit_desc_url) raise
def _discover_features(self): """Pull and parse the desc.xml so we can query it later.""" try: desc_xml = self._session.get(self.unit_desc_url).content if not desc_xml: logger.error( "Unsupported Yamaha device? Failed to fetch {}".format( self.unit_desc_url )) return self._desc_xml = cElementTree.fromstring(desc_xml) except xml.etree.ElementTree.ParseError: logger.exception("Invalid XML returned for request %s: %s", self.unit_desc_url, desc_xml) raise except Exception: logger.exception("Failed to fetch %s" % self.unit_desc_url) raise
def _excepthook(exctype, value, tb): if exctype == HTTPError: if value.response.status_code == 401: logger.fatal("Authentication cookie expired. " "Log in with `slipstream login`.") elif value.response.status_code == 403: logger.fatal("Invalid credentials provided. " "Log in with `slipstream login`.") elif 'xml' in value.response.headers['content-type']: root = etree.fromstring(value.response.text) logger.fatal(root.text) else: logger.fatal(str(value)) else: logger.fatal(str(value)) out = six.StringIO() traceback.print_exception(exctype, value, tb, file=out) logger.info(out.getvalue())
def parse_conf_xml_dom(xml_content): """ @xml_content: XML DOM from splunkd """ xml_content = xml_content.decode("utf-8") m = re.search(r'xmlns="([^"]+)"', xml_content) ns = m.group(1) m = re.search(r'xmlns:s="([^"]+)"', xml_content) sub_ns = m.group(1) entry_path = "./{%s}entry" % ns stanza_path = "./{%s}title" % ns key_path = "./{%s}content/{%s}dict/{%s}key" % (ns, sub_ns, sub_ns) meta_path = "./{%s}dict/{%s}key" % (sub_ns, sub_ns) list_path = "./{%s}list/{%s}item" % (sub_ns, sub_ns) xml_conf = et.fromstring(xml_content) stanza_objs = [] for entry in xml_conf.iterfind(entry_path): for stanza in entry.iterfind(stanza_path): stanza_obj = {"name": stanza.text, "stanza": stanza.text} break else: continue for key in entry.iterfind(key_path): if key.get("name") == "eai:acl": meta = {} for k in key.iterfind(meta_path): meta[k.get("name")] = k.text stanza_obj[key.get("name")] = meta elif key.get("name") != "eai:attributes": name = key.get("name") if name.startswith("eai:"): name = name[4:] list_vals = [k.text for k in key.iterfind(list_path)] if list_vals: stanza_obj[name] = list_vals else: stanza_obj[name] = key.text if key.text == "None": stanza_obj[name] = None stanza_objs.append(stanza_obj) return stanza_objs
def list_collection(self, collection=None, app=None, owner="nobody"): """ :collection: collection name. When euqals "None", return all collections in the system. :return: a list containing the connection names if successful, throws KVNotExists if no such colection or other exception if other error happened """ uri = self._get_config_endpoint(app, owner, collection) content = self._do_request(uri, method="GET") m = re.search(r'xmlns="([^"]+)"', content) path = "./entry/title" if m: ns = m.group(1) path = "./{%s}entry/{%s}title" % (ns, ns) collections = et.fromstring(content) return [node.text for node in collections.iterfind(path)]
def add_anchor(html, anchor_link_text=u'¶'): """Add an id and an anchor-link to an html header For use on markdown headings """ try: h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8')) except Exception: # failed to parse, just return it unmodified return html link = _convert_header_id(html2text(h)) h.set('id', link) a = Element("a", {"class" : "anchor-link", "href" : "#" + link}) a.text = anchor_link_text h.append(a) # Known issue of Python3.x, ElementTree.tostring() returns a byte string # instead of a text string. See issue http://bugs.python.org/issue10942 # Workaround is to make sure the bytes are casted to a string. return py3compat.decode(ElementTree.tostring(h), 'utf-8')
def _list_page(self, prefix, page_token=None, batch_size=1000): # We can get at most 1000 keys at a time, so there's no need # to bother with streaming. query_string = { 'prefix': prefix, 'max-keys': str(batch_size) } if page_token: query_string['marker'] = page_token resp = self._do_request('GET', '/', query_string=query_string) if not XML_CONTENT_RE.match(resp.headers['Content-Type']): raise RuntimeError('unexpected content type: %s' % resp.headers['Content-Type']) body = self.conn.readall() etree = ElementTree.fromstring(body) root_xmlns_uri = _tag_xmlns_uri(etree) if root_xmlns_uri is None: root_xmlns_prefix = '' else: # Validate the XML namespace root_xmlns_prefix = '{%s}' % (root_xmlns_uri, ) if root_xmlns_prefix != self.xml_ns_prefix: log.error('Unexpected server reply to list operation:\n%s', self._dump_response(resp, body=body)) raise RuntimeError('List response has unknown namespace') names = [ x.findtext(root_xmlns_prefix + 'Key') for x in etree.findall(root_xmlns_prefix + 'Contents') ] is_truncated = etree.find(root_xmlns_prefix + 'IsTruncated') if is_truncated.text == 'false': page_token = None elif len(names) == 0: next_marker = etree.find(root_xmlns_prefix + 'NextMarker') page_token = next_marker.text else: page_token = names[-1] return (names, page_token)
def convert_queue_status(queue_status_output): def leaf_to_dict(leaf): return [{sub_child.tag: sub_child.text for sub_child in child} for child in leaf] tree = ETree.fromstring(queue_status_output) df_running_jobs = pandas.DataFrame(leaf_to_dict(leaf=tree[0])) df_pending_jobs = pandas.DataFrame(leaf_to_dict(leaf=tree[1])) df_merge = df_running_jobs.append(df_pending_jobs, sort=True) df_merge.loc[df_merge.state == "r", 'state'] = "running" df_merge.loc[df_merge.state == "qw", 'state'] = "pending" df_merge.loc[df_merge.state == "Eqw", 'state'] = "error" return pandas.DataFrame({ "jobid": pandas.to_numeric(df_merge.JB_job_number), "user": df_merge.JB_owner, "jobname": df_merge.JB_name, "status": df_merge.state, })
def convert_queue_status(queue_status_output): def leaf_to_dict(leaf): return [{sub_child.tag: sub_child.text for sub_child in child} for child in leaf] tree = ETree.fromstring(queue_status_output) df_running_jobs = pandas.DataFrame(leaf_to_dict(leaf=tree[0])) df_pending_jobs = pandas.DataFrame(leaf_to_dict(leaf=tree[1])) df_merge = df_running_jobs.append(df_pending_jobs, sort=True) df_merge.state[df_merge.state == 'r'] = 'running' df_merge.state[df_merge.state == 'qw'] = 'pending' df_merge.state[df_merge.state == 'Eqw'] = 'error' return pandas.DataFrame({ 'jobid': pandas.to_numeric(df_merge.JB_job_number), 'user': df_merge.JB_owner, 'jobname': df_merge.JB_name, 'status': df_merge.state })
def clean(self): if 'opml_file' not in self.cleaned_data: return self.cleaned_data cleaned_data = super().clean() opml_file = cleaned_data['opml_file'] if opml_file.size > MAX_OPML_FILE_SIZE: raise forms.ValidationError('Your OPML file was too large: %(size)s', code="too_large", params={'size': opml_file.size}) if opml_file.content_type not in OPML_CONTENT_TYPES: raise forms.ValidationError('The file was not a valid OPML file: %(content_type)s', code="invalid", params={'content_type': opml_file.content_type}) opml_contents = opml_file.read() try: opml = fromstring(opml_contents, forbid_dtd=True) feeds = opml.findall('./body//outline[@xmlUrl]') if not feeds: raise forms.ValidationError('No feeds were found in the OPML file', code="no_feeds") except DefusedXmlException: raise forms.ValidationError('The file was not a valid OPML file', code="invalid") # Use SyntaxError, as ParseError doesn't work here except SyntaxError: raise forms.ValidationError('The file was not a valid OPML file', code="invalid") cleaned_data['feeds'] = feeds return cleaned_data
def console_writer(msg, tab=-1): """ Internal API: Returns a prettified tree-based output of an XML message for debugging purposes. This helper function is used by the debug-transform command. """ tab += 1 if isinstance(msg, Model): msg = fromstring(msg.render(fragment=True)) print('%s`- %s: %s %s' % (' ' * tab, highlight(msg.tag, None, True), highlight(msg.text, 'red', False) if msg.text is not None else '', highlight(msg.attrib, 'green', True) if msg.attrib.keys() else '')) for c in msg.getchildren(): print(' %s`- %s: %s %s' % (' ' * tab, highlight(c.tag, None, True), highlight(c.text, 'red', False) if c.text is not None else '', highlight(c.attrib, 'green', True) if c.attrib.keys() else '')) for sc in c.getchildren(): tab += 1 console_writer(sc, tab) tab -= 1
def insertAmps(self, xmlfile): """Insert data from amps file into database. Args: xmlfile (str): XML file containing peak ground motion data. """ _, fname = os.path.split(xmlfile) try: xmlstr = open(xmlfile, 'r').read() # sometimes these records have non-ascii bytes in them newxmlstr = re.sub(r'[^\x00-\x7F]+', ' ', xmlstr) # newxmlstr = _invalid_xml_remove(xmlstr) newxmlstr = newxmlstr.encode('utf-8', errors='xmlcharrefreplace') amps = dET.fromstring(newxmlstr) except Exception as e: raise Exception('Could not parse %s, due to error "%s"' % (xmlfile, str(e))) if amps.tag != 'amplitudes': raise Exception('%s does not appear to be an amplitude XML ' 'file.' % xmlfile) agency = amps.get('agency') record = amps.find('record') timing = record.find('timing') reference = timing.find('reference') has_pgm = False time_dict = {} for child in reference.iter(): node_name = child.tag if node_name == 'PGMTime': has_pgm = True elif node_name == 'year': time_dict['year'] = int(child.get('value')) elif node_name == 'month': time_dict['month'] = int(child.get('value')) elif node_name == 'day': time_dict['day'] = int(child.get('value')) elif node_name == 'hour': time_dict['hour'] = int(child.get('value')) elif node_name == 'minute': time_dict['minute'] = int(child.get('value')) elif node_name == 'second': time_dict['second'] = int(child.get('value')) elif node_name == 'msec': time_dict['msec'] = int(child.get('value')) if has_pgm: pgmtime_str = reference.find('PGMTime').text pgmdate = datetime.strptime(pgmtime_str[0:19], TIMEFMT).\ replace(tzinfo=timezone.utc) pgmtime = int(dt_to_timestamp(pgmdate)) else: if not len(time_dict): print('No time data for file %s' % fname) return pgmdate = datetime(time_dict['year'], time_dict['month'], time_dict['day'], time_dict['hour'], time_dict['minute'], time_dict['second']) pgmtime = dt_to_timestamp(pgmdate) # there are often multiple stations per file, but they're # all duplicates of each other, so just grab the information # from the first one station = record.find('station') attrib = dict(station.items()) lat = float(attrib['lat']) lon = float(attrib['lon']) code = attrib['code'] name = attrib['name'] if 'net' in attrib: network = attrib['net'] elif 'netid' in attrib: network = attrib['netid'] else: network = agency # # The station (at this pgmtime +/- 10 seconds) might already exist # in the DB; if it does, use it # self._cursor.execute('BEGIN EXCLUSIVE') query = ('SELECT id, timestamp FROM station where network = ? and ' 'code = ? and timestamp > ? and timestamp < ?') self._cursor.execute(query, (network, code, pgmtime - 10, pgmtime + 10)) # # It's possible that the query returned more than one station; pick # the one closest to the new station's pgmtime # rows = self._cursor.fetchall() best_sid = None best_time = None for row in rows: dtime = abs(row[1] - pgmtime) if best_time is None or dtime < best_time: best_time = dtime best_sid = row[0] inserted_station = False if best_sid is None: fmt = ('INSERT INTO station ' '(timestamp, lat, lon, name, code, network) ' 'VALUES (?, ?, ?, ?, ?, ?)') self._cursor.execute(fmt, (pgmtime, lat, lon, name, code, network)) best_sid = self._cursor.lastrowid inserted_station = True # # If the station is already there, it has at least one channel, too # existing_channels = {} if inserted_station is False: chan_query = 'SELECT channel, id FROM channel where station_id = ?' self._cursor.execute(chan_query, (best_sid, )) rows = self._cursor.fetchall() existing_channels = dict(rows) # might need these insert_channel = ('INSERT INTO channel ' '(station_id, channel, loc)' 'VALUES (?, ?, ?)') insert_pgm = ('INSERT INTO pgm ' '(channel_id, imt, value)' 'VALUES (?, ?, ?)') # loop over components channels_inserted = 0 for channel in record.iter('component'): # We don't want channels with qual > 4 (assuming qual is Cosmos # table 6 value) qual = channel.get('qual') if qual: try: iqual = int(qual) except ValueError: # qual is something we don't understand iqual = 0 else: iqual = 0 if iqual > 4: continue loc = channel.get('loc') if not loc: loc = '--' cname = channel.get('name') if cname in existing_channels: best_cid = existing_channels[cname] inserted_channel = False else: self._cursor.execute(insert_channel, (best_sid, cname, loc)) best_cid = self._cursor.lastrowid inserted_channel = True channels_inserted += 1 # # Similarly, if the channel is already there, we don't want to # insert repeated IMTs (and updating them doesn't make a lot of # sense) # existing_pgms = {} if inserted_channel is False: pgm_query = 'SELECT imt, id FROM pgm where channel_id = ?' self._cursor.execute(pgm_query, (best_cid, )) rows = self._cursor.fetchall() existing_pgms = dict(rows) # loop over imts in channel pgm_list = [] for pgm in list(channel): imt = pgm.tag if imt not in IMTS: continue try: value = float(pgm.get('value')) except ValueError: # # Couldn't interpret the value for some reason # continue if imt == 'sa': imt = 'p' + imt + pgm.get('period').replace('.', '') value = value / 9.81 if imt in IMTDICT: imt = IMTDICT[imt] if imt == 'pga': value = value / 9.81 if imt in existing_pgms: continue pgm_list.append((best_cid, imt, value)) if len(pgm_list) > 0: # # Insert the new amps # self._cursor.executemany(insert_pgm, pgm_list) elif inserted_channel: # # If we didn't insert any amps, but we inserted the channel, # delete the channel # channel_delete = 'DELETE FROM channel WHERE id = ?' self._cursor.execute(channel_delete, (best_cid, )) channels_inserted -= 1 # End of pgm loop # End of channel loop # # If we inserted the station but no channels, delete the station # if channels_inserted == 0 and inserted_station: station_delete = 'DELETE FROM station WHERE id = ?' self._cursor.execute(station_delete, (best_sid, )) self.commit() return
# -*- coding: utf-8 -*- import xml.etree.cElementTree as badET import defusedxml.cElementTree as goodET xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>" # unsafe tree = badET.fromstring(xmlString) print(tree) badET.parse("filethatdoesntexist.xml") badET.iterparse("filethatdoesntexist.xml") a = badET.XMLParser() # safe tree = goodET.fromstring(xmlString) print(tree) goodET.parse("filethatdoesntexist.xml") goodET.iterparse("filethatdoesntexist.xml") a = goodET.XMLParser()
def parse_xml_string(xml_content): return _parse_xml(ET.fromstring(xml_content))
async def begin_request(self, parent_ctx: Context, request: HTTPRequest): # Check that the HTTP method was "POST" if request.method != 'POST': raise HTTPError(405, 'POST required for XML-RPC') # Check that the content type is correct if request.content_type != 'text/xml': raise HTTPError(400, 'Wrong content-type for XML-RPC (must be text/xml)') # Parse the XML request body = await request.body.read() try: document = cElementTree.fromstring(body.decode('utf-8')) except (UnicodeDecodeError, ParseError) as e: raise XMLRPCError(-32701, 'Parse error: %s' % e) # Find the requested method name methodname = document.find('methodName') if not methodname: raise XMLRPCError(-32600, 'Server error: invalid xml-rpc') # Find the callable by the method name method = self.methods.get(methodname) if method is None: raise XMLRPCError(-32601, 'Server error: method not found') # Deserialize the arguments param_elements = document.findall('params/param/value') try: async with threadpool(): args = [deserialize(element) for element in param_elements] except Exception as e: raise XMLRPCError(-32602, 'Server error: invalid arguments: %s' % e) # Create a request context and call the callable in it async with HTTPRequestContext(parent_ctx, self, request) as ctx: retval = exception = fault_code = None try: retval = await method(ctx, *args) except XMLRPCError as e: exception = e fault_code = e.fault_code except Exception as e: exception = e fault_code = -32500 logger.exception('Error during method invocation') # Serialize the return value serialized_retval = None try: serialized_retval = await call_in_executor(serialize, retval) except Exception as e: exception = e fault_code = -32603 # Finish the request context try: await ctx.dispatch_event('finished', exception) except Exception as e: exception = e fault_code = -32400 logger.exception('Error during XML-RPC request context teardown') # Generate the methodResponse XML if exception is None: body = ('<?xml version="1.0"?>\n<methodResponse><params><value>%s' '</value></params></methodResponse>' % serialized_retval) else: fault = serialize({'faultCode': fault_code, 'faultString': str(exception)}) body = ('<?xml version="1.0"?>\n<methodResponse><fault>%s' '</fault></methodResponse>' % fault) ctx.response.content_type = 'text/xml'
def index(): if request.method == 'GET': return "<RESPONSE>Send XML data</RESPONSE>" if request.method == 'POST': # Database connection try: conn = psycopg2.connect(database=DB_NAME, user=DB_USER, password=DB_PASS, host=DB_HOST) cursor = conn.cursor() except: return "<RESPONSE>Error connecting to database</RESPONSE>" try: data = request.data if type(data) == bytes: data = request.data.decode('utf-8') declaration = data.find("?>") if declaration != -1: data = data[declaration + 2:] # Parse XML data dataTag = ET.fromstring(data) data = dataTag[0][0][0].text data = codecs.getdecoder("unicode_escape")(data)[0] dataTag = ET.fromstring(data) dataTag = dataTag[0] # Validate tag names for x in dataTag: if not re.match(VALID_IDENTIFIER, x.tag): return "<RESPONSE>Invalid XML (Tag name)</RESPONSE>" # Validate tag values for x in dataTag: if x.text == None: x.text = "" if not re.match(VALID_VALUE, x.text): return "<RESPONSE>Invalid XML (Tag value)</RESPONSE>" # Create table if not exists cursor.execute( "select * from information_schema.tables where table_name=%s", (DB_TABLE, )) exists = bool(cursor.rowcount) if not exists: tags = ",".join( ['"{0}" character varying'.format(x.tag) for x in dataTag]) tags = tags + ',"timestamp" timestamp with time zone' query = 'CREATE TABLE public."{0}" ({1}) WITH (OIDS=FALSE)'.format( DB_TABLE, tags) cursor.execute(query) conn.commit() # Check for duplicates in DB cond = " AND ".join( ["\"{0}\" = '{1}'".format(x.tag, x.text) for x in dataTag]) query = 'SELECT * FROM "{0}" WHERE {1} LIMIT 1'.format( DB_TABLE, cond) cursor.execute(query) res = cursor.fetchall() if len(res) != 0: return "<RESPONSE>Dataset already in database</RESPONSE>" tags = ",".join(['"{0}"'.format(x.tag) for x in dataTag]) values = ",".join(["'{0}'".format(x.text) for x in dataTag]) # Insert in DB query = 'INSERT INTO "{0}"({1}, "timestamp") VALUES({2}, NOW()) '.format( DB_TABLE, tags, values) cursor.execute(query) conn.commit() # Update CS table search = [] update = [] for x in dataTag: match = UPDATE[x.tag] if match != "": update.append("\"{0}\" = '{1}'".format(match, x.text)) match = SEARCH[x.tag] if match != "": search.append("\"{0}\" = '{1}'".format(match, x.text)) search = " AND ".join(search) update = ",".join(update) query = 'UPDATE "{0}" SET {1} WHERE {2}'.format( DB_TABLE_CS, update, search) cursor.execute(query) conn.commit() except: return "<RESPONSE>Invalid XML</RESPONSE>" return "<RESPONSE>OK</RESPONSE>"
def cs(): if request.method == 'GET': return "<RESPONSE>Send XML data</RESPONSE>" if request.method == 'POST': # Database connection try: conn = psycopg2.connect(database=DB_NAME, user=DB_USER, password=DB_PASS, host=DB_HOST) cursor = conn.cursor() except: return "<RESPONSE>Error connecting to database</RESPONSE>" try: data = request.data if type(data) == bytes: data = request.data.decode('utf-8') # Parse XML data dataTag = ET.fromstring(data) for x in dataTag: if not re.match(VALID_IDENTIFIER, x.tag): return "<RESPONSE>Invalid XML (Tag name)</RESPONSE>" # Validate tag values for x in dataTag: if x.text == None: x.text = "" if not re.match(VALID_VALUE, x.text): return "<RESPONSE>Invalid XML (Tag value)</RESPONSE>" # Create table if not exists cursor.execute( "select * from information_schema.tables where table_name=%s", (DB_TABLE_CS, )) exists = bool(cursor.rowcount) if not exists: tags = ",".join( ['"{0}" character varying'.format(x.tag) for x in dataTag]) tags = tags + ',"timestamp" timestamp with time zone' tags = tags + ',"printed" boolean DEFAULT false' query = 'CREATE TABLE public."{0}" ({1}) WITH (OIDS=FALSE)'.format( DB_TABLE_CS, tags) cursor.execute(query) conn.commit() # Check for duplicates in DB cond = " AND ".join( ["\"{0}\" = '{1}'".format(x.tag, x.text) for x in dataTag]) query = 'SELECT * FROM "{0}" WHERE {1} LIMIT 1'.format( DB_TABLE_CS, cond) cursor.execute(query) res = cursor.fetchall() conn.commit() if len(res) != 0: return "<RESPONSE>Dataset already in database</RESPONSE>" tags = ",".join(['"{0}"'.format(x.tag) for x in dataTag]) values = ",".join(["'{0}'".format(x.text) for x in dataTag]) # Insert in DB query = 'INSERT INTO "{0}"({1}, "timestamp") VALUES({2}, NOW())'.format( DB_TABLE_CS, tags, values) cursor.execute(query) conn.commit() except: return "<RESPONSE>Invalid XML</RESPONSE>" return "<RESPONSE>OK</RESPONSE>"
def get_valores_series( self, codigo_serie: str, data_inicio: str, data_fim: str ) -> pd.DataFrame: """ Solicita uma série temporal ao SGS. Parâmetros: * codigo_serie(int): código da série * data_inicio(str): data de inicio no format dd/mm/yyyy * data_fim(str): data de fim no format dd/mm/yyyy Retorna dataframe contendo os valores da série temporal. """ method = "getValoresSeriesXML" params = OrderedDict( [ ("codigosSeries", codigo_serie), ("dataInicio", data_inicio), ("dataFim", data_fim), ] ) wssg_response = self.requests_wssgs(method, params) if "Value(s) not found" in wssg_response.decode(): msg = ( "Valores não encontrados." " Verifique o código da série e a data de vigência." ) raise ValueError(msg) tree = ET.fromstring(wssg_response) xml_return = list(tree.iter(tag="getValoresSeriesXMLReturn"))[0] serie = ET.fromstring(xml_return.text.encode("ISO-8859-1"))[0] colum_names = [i.tag for i in serie[0]] serie_temporal = [] for item in serie: values = [] for coluna in item: val = coluna.text if coluna.tag.startswith("DATA"): val = parse_data(coluna.text) if coluna.tag.startswith("VALOR"): try: val = float(val) except TypeError: # trata valores nulos val = np.nan values.append(val) serie_temporal.append(values) print(serie_temporal) df = pd.DataFrame(serie_temporal, columns=colum_names) for col in df: if col.startswith("DATA"): df.index = pd.to_datetime(df[col], dayfirst=True) df = df.drop("DATA", axis=1) if "BLOQUEADO" in df.columns: del df["BLOQUEADO"] df = df.rename(columns={"VALOR": codigo_serie}) return df
def __init__(self, xml_doc, logging_channel='CrossDomain'): self._dom = ET.fromstring(xml_doc) self.logger = logging.getLogger(logging_channel)
def parse_stig(stig_location): with open(stig_location, 'rb') as stig_file: contents = stig_file.read() tree = ElementTree.fromstring(contents) # parse name try: stig_name = tree.find('{}title'.format(XCCDF_1_1)).text.strip() except AttributeError: raise ParseError('title element not present in STIG') # parse description, version & release date description = get_elem_text(tree.find('{}description'.format(XCCDF_1_1))) version = tree.find('{}version'.format(XCCDF_1_1)) plain_text = tree.find('{}plain-text'.format(XCCDF_1_1)) if None in (version, plain_text): raise ParseError('version info missing from STIG') release_info = re.match( r'Release:\s+(\d+)\sBenchmark\sDate:\s+(.*)', plain_text.text, re.I) if release_info is None: raise ParseError('Unable to parse release-info: {}'.format( plain_text.text)) version_text = version.text release_num = release_info.group(1) release_date = release_info.group(2) # try to parse dates correctly because DISA try: release_datetime = datetime.datetime.strptime(release_date, '%d %b %Y') except ValueError: release_datetime = datetime.datetime.strptime(release_date, '%d %B %Y') stig = stig_models.Stig.query.filter_by( name=stig_name, version=version_text, release=release_num).first() if not stig: stig = stig_models.Stig( name=stig_name, description=description, version=version_text, release=release_num, release_date=release_datetime) db.session.add(stig) db.session.commit() # parse checks groups = tree.findall('{}Group'.format(XCCDF_1_1)) profiles = tree.findall('{}Profile'.format(XCCDF_1_1)) for group in groups: group_id = group.get('id') group_title = get_elem_text(group.find( '{}title'.format(XCCDF_1_1))) stig_profiles = [] for profile in profiles: selector = '{}select[@idref=\'{}\'][@selected=\'true\']'.format( XCCDF_1_1, group_id) if profile.find(selector) is not None: stig_profiles.append(profile.get('id')) for rule in group.findall('{}Rule'.format(XCCDF_1_1)): unparsed_rule_id = rule.get('id') full_rule_id = re.match( r'(.*)r(\d+)_rule', unparsed_rule_id, re.I) rule_id = full_rule_id.group(1) rule_revision = full_rule_id.group(2) rule_severity = rule.get('severity') rule_title = get_elem_text(rule.find( '{}title'.format(XCCDF_1_1))) # yo dawg we heard you like XML so we put (sometimes) HTML # escaped XML in your XML so you can parse XML while you parse # XML! - #DISA2012 metadata = dict(re.findall( r'(?:<|<)(?P<tag>.+?)(?:>|>)' '(?P<value>.+?)(?:<|<)/(?P=tag)(?:>|>)', get_elem_text(rule.find( '{}description'.format(XCCDF_1_1))), re.IGNORECASE + re.DOTALL + re.MULTILINE + re.UNICODE )) metadata.update({ 'mac_profiles': stig_profiles, 'version': get_elem_text(rule.find('{}version'.format( XCCDF_1_1))), }) check_content = rule.find('.//{}check-content'.format( XCCDF_1_1)) if check_content is not None: check_content = check_content.text else: check_content = rule.find('.//{}check-content-ref'.format( XCCDF_1_1)) if check_content is not None: check_content = check_content.get('name') else: check_content = 'No check content given' fix_text = rule.find('{}fixtext'.format(XCCDF_1_1)) if fix_text is not None: fix_text = fix_text.text else: fix_text = 'No fix text given' cves = [get_elem_text(ident) for ident in rule.findall( '{}ident' '[@system=\'http://cve.mitre.org\']'.format( XCCDF_1_1)) or []] cve_objects = [] for cve in cves: cve_object = stig_models.CVE.query.filter_by(id=cve).first() if not cve_object: cve_object = stig_models.CVE(id=cve) db.session.add(cve_object) db.session.commit() cve_objects.append(cve_object) ccis = [get_elem_text(ident) for ident in rule.findall( '{}ident' '[@system=\'http://iase.disa.mil/cci\']'.format( XCCDF_1_1)) or []] cci_objects = [] for cci in ccis: cci_object = Cci.query.filter_by(cci_id=cci).first() if not cci_object: flask.current_app.logger.warn( 'Found non-existent CCI {}, creating it...'.format( cci)) cci_object = Cci(cci_id=cci) db.session.add(cci_object) db.session.commit() cci_objects.append(cci_object) # try to insert rule rule = stig_models.Rule.query.filter_by( full_rule_id=unparsed_rule_id).first() if rule: flask.current_app.logger.info( 'Found existing rule "{}"'.format(unparsed_rule_id)) if rule not in stig.rules: flask.current_app.logger.info( 'Rule "{}" was not in STIG "{}", adding it...'.format( unparsed_rule_id, stig_name)) stig.rules.append(rule) db.session.commit() else: flask.current_app.logger.info( 'Creating new rule "{}" in STIG "{}"'.format( unparsed_rule_id, stig_name)) rule = stig_models.Rule( group_id=group_id, group_title=group_title, full_rule_id=unparsed_rule_id, rule_id=rule_id, rule_revision=rule_revision, rule_severity=stig_models.RuleSeverity(rule_severity), rule_title=rule_title, rule_metadata=metadata, check_content=check_content, fix_text=fix_text, cves=cve_objects, ccis=cci_objects, ) db.session.add(rule) stig.rules.append(rule) db.session.commit()
def xml_get(self, url): response = self.session.get('%s%s' % (self.endpoint, url), headers={'Accept': 'application/xml'}) response.raise_for_status() return etree.fromstring(response.text)
import xml.etree.cElementTree as badET import defusedxml.cElementTree as goodET xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>" # unsafe tree = badET.fromstring(xmlString) print(tree) badET.parse('filethatdoesntexist.xml') badET.iterparse('filethatdoesntexist.xml') a = badET.XMLParser() # safe tree = goodET.fromstring(xmlString) print(tree) goodET.parse('filethatdoesntexist.xml') goodET.iterparse('filethatdoesntexist.xml') a = goodET.XMLParser()