def get_forecast_from_api(): """ Using api.met.no to get the weather forecast :return data: data dictionary with relevant data from the api """ # Define endpoint and parameters # (lat and lon are for the weather station at Bygdøy used for weather forecast at Fornebu) endpoint = "https://api.met.no/weatherapi/locationforecast/1.9/" parameters = {"lat": 59.90, "lon": 10.69, "msl": 15} try: # Issue an HTTP GET request r = requests.get(endpoint, parameters) except requests.exceptions.RequestException as e: print("Not possible to get the weather forecast from api.met.no") print(e) sys.exit() # Handling the XML response bf = BadgerFish() json_string = json.dumps(bf.data(et.fromstring( r.content))) # creating json string json_dict = json.loads(json_string) # creating json dict data = json_dict["weatherdata"]["product"][ "time"] # collecting the relevant part of the response return data
def get_reports(): bf = BadgerFish(dict_type=dict) reports = [] for file in listdir(path_r): file = path_r + '/' + file with open(file, "r") as xml: reports.append(bf.data(fromstring(xml.read()))) return reports
def convertXmlToDict(xmlContent): import xml.etree.ElementTree as ET from xmljson import BadgerFish from collections import OrderedDict # parse XML DOM into dictionary bf = BadgerFish(dict_type=OrderedDict, xml_fromstring=False) # dump into dict string, then load back to dict return json.loads(json.dumps(bf.data(ET.fromstring(xmlContent))))
def xmlToJson(data): """ The Badgerfish notation is used to convert xml into dictionary or json This notation uses "$" for xml text content and @ to prefix xml attributes """ dataText = data.text # deleting namespace from xml because of long string repetition dataWithoutNameSpace = re.sub(' xmlns="[^"]+"', '', dataText, count=1) bf = BadgerFish(dict_type=dict) dataDict = bf.data(fromstring(dataWithoutNameSpace)) return dataDict
def hpfeedsend(self, eformat): """ workaround if hpfeeds broker is offline as otherwise hpfeeds lib will loop connection attempt """ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(2) result = sock.connect_ex((self.ECFG["host"], int(self.ECFG["port"]))) sock.close() if result != 0: """ broker unavailable """ self.logger.warning( f"HPFEEDS broker is configured to {self.ECFG['host']}:{self.ECFG['port']} but is currently unavailable. Disabling hpfeeds submission for this round!", '2') return (False) try: hpc = '' if self.ECFG["tlscert"].lower() != "none": hpc = hpfeeds.new(self.ECFG["host"], int(self.ECFG["port"]), self.ECFG["ident"], self.ECFG["secret"], certfile=self.ECFG["tlscert"], reconnect=False) print( f' -> Connecting to {hpc.brokername}/{self.ECFG["host"]}:{self.ECFG["port"]} via TLS' ) else: hpc = hpfeeds.new(self.ECFG["host"], int(self.ECFG["port"]), self.ECFG["ident"], self.ECFG["secret"], reconnect=False) print( f' -> Connecting to {hpc.brokername}/{self.ECFG["host"]}:{self.ECFG["port"]} via none TLS' ) """ remove auth header """ etree.strip_elements(self.esm, "Authentication") for i in range(0, len(self.esm)): if eformat == "xml": hpc.publish( self.ECFG["channels"], etree.tostring(self.esm[i], pretty_print=False)) if eformat == "json": bf = BadgerFish(dict_type=OrderedDict) hpc.publish(self.ECFG["channels"], json.dumps(bf.data(self.esm[i]))) return (True) except hpfeeds.FeedException as e: self.logger.error(f"HPFeeds Error ({e})", '2') return (False)
def xml2json(element, conv='parker'): """Takes an XML record and returns the json representation of it.""" if conv == 'bf': convention = BadgerFish(xml_fromstring=str) elif conv == 'parker': convention = Parker(xml_fromstring=str) else: logging.critical('Invalid XML2JSON Convention: ' + conv) raise ValueError('The parameter @conv should be "bf" or "parker" not ' + conv) data = convention.data(element) return json.dumps(data, indent=' ', ensure_ascii=False)
def get_building_info(self, prop_id): """ Get the building info based on a property Args: prop_id : the property ID Returns: Dictionary : BadgerFish output of XML """ resource = '{0}/building/{1}'.format(self.domain, prop_id) self.logger.debug("Pulling data from {0}".format(resource)) response = self.session.get(resource) if response.status_code != requests.codes.ok: return response.raise_for_status() root = Et.fromstring(response.text) bf = BadgerFish(dict_type=dict) building_info = bf.data(root) return building_info
def get_account_info(self): """ Get Account information for the current user Returns: Dictionary : BadgerFish style representation of Energy Star response e.g.: account_info = client.get_account_info() account_id = account_info["account"]["id"]["$"] Notes: BadgerFish : http://www.sklar.com/badgerfish/ """ resource = self.domain + "/account" self.logger.debug("Pulling data from {0}".format(resource)) response = self.session.get(resource) if response.status_code != requests.codes.ok: return response.raise_for_status() data = response.text root = Et.fromstring(data) bf = BadgerFish(dict_type=dict) account_info = bf.data(root) return account_info
if limit != 0: logging.info("input file limit activated %s definitions", str(limit)) # check input file presency if not os.path.exists(infile): logging.error("input file %s not found", infile) sys.exit(8) bf = BadgerFish(dict_type=OrderedDict, xml_fromstring=False) # read xml structure with open(infile) as fd: text = fd.read() xml_json = bf.data(ET.fromstring(text)) # test purpose : print json structure # json_print(xml_json, 0) attr = {} attr.update( {"Topology_Container": ["@name", "@model_type", "@model_handle"]}) attr.update({ "Device": ["@name", "@network_address", "@model_type", "@model_handle"] }) topology = get_spectrum_topology(xml_json["SPECTRUM_Export"]["Topology"], attr, 0) print(topology)
def xmlToJson(data): bf = BadgerFish(dict_type=OrderedDict) xml = lxml.html.fromstring(data) data = bf.data(xml) return data
import pandas as pd import regex as re from sklearn.model_selection import train_test_split import time from fastprogress import progress_bar from xmljson import BadgerFish from xml.etree.ElementTree import fromstring # Use the HTTP GET method of requests library to get the xml dataset and decode it to string format r = requests.get('https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml' ).content.decode() # Initialize the badgerfish class of xmljson package to convert xml data to dictionary format bf = BadgerFish(dict_type=dict) # Convert the xml data to python dictionary format xml_dict = dict(bf.data(fromstring(r))) title_list = [] # Get the title data from the rss feed and append to a list for title in xml_dict['rss']['channel']['item']: title_list.append(title['title']['$']) print(title_list[:10]) print(len(title_list)) desc_list = [] # Get the descriptio data corresponding to the title and append to another list for desc in xml_dict['rss']['channel']['item']: desc_list.append(desc['description']['$']) print(desc_list[:10]) print(len(desc_list))
def _get_collection_data(filename): path = Path(filename) assert path.is_file() bf = BadgerFish(dict_type=OrderedDict) return path, bf.data(fromstring(open(path, 'rb').read()))
fname=os.path.join(dir,added[zk]) logging.info("KMZ file is passed as argument to kmz_to_kml function",extra={"verbosity":1}) kmlfile=kmz_to_kml(fname) dom = md.parse(kmlfile) root = dom.documentElement logging.info("Obtained kml file is generically parsed",extra={"verbosity":1}) print_node(root) tree = ET.parse(kmlfile) root1 = tree.getroot() xmlstr = ET.tostring(root1, encoding='utf8', method='xml') bf = BadgerFish(dict_type=OrderedDict) bf_str = BadgerFish(xml_fromstring=False) t_n = os.path.splitext(config.get("Section2","path1"))[0] + os.path.splitext(basename(fname))[0] + ".geojson" f_file = open(t_n, 'w') logging.info("kml file is converted to geojson format1",extra={"verbosity":1}) f_file.write(dumps(bf_str.data(fromstring(xmlstr)))) f_file.close() o = xmltodict.parse(xmlstr) t_s = os.path.splitext(config.get("Section2","path2"))[0] + os.path.splitext(basename(fname))[0] + ".geojson" g_file = open(t_s, 'w') rr = json.dumps(o) logging.info("kml file is converted to geojson format2",extra={"verbosity":1}) g_file.write(json.dumps(o, sort_keys=False, indent=4, separators=(',', ': '))) g_file.close() t_k = os.path.splitext(config.get("Section2","path3"))[0] + os.path.splitext(basename(fname))[0] + ".geojson" h_file = open(t_k, 'w') logging.info("kml file is converted to geojson format3",extra={"verbosity":1}) h_file.write(dumps(rr)) h_file.close() d_name=os.path.splitext(basename(fname))[0] conn=MongoClient()
def xml2json(xmlfile): bf = BadgerFish(dict_type=OrderedDict) return bf.data(lxml.etree.fromstring(open(xmlfile).read()))