def test_create_with_xml(self): """ Test creation of a Root instance by using the create_from_elem() factory method. """ elem1 = objectify.parse(TestRoot.xml1).getroot() elem2 = objectify.parse(TestRoot.xml2).getroot() r1 = Root(elem=elem1) r2 = Root(elem=elem2) assert r1.value == u'koeï' assert r1.remarks == u'Ä cow.' assert r1.id == 1 assert r1.pos_id == 11 assert r1.user_id == 111 assert r1.date == "1212555224" assert r2.value == u'boom' assert r2.remarks == u'' assert r2.id == 2 assert r2.pos_id == 22 assert r2.user_id == 222 assert r2.date == "1212555925" del r1 del r2
def parse_with_region(person_list_file="../personlist.xml", city_index_file="../cityindex.xml"): """ Augment document while parsing. """ tree = objectify.parse(person_list_file) city_index = objectify.parse(city_index_file).getroot().entry person_list = tree.getroot() region_element = person_list.makeelement("region") for person in person_list.person: # find city and country of each person city = person.address.city country = person.address.country region = region_element for entry in city_index: if entry.city == city and entry.country == country: region = entry.region break # insert region tag after city tag city.addnext(deepcopy(region)) # change birth date to April 1st if born in December if person.birthday.month == "December": birthday = person.birthday birthday.day = 1 birthday.month = "April" birthday.month.set("number", "4") # return processed tree objectify.deannotate(tree) etree.cleanup_namespaces(tree) return tree
def send(self): log("sending Query for {url}", url=self.url) if Query.use_cache: if not self.is_cached: log(" no cache file for query") # create the cache file with open(self.cached_filename, 'w+') as f: f.write(urllib2.urlopen(self.url).read()) log(" cache file {filename} created", filename=self.cached_filename) self.tree = objectify.parse(self.cached_filename) # read the cached file log(" Query loaded from file {filename}", filename=self.cached_filename) else: # get directly from URL self.tree = objectify.parse(self.url) log(" data fetched from URL {url}", url=self.url) if Query.translate_tags: xml_string = lxml.etree.tostring(self.tree) doc = lxml.etree.XML(xml_string) for e in doc.xpath('//*[contains(local-name(),"-")]'): e.tag = e.tag.replace('-', '_') self.tree = objectify.parse(StringIO.StringIO(lxml.etree.tostring(doc))) self.root = self.tree.getroot() self.sent = True
def parse(self,fileobject, schema=None): """Parses a file object This function parses a KML file object, and optionally validates it against a provided schema. """ if schema: # with validation parser = objectify.makeparser(schema = schema.schema, strip_cdata=False) return objectify.parse(fileobject, parser=parser) else: # without validation return objectify.parse(fileobject)
def handle(self, *args, **options): self.verbosity = int(options.get('verbosity', 1)) self.popit = create_popit_api_object() with open(CONSTITUENCIES_JSON_FILE) as f: constituencies = json.load(f).values() xml = objectify.parse(ALL_MEMBERS_XML_FILE).getroot() self.members = [member.attrib for member in xml.findall('member')] people_xml = objectify.parse(PEOPLE_XML_FILE).getroot() people = people_xml.findall('person') self.id_mapping = {p.get('latestname'): p.get('id') for p in people} self.update_candidates(constituencies)
def fetch_data(): stations_tree = objectify.parse(urllib2.urlopen(BICIKELJ_STATIONS_URL)).getroot() for node in stations_tree.markers.marker: info_tree = objectify.parse(urllib2.urlopen(BICIKELJ_INFO_URL % int(node.attrib['number']))).getroot() yield { 'station_id': int(node.attrib['number']), 'name': node.attrib['name'], 'address': node.attrib['address'], 'location': (float(node.attrib['lat']), float(node.attrib['lng'])), 'available': int(info_tree.available), 'free': int(info_tree.free), 'total': int(info_tree.total), 'open': bool(info_tree.open), }, datetime.datetime.utcfromtimestamp(int(info_tree.updated)), timezone.now()
def validate(self): dummy, fn = os.path.split(self._xmlFn) stdoutWrite('Validating metadata file %s against scheme ...\n' % fn) try: schema = etree.XMLSchema(file = self._config.configDir + '/' + self._scheme) parser = etree.XMLParser(schema = schema) objectify.parse(self._xmlFn, parser) stdoutWrite('Metadata file is valid.\n') return True except etree.XMLSyntaxError, err: stdoutWrite('Metadata file is invalid, see logfile for details.\n') self._config.logger.error('Schema file: %s' % self._scheme) self._config.logger.error('Details: %s' % str(err)) return False
def compare_output_to_known_xml(self, response_stringIO, known_xml_filename): """ Make sure response output is as expected. This tests that a response contains the same XML as in the known XML file. """ known_file_path = os.path.join(os.path.dirname(__file__), TEST_XML_DIR, known_xml_filename) # Use objectify and etree to remove whitespace from XML for comparison response_tree = objectify.parse(response_stringIO) known_tree = objectify.parse(known_file_path) response_string = etree.tostring(response_tree) known_string = etree.tostring(known_tree) assert response_string == known_string
def validate(self): fn = os.path.basename(self._xmlFn) self._logger.info('validating metadata file %s against scheme' % fn) l.acquire() try: schema = etree.XMLSchema(file = os.path.join(self._config.configDir, self._scheme)) parser = etree.XMLParser(schema = schema) objectify.parse(self._xmlFn, parser) self._logger.info('metadata file is valid') ret = True except etree.XMLSyntaxError, err: stdoutWrite('Metadata file is invalid, see report file for details.\n') self._logger.error('Schema file: %s' % self._scheme) self._logger.error('Details: %s' % str(err)) ret = False
def test_create_from_xml(self): """ Test creation of a PartOfSpeech instance by using the create_from_elem() factory method. """ elem1 = objectify.parse(TestPartOfSpeech.xml1).getroot() elem2 = objectify.parse(TestPartOfSpeech.xml2).getroot() pos1 = PartOfSpeech(elem=elem1) pos2 = PartOfSpeech(elem=elem2) assert pos1.id == 1 and pos2.id == 2 assert pos1.shortcut == 'n1' and pos2.shortcut == 'v1sp' assert pos1.name == u'Noun, ôrdinary' assert pos2.name == 'Verb, 1st person singular, present tense' assert len(str(pos1.remarks)) == 43 and pos2.remarks == ''
def main(filename): tree = objectify.parse(filename) root = tree.getroot() for object in root.iterchildren(): if object.tag == 'package': ProcessPackage(object)
def update_moved_functions(filename,is_addon=False): xml = objectify.parse(filename) doxygen = xml.getroot() xmlfunctionsfile = doxygen.compounddef if xmlfunctionsfile.find('sectiondef')!=None: if len([ s for s in xmlfunctionsfile.sectiondef if s.get('kind')=='func'])>0: file_split = os.path.splitext(xmlfunctionsfile.compoundname.text) functionsfile = getfunctionsfile(file_split[0]) for section in xmlfunctionsfile.sectiondef: if section.get('kind')=='func': for xmlfunction in section.memberdef: for function in missing_functions: if function.name == xmlfunction.name.text: argstring = str(xmlfunction.argsstring.text) params = argstring[argstring.find('(')+1:argstring.rfind(')')] returns = xmlfunction.type.ref.text if hasattr(xmlfunction.type,'ref') else xmlfunction.type.text moved_function = functionsfile.function_by_signature(xmlfunction.name.text, returns, params) moved_function.returns = returns moved_function.description = moved_function.description + '\n\n' + function.description print "moved function: " + function.name setfunctionsfile(functionsfile,is_addon)
def main(): parser = argparse.ArgumentParser(description='Mongolizuje ruian data') parser.add_argument('--ruian_file', help='Ruian file', required=True) parser.add_argument('--db', help='Mongo databaze', required=True) args = parser.parse_args() if not os.path.isfile(args.ruian_file): logging.error('Ruian file %s is not file' % args.ruian_file) sys.exit(1) # parse command line options f = open(args.ruian_file,'r'); doc = objectify.parse(f) root = doc.getroot() data = root.Data client = MongoClient('localhost', 27017) #klient na mongo db = client[args.db] for layer in data.iterchildren(): ##tady mozna bude treba pouzit neco jinyho nez dom parse_layer(layer, db)
def parse(self, xmlfile): xml = objectify.parse(xmlfile, self.parser)#, base_url=".") xml.xinclude() if not self.schema.validate(xml): err = self.schema.error_log raise Exception("XML file didn't pass schema validation:\n%s" % err) return xml
def wargame(phenny, input): if input.group(2) is not None: rest = input.group(2) m = re.match("^scores\s+(\S+)\s*$",rest) if m is not None and len( m.groups() ) == 1: return wargame_scores(phenny, m.group(1)) m = re.match("^scores\s*$",rest) if m is not None: return wargame_scores(phenny, "Total") m = re.match("^help\s*$",rest) if m is not None: phenny.say("VTLUUG King of the Root - http://wargame.vtluug.org'") phenny.say("syntax: '.wargame' to see network status and target list'") phenny.say("syntax: '.wargame scores <target name>' to get current scores for a target'") return else: phenny.say("hmm.. I don't know what you mean. try '.wargame help'") return try: req = urlopen(APIURL) except HTTPError as e: phenny.say("uhoh. try again later, mmkay?") return root = objectify.parse(req).getroot() online = root.attrib.get("online") == "True" updated = root.attrib.get("updated") servers = [] for server_e in root.servers.server: servers.append( parse_server( server_e ) ) phenny.say( "wargame network is %s. last updated %s. available targets: %s" % ( "ONLINE" if online else "OFFLINE", updated, ", ".join([s.name for s in servers])) )
def import_document(self, document_path): if document_path.startswith('http'): self.xml = objectify.fromstring(requests.get(document_path, verify=self.verify).content) else: self.xml = objectify.parse(document_path).getroot() self.ns = self.xml.nsmap.get(None, None) return self.parse_document()
def read_XML(path): # var = ['NORD', 'FRAN', 'SVIZ'] xml = objectify.parse(open(path)) root = xml.getroot() nord_fran = [] fran_sviz = [] sviz_nord = [] nord_sviz = [] sviz_fran = [] fran_nord = [] diz = OrderedDict() for i in range(len(root.getchildren())): child = root.getchildren()[i].getchildren() if child[3] == "NORD" and child[4] == "FRAN": nord_fran.append(float(child[5])) elif child[3] == "NORD" and child[4] == "SVIZ": nord_sviz.append(float(child[5])) elif child[3] == "FRAN" and child[4] == "SVIZ": fran_sviz.append(float(child[5])) elif child[3] == "FRAN" and child[4] == "NORD": fran_nord.append(float(child[5])) elif child[3] == "SVIZ" and child[4] == "NORD": sviz_nord.append(float(child[5])) elif child[3] == "SVIZ" and child[4] == "FRAN": sviz_fran.append(float(child[5])) else: pass diz["nord-fran"] = np.nanmean(nord_fran) diz["nord-sviz"] = np.nanmean(nord_sviz) diz["fran-nord"] = np.nanmean(fran_nord) diz["fran-sviz"] = np.nanmean(fran_sviz) diz["sviz-fran"] = np.nanmean(sviz_fran) diz["sviz-nord"] = np.nanmean(sviz_nord) return diz
def createCBP(project_path): if os.path.abspath(project_path) == os.path.abspath(templates_path): return project_name = os.path.basename(project_path) cbp = objectify.parse(os.path.join(project_path,project_name+'.cbp')) root = cbp.getroot() project = root.Project for option in project.Option: if option.get("title")!=None: option.set("title",project_name) # add existing files in src/ to the codeblocks project for root, dirs, files in os.walk(os.path.join(project_path,'src')): for name in files: basefolder = root[len(project_path)+1:] filepath = str(os.path.join(basefolder,name)) addCBPUnit(project,filepath,basefolder) # add addons from addons.make to the cbp addAddons(project,project_path) cbp_file = open(os.path.join(project_path,project_name+'.cbp'),mode='w') cbp_file.write(etree.tostring(cbp, xml_declaration=True, encoding='UTF-8', pretty_print=True)) cbp_file.close()
def _read_trl(trl_location): """Read and Import TRL""" parsed = objectify.parse(gzip.open(trl_location)) root = parsed.getroot() return root
def xmlparser(xml, objectify=True): """ Parse xml :param xml: XML element :type xml: Union[text_type, lxml.etree._Element] :rtype: lxml.etree._Element :returns: An element object :raises: TypeError if element is not in accepted type """ doclose = None if isinstance(xml, (etree._Element, ObjectifiedElement, etree._ElementTree)): return xml elif isinstance(xml, text_type): xml = StringIO(xml) doclose = True elif not isinstance(xml, IOBase): raise TypeError("Unsupported type of resource {}".format(type(xml))) if objectify: parsed = etree.parse(xml).getroot() else: parsed = parse(xml, parser=__parser__) if doclose: xml.close() return parsed
def main(nzb_file): m = re.search(r'/([^/]*$)', nzb_file) if m: filename = m.group(1) else: filename = nzb_file rlsname = filename[:-4] nzb = {'category': 'moovee'} f = open(nzb_file, 'r') date = d.fromtimestamp( int(objectify.parse(f).getroot().file.attrib['date'])) f2 = open(nzb_file, 'r') folder = nzbs.download.get_folder(nzb) fgz = gzip.open(os.path.join(folder, filename + '.gz'), 'wb') fgz.write(f2.read()) f2.close() fgz.close() nzb['_id'] = rlsname nzb['rlsname'] = rlsname nzb['tags'] = ['#a.b.moovee@EFNet', 'ByHand'] nzb['stages'] = {'downloaded': True} nzb['stage'] = 2 nzb['date'] = date nzb['file'] = filename + '.gz' pymongo.Connection().usenet.nzbs.save(nzb)
def check_format_strings(): """ This method runs through the ts-files and looks for mismatches between format strings in the original text and in the translations. """ is_ok = True path = os.path.join(os.path.abspath('..'), 'resources', 'i18n', '*.ts') file_list = glob.glob(path) for filename in file_list: print_quiet('Checking %s' % filename) file = open(filename, 'rb') tree = objectify.parse(file) root = tree.getroot() for tag in root.iter('message'): location = tag.location.get('filename') line = tag.location.get('line') org_text = tag.source.text translation = tag.translation.text if not translation: for num in tag.iter('numerusform'): print_verbose('parsed numerusform: location: %s, source: %s, translation: %s' % ( location, org_text, num.text)) if num and org_text.count('%') != num.text.count('%'): is_ok = False print_quiet( 'ERROR: Translation from %s at line %s has a mismatch of format input:\n%s\n%s\n' % ( location, line, org_text, num.text)) else: print_verbose('parsed: location: %s, source: %s, translation: %s' % (location, org_text, translation)) if org_text.count('%') != translation.count('%'): is_ok = False print_quiet('ERROR: Translation from %s at line %s has a mismatch of format input:\n%s\n%s\n' % ( location, line, org_text, translation)) return is_ok
def parse_worksheet(sheetname, string_dict): ''' Returns: a list of class Cells a list of shared_formulas ''' parser = etree.XMLParser(ns_clean=True) tree = objectify.parse(sheetname, parser) root = tree.getroot() # A list of cells output = [] # A list of shared formulas global shared_formulas shared_formulas = [] rows = get_row("sheetData", root) for row in rows: # Iterate over the rows cells = list(row) for cell in cells: # Iterate over the cells in a row # Add a cell to the list of cells output.append(Cell(cell)) return output, shared_formulas
def load_adjustment_values(): """load adjustment values and their default values from XML""" # parse XML -------------------------------------------- thisdir = os.path.split(__file__)[0] prst_defs_relpath = ( 'ISO-IEC-29500-1/schemas/dml-geometries/OfficeOpenXML-DrawingMLGeomet' 'ries/presetShapeDefinitions.xml' ) prst_defs_path = os.path.join(thisdir, prst_defs_relpath) presetShapeDefinitions = objectify.parse(prst_defs_path).getroot() # load individual records into tuples to return -------- ns = 'http://schemas.openxmlformats.org/drawingml/2006/main' avLst_qn = '{%s}avLst' % ns adjustment_values = [] for shapedef in presetShapeDefinitions.iterchildren(): prst = shapedef.tag try: avLst = shapedef[avLst_qn] except AttributeError: continue for idx, gd in enumerate(avLst.gd): name = gd.get('name') val = int(gd.get('fmla')[4:]) # strip off leading 'val ' record = (prst, idx+1, name, val) adjustment_values.append(record) return adjustment_values
def import_document(self, document_path): #try: tree = objectify.parse(document_path) xml = tree.getroot() debateBody = xml.debate.debateBody mainSection = debateBody.debateSection self.title = '%s (%s)' % ( mainSection.heading.text, etree.tostring(xml.debate.preface.p, method='text')) section = self.make(Section, title=self.title) #try: start_date = xml.debate.preface.p.docDate.get('date') self.set_resolver_for_date(date_string = start_date) self.start_date = datetime.strptime(start_date, '%Y-%m-%d') #except Exception as e: #raise e # pass self.visit(mainSection, section) return section
def import_document(self, document_path): if document_path.startswith('http'): self.xml = objectify.fromstring(urlopen(document_path).read()) else: self.xml = objectify.parse(document_path).getroot() self.ns = self.xml.nsmap.get(None, None) return self.parse_document()
def sml_import(xmlfile, user): filename = xmlfile.filename tree = objectify.parse(xmlfile).getroot() move = parse_move(tree) move.source = os.path.abspath(filename) move.import_module = __name__ device = parse_device(tree) persistent_device = Device.query.filter_by(serial_number=device.serial_number).scalar() if persistent_device: if not persistent_device.name: flash("update device name to '%s'" % device.name) persistent_device.name = device.name else: assert device.name == persistent_device.name device = persistent_device else: db.session.add(device) if Move.query.filter_by(user=user, date_time=move.date_time, device=device).scalar(): flash("%s at %s already exists" % (move.activity, move.date_time), 'warning') else: move.user = user move.device = device db.session.add(move) samples = tree.DeviceLog.Samples.iterchildren() for sample in parse_samples(samples, move): db.session.add(sample) postprocess_move(move) db.session.commit() return move
def field_metadata(self, well_row=0, well_column=0, field_row=0, field_column=0): """Get OME-XML metadata of given field. Parameters ---------- well_row : int Y well coordinate. Same as --V in files. well_column : int X well coordinate. Same as --U in files. field_row : int Y field coordinate. Same as --Y in files. field_column : int X field coordinate. Same as --X in files. Returns ------- lxml.objectify.ObjectifiedElement lxml object of OME-XML found in slide/chamber/field/metadata. """ def condition(path): attrs = attributes(path) return (attrs.u == well_column and attrs.v == well_row and attrs.x == field_column and attrs.y == field_row) field = [f for f in self.fields if condition(f)] if field: field = field[0] filename = _pattern(field, 'metadata', _image, extension='*.ome.xml') filename = glob(filename)[0] # resolve, assume found return objectify.parse(filename).getroot()
def extract_operations_from_wsdl(path): """ Extracts operations from Amazon's WSDL file. """ root = objectify.parse(open(path)).getroot() wsdlns = "http://schemas.xmlsoap.org/wsdl/" return set(root.xpath("//ws:operation/@name", namespaces={"ws": wsdlns}))
def __init__(self, service, plugins): # Register a function to listen to the game events. self.service = service self.service.listen().addCallback(self.self_notify) # Implement the path conventions self.confdir = os.path.join(self.service.settings['plugins-confdir'], self.name()) self.libdir = os.path.join(self.service.settings['plugins-libdir'], self.name()) # Load bots logic brain = NLWordMatcherBrain(self) # Load configuration and instantiate bots self.settings = objectify.parse(open(os.path.join(self.confdir, 'bot.xml'))).getroot() self.delays = {} self.bots = [] for node in self.settings.iterchildren(): if node.tag == "delay": self.delays[node.get('type')] = {'base': int(node.get('base')), 'random': int(node.get('random'))} elif node.tag == "botinstance": bot = Bot(self, brain, int(node.get('player_id'))) self.bots.append(bot) # Store enable_join per game. self.enable_join = {} # Initialize the pollable using the recommended timeout. Pollable.__init__(self, self.service.settings.get('poll-timeout', 30))
parser = argparse.ArgumentParser() parser.add_argument("--align", "-a", help="Stockholm TreeAligner alignment file", required=True) parser.add_argument("--outdir", "-o", help="Output directory", required=True) parser.add_argument("--noshuffle", "-n", help="Do not shuffle extracted aligned sentences", action="store_true") args = parser.parse_args() treeparser = etree.XMLParser(remove_comments=True, recover=True) try: align_tree = objectify.parse(args.align, parser=treeparser) except IOError as e: logging.error( "Unable to open STA-XML file (--align/-a) - does not exist or no read permissions." ) if not os.path.exists(args.outdir): logging.error("Specified output directory (" + args.outdir + ") does not exist!") abs_align = os.path.abspath(args.align) tree_files = sta_files.get_treebank_files(align_tree, abs_align) tree_files = sta_files.get_treebank_files(align_tree, abs_align) if not tree_files: logging.error(
file.close() good_doctype = re.match(r'(?ims).*DOCTYPE.*\[.*\]\>', xml_string).group() # Two trees. One with the unresolved entities... parser = objectify.makeparser(resolve_entities=False) # tree = objectify.parse(xml, parser=parser) # root = tree.getroot() # ...and another with the entities resolved. # This dummy tree expands the entities that I found and puts them # in a dictionary (entity) => resolved_entity # NOTE: `findall' makes the script very slow # First get the entities declared in the header temp_xml = ''.join([good_doctype, dummy_map % '<dummy_tag></dummy_tag>']) expanded_tree = objectify.parse(StringIO.StringIO(temp_xml)) expanded_tree_string = etree.tostring(expanded_tree, pretty_print=True, xml_declaration=True, encoding="utf-8") resolved_doctype = re.match(r'(?ims).*DOCTYPE.*\[.*\]\>', expanded_tree_string).group() doctype_entities = {} # e.g.: # <!ENTITY layer-amenity-symbols SYSTEM "layer-amenity-symbols.xml.inc"> for line in StringIO.StringIO(resolved_doctype).readlines(): entity_kv = re.match(r'(?ims)(.*ENTITY.*?(?P<entity_key>\b[a-z09].*?\b) .*\"(?P<entity_value>.*)\").*', line) # Only consider internal entities if (entity_kv is not None) and not (re.search("%", line)): doctype_entities[''.join(['&',entity_kv.groupdict()['entity_key']])] = entity_kv.groupdict()['entity_value']
def import_net(input_file_path, parameters=None): """ Import a Petri net from a PNML file Parameters ---------- input_file_path Input file path parameters Other parameters of the algorithm """ if parameters is None: parameters = {} parser = etree.XMLParser(remove_comments=True) tree = objectify.parse(input_file_path, parser=parser) root = tree.getroot() net = petri.petrinet.PetriNet('imported_' + str(time.time())) marking = petri.petrinet.Marking() fmarking = petri.petrinet.Marking() nett = None page = None finalmarkings = None stochastic_information = {} for child in root: nett = child places_dict = {} trans_dict = {} if nett is not None: for child in nett: if "page" in child.tag: page = child if "finalmarkings" in child.tag: finalmarkings = child if page is None: page = nett if page is not None: for child in page: if "place" in child.tag: position_X = None position_Y = None dimension_X = None dimension_Y = None place_id = child.get("id") place_name = place_id number = 0 for child2 in child: if child2.tag.endswith('name'): for child3 in child2: if child3.text: place_name = child3.text if child2.tag.endswith('initialMarking'): for child3 in child2: if child3.tag.endswith("text"): number = int(child3.text) if child2.tag.endswith('graphics'): for child3 in child2: if child3.tag.endswith('position'): position_X = float(child3.get("x")) position_Y = float(child3.get("y")) elif child3.tag.endswith("dimension"): dimension_X = float(child3.get("x")) dimension_Y = float(child3.get("y")) places_dict[place_id] = petri.petrinet.PetriNet.Place(place_id) places_dict[place_id].properties[ constants.PLACE_NAME_TAG] = place_name net.places.add(places_dict[place_id]) if position_X is not None and position_Y is not None and dimension_X is not None and dimension_Y is not None: places_dict[place_id].properties[ constants.LAYOUT_INFORMATION_PETRI] = ((position_X, position_Y), (dimension_X, dimension_Y)) if number > 0: marking[places_dict[place_id]] = number del place_name if page is not None: for child in page: if child.tag.endswith("transition"): position_X = None position_Y = None dimension_X = None dimension_Y = None trans_name = child.get("id") trans_label = trans_name trans_visible = True random_variable = None for child2 in child: if child2.tag.endswith("name"): for child3 in child2: if child3.text: if trans_label == trans_name: trans_label = child3.text if child2.tag.endswith("graphics"): for child3 in child2: if child3.tag.endswith("position"): position_X = float(child3.get("x")) position_Y = float(child3.get("y")) elif child3.tag.endswith("dimension"): dimension_X = float(child3.get("x")) dimension_Y = float(child3.get("y")) if child2.tag.endswith("toolspecific"): tool = child2.get("tool") if "ProM" in tool: activity = child2.get("activity") if "invisible" in activity: trans_visible = False elif "StochasticPetriNet" in tool: distribution_type = None distribution_parameters = None priority = None weight = None for child3 in child2: key = child3.get("key") value = child3.text if key == "distributionType": distribution_type = value elif key == "distributionParameters": distribution_parameters = value elif key == "priority": priority = int(value) elif key == "weight": weight = float(value) random_variable = RandomVariable() random_variable.read_from_string( distribution_type, distribution_parameters) random_variable.set_priority(priority) random_variable.set_weight(weight) if not trans_visible: trans_label = None #if "INVISIBLE" in trans_label: # trans_label = None trans_dict[trans_name] = petri.petrinet.PetriNet.Transition( trans_name, trans_label) net.transitions.add(trans_dict[trans_name]) if random_variable is not None: trans_dict[trans_name].properties[ constants.STOCHASTIC_DISTRIBUTION] = random_variable if position_X is not None and position_Y is not None and dimension_X is not None and dimension_Y is not None: trans_dict[trans_name].properties[ constants.LAYOUT_INFORMATION_PETRI] = ((position_X, position_Y), (dimension_X, dimension_Y)) if page is not None: for child in page: if child.tag.endswith("arc"): arc_source = child.get("source") arc_target = child.get("target") arc_weight = 1 for arc_child in child: if arc_child.tag.endswith("inscription"): for text_arcweight in arc_child: if text_arcweight.tag.endswith("text"): arc_weight = int(text_arcweight.text) if arc_source in places_dict and arc_target in trans_dict: petri.utils.add_arc_from_to(places_dict[arc_source], trans_dict[arc_target], net, weight=arc_weight) elif arc_target in places_dict and arc_source in trans_dict: petri.utils.add_arc_from_to(trans_dict[arc_source], places_dict[arc_target], net, weight=arc_weight) if finalmarkings is not None: for child in finalmarkings: for child2 in child: place_id = child2.get("idref") for child3 in child2: if child3.tag.endswith("text"): number = int(child3.text) if number > 0: fmarking[places_dict[place_id]] = number # generate the final marking in the case has not been found if len(fmarking) == 0: fmarking = final_marking.discover_final_marking(net) return net, marking, fmarking
def step_impl(context): # noqa: F811 context.xml = objectify.parse(context.contentfile) assert context.xml.getroot().tag == tagset.macroset.tag
from lxml import etree, objectify E = objectify.ElementMaker(annotate=False) anno_tree = E.annotation( E.folder('VOC2007'), E.filename('hi'), E.size(E.width(str(100)), E.height(str(100)), E.depth(str(3))), E.segmented('0'), E.object(E.name('1'), E.pose('Unspecified'), E.truncated('0'), E.difficult('0'), E.bndbox(E.xmin(1), E.ymin(2), E.xmax(3), E.ymax(4)))) print type(anno_tree) etree.ElementTree(anno_tree).write("test.xml", pretty_print=True) anno_tree2 = objectify.parse("test.xml").getroot() print type(anno_tree2.getchildren()[2]) print type(anno_tree2) anno_tree_son = E.object(E.name('2'), E.pose('Unspecified'), E.truncated('0'), E.difficult('0'), E.bndbox(E.xmin(2), E.ymin(3), E.xmax(4), E.ymax(5))) anno_tree2.append(anno_tree_son) etree.ElementTree(anno_tree2).write("test2.xml", pretty_print=True)
def main(): # data reading and writing import pandas as pd import sys df = pd.read_csv('example.csv', header=None) df = pd.read_table('example.csv', sep='\s+') # table need to specify the delimiter; 's+' is regex for all whitespace df = pd.read_csv('example.csv', names=['a', 'b', 'c', 'd', 'message'], index_col=['message', 'a']) # grant names to columns, specify column to be index; a hierachical index here df = pd.read_csv('example.csv', skiprows=[0, 2, 3]) # skip rows for reading sentinels = {'message': ['foo', 'NA'], 'something': ['two']} df = pd.read_csv('example.csv', na_values=sentinels) # specify values to be replaced NA; specify such condition for each column pd.options.display.max_rows = 10 # set for display only pd.read_csv('example.csv', nrows=5) # set for reading chuncker = pd.read_csv('example.csv', chunksize=1000) # iterate over parts of file tot = pd.Series([]) for piece in chuncker: tot = tot.add(piece['key'].value_counts(), fill_value=0) df.to_csv(sys.stdout, sep='|', na_rep='NULL') # writing to the console, replace NaN df.to_csv(sys.stdout, index=False, columns=['a', 'b', 'c']) # control writing index and columns # csv file import csv with open('example.csv') as f: reader = csv.reader(f, delimiter='|') lines = list(reader) header, values = lines[0], lines[:1] # unpack data_dict = {h: v for h, v in zip(header, values)} # dict comprehension class my_dialect(csv.Dialect): # define a specific reader lineterminator = '\n' delimiter = ';' quotechar = '"' quoting = csv.QUOTE_MINIMAL # quote only fields with special character with open('example.csv') as f: reader = csv.reader(f, dialect=my_dialect) # deploy my_dialect writer = csv.writer(f, dialect=my_dialect) writer.writerow(('1', '2', '3')) # JSON data import json result = json.loads(obj) # transform json into python asjson = json.dump(result) # backwards data = pd.read_json('example.json') asjson = data.to_json(orient='records') # XML and HTML html = 'https://www.fdic.gov/bank/individual/failed/banklist.html' tables = pd.read_html('html') failures = table[0] close_timestamps = pd.to_datetime(failures['Closing Date']) close_timestamps.dt.year.value_counts() from lxml import objectify path = 'performance_nmr.xml' parsed = objectify.parse(path) root = parsed.getroot() data = [] skip_fields = ['PARENT_SEQ', 'INDICATOR_SEQ', 'DESIRED_CHANCE', 'DECIMAL_PLACES'] for elt in root.INDICATOR: # returns a generators for yielding each tag el_data = {} for child in elt.getchildren(): if child.tag in skip_fields: continue el_data[child.tag] = child.pyval data.append(el_data) # excel writer = pd.ExcelWriter('example.xlsx') frame.to_excel(writer, 'sheet1') writer.save() frame.to_excel('example.xlsx') # avoid ExcelWriter # Web API import requests url = 'https://api.github.com/repos/pandas-dev/pandas/issues' resp = requests.get(url) data = resp.json() data[0]['title'] # SQL import sqlalchemy as sqla db = sqla.create_engine('mysql://*****:*****@localhost/test', encoding='utf8') pd.read_sql('select * from test', db)
renamed_attribute = pluralize(renamed_attribute) NameTable[nml_attribute] = renamed_attribute filename = variables['schema_name'] import StringIO import process_includes outfile = StringIO.StringIO() infile = open(filename, 'r') process_includes.process_include_files(infile, outfile, inpath=filename) infile.close() outfile.seek(0) doc = objectify.parse(outfile) root = doc.getroot() queue = [root] NameTable = {} traverse_doc(queue,_node_to_python) #filtering routine, need to get a better way to extract these, asked on Stack Overflow import keyword disallowed_keywords = keyword.kwlist for keyword in disallowed_keywords: try: NameTable.pop(keyword) except: pass
def parse(source, **kwargs): return objectify.parse(source, parser=FVDLParser, **kwargs)
def write_gpx(gpxFile, df, row_date="2016-01-01", notes="Exported by rowingdata"): if notes == None: notes = "Exported by rowingdata" f = open(gpxFile, 'w') totalseconds = int(df['TimeStamp (sec)'].max() - df['TimeStamp (sec)'].min()) totalmeters = int(df['cum_dist'].max()) avghr = int(df[' HRCur (bpm)'].mean()) if avghr == 0: avghr = 1 maxhr = int(df[' HRCur (bpm)'].max()) if maxhr == 0: maxhr = 1 avgspm = int(df[' Cadence (stokes/min)'].mean()) seconds = df['TimeStamp (sec)'].values distancemeters = df['cum_dist'].values heartrate = df[' HRCur (bpm)'].values.astype(int) cadence = np.round(df[' Cadence (stokes/min)'].values).astype(int) nr_rows = len(seconds) try: lat = df[' latitude'].values except KeyError: lat = np.zeros(nr_rows) try: lon = df[' longitude'].values except KeyError: lon = np.zeros(nr_rows) haspower = 1 try: power = df[' Power (watts)'].values except KeyError: haspower = 0 s = "2000-01-01" tt = ps.parse(s) #timezero=time.mktime(tt.timetuple()) timezero = arrow.get(tt).timestamp() if seconds[0] < timezero: # print("Taking Row_Date ",row_date) dateobj = ps.parse(row_date) #unixtimes=seconds+time.mktime(dateobj.timetuple()) unixtimes = seconds + arrow.get(dateobj).timestamp() datetimestring = row_date lap_begin(f, datetimestring, totalmeters, avghr, maxhr, avgspm, totalseconds) ts = datetime.datetime.fromtimestamp(unixtimes[0]).isoformat() s = '<time>{ts}</time></metadata><trk><name>Export by rowingdata</name><trkseg>'.format( ts=ts, ) f.write(s) for i in range(nr_rows): s = ' <trkpt lat="{lat}" lon="{lon}">\n'.format(lat=lat[i], lon=lon[i]) f.write(s) #s=datetime.datetime.fromtimestamp(unixtimes[i]).isoformat() s = arrow.get(unixtimes[i]).isoformat() f.write(' <time>{s}</time>\n'.format(s=s)) f.write(' </trkpt>\n') f.write('</trkseg>') f.write('</trk>') f.write('</gpx>') f.close() file = open(gpxFile, 'r') some_xml_string = file.read() file.close() try: xsd_file = six.moves.urllib.request.urlopen( "http://www.topografix.com/GPX/1/1/gpx.xsd") output = open('gpx.xsd', 'w') if pythonversion <= 2: output.write(xsd_file.read().replace('\n', '')) else: output.write(xsd_file.read().decode('utf-8').replace('\n', '')) output.close() xsd_filename = "gpx.xsd" # Run some tests try: tree = objectify.parse(gpxFile) try: schema = etree.XMLSchema(file=xsd_filename) parser = objectify.makeparser(schema=schema) objectify.fromstring(some_xml_string, parser) except XMLSyntaxError: pass except: pass except six.moves.urllib.error.URLError: pass return 1
def serialize_class(filename,is_addon=False): xml = objectify.parse(filename) doxygen = xml.getroot() clazz = doxygen_compound.parse(filename).compounddef #doxygen.compounddef documentation_class = getclass(clazz.compoundname) current_variables_list = [] current_methods_list = [] #f = open('documentation/' + classname + ".html.mako",'w') #index.write("[" + classname + "](" + classname + ".html)\n\n") #f.write( '<%inherit file="_templates/documentation.mako" />\n' ) #f.write( '___' + classname + "___\n" ) inheritsfrom = [] #if clazz.find('derivedcompoundref')!=None: inheritsfrom = clazz.derivedcompoundref documentation_class.detailed_inline_description = "" #clazz_for_description = doxygen_compound.parse(filename).compounddef for p in clazz.briefdescription.get_para(): documentation_class.detailed_inline_description = documentation_class.detailed_inline_description + serialize_doxygen_paragraph(p) documentation_class.detailed_inline_description = documentation_class.detailed_inline_description + "\n" for p in clazz.detaileddescription.get_para(): documentation_class.detailed_inline_description = documentation_class.detailed_inline_description + serialize_doxygen_paragraph(p) #if clazz.find('sectiondef')!=None: for section in clazz.sectiondef: for member in section.memberdef: #if section.get("kind") == public TODO: access, virtual, pure virtual if member.kind == 'enum': pass else: #f.write( "$$code(lang=c++)\n" ) if member.kind == 'variable': var = documentation_class.var_by_name(member.name) if not var: var = DocsVar(0) var.name = member.name var.access = member.prot var.version_started = currentversion var.version_deprecated = "" var.constant = member.mutable=="no" var.static = member.static!="no" var.clazz = documentation_class.name #member.type.ref.text if hasattr(member.type,'ref') else member.type.text var.type = "" try: for e in member.type_.content_: if type(e.value) == doxygen_compound.refTextType: var.type = var.type + e.value.valueOf_ else: var.type = var.type + e.value + " " except: pass current_variables_list.append(var) #f.write( str(member.type.text) + " " + str(member.name.text) + "\n" ) if member.kind == 'function' and member.name.find("OF_DEPRECATED_MSG")==-1: #print member.name argstring = str(member.argsstring) params = argstring[argstring.find('(')+1:argstring.rfind(')')] returns = "" try: for e in member.type_.content_: if type(e.value) == doxygen_compound.refTextType: returns = returns + e.value.valueOf_ else: returns = returns + e.value except: pass returns = ("" if returns is None else returns) method = documentation_class.function_by_signature(member.name, returns, params) method.static = member.static!="no" method.clazz = documentation_class.name method.access = member.prot method.returns = returns #method.description = method.description.replace("~~~~{.brush: cpp}","~~~~{.cpp}").replace('</pre>',"~~~~") method.description = method.description.replace('<p>','').replace('</p>','').replace('<code>','').replace('</code>','').replace('<pre>','') if method.new: print "new method " + method.name + " in " + method.clazz method.version_started = currentversion method.inlined_description = "" for p in member.briefdescription.get_para(): method.inlined_description = method.inlined_description + serialize_doxygen_paragraph(p) method.inlined_description = method.inlined_description + "\n" for p in member.detaileddescription.get_para(): method.inlined_description = method.inlined_description + serialize_doxygen_paragraph(p) current_methods_list.append(method) #f.write( str(member.type.text) + " " + str(member.name.text) + str(member.argsstring.text) + "\n" ) """if member.name.text.find("OF_DEPRECATED_MSG")!=-1: print "found deprecated function " + member.name.text print "argstring = " + str(member.argsstring.text) print "params = " + member.argsstring.text[member.argsstring.text.find('(')+1:member.argsstring.text.rfind(')')] returns = member.type.ref.text if hasattr(member.type,'ref') else member.type.text print "returns = " + ("" if returns is None else returns)""" #f.write( "$$/code\n\n\n\n" ) #f.close() deprecated_methods = [] for method in documentation_class.function_list: if method.name.find("OF_DEPRECATED_MSG")!=-1: deprecated_methods.append(method) for method in deprecated_methods: documentation_class.function_list.remove(method); class_name_printed = False for method in documentation_class.function_list: if not method in current_methods_list: if method.description.strip("\n ") != "": if not class_name_printed: print "\n\n\n\n" print "========================================" print "class " + documentation_class.name class_name_printed = True print "\n\n\n\n" print "removing method " + method.returns + " " + method.name + "(" + method.parameters + ")" print "with description:" print method.description documentation_class.function_list = current_methods_list for var in documentation_class.var_list: if not var in current_variables_list: if var.description.strip("\n ") != "": if not class_name_printed: print "\n\n\n\n" print "========================================" print "class " + documentation_class.name class_name_printed = True print "removing " + var.name print "with description:" print var.description documentation_class.var_list = current_variables_list documentation_class.function_list.sort(key=lambda function: function.name) documentation_class.var_list.sort(key=lambda variable: variable.name) setclass(documentation_class,is_addon)
def parse_options_data(table): rows = table.findall('.//tr') header = _unpack(rows[0], kind='th') data = [_unpack(r) for r in rows[1:]] return TextParser(data, names=header).get_chunk() put_data = parse_options_data(puts) put_data[:10] # XML from lxml import objectify path = 'pydata-book/datasets/mta_perf/Performance_MNR.xml' parsed = objectify.parse(open(path)) root = parsed.getroot() data = [] skip_fields = [ 'PARENT_SEQ', 'INDICATOR_SEQ', 'DESIRED_CHANGE', 'DECIMAL_PLACES' ] for elt in root.INDICATOR: el_data = {} for child in elt.getchildren(): if child.tag in skip_fields: continue el_data[child.tag] = child.pyval data.append(el_data) perf = pd.DataFrame(data) perf from StringIO import StringIO
vectors[j] = 1 j = j + 1 return vectors def calc_acc(predictions): correctly_classified = 0 j = 0 for i in predictions: if i == test_labels[j]: correctly_classified = correctly_classified + 1 j = j + 1 acc = (correctly_classified / len(predictions)) * 100 return acc #Creating the xml object/tree data_file = objectify.parse(open(args.data)) data_label_file = objectify.parse(open(args.datalabel)) #To access the root element root_data_file = data_file.getroot() root_data_label_file = data_label_file.getroot() data = [] data_labels = [] print "Reading in the training corpus:" for i in tqdm(root_data_file.getchildren()): data.append(' '.join(e for e in i.itertext())) print "Reading in the training label file:" for row in tqdm(root_data_label_file.getchildren()):
def do_import(self, bible_name=None): """ Loads a Bible from file. """ log.debug('Starting OpenSong import from "%s"' % self.filename) if not isinstance(self.filename, str): self.filename = str(self.filename, 'utf8') import_file = None success = True try: # NOTE: We don't need to do any of the normal encoding detection here, because lxml does it's own encoding # detection, and the two mechanisms together interfere with each other. import_file = open(self.filename, 'rb') opensong = objectify.parse(import_file) bible = opensong.getroot() # Check that we're not trying to import a Zefania XML bible, it is sometimes refered to as 'OpenSong' if bible.tag.upper() == 'XMLBIBLE': critical_error_message_box( message=translate('BiblesPlugin.OpenSongImport', 'Incorrect Bible file type supplied. This looks like a Zefania XML bible, ' 'please use the Zefania import option.')) return False # No language info in the opensong format, so ask the user language_id = self.get_language(bible_name) if not language_id: log.error('Importing books from "%s" failed' % self.filename) return False for book in bible.b: if self.stop_import_flag: break book_ref_id = self.get_book_ref_id_by_name(str(book.attrib['n']), len(bible.b), language_id) if not book_ref_id: log.error('Importing books from "%s" failed' % self.filename) return False book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) db_book = self.create_book(book.attrib['n'], book_ref_id, book_details['testament_id']) chapter_number = 0 for chapter in book.c: if self.stop_import_flag: break number = chapter.attrib['n'] if number: chapter_number = int(number.split()[-1]) else: chapter_number += 1 verse_number = 0 for verse in chapter.v: if self.stop_import_flag: break number = verse.attrib['n'] if number: try: number = int(number) except ValueError: verse_parts = number.split('-') if len(verse_parts) > 1: number = int(verse_parts[0]) except TypeError: log.warning('Illegal verse number: %s', str(verse.attrib['n'])) verse_number = number else: verse_number += 1 self.create_verse(db_book.id, chapter_number, verse_number, self.get_text(verse)) self.wizard.increment_progress_bar( translate('BiblesPlugin.Opensong', 'Importing %(bookname)s %(chapter)s...') % {'bookname': db_book.name, 'chapter': chapter_number}) self.session.commit() self.application.process_events() except etree.XMLSyntaxError as inst: trace_error_handler(log) critical_error_message_box( message=translate('BiblesPlugin.OpenSongImport', 'Incorrect Bible file type supplied. OpenSong Bibles may be ' 'compressed. You must decompress them before import.')) log.exception(inst) success = False except (IOError, AttributeError): log.exception('Loading Bible from OpenSong file failed') success = False finally: if import_file: import_file.close() if self.stop_import_flag: return False else: return success
type=str, help='Path to which the predictions file will be written.', required=True) args = parser.parse_args() input_file_path = args.testfile output_file_path = args.outputpath qualified_name_of_output_file = output_file_path + "/predictions.txt" #Reading in the imput file. for filename in os.listdir(input_file_path): if filename.endswith('.xml'): fullname = os.path.join(input_file_path, filename) test_file = objectify.parse(fullname) root_test_file = test_file.getroot() test_articles = [] test_articles_id = [] for i in root_test_file.getchildren(): test_articles.append(' '.join(e for e in i.itertext())) test_articles_id.append(i.attrib['id']) #Loading the TDM model. ngram_model = load('TDM.joblib') #Loading the Classifier. lr_clf = load('LR.joblib')
def load_xml(file): tree = objectify.parse(file) root = tree.getroot() return root
def upload_ovf(self, catalog_name, file_name, item_name=None, description='', chunk_size=DEFAULT_CHUNK_SIZE, callback=None): catalog = self.get_catalog(catalog_name) if item_name is None: item_name = os.path.basename(file_name) tempdir = tempfile.mkdtemp(dir='.') total_bytes = 0 try: ova = tarfile.open(file_name) ova.extractall(path=tempdir) ova.close() ovf_file = None files = os.listdir(tempdir) for f in files: fn, ex = os.path.splitext(f) if ex == '.ovf': ovf_file = os.path.join(tempdir, f) break if ovf_file is not None: stat_info = os.stat(ovf_file) total_bytes += stat_info.st_size ovf = objectify.parse(ovf_file) files = [] ns = '{http://schemas.dmtf.org/ovf/envelope/1}' for f in ovf.getroot().References.File: source_file = { 'href': f.get(ns + 'href'), 'name': f.get(ns + 'id'), 'size': f.get(ns + 'size') } files.append(source_file) if item_name is None: item_name = os.path.basename(file_name) params = E.UploadVAppTemplateParams(name=item_name) params.append(E.Description(description)) catalog_item = self.client.post_resource( catalog.get('href') + '/action/upload', params, EntityType.UPLOAD_VAPP_TEMPLATE_PARAMS.value) entity = self.client.get_resource( catalog_item.Entity.get('href')) file_href = entity.Files.File.Link.get('href') self.client.put_resource(file_href, ovf, 'text/xml') while True: time.sleep(5) entity = self.client.get_resource( catalog_item.Entity.get('href')) if len(entity.Files.File) > 1: break for source_file in files: for target_file in entity.Files.File: if source_file.get('href') == target_file.get('name'): file_path = os.path.join(tempdir, source_file.get('href')) total_bytes += self.upload_file( file_path, target_file.Link.get('href'), chunk_size=chunk_size, callback=callback) shutil.rmtree(tempdir) except Exception as e: print((traceback.format_exc())) shutil.rmtree(tempdir) raise e return total_bytes
def __init__(self, mtdfile): # parse # with open(mtdfile) as f: et = objectify.parse(mtdfile) self.root = et.getroot()
def gen_vcproj(path, mksources): projfd = open(path) header = projfd.readline() proj = objectify.parse(projfd) projfd.close()
import requests from lxml import objectify from rdflib import ConjunctiveGraph, URIRef, Literal, Namespace from rdflib.namespace import DCTERMS, FOAF, RDF, XSD # r = requests.get('https://www.iana.org/assignments/media-types/media-types.xml', headers={'Accept': 'text/xml'}) # with open('mediatypes.xml', 'w') as f: # f.write(r.text) g = ConjunctiveGraph() MT = Namespace("https://w3id.org/mediatype/") g.bind("mt", MT) g.bind("dcterms", DCTERMS) g.bind("foaf", FOAF) for register in objectify.parse("mediatypes.xml").getroot().getchildren(): if register.tag == "{http://www.iana.org/assignments}registry": c = register.getchildren() category = c[0] for record in c: if record.tag == "{http://www.iana.org/assignments}record": if hasattr(record, "file"): me = URIRef(MT + record.file) g.add((me, RDF.type, DCTERMS.FileFormat)) g.add((me, DCTERMS.title, Literal(record.name))) for x in record.xref: if x.get("data") is not None: if x.get("type") == "rfc": g.add(( me, DCTERMS.source,
def create_imaging_json(xml_source_file): # Make empty dict source_data = {} # Get datetime try: datetime_str, _, _ = get_datetime_from_xml(xml_source_file) except: print('No xml or cannot read.') sys.stdout.flush() return date = datetime_str.split('-')[0] time = datetime_str.split('-')[1] source_data['date'] = str(date) source_data['time'] = str(time) # Get rest of data tree = objectify.parse(xml_source_file) source = tree.getroot() statevalues = source.findall('PVStateShard')[0].findall('PVStateValue') for statevalue in statevalues: key = statevalue.get('key') if key == 'micronsPerPixel': indices = statevalue.findall('IndexedValue') for index in indices: axis = index.get('index') if axis == 'XAxis': source_data['x_voxel_size'] = float(index.get('value')) elif axis == 'YAxis': source_data['y_voxel_size'] = float(index.get('value')) elif axis == 'ZAxis': source_data['z_voxel_size'] = float(index.get('value')) if key == 'laserPower': # I think this is the maximum power if set to vary by z depth - WRONG indices = statevalue.findall('IndexedValue') laser_power_overall = int(float(indices[0].get('value'))) if key == 'pmtGain': indices = statevalue.findall('IndexedValue') for index in indices: index_num = index.get('index') if index_num == '0': source_data['PMT_red'] = int(float(index.get('value'))) if index_num == '1': source_data['PMT_green'] = int(float(index.get('value'))) if key == 'pixelsPerLine': source_data['x_dim'] = int(float(statevalue.get('value'))) if key == 'linesPerFrame': source_data['y_dim'] = int(float(statevalue.get('value'))) sequence = source.findall('Sequence')[0] last_frame = sequence.findall('Frame')[-1] source_data['z_dim'] = int(last_frame.get('index')) # Need this try block since sometimes first 1 or 2 frames don't have laser info... try: # Get laser power of first and last frames last_frame = sequence.findall('Frame')[-1] source_data['laser_power_max'] = int( last_frame.findall('PVStateShard')[0].findall('PVStateValue') [1].findall('IndexedValue')[0].get('value')) first_frame = sequence.findall('Frame')[0] source_data['laser_power_min'] = int( first_frame.findall('PVStateShard')[0].findall('PVStateValue') [1].findall('IndexedValue')[0].get('value')) except: try: first_frame = sequence.findall('Frame')[2] source_data['laser_power_min'] = int( first_frame.findall('PVStateShard')[0].findall('PVStateValue') [1].findall('IndexedValue')[0].get('value')) print( 'Took min laser data from frame 3, not frame 1, due to bruker metadata error.' ) # Apparently sometimes the metadata will only include the # laser value at the very beginning except: source_data['laser_power_min'] = laser_power_overall source_data['laser_power_max'] = laser_power_overall print('Used overall laser power.') # Save data with open(os.path.join(os.path.split(xml_source_file)[0], 'scan.json'), 'w') as f: json.dump(source_data, f, indent=4)
#Nowcast file header filepath_Nowcast = '../../data/raw/Nowcast' + today + '.csv' if not os.path.isfile(filepath_Nowcast): fileNowcast = open(filepath_Nowcast, 'a', newline='') csvWriter = csv.writer(fileNowcast, delimiter=',') csvWriter.writerow(['Time', 'Forecast', 'Latitude', 'Longitude', 'Name']) fileNowcast.close() # loop every 30 mins while (day == datetime.now().day): timeNow = datetime.now() timestamp = datetime.now().strftime("%H:%M") # collect Nowcast data with open(filepath_Nowcast, 'a', newline='') as fileNowcast: request = urllib.request.Request(urlNowcast) parsed = objectify.parse(urlopen(request)) root = parsed.getroot() timestamp = root.item.forecastIssue.get('time') csvWriter = csv.writer(fileNowcast, delimiter=',') for area in root.iter('area'): csvWriter.writerow([timestamp, area.get('forecast'), area.get("lat"), area.get("lon"), area.get("name")]) # wait 30 minutes time.sleep(1800) #completed a day print(datetime.now())
def __init__(self, path, xmlFile): self.path = path self.project = {} self.xmlFile = xmlFile xmltree = objectify.parse(xmlFile) self.root = xmltree.getroot()
class Infinite(object): """ classdocs """ def __init__(self, xml): """ Constructs a Parser object for Infinite Result .xml """ self.xml = objectify.fromstring(etree.tostring(xml)) self.barcode = self.xml.xpath('/MeasurementResultData/Plate/BC/text()') self.sections = self.xml.xpath('/MeasurementResultData/Section') self.wc = XMLWalker(self.xml) xp = "//Section/Data?run:int=@Cycle" + "/Well?well=@Pos/Single?value:int=text()" xp = "//Section/Data?run:int=@Cycle/Well?well=@Pos/Multiple?location:well1=@MRW_Position&value=text()" pr = AbsorbanceMultiRead(self.sections[0]) print pr.result file1 = 'E_PTT_BeamLocation_A12345678W_20140729_225738.xml' file2 = 'result.xml' root1 = objectify.parse(file1) root2 = objectify.parse(file2) inf = Infinite(root1)
from lxml import objectify import pandas as pd path = 'seaborn-data/Performance_MNR.xml' # 解析xml文件 parsed = objectify.parse(open(path)) # 获取 xml文件根节点引用 root = parsed.getroot() print(root) data = [] skip_fields = ['PARENT_SEQ', 'INDICATOR_SEQ', 'DESIRED_CHANGE', 'DECIMAL_PLACES'] for elt in root.INDICATOR: el_data = {} for child in elt.getchildren(): if child.tag in skip_fields: continue el_data[child.tag] = child.pyval data.append(el_data) perf = pd.DataFrame(data) perf.head() # 前五行 print(perf)
def make_model_from_file(self, path): obj = objectify.parse(path) root = obj.getroot() self._make_model(root)
from lxml import objectify from joblib import load import argparse from tqdm import tqdm parser = argparse.ArgumentParser(description='Utility program to convert supplied xml file to txt file for usage with the given programs.') parser.add_argument('-xp','--xmlpath', metavar='', type=str, help='Path to the training file (.xml).' , required = True) parser.add_argument('-tp','--txtpath', metavar='', type=str, help='Path to the folder where the converted training file (.txt) will be saved.', required = True) parser.add_argument('-m','--mode', metavar='', type=int, help='Mode 1 = Convert data file; Mode 2 = Convert label file.', choices=[1,2], required = True) parser.add_argument('-tetr','--testortrain', metavar='', type=int, help='tetr 1 = Convert train file; tetr 2 = Convert test file.', choices=[1,2], required = True) args = parser.parse_args() if args.mode == 1: xml_data_file = objectify.parse(open(args.xmlpath , encoding="utf-8")) xml_data_file_root = xml_data_file.getroot() articles = [] print("Reading in the data file:") for i in tqdm(xml_data_file_root.getchildren()): articles.append((i.attrib['id'],' '.join(e for e in i.itertext()).replace('\n',' '))) if args.testortrain == 1: f = open(args.txtpath + "/train.txt", 'w') else: f = open(args.txtpath + "/test.txt", 'w') print("Generating the converted text file...") for i in articles:
import os from lxml import etree from lxml import objectify file_name = "countries.xml" file = open(file_name, 'rb') tree = objectify.parse(file) file.close() country = tree.findall("country") print("List of countries in xml file:\n") for c in country: print(c.get("name")) root = tree.getroot() tmp = etree.SubElement(root, 'country') tmp.set("name", "Serbia") rank = etree.SubElement(tmp, 'rank') rank._setText('99') country = tree.findall("country") print("\nList of countries in xml file after addition:\n") for c in country: print(c.get("name")) tree.write(open('new_xml.xml', 'w'), encoding='utf-8')
default='N') #Setting the 'columns' environment variable to a value greater than 80 (default) in order to avoid assertion errors for argparse for long input string. os.environ["COLUMNS"] = "81" args = parser.parse_args() #Check to see whether the file exists or not. exists_train_file = os.path.isfile(args.train) exists_train_label_file = os.path.isfile(args.trainlabel) if exists_train_file and exists_train_label_file: #Checking File extension if args.train.endswith('.xml') and args.trainlabel.endswith('.xml'): #Creating the xml object/tree training_file = objectify.parse(open(args.train, encoding="utf-8")) training_label_file = objectify.parse(open(args.trainlabel)) #To access the root element root_data_file = training_file.getroot() root_data_label_file = training_label_file.getroot() training_data = [] training_labels = [] print("Reading in the training corpus:") for i in tqdm(root_data_file.getchildren()): training_data.append(' '.join(e for e in i.itertext())) print("Reading in the training label file:") for row in tqdm(root_data_label_file.getchildren()):
def __init__(self, tcx_file): tree = objectify.parse(tcx_file) self.root = tree.getroot() self.activity = self.root.Activities.Activity
def add_results (self, result_set, is_build): try: tree = objectify.parse (self.__file_target__) root = tree.getroot () for result in root.iter ('result'): if result.get ('name'): result_set.name = result.get ('name') if result.get ('source'): result_set.source = result.get ('source') if result.get ('args'): result_set.args = result.get ('args') if is_build: result_set.builds[result.get ('source')] = result.get ('args') else: result_set.imports[result.get ('source')] = result.get ('args') for weakness in result.iter ('weakness'): if not weakness.get ('id') in result_set.weaknesses: weakness_obj = Weakness.from_xml (weakness, result_set) result_set[weakness_obj.name] = weakness_obj weakness_obj = result_set[weakness.get ('id')] for suite in weakness.iter ('suite'): if not suite.get ('dir') in weakness_obj.suites: suite_obj = Suite.from_xml (suite, weakness_obj) weakness_obj[suite_obj.directory] = suite_obj suite_obj = weakness_obj[suite.get ('dir')] for flaw in suite.iter ('flaw'): filename = flaw.get ('file') function = flaw.get ('function') line = int (flaw.get ('line')) if not filename in suite_obj.files: file = File.from_xml (flaw, suite_obj) suite_obj[filename] = file if not function in suite_obj[filename].functions: function_obj = Function.from_xml (flaw, suite_obj[filename]) suite_obj[filename][function] = function_obj if not line in suite_obj[filename][function].lines: line_obj = Line.from_xml (flaw, suite_obj[filename][function]) suite_obj[filename][function][line] = line_obj flaw_obj = Flaw.from_xml (flaw, suite_obj[filename][function][line], result_set.source) if not flaw_obj in suite_obj[filename][function][line].get_Flaws (): suite_obj[filename][function][line].add_Flaw (flaw_obj) for bug in suite.iter ('bug'): filename = bug.get ('filename') function = bug.get ('function') line = int (bug.get ('line')) if not filename in suite_obj.files: file = File.from_xml (bug, filename) file.suite = suite_obj suite_obj[filename] = file if not function in suite_obj[filename].functions: function_obj = Function.from_xml (bug, function) function_obj.file = suite_obj[filename] suite_obj[filename][function] = function_obj if not line in suite_obj[filename][function].lines: line_obj = Line.from_xml (bug, line) line_obj.function = suite_obj[filename][function] suite_obj[filename][function][line] = line_obj bug_obj = Bug.from_xml (bug, result_set.source) bug_obj.line = suite_obj[filename][function][line] suite_obj[filename][function][line].add_Bug (bug_obj) except lxml.etree.XMLSyntaxError: logging.error ('Syntax error reading XML [%s]' % self.__file_target__) return