def __init__(self, f): self._artifacts = [] root = ElementTree().parse(f) for artifact in root.find('artifacts'): self._artifacts.append(Artifact(artifact))
def dump(self, output): tempo = {} root = Element('resources') root.tail = '\n' tempo[()] = root for key in self._resources_order: for i in range(1, len(key) + 1): if key[0:i] not in tempo: parent = tempo[key[0:i - 1]] value = self._resources.get(key[0:i], None) if value is None: elem = SubElement(parent, 'node', name=key[i - 1]) else: fullKey = key[0] for j in range(1, i): fullKey += '/' + key[j] newValue = self._dictionary[fullKey] elem = SubElement(parent, 'node', name=key[i - 1], value=newValue) parent.text = elem.tail = '\n' + i * SPACES tempo[key[0:i]] = elem fix_it(root) print >> output, '<?xml version="1.0" encoding="UTF-8"?>' ElementTree(root).write(output, 'ascii')
def write(pattern, fpath): import os from elementtree.ElementTree import Element, ElementTree root = Element('xml') toelem(pattern, root) ElementTree(root).write(fpath) os.system('tidy -xml -iqm \"%s\"' % fpath)
def __call__ (self) : self.processed = {} self.pending = {} tree = Element ("project", id = "SW-Tasks") SubElement (tree, "label").text = ("Task(s) %s." % self.toplevel) SubElement (tree, "import-resources", file = "workers.xml") if self.VERBOSE > 1 : self.debug ("Writing `part_of` tree ...") for issue_id in self.toplevel : if self.VERBOSE > 2 : self.debug \ ("Issue %d is top-level. Processing it now ..." % issue_id) issue = self.db.issue.getnode (issue_id) self.process_issue (issue, tree) if self.VERBOSE > 1 : self.debug ("\nWriting issues due to `depends_on` or `needs` ...") while self.pending : (issue_id, issue) = self.pending.popitem () if issue_id not in self.processed : if self.VERBOSE > 2 : self.debug ("Adding %s ... " % issue_id) self.process_issue (issue, tree) else : if self.VERBOSE > 3 : self.debug ("%s already included." % issue_id) file = open (self.oname, "w") ElementTree (tree).write (file, encoding = "utf-8") if self.VERBOSE > 1 : self.debug ("Done.")
def requestMonitorId(self,monitorTag): req = urllib2.Request(str('{0}/?apikey={1}&output={2}'+\ '&version={3}&action=getMonitors&tag={4}')\ .format(self.url,self.apiKey,self.output,self.version,monitorTag)) res = urllib2.urlopen(req) xml = res.read() root = ElementTree(file=StringIO.StringIO(xml)).getroot() return root.find('./monitor/id').text
def loadBlissLexicon(fname): from elementtree.ElementTree import ElementTree xml = ElementTree(file=gOpenIn(fname)) pronunciations = pronunciationsFromXmlLexicon(xml) result = [(orth, phon) for orth in pronunciations if not (orth.startswith('[') and orth.endswith(']')) for phon in pronunciations[orth]] result.sort() return result
def install_xpi(self, filename): extract_path = os.path.join(self.profiledir, 'extensions', os.path.basename(filename)) os.makedirs(extract_path) z = zipfile.ZipFile(filename, 'r') z.extractall(extract_path) doc = ElementTree(file = os.path.join(extract_path, 'install.rdf')) eid = doc.find('.//{http://www.mozilla.org/2004/em-rdf#}id').text os.rename(extract_path, os.path.join(os.path.dirname(extract_path), eid))
def tostring(node, *args, **kwds): if 'pretty' in kwds or 'pretty_print' in kwds: if 'pretty' in kwds: del kwds['pretty'] if 'pretty_print' in kwds: del kwds['pretty_print'] indent(node) kwds.pop('pretty', None) kwds.pop('pretty_print', None) oss = StringIO() ElementTree(node).write(oss, *args, **kwds) return oss.getvalue()
def createXmlFile(filePath, rootElement, version='1.0', encoding=ENCODING_IN): """ Create an xml file """ doc = ElementTree(rootElement) outfile = open(filePath, 'w') outfile.write('<?xml version="' + version + '" encoding="' + encoding + '" ?>') doc._write(outfile, doc._root, ENCODING_IN, {}) outfile.close()
def process(self, lang): assert len(lang) == 2, 'Language name must be two letters long' doc = ElementTree(file='%s.xml' % lang) root = doc.getroot() if root.tag == 'resources': for child in root: self.walk(child, (child.get('name'), ), lang)
def gettemp(): cmd = ["omreport", "chassis", "temps", "-fmt", "xml"] (omstdin, omstdout) = popen2.popen2(cmd) tree = ElementTree() root = tree.parse(omstdin) iter = root.getiterator() sensors = [] for element in iter: if element.tag == "TemperatureProbe": sensors.append(tempprobe(element)) return sensors
def listMonitors(self): ret = list() req = urllib2.Request(str('{0}/?apikey={1}&output={2}'+\ '&version={3}&action=getMonitors')\ .format(self.url,self.apiKey,self.output,self.version)) res = urllib2.urlopen(req) xml = res.read() root = ElementTree(file=StringIO.StringIO(xml)).getroot() for monitor in list(root): ret.append((monitor.find('id').text,monitor.find('tag').text, monitor.find('name').text)) return ret
def xml_to_dict(fPath): ''' Converts study data from (ref man generated) XML to a dictionary matching study IDs (keys) to title/abstract tuples (values). For example: dict[n] might map to a tuple [t_n, a_n] where t_n is the title of the nth paper and a_n is the abstract ''' ref_ids_to_abs = {} num_no_abs = 0 tree = ElementTree(file=fPath) for record in tree.findall('.//record'): pubmed_id = None refmanid = eval(record.findtext('.//rec-number')) # attempt to grab the pubmed id pubmed_id = "" try: pubmed = record.findtext('.//notes/style') pubmed = pubmed.split("-") for i in range(len(pubmed)): if "UI" in pubmed[i]: pubmed_str = pubmed[i + 1].strip() pubmed_id = eval("".join( [x for x in pubmed_str if x in string.digits])) except Exception, ex: print "problem getting pmid ..." print ex ab_text = record.findtext('.//abstract/style') if ab_text is None: num_no_abs += 1 title_text = record.findtext('.//titles/title/style') # Also grab keywords keywords = [ keyword.text.strip().lower() for keyword in record.findall(".//keywords/keyword/style") ] # and authors authors = [ author.text for author in record.findall( ".//contributors/authors/author/style") ] # journal journal = record.findtext(".//periodical/abbr-1/style") ref_ids_to_abs[refmanid] = {"title":title_text, "abstract":ab_text, "journal":journal,\ "keywords":keywords, "pmid":pubmed_id, "authors":authors}
def PrintStats(): """Looks at the XML output and dumps render time.""" try: from elementtree.ElementTree import ElementTree except: print "Unable to load ElementTree, skipping statistics." else: doc = ElementTree(file='stats.xml') for timer in doc.findall('//timer'): if "totaltime" == timer.get("name"): print "Render time was %s seconds" % timer[0].text break
def gencix(major, minor): # First generate first pass at the CILE over all of the lib tree cixfile = "activeperl-%d.%d.cix" % (major, minor) command = "python ../../../ci2.py scan -n -r -p -l Perl -T /tmp/ActivePerl-%d.%d/perl/lib -i \"*.pm\"> %s" % ( major, minor, cixfile) retval = os.system(command) if retval != 0: print "Error scanning ActivePerl library" sys.exit(retval) # # Grab the output of that scan root = parse(cixfile).getroot() newroot = Element("codeintel", version="2.0") cixfile = SubElement(newroot, "file", lang="Perl", mtime=str(int(time.time())), path=os.path.basename('perl.cix')) for file in root.getiterator('file'): print >> sys.stderr, "Processing", file.get('path') for blob in file: if blob.get("src"): # Don't want the src string. del blob.attrib["src"] cixfile.append(blob) cix = genPerlStdCIX( cixfile, "/tmp/ActivePerl-%d.%d/perl/lib/pod/perlfunc.pod" % (major, minor)) parent_map = dict((c, p) for p in cixfile.getiterator() for c in p) for variable in newroot.getiterator('variable'): attributes = variable.get('attributes') if attributes and '__local__' in variable.get('attributes'): parent_map[variable].remove(variable) # Generate the CIX. print >> sys.stderr, "Prettying" prettify(newroot) tree = ElementTree(newroot) #fname = '../../../lib/codeintel2/stdlibs/perl-%d.%d.cix' % (major, minor) fname = 'perl-%d.%d.cix' % (major, minor) #os.system('p4 edit %s' % fname) stream = open(fname, "w") print >> sys.stderr, "Writing" stream.write('<?xml version="1.0" encoding="UTF-8"?>\n') tree.write(stream) stream.close()
def getpdisks(controller="0"): cmd = [ "omreport", "storage", "pdisk", "controller=" + controller, "-fmt", "xml" ] (omstdin, omstdout) = popen2.popen2(cmd) tree = ElementTree() root = tree.parse(omstdin) iter = root.getiterator() pdisks = [] for element in iter: if element.tag == "DCStorageObject": pdisks.append(pdisk(element)) return pdisks
def dump(self, output, lang): tempo = {} root = Element('resources') root.tail = '\n' tempo[()] = root for key in self._resources_order: for i in range(1, len(key) + 1): if key[0:i] not in tempo: parent = tempo[key[0:i - 1]] value = self._resources.get(key[0:i], None) if value is None: elem = SubElement(parent, 'node', name=key[i - 1]) else: localized = value.get(lang, None) english = value.get('en', None) if english is None: print >> sys.stderr, 'English file does not have the string for', key[ 0:i] print >> sys.stderr, ' entry is marked as obosolete.' elem = SubElement(parent, 'node', name=key[i - 1], value=localized, obsolete='true') elif localized is not None: elem = SubElement(parent, 'node', name=key[i - 1], value=localized) else: elem = SubElement(parent, 'node', name=key[i - 1], value=english, toBeTranslated='true') parent.text = elem.tail = '\n' + i * SPACES tempo[key[0:i]] = elem fix_it(root) print >> output, '<?xml version="1.0" encoding="UTF-8"?>' ElementTree(root).write(output, 'utf-8')
def getpower(): cmd = ["omreport", "chassis", "pwrsupplies", "-fmt", "xml"] (omstdin, omstdout) = popen2.popen2(cmd) tree = ElementTree() root = tree.parse(omstdin) iter = root.getiterator() status = "" pwrsupplies = [] for element in iter: if element.tag == "Redundancy": status = element.get("status") redunstatus = element.findtext("RedunStatus") if element.tag == "PowerSupply": pwrsupplies.append(powersupply(element)) return [(status, redunstatus), pwrsupplies]
def GetElementsFromXML(self, filename): 'Extracts a dictionary of elements from the gcc_xml file.' tree = ElementTree() try: tree.parse(filename) except ExpatError: raise InvalidXMLError, 'Not a XML file: %s' % filename root = tree.getroot() if root.tag != 'GCC_XML': raise InvalidXMLError, 'Not a valid GCC_XML file' # build a dictionary of id -> element, None elementlist = root.getchildren() elements = {} for element in elementlist: id = element.get('id') if id: elements[id] = element, None return elements
def currency_data( self, xml_url='http://www.ecb.europa.eu/stats/eurofxref/eurofxref-hist-90d.xml' ): """Returns the most recent currency data with tuples.""" now = datetime.now() today = u'-'.join( [unicode(now.year), unicode(now.month), unicode(now.day)]) if today == self.updated_date(): return etree90 = ElementTree() try: data90 = urllib2.urlopen(xml_url) root90 = ElementTree.parse(etree90, data90) DATA90 = root90[2] DATA_list = [] for DATA in DATA90: daily_data_list = [] for daily_data in DATA: daily_data_tuple = (daily_data.get('currency'), daily_data.get('rate')) daily_data_list.append(daily_data_tuple) ddl = (DATA.get('time'), dict(daily_data_list)) DATA_list.append(ddl) date = DATA_list[0][0] try: if self.date != date: self.currencies = DATA_list self.date = date for date in self.currencies: for key in date[1].keys(): if key not in self.codes: self.codes.append(key) self.amount_of_days = range(1, len(self.currencies) + 1) except AttributeError: self.currencies = DATA_list except: pass
def test(): import sys doc = HTML(HEAD('ankjwajhsjasa', META('blabal'))) table = TABLE() body = BODY( table.append( TBODY('ahasa', TR( TD('blabla'), TD('blabla'), TD('blabla'), )))) p1, p2 = P('blabla'), P('bli') p2.text += 'dhsdhshkds' doc += body doc += (p1, p2) tree = ElementTree(doc) write_pretty(tree, sys.stdout)
def __init__(self, lstData, GeoType, strPath, strFilename, strLayername): dctWriteKML = {'Point': self.writePoint, 'Polyline': self.writeLine, 'Polygon': self.writePolygon} #Create new element tree with a root of KML... objRoot = Element("{http://earth.google.com/kml/2.1}kml") objTree = ElementTree(element=objRoot) elemDoc = SubElement(objRoot, 'Document') elemDocName = SubElement(elemDoc, 'name') #According the KML spec, default Polystyle stuff goes here... elemDocName.text = strLayername #Add a document name element here...populate from supplied parameters for objRow in lstData: elemPlace = SubElement(elemDoc, 'Placemark') elemName =SubElement(elemPlace,'name') elemName.text = objRow['Name'] #Add support for the description tag... elemDesc = SubElement(elemPlace, 'description') elemDesc.text = objRow['Description'] elemGeo = dctWriteKML.get(GeoType, self.errHandler)(objRow['Geometry'], elemPlace) elemPlace.append(elemGeo) self.Write(objTree, strPath, strFilename)
def output_workers (self, file = sys.stdout) : week = [] tree = Element ("resources-list", type = "worker") tt = SubElement (tree, "timetable", id = "weekend") SubElement (tt, "dayoff", type = "weekday").text = "saturday" SubElement (tt, "dayoff", type = "weekday").text = "sunday" for day in ("monday", "tuesday", "wednesday", "thursday", "friday") : tt = SubElement (tree, "timetable", id = day) SubElement (tt, "dayoff", type = "weekday").text = day week.append (day) stati = [self.db.user_status.lookup (i) for i in ("valid", "obsolete", "system") ] for uid in self.db.user.filter (None, dict (status = stati)) : dyn = self.get_user_dynamic (self.db, uid, self.now) if not dyn : dyn = self.last_user_dynamic (self.db, uid) user = self.db.user.getnode (uid) if not user.nickname and not user.username : continue r = SubElement \ ( tree , "resource" , id = (user.nickname or user.username).decode ("utf-8") , fullname = (user.realname or user.username).decode ("utf-8") ) SubElement (r, "use-timetable", idref = "weekend") wh = 38.5 if dyn : wh = self.weekly_hours (dyn) or 38.5 wh *= 4 wh += 7.75 * 4 - 1 wh = int (wh) wh = int (wh / (7.75 * 4)) for i in range (wh, 5) : SubElement (r, "use-timetable", idref = week [i]) ElementTree (tree).write (file, encoding = "utf-8")
import sys from elementtree.ElementTree import ElementTree mydoc = ElementTree(file=sys.argv[1]) for e in mydoc.findall(sys.argv[2]): print e.text
def xml_to_dict(fpath): ''' Converts study data from (ref man generated) XML to a dictionary matching study IDs (keys) to title/abstract tuples (values). For example: dict[n] might map to a tuple [t_n, a_n] where t_n is the title of the nth paper and a_n is the abstract ''' ref_ids_to_abs = {} parsing_errors = [] num_no_abs = 0 tree = ElementTree(file=fpath) num_failed = 0 for record in tree.findall('.//record'): pubmed_id, refmanid = None, None refman_version = record.findtext('.//source-app') path_str = None ### here we check the RefMan version, and change # the xml path accordingly. this fixes issue #7 if refman_version == 'Reference Manager 12.0': path_str = './/rec-number/style' journal_path_str = './/periodical/full-title/style' elif refman_version == 'Reference Manager 11.0': path_str = './/rec-number' journal_path_str = './/periodical/abbr-1/style' try: refmanid = int(record.findtext(path_str)) except: error = "Unable to parse record '%s' in '%s'" % ( record, os.path.basename(fpath)) #print "failed to parse refman document" parsing_errors.append(error) if refmanid is not None: # attempt to grab the pubmed id pubmed_id = "" try: pubmed = record.findtext('.//notes/style') pubmed = pubmed.split("-") for i in range(len(pubmed)): if "UI" in pubmed[i]: pubmed_str = pubmed[i + 1].strip() pubmed_id = int("".join( [x for x in pubmed_str if x in string.digits])) except Exception, ex: error = "Problem getting pmid from '%s' in '%s'" % ( record, os.path.basename(fpath)) parsing_errors.append(error) #print "problem getting pmid ..." #print ex #print("\n") ab_text = record.findtext('.//abstract/style') if ab_text is None: num_no_abs += 1 title_text = record.findtext('.//titles/title/style') # Also grab keywords keywords = [ keyword.text.strip().lower() for keyword in record.findall(".//keywords/keyword/style") ] # and authors authors = [ author.text for author in record.findall( ".//contributors/authors/author/style") ] # journal journal = record.findtext(journal_path_str) ref_ids_to_abs[refmanid] = {"title":title_text, "abstract":ab_text, "journal":journal,\ "keywords":keywords, "pmid":pubmed_id, "authors":authors}
from elementtree.ElementTree import Element, ElementTree, SubElement _src_f = open("pycon_src.xml") _content = _src_f.read() _src_f.close() _content = _content.replace("<description>", "<description>\n<![CDATA[\n") _content = _content.replace("</description>", "\n\n\n]]>\n</description>") _content = _content.replace("<", "<") _content = _content.replace(">", ">") _tmp_f = open("pycon_src_tmp.xml", "wb") _tmp_f.write(_content) _tmp_f.close() _src_xml = ElementTree(file="pycon_src_tmp.xml") _root = Element("xml") _conference = SubElement(_root, "conference") # add pyconfr main values SubElement(_conference, "title").text = "PyCONFR 2010" SubElement(_conference, "subtitle").text = \ "Rendez-vous annuel des utilisateurs de Python organisee par l'Association Francophone Python" SubElement(_conference, "venue").text = "Cyberbase de la Cite des Sciences" SubElement(_conference, "city").text = "Paris" SubElement(_conference, "start").text = "2010-10-28" SubElement(_conference, "end").text = "2010-10-29" SubElement(_conference, "days").text = "2" SubElement(_conference, "day_change").text = "08:00"
def main(options, args): # 1. load reference lexicon print('loading reference lexicon ...') lexicon = loadBlissLexicon(options.lexicon) knownWords = set([ orth for orth, phon in lexicon ]) # 2. load model for fragmentizing unknown words if options.subliminal_lexicon: print('loading subliminal lexicon ...') subliminalLexicon = loadBlissLexicon(options.subliminal_lexicon) else: subliminalLexicon = None if options.subliminal_g2p: print('loading subliminal g2p model ...') subliminalG2p = pickle.load(open(options.subliminal_g2p)) else: subliminalG2p = None if options.g2pModel: print('loading g2p model ...') model = pickle.load(open(options.g2pModel)) oldSize, newSize = model.strip() print('stripped number of multigrams from %d to %d' % (oldSize, newSize)) fragmentizer = Fragmentizer(model) if subliminalLexicon: fragmentizer.addSupervised(subliminalLexicon) if subliminalG2p: fragmentizer.addSupervised(subliminalG2p) graphones = model.sequitur.symbols() graphones.remove(model.sequitur.symbol(model.sequitur.term)) else: model = fragmentizer = graphones = None # 3. add fragments to lexicon if options.write_lexicon: print('creating extended lexicon ...') xmlLexicon = ElementTree(file = options.lexicon) if options.model_type == 'phonemes': changeSyntaticToPhonetic(xmlLexicon) else: addGraphonesToLexicon(xmlLexicon, graphones) xmlLexicon.write(gOpenOut(options.write_lexicon), defaultEncoding) # 4. determine set of LM tokens vocabulary = mGramCounts.ClosedVocablary() vocabulary.add(['<s>', '</s>']) if options.model_type == 'flat-hybrid': vocabulary.add(filter(isLmToken, knownWords), soft=True) if graphones: vocabulary.add(starmap(lmToken, graphones)) vocabulary.sort() if options.write_tokens: f = gOpenOut(options.write_tokens, defaultEncoding) if options.model_type == 'phonemes': phonemes = set(p for orth, phon in lexicon for p in phon) phonemes.add('#1') if 'si' in phonemes: phonemes.remove('si') for p in sorted(phonemes): print(p, file=f) else: for w in vocabulary: if w is not None: print(w, file=f) # 5./6. set-up LM event generator if options.write_counts or options.write_events: order = options.order - 1 if options.model_type == 'flat-hybrid': events = HybridEventGenerator(knownWords, fragmentizer, order) if options.range_type == 'fragments': events.setFragmentRange() elif options.range_type == 'words': events.setTrueWordRange() else: assert ValueError(options.range_type) elif options.model_type == 'fragments': events = OovEventGenerator(knownWords, fragmentizer, order) elif options.model_type == 'phonemes': events = PhonemeEventGenerator(lexicon, order) # 5. create modified LM training corpus counts if options.write_events: print('creating sequence model events ...') f = gOpenOut(options.write_events, defaultEncoding) for event, count in events(gOpenIn(options.text, defaultEncoding)): print(repr(event), '\t', count, file=f) # 6. count LM events if options.write_counts: print('creating sequence model counts ...') counts = mGramCounts.SimpleMultifileStorage() counts.addIter(events(gOpenIn(options.text, defaultEncoding))) mGramCounts.TextStorage.write(gOpenOut(options.write_counts, defaultEncoding), counts) # 7. dump list of OOV words and their corresponding fragmentation if options.write_fragments: print('dumping fragments ...') f = gOpenOut(options.write_fragments, defaultEncoding) events = OovFragmentGenerator(knownWords, fragmentizer) fragments = events(gOpenIn(options.text, defaultEncoding)) for event in list(fragments.keys()): print(event, '\t', ' '.join(fragments[event]), file=f) # 8. dump modified LM training text if options.write_lm_text: print('dumping modified LM training text ...') f = gOpenOut(options.write_lm_text, defaultEncoding) events = OovFragmentGenerator(knownWords, fragmentizer) for line in gOpenIn(options.text, defaultEncoding): words = line.split() modWords = events.modifyLmText(words) print(" ".join(modWords), file=f)
<author> <name>John Doe</name> </author> <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id> <entry> <title type="xhtml">Atom-Powered <br/> Robots Run Amok</title> <link href="http://example.org/2003/12/13/atom03"/> <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> <updated>2003-12-13T18:30:02Z</updated> <summary>Some text.</summary> </entry> </feed>""" etree = ElementTree(file=StringIO.StringIO(content)) feed = XML(content) print etree print feed #print len(feed) #print feed[0] #print feed.keys() ATOM = "http://www.w3.org/2005/Atom" entry = etree.getiterator('{%s}entry'%ATOM)[0] new_lin = SubElement(entry, '{%s}link'%ATOM) new_lin.set('rel', 'source') new_lin.set('href', 'http://somthing.org')
def xmlToDict(fPath, stopPath=None, splitTxt= False, get_pubmed = False): ''' Converts study data from (ref man generated) XML to a dictionary matching study IDs (keys) to title/abstract tuples (values). For example: dict[n] might map to a tuple [t_n, a_n] where t_n is the title of the nth paper and a_n is the abstract ''' refIDToAbs = {} numNoPubmeds = 0 numNoAbs = 0 # Keep track of how many studies have no abstracts. tree = ElementTree(file=fPath) for record in tree.findall('.//record'): pubmed_id = None refmanid = eval(record.findall('.//rec-number')[0].text) try: pubmed = record.findall('.//notes/style')[0].text pubmed = pubmed.split("-") for i in range(len(pubmed)): if "UI" in pubmed[i]: pubmed_str = pubmed[i+1].strip() pubmed_id = eval("".join([x for x in pubmed_str if x in string.digits])) #pubmed_id = eval(pubmed[i+1].replace("PT", "").replace("IN", "")) #print pubmed break except Exception, ex: print ex if pubmed_id is None: #if not "Cochrane" in pubmed[2]: # pdb.set_trace() numNoPubmeds+=1 print "%s has no pubmed id" % refmanid abstract = record.findall('.//abstract/style') abText = "" try: if abstract and splitTxt: abText = (abstract[0].text).split(" ") abText = [string.lower(s) for s in abText] abText = cleanUpTxt(abText, stopListPath=stopPath) elif abstract: abText = abstract[0].text else: numNoAbs += 1 except: pdb.set_trace() title = "" if splitTxt: title = cleanUpTxt(string.lower(record.findall('.//titles/title/style')[0].text).split(" "), stopListPath=stopPath) else: try: title = record.findall('.//titles/title/style')[0].text except: pdb.set_trace() # Also grab keywords keywords = [keyword.text.strip().lower() for keyword in record.findall(".//keywords/keyword/style")] if pubmed_id is not None or True: refIDToAbs[refmanid] = [title, abText, keywords, pubmed_id]
def genPerlStdCIX(filename, stream): log.debug("genPerlStdCIX(filename=%r, stream=%r)", filename, stream) root = Element("codeintel", version="2.0") cixfile = SubElement(root, "file", lang="Perl", mtime=str(int(time.time())), path=os.path.basename(filename)) # Process Perl's built-ins out of perlfunc.pod. if 1: p4path = "//depot/main/Apps/Gecko/src/Core/pod/perlfunc.pod" cmd = "p4 print -q %s" % p4path i, o, e = os.popen3(cmd) lines = o.read().splitlines(0) i.close() o.close() retval = e.close() if retval: raise Error("error running: %s" % cmd) else: lines = open("perlfunc.pod", 'r').read().splitlines(0) # Parse the "Alphabetical Listing of Perl Functions" into a list of # 'blocks' where each block is one command-"=item" block. start = lines.index("=head2 Alphabetical Listing of Perl Functions") blocks = [] block = None level = 0 def parseItem(line): sig = line.split(None, 1)[1] name = re.split("[ \t\n(/]", sig, 1)[0] return name, sig for i, line in enumerate(lines[start:]): if line.startswith("=over"): level += 1 if line.startswith("=back"): level -= 1 if level == 0: # done the 'Alphabetical Listing' section if block: blocks.append(block) break if level > 1: if block: block["lines"].append(line) elif block is None and not line.startswith("=item"): continue elif block is None and line.startswith("=item"): block = {} name, sig = parseItem(line) block = {"name": name, "sigs": [sig], "lines": []} elif line.startswith("=item"): name, sig = parseItem(line) if name == block["name"]: block["sigs"].append(sig) else: blocks.append(block) block = {"name": name, "sigs": [sig], "lines": []} else: if not block["lines"] and not line.strip(): pass # drop leading empty lines elif not line.strip() and block["lines"] and \ not block["lines"][-1].strip(): pass # collapse multiple blank lines else: block["lines"].append(line) # pprint(blocks) # Process the blocks into a list of command info dicts. def podrender(pod): rendered = pod rendered = re.sub("F<(.*?)>", r"\1", rendered) rendered = re.sub("I<(.*?)>", r"*\1*", rendered) def quoteifspaced(match): if ' ' in match.group(1): return "'%s'" % match.group(1) else: return match.group(1) rendered = re.sub("C<(.*?)>", quoteifspaced, rendered) def linkrepl(match): content = match.group(1) if content.startswith("/"): content = content[1:] if "/" in content: page, section = content.split("/", 1) content = "%s in '%s'" % (section, page) else: content = "'%s'" % content return content rendered = re.sub("L<(.*?)>", linkrepl, rendered) return rendered # These perl built-ins are grouped in perlfunc.pod. commands = [] WIDTH = 60 # desc field width syscalls = """ getpwnam getgrnam gethostbyname getnetbyname getprotobyname getpwuid getgrgid getservbyname gethostbyaddr getnetbyaddr getprotobynumber getservbyport getpwent getgrent gethostent getnetent getprotoent getservent setpwent setgrent sethostent setnetent setprotoent setservent endpwent endgrent endhostent endnetent endprotoent endservent """.split() calltip_skips = "sub use require".split() for block in blocks: name, sigs, lines = block["name"], block["sigs"], block["lines"] if name == "-X": # template for -r, -w, -f, ... pattern = re.compile(r"^ (-\w)\t(.*)$") tlines = [line for line in lines if pattern.match(line)] for tline in tlines: tname, tdesc = pattern.match(tline).groups() tsigs = [s.replace("-X", tname) for s in sigs] command = { "name": tname, "sigs": tsigs, "desc": textwrap.fill(tdesc, WIDTH) } commands.append(command) elif name in ("m", "q", "qq", "qr", "qx", "qw", "s", "tr", "y"): operators = { "m": """\ m/PATTERN/cgimosx /PATTERN/cgimosx Searches a string for a pattern match, and in scalar context returns true if it succeeds, false if it fails. """, "q": """\ q/STRING/ 'STRING' A single-quoted, literal string. """, "qq": """\ qq/STRING/ "STRING" A double-quoted, interpolated string. """, "qr": """\ qr/STRING/imosx Quotes (and possibly compiles) STRING as a regular expression. """, "qx": """\ qx/STRING/ `STRING` A string which is (possibly) interpolated and then executed as a system command. """, "qw": """\ qw/STRING/ Evaluates to a list of the words extracted out of STRING, using embedded whitespace as the word delimiters. """, "s": """\ s/PATTERN/REPLACEMENT/egimosx Searches a string for a pattern, and if found, replaces that pattern with the replacement text and returns the number of substitutions made. Otherwise it returns the empty string. """, "tr": """\ tr/SEARCHLIST/REPLACEMENTLIST/cds y/SEARCHLIST/REPLACEMENTLIST/cds Transliterates all occurrences of the characters found in the search list with the corresponding character in the replacement list. It returns the number of characters replaced or deleted. """, "y": """\ tr/SEARCHLIST/REPLACEMENTLIST/cds y/SEARCHLIST/REPLACEMENTLIST/cds Transliterates all occurrences of the characters found in the search list with the corresponding character in the replacement list. It returns the number of characters replaced or deleted. """, } sigs = [] desclines = None for line in operators[name].splitlines(0): if desclines is not None: desclines.append(line.strip()) elif not line.strip(): desclines = [] else: sigs.append(line.strip()) command = { "name": name, "sigs": sigs, "desc": textwrap.fill(' '.join(desclines), WIDTH) } commands.append(command) elif name in syscalls: desc = "Performs the same function as the '%s' system call." % name desc = textwrap.fill(desc, WIDTH) getterListContext = { "getpw": "\n" " ($name,$passwd,$uid,$gid,$quota,$comment,\n" " $gcos,$dir,$shell,$expire) = %s", "getgr": "\n ($name,$passwd,$gid,$members) = %s", "gethost": "\n ($name,$aliases,$addrtype,$length,@addrs) = %s", "getnet": "\n ($name,$aliases,$addrtype,$net) = %s", "getproto": "\n ($name,$aliases,$proto) = %s", "getserv": "\n ($name,$aliases,$port,$proto) = %s", } getterScalarContext = { "getgrent": "$name = %s", "getgrgid": "$name = %s", "getgrnam": "$gid = %s", "gethostbyaddr": "$name = %s", "gethostbyname": "$addr = %s", "gethostent": "$name = %s", "getnetbyaddr": "$name = %s", "getnetbyname": "$net = %s", "getnetent": "$name = %s", "getprotobyname": "$num = %s", "getprotobynumber": "$name = %s", "getprotoent": "$name = %s", "getpwent": "$name = %s", "getpwnam": "$uid = %s", "getpwuid": "$name = %s", "getservbyname": "$num = %s", "getservbyport": "$name = %s", "getservent": "$name = %s", } for prefix, template in getterListContext.items(): if name.startswith(prefix): desc += template % sigs[0] if name in getterScalarContext: desc += "\nin list context or:\n "\ + getterScalarContext[name] % sigs[0] command = {"name": name, "desc": desc, "sigs": sigs} commands.append(command) elif name == "shmread": desc = """\ Reads the System V shared memory segment ID starting at position POS for size SIZE by attaching to it, copying out, and detaching from it. """ desc = ' '.join([ln.strip() for ln in desc.splitlines(0)]) command = { "name": name, "sigs": sigs, "desc": textwrap.fill(desc, WIDTH) } commands.append(command) elif name == "shmwrite": desc = """\ Writes the System V shared memory segment ID starting at position POS for size SIZE by attaching to it, copying in, and detaching from it. """ desc = ' '.join([ln.strip() for ln in desc.splitlines(0)]) command = { "name": name, "sigs": sigs, "desc": textwrap.fill(desc, WIDTH) } commands.append(command) elif name in calltip_skips: continue # just drop the sub calltip: annoying else: # Parsing the description from the full description: # Pull out the first sentence up to a maximum of three lines # and one paragraph. If the first *two* sentences fit on the # first line, then use both. desc = "" sentencePat = re.compile(r"([^\.]+(?:\. |\.$))") if name in ("dbmclose", "dbmopen"): # Skip the first paragraph: "[This function...superceded by" lines = lines[lines.index('') + 1:] elif name == "do": # Skip the first sentence: "Not really a function." end = sentencePat.match(lines[0]).span()[1] lines[0] = lines[0][end:].lstrip() for i, line in enumerate(lines): if not line.strip(): break sentences = sentencePat.findall(line) if not sentences: desc += line + ' ' continue elif i == 0 and len(sentences) > 1: desc += ' '.join([s.strip() for s in sentences[:2]]) else: desc += sentences[0].strip() break command = { "name": name, "sigs": sigs, "desc": textwrap.fill(podrender(desc), WIDTH) } commands.append(command) # for command in commands: # print # print banner(command["name"], '-') # print '\n'.join(command["sigs"]) # print # print command["desc"] # Generate the CIX for each function. module_elt = SubElement(cixfile, "scope", ilk="blob", name="*") # "built-ins" module for command in commands: name, sigs, desc = command["name"], command["sigs"], command["desc"] func_elt = SubElement(module_elt, "scope", ilk="function", name=name) if sigs: func_elt.set("signature", '\n'.join(sigs)) if desc: doclines = desc.split('\n')[:3] # doclines = parseDocSummary(doclines) doc = '\n'.join(doclines) func_elt.set("doc", doc) # Generate the CIX. prettify(root) tree = ElementTree(root) stream.write('<?xml version="1.0" encoding="UTF-8"?>\n') tree.write(stream)