def __init__(self, path): """ self.unit - the unit to which this Chapter belongs (e.g., 'Pathways & Advance Engineering') self.data - TabData instances for each topic """ self.data = [] s = utils.getHtml(path) filename = os.path.basename(path) self.unit = os.path.basename(os.path.dirname(path)) self.num, self.chapter = self.getChapterInfo(filename) tagPat = RegExUtils.getTagPattern('x:ExcelWorkbook') m = tagPat.search(s) if not m: raise Exception, "could not get TABS data from file (%s)" % path print 'found data' xml = m.group(0).replace('x:', '') # strip x prefix from all elements rec = XmlRecord(xml=xml) rec.xpath_delimiter = '/' tabNodes = rec.selectNodes( rec.dom, "ExcelWorkbook/ExcelWorksheets/ExcelWorksheet") # we ignore the 'Cover sheet' print 'creating %d tabs' % len(tabNodes) for tabElement in tabNodes: tabData = TabData(tabElement, self.unit) if tabData.name.lower() != 'cover sheet': tabData.num = len(self) + 1 self.append(tabData)
def __init__(self, xml, payload_constructor=None): self.payload_constructor = payload_constructor or self.default_payload_constructor # XmlRecord.__init__ (self, xml=element.toxml()) # xml is an Jlo Element XmlRecord.__init__(self, xml=xml) # xml is a string # self.recId = self.getTextAtPath("head:id") self.pid = self.get_header_field("PID") self.pub_date = self.get_header_field("keyDateYMD") self.ark = self.get_header_field("ark") self.doi = self.get_header_field("doi") # get values from payload self.payload = self.get_payload() self.title = self.payload.get_title() self.pub_type = self.payload.get_genre() self.journal = self.payload.get_journal() self.collaboration = self.payload.get_collaboration() try: self.authors = self.payload.get_authors_display() except: print 'could not get get_authors_display for %s' % self.pid self.authors = 'no authors found' self.ncar_authors = self.payload.get_ncar_authors() # self.ncar_author_names = self.payload.get_authors_display(self.ncar_authors) # self.num_ncar_authors = len(self.ncar_authors) # self.ncar_author_upids = map(lambda x:x.upid, self.ncar_authors) # these fields must be populated externally (e.g., by Reporter) self.sum_author_charges = 0 self.num_yellowstone_authors = '' self.yellowstone_authors = '' self.sum_author_charges = '' self.other_ncar_authors = ''
def __init__(self): XmlRecord.__init__(self, xml="<wgbh_lexicon/>") self.doc.setAttribute("timestamp", time.asctime()) self.terms = LexiconWorkSheet() for i, term in enumerate(self.terms): # some terms have no category, skip these! if term.category: if i % 100 == 0: print '%d/%d - %s' % (i, len(self.terms), term.prettyTerm) xpath = term.xpath parent = self.doc term_parts = term.segments term_parts.insert(0, term.category) for j, part in enumerate(term_parts): isLeaf = self._is_leaf_segment(j, term) isCategory = j == 0 itemText = isCategory and lexicon_set_map[part] or part child = self.findChild(parent, itemText) nodeName = isCategory and "category" or "segment" if not child: child = self.addElement(parent, nodeName) # print 'added %s' % nodeName child.setAttribute("text", itemText) if isLeaf: child.setAttribute("id", term.id) child.setAttribute("term", term.prettyTerm) # XmlUtils.setText (child, term.prettyTerm) parent = child if i > 10000: break
def __init__(self, url): ## print "reading from: '%s'" % url self.url = url pagedata = urllib.urlopen(url) html = pagedata.read() marker = html.find("<FONT SIZE=4 FACE=arial>Archive Browse</FONT>") if marker < 0: raise Exception, "browse html not found" tablePat = re.compile("<TABLE[^>]*?>(.*?)</TABLE>", re.S) m = tablePat.search(html[marker:]) if not m: raise Exception, "browse TABLE not found" ## following are manipulations required to convert HTML into XML tableXml = webcatUtils.stripComments(m.group()) tableXml = webcatUtils.fixAttributes( tableXml) # some attributes have no value or unquoted value tableXml = webcatUtils.removeBoldTags( tableXml) # bold tags are interleaved with "A" tags! tableXml = webcatUtils.removeFontTags( tableXml) # Font tags just make processing difficult if 0: fp = open("tableXml.xml", 'w') fp.write(tableXml) fp.close() ## print tableXml XmlRecord.__init__(self, xml=tableXml) if 0: fp = open("tableXml.xml", 'w') fp.write(self.__repr__()) fp.close() self.nodeElements = self.getElements(self.doc)
def __init__(self, path): XmlRecord.__init__(self, path=path) self.path = path self.filename = os.path.basename(self.path) self.accessionNum = self._get_field("accessionNum") self.recordID = self._get_field("recordID") self.url = self._get_field("url")
def __init__(self, data, exc_info=None, preprocessor=None): self.data = data self.error = None if exc_info: self.error = ServiceError(exc_info) self.doc = None if not exc_info: try: # responseText = data.read() # responseText = unicode (data.read(), 'iso-8859-1') # universal? responseText = unicode(data.read(), 'utf-8') # experimental 12/2/2010 # print "serviceClient: reponseText:\n%s" % responseText if preprocessor: responseText = preprocessor(responseText) self.doc = XmlRecord(xml=responseText) webResponseErrorNode = self.doc.selectSingleNode( self.doc.dom, 'DDSWebService:error') if webResponseErrorNode: self.error = XmlUtils.getText(webResponseErrorNode) except: ## self.error = ServiceError (sys.exc_info()) self.error = ServiceError([ "ServiceResponse: Could not parse XML", sys.exc_info()[1] ])
def __init__(self): XmlRecord.__init__(self, xml="<opml></opml>") self.title = "Subject - Math" self.nameSpaceUri = "http://ns.nsdl.org/ncs/fields" self.schemaUri = "http://ns.nsdl.org/ncs/msp2/1.00/schemas/fields/mathSubject.xsd" self.version = "2.0" self.defaultNamespace = "http://ns.nsdl.org/ncs/fields" self.setSchemaLocation(self.schemaUri, self.nameSpaceUri) self.setSchemaNamespace() self.doc.setAttribute("xmlns:" + self.schema_instance_namespace, self.defaultNamespace) self.head = self.addElement(self.doc, "head") title = self.addElement(self.head, "title") self.setText(title, self.title) concept = self.addElement(self.head, "concept") concept.setAttribute("language", "en-us") concept.setAttribute("metaFormat", "osm") concept.setAttribute("metaVersion", "1.0.0") concept.setAttribute("text", "Mathematics Subject") concept.setAttribute("audience", "cataloger") concept.setAttribute("path", "/record/coverage/location/@state") concept.setAttribute("deftn", "mathematical topics the resource addresses") concept.setAttribute("collapseExpand", "true") self.body = self.addElement(self.doc, "body")
def __init__ (self, path=None, xml=None): self.fields_list = self.field_specs.keys() if path or xml: XmlRecord.__init__ (self, path=path, xml=xml) ## NCARRec.__init__ (self, path=path)s else: self.makeRecord ()
def __init__(self, instance, xmlFormat): self.instance = instance self.xmlFormat = xmlFormat path = instance._get_framework_config_path(xmlFormat + ".xml") # if not os.path.exists (path): # raise IOError, "file not found " XmlRecord.__init__(self, path=path)
def getResponseDoc(self, params=None, opts=None): """ returns response as XmlRecord """ # print 'params: %s' % params # return XmlRecord(xml=self.getData(params, opts)) responseDoc = None try: # responseText = data.read() # responseText = unicode (data.read(), 'iso-8859-1') # universal? # responseText = unicode (data.read(), 'utf-8') # experimental 12/2/2010 data = self.getData(params, opts) # print data responseDoc = XmlRecord(xml=data) webResponseErrorNode = responseDoc.selectSingleNode( responseDoc.dom, 'DDSWebService:error') if webResponseErrorNode: errorCode = webResponseErrorNode.getAttribute('code') if errorCode == 'noRecordsMatch': return None print 'errorCode', errorCode raise SimpleClientError, XmlUtils.getText(webResponseErrorNode) except Exception, msg: ## self.error = ServiceError (sys.exc_info()) # self.error = ServiceError (["ServiceResponse: Could not parse XML", sys.exc_info()[1]]) raise SimpleClientError, "DDSClient: Could not parse XML: %s" % msg
def __init__(self, path=None, xml=None): XmlRecord.__init__(self, path, xml) for attr in self.field_list: setattr(self, attr, None) for element in self.getElements(self.doc): setattr(self, element.tagName, self.getText(element)) print 'set %s to %s' % (element.tagName, self.getText(element))
def __init__(self, path, NSES=None): self.path = path self.NSES = NSES XmlRecord.__init__(self, path=path) self.group = self._get_group() self.band = self._get_band() self.nses_id = self._get_nses_id() self.numId = getNumId(self.nses_id)
def write (self, path, verbose=False): """ require a path so we don't tromp the template """ if self.dowrites: XmlRecord.write (self, path, verbose) else: print "WOULD have written to " + path
def __init__ (self): self.docId = "td-lexicon" XmlRecord.__init__ (self, xml="<%s/>" % self.docId) self.lexiconData = LexiconWorkSheet () self.termMap, self.idMap = self.makeTermAndIdMap() self.nodeMap = NodeMap() self.processNodes() self.makeDoc()
def initializeFromBaseMappings(self): baseRec = XmlRecord(path="output/dr_2_recId_mappings.xml") mappingEls = baseRec.selectNodes(baseRec.dom, 'dr_2_recId_mappings:mapping') for mappingEl in mappingEls: drNum = mappingEl.getAttribute('drNumber') recId = mappingEl.getAttribute('recordID') self[drNum] = recId print '%d base mappings found' % len(self)
def initializeFromBaseMappingsBOG(self): baseRec = XmlRecord(path="input/accessionNumberMappings.xml") mappingEls = baseRec.selectNodes(baseRec.dom, 'accessionNumberMappings:mapping') for mappingEl in mappingEls: drNum = mappingEl.getAttribute('drNumber') queryString = mappingEl.getAttribute('queryString') self[drNum] = queryString print '%d base mappings found' % len(self)
def asXml(self): rec = XmlRecord(xml="<accessionNumberMappings />") root = rec.doc root.setAttribute("date", time.asctime()) for drNum in self.keys(): mappingEl = rec.addElement(root, "mapping") self.populateMappingElement(mappingEl, drNum) return rec
def write(self, path=None): XmlRecord.write(self, path) writePath = None if path is not None: writePath = path elif self.path is not None: writePath = self.path if writePath is not None: print "xml written to %s" % writePath
def __init__(self, xml): # MetaDataRecord.__init__ (self, xml=xml) XmlRecord.__init__(self, xml=xml) self.savedXmlFormat = self.getTextAtPath( 'savedResource/savedXmlFormat') self.id = self.getTextAtPath('savedResource/id') self.collection = self.getTextAtPath( 'savedResource/ddsRepoInfo/collectionKey')
def __init__ (self, path): XmlRecord.__init__ (self, path=path) archdescElement = self.selectSingleNode (self.dom, 'ead/archdesc') self.archdesc = ArchDesc (archdescElement, self) self.collections = self._get_collections() self.itemMap = UserDict() for col in self.collections: for item in col.getItems(): self.itemMap[item.id] = item
def __init__(self, collection): UserList.__init__(self) self.collection = collection self.dataPath = os.path.join(self.baseDir, collection + '.xml') print "DATA_PATH: ", self.dataPath self.rec = XmlRecord(path=self.dataPath) nodes = self.rec.selectNodes(self.rec.dom, "collectionInfo:rec") print "%d recs read from meta-metadata" % len(nodes) map(self.append, map(RecordInfo, nodes))
def __init__(self, element): XmlRecord.__init__(self, xml=element.toxml()) self.searchKey = self.getTextAtPath("collection:searchKey") self.recordId = self.getTextAtPath("collection:recordId") self.xmlFormat = self.getTextAtPath( "collection:additionalMetadata:dlese_collect:formatOfRecords") self.numRecords = self.getTextAtPath( "collection:additionalMetadata:dlese_collect:numRecords") self.name = self.getTextAtPath("collection:renderingGuidelines:label")
def __init__(self): XmlRecord.__init__(self, path=self.data_path) collections = self.selectNodes(self.dom, "ncsCollections/collection") print '%d collections found' % len(collections) self.collectionInfos = [] for collection in collections: info = CollectionInfo(collection) # recordID = XmlUtils.getChildText (collection, "recordID") # setSpec = XmlUtils.getChildText (collection, "setSpec") self.collectionInfos.append(info)
def __init__(self): UserDict.__init__(self) rec = XmlRecord('output/FINAL-accessionNumberMappings.xml') mappings = rec.selectNodes(rec.dom, 'accessionNumberMappings:mapping') print '%d mappings found' % len(mappings) for mapping in mappings: drNum = mapping.getAttribute("drNumber") queryString = mapping.getAttribute("queryString") # print '%s -> %s' % (drNum, queryString) self[drNum] = queryString
def populateXml (self, xmlData): dataRec = XmlRecord (xml=xmlData) dataElements = dataRec.getElements (dataRec.doc) for dataElement in dataElements: cells = XmlUtils.getChildElements (dataElement, "TD") name = XmlUtils.getText (cells[0]).strip() if name[-1] == ":": name = name[:-1] value = XmlUtils.getText (XmlUtils.getChild ("B", cells[1])).strip() XmlUtils.addChild (self.dom, self.normalizeTagName(name), value)
def writeTopicRecords(self): for topic in self.keys(): print "%s - %d" % (topic, len(self[topic])) rec = XmlRecord(xml="<AsnDocuments/>") root = rec.doc root.setAttribute("topic", topic) for asnInfo in self[topic]: root.appendChild(asnInfo) path = os.path.join(self.topicCache, topic + '.xml') rec.write(path) print 'wrote to', path
def writeXml (self, path=None): """ write record info file to disk as xml """ path = path or "not-fy10-records.xml" rec = XmlRecord (xml="<not-fy10-records/>") rec.doc.setAttribute ("date", time.asctime(time.localtime())) for recInfo in self: rec.doc.appendChild (recInfo.asElement()) rec.write(path) print 'wrote to ', path
def asXml(self): from JloXml import XmlRecord, XmlUtils import time rec = XmlRecord(xml="<%s />" % self.rootElementName) root = rec.doc root.setAttribute("date", time.asctime()) for drNum in self.keys(): mappingEl = rec.addElement(root, "mapping") self.populateMappingElement(mappingEl, drNum) return rec
def __init__(self): XmlRecord.__init__(self, path=titles_listing) termElements = self.selectNodes(self.dom, 'DDSWebService/ListTerms/terms/term') print '%d termElements found' % len(termElements) self.terms = map(Term, termElements) # print self.terms[2] self.multiTerms = filter(lambda x: x.docCount > 1, self.terms) print '%d multiTerms found' % len(self.multiTerms)
def __init__ (self, path): XmlRecord.__init__ (self, path=path) self.id = self._make_id () self.filename = self.id+".xml" self.timeStamp = self._get_time_stamp() self.ndrHandle = self.getTextAtPath ("ndrMetadataInfo/ndrHandle") self.ncs_item = self._make_ncs_item () # print self.ncs_item self.dcs_data = self._make_dcs_data_record()