def process_document(self, session, doc): xml = doc.get_raw(session) self.inputSource.setByteStream(cStringIO.StringIO(xml)) ch = self.contentHandler ch.reinit() try: self.parser.parse(self.inputSource) except: # Splat. Reset self and reraise if self.keepError: # Work out path path = [] for l in ch.pathLines: line = ch.currentText[l] elemName = line[2:line.index('{')-1] path.append("%s[@SAXID='%s']" % (elemName, l)) self.errorPath = '/'.join(path) else: ch.reinit() raise rec = SaxRecord(ch.currentText, xml, wordCount=ch.recordWordCount) rec.elementHash = ch.elementHash rec.byteCount = len(xml) self._copyData(doc, rec) ch.reinit() return rec
def create_record(self, session, rec=None): p = self.permissionHandlers.get('info:srw/operation/1/create', None) if p: if not session.user: raise PermissionException("Authenticated user required to " "create an object in %s" % self.id) okay = p.hasPermission(session, session.user) if not okay: raise PermissionException("Permission required to create an " "object in %s" % self.id) id = self.generate_id(session) if (rec is None): # Create a placeholder rec = SaxRecord([], "", id) else: rec.id = id rec.recordStore = self.id try: self.store_record(session, rec) except ObjectAlreadyExistsException: # Back out id change if type(id) == long: self.currentId -= 1 raise except: raise return rec
def process_document(self, session, doc): xml = doc.get_raw(session) self.inputSource.setByteStream(cStringIO.StringIO(xml)) ch = self.contentHandler ch.reinit() try: self.parser.parse(self.inputSource) except: # Splat. Reset self and reraise if self.keepError: # Work out path path = [] for l in ch.pathLines: line = ch.currentText[l] elemName = line[2:line.index('{') - 1] path.append("%s[@SAXID='%s']" % (elemName, l)) self.errorPath = '/'.join(path) else: ch.reinit() raise rec = SaxRecord(ch.currentText, xml, wordCount=ch.recordWordCount) rec.elementHash = ch.elementHash rec.byteCount = len(xml) self._copyData(doc, rec) ch.reinit() return rec
def create_record(self, session, rec=None): if (rec is None): rec = SaxRecord([], "", None) else: rec.id = None self.store_record(session, rec) return rec
def create_record(self, session, rec=None): p = self.permissionHandlers.get('info:srw/operation/1/create', None) if p: if not session.user: raise PermissionException("Authenticated user required to create an object in %s" % self.id) okay = p.hasPermission(session, session.user) if not okay: raise PermissionException("Permission required to create an object in %s" % self.id) id = self.generate_id(session) if (rec == None): # Create a placeholder rec = SaxRecord([], "", id) else: rec.id = id rec.recordStore = self.id try: self.store_record(session, rec) except ObjectAlreadyExistsException: # Back out id change if type(id) == long: self.currentId -= 1 raise except: raise return rec
def create_record(self, session, rec=None): if (rec == None): rec = SaxRecord([], "", None) else: rec.id = None self.store_record(session, rec) return rec
def process_document(self, session, doc): data = doc.get_raw(session) data = unicode(data, 'utf-8') sax = data.split(nonTextToken) if sax[-1][0] == "9": line = sax.pop() elemHash = pickle.loads(str(line[2:])) else: elemHash = {} rec = SaxRecord(sax) rec.elementHash = elemHash return rec
def find_documents(self, session, cache=0): # Should extract records by xpath or span and store as X/SGML if cache == 1: # nothing to offset into raise NotImplementedError rec = self.stream hasNsRe = re.compile('<([a-zA-Z1-9_-]+:[a-zA-Z1-9_-])[ >]') for src in self.sources: raw = src.process_record(session, rec) for xp in raw: for r in xp: if (type(r) == types.ListType): tempRec = SaxRecord(r) docstr = tempRec.get_xml(session) hasNs = hasNsRe.search(docstr) saxid = r[-1][r[-1].rfind(' ') + 1:] if hasNs: docstr = "<c3:component xmlns:c3=\"http://www.cheshire3.org/schemas/component/\" parent=\"%r\" event=\"%s\">%s</c3:component>" % ( rec, saxid, docstr) else: docstr = "<c3component parent=\"%r\" event=\"%s\">%s</c3component>" % ( rec, saxid, docstr) elif (type(r) == types.StringType): docstr = "<c3component parent=\"%r\"><data>%s</data></c3component>" % ( rec, escape(r)) else: if r.__class__ == etree._Element: # Lxml Record docstr = etree.tostring(r) tree = r.getroottree() path = tree.getpath(r) if (r.nsmap): #if hasNs: namespaceList = [] for (pref, ns) in r.nsmap.iteritems(): namespaceList.append("xmlns:%s=\"%s\"" % (pref, ns)) namespaces = " ".join(namespaceList) docstr = """<c3:component xmlns:c3="http://www.cheshire3.org/schemas/component/" %s parent="%r" xpath="%s">%s</c3component>""" % ( namespaces, rec, path, docstr) else: docstr = """<c3component parent="%r" xpath="%s">%s</c3component>""" % ( rec, path, docstr) else: raise ValueError("Unknown Record Type") doc = StringDocument(docstr) if cache == 0: yield doc else: self.documents.append(doc)
def process_document(self, session, doc): # Simply copy data into a record of appropriate type data = doc.get_raw(session) if (typeof(data) == types.ListType): rec = SaxRecord(data) else: rec = DomRecord(data) self._copyData(doc, rec) return rec
def process_document(self, session, doc): # Simply copy data into a record of appropriate type data = doc.get_raw(session) if isinstance(data, list): rec = SaxRecord(data) else: rec = DomRecord(data) self._copyData(doc, rec) return rec
def find_documents(self, session, cache=0): # Should extract records by xpath or span and store as X/SGML if cache == 1: # nothing to offset into raise NotImplementedError rec = self.stream hasNsRe = re.compile('<([a-zA-Z1-9_-]+:[a-zA-Z1-9_-])[ >]') for src in self.sources: raw = src.process_record(session, rec) for xp in raw: for r in xp: if (type(r) == types.ListType): tempRec = SaxRecord(r) docstr = tempRec.get_xml(session) hasNs = hasNsRe.search(docstr) saxid = r[-1][r[-1].rfind(' ')+1:] if hasNs: docstr = "<c3:component xmlns:c3=\"http://www.cheshire3.org/schemas/component/\" parent=\"%r\" event=\"%s\">%s</c3:component>" % (rec, saxid, docstr) else: docstr = "<c3component parent=\"%r\" event=\"%s\">%s</c3component>" % (rec, saxid, docstr) elif (type(r) == types.StringType): docstr = "<c3component parent=\"%r\"><data>%s</data></c3component>" % (rec, escape(r)) else: if r.__class__ == etree._Element: # Lxml Record docstr = etree.tostring(r) tree = r.getroottree() path = tree.getpath(r) if (r.nsmap): #if hasNs: namespaceList = [] for (pref, ns) in r.nsmap.iteritems(): namespaceList.append("xmlns:%s=\"%s\"" % (pref, ns)) namespaces = " ".join(namespaceList) docstr = """<c3:component xmlns:c3="http://www.cheshire3.org/schemas/component/" %s parent="%r" xpath="%s">%s</c3component>""" % (namespaces, rec, path, docstr) else: docstr = """<c3component parent="%r" xpath="%s">%s</c3component>""" % (rec, path, docstr) else: raise ValueError("Unknown Record Type") doc = StringDocument(docstr) if cache == 0: yield doc else: self.documents.append(doc)