def startElementNS(self, name, qname, attrs): if name == 'rss': if qname: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) validatorBase.defaultNamespaces.append(qname) if name == 'feed' or name == 'entry': if qname == pie_namespace: from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element": "feed"})) validatorBase.defaultNamespaces.append(pie_namespace) from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) elif not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) else: from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) validatorBase.defaultNamespaces.append(atom_namespace) if qname <> atom_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) validatorBase.defaultNamespaces.append(qname) if name == 'Channel': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) elif qname != rss11_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) else: validatorBase.defaultNamespaces.append(qname) from logging import TYPE_RSS1 self.setFeedType(TYPE_RSS1) validatorBase.startElementNS(self, name, qname, attrs)
def unknown_starttag(self, name, qname, attrs): from logging import ObsoleteNamespace,InvalidNamespace,UndefinedElement if qname in ['http://example.com/newformat#','http://purl.org/atom/ns#']: self.log(ObsoleteNamespace({"element":name, "namespace":qname})) elif name=='feed': self.log(InvalidNamespace({"element":name, "namespace":qname})) else: self.log(UndefinedElement({"parent":"root", "element":name})) from validators import any return any(self, name, qname, attrs)
def startElementNS(self, name, qname, attrs): if name=='rss': if qname: from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) if name=='feed' or name=='entry': if self.namespace.has_key('atom'): from logging import AvoidNamespacePrefix self.log(AvoidNamespacePrefix({'prefix':'atom'})) if self.namespace.has_key('xhtml'): from logging import AvoidNamespacePrefix self.log(AvoidNamespacePrefix({'prefix':'xhtml'})) if qname==pie_namespace: from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element":"feed"})) self.dispatcher.defaultNamespaces.append(pie_namespace) from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) elif not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) else: if name=='feed': from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) else: from logging import TYPE_ATOM_ENTRY self.setFeedType(TYPE_ATOM_ENTRY) self.dispatcher.defaultNamespaces.append(atom_namespace) if qname<>atom_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) if name=='Channel': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) elif qname != rss11_namespace : from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) else: self.dispatcher.defaultNamespaces.append(qname) from logging import TYPE_RSS1 self.setFeedType(TYPE_RSS1) if name=='kml': from logging import TYPE_KML20, TYPE_KML21, TYPE_KML22 self.dispatcher.defaultNamespaces.append(qname) if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = kml20_namespace feedType = TYPE_KML20 elif qname == kml20_namespace: feedType = TYPE_KML20 elif qname == kml21_namespace: feedType = TYPE_KML21 elif qname == kml22_namespace: feedType = TYPE_KML22 elif qname != kml20_namespace and qname != kml21_namespace and qname != kml22_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) qname = kml22_namespace feedType = TYPE_KML22 self.setFeedType(feedType) if name=='OpenSearchDescription': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = opensearch_namespace elif qname != opensearch_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) qname = opensearch_namespace if name=='XRDS': from logging import TYPE_XRD self.setFeedType(TYPE_XRD) if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = xrds_namespace elif qname != xrds_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) qname = xrds_namespace validatorBase.startElementNS(self, name, qname, attrs)
def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang = attrs.getValue( (u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype = self.getFeedType() if (not qname) and feedtype and (feedtype != TYPE_RSS2): from logging import UndeterminableVocabulary self.log( UndeterminableVocabulary({ "parent": self.name, "element": name, "namespace": '""' })) qname = "null" if qname in self.dispatcher.defaultNamespaces: qname = None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace, attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": attr })) if qname == 'http://purl.org/atom/ns#': from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element": "feed"})) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log( BadCharacters({ "parent": name, "element": key[-1] })) if qname: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name self.child = name else: try: self.child = name if name.startswith('dc_'): # handle "Qualified" Dublin Core handler = getattr( self, "do_" + name.replace("-", "_").split('.')[0])() else: handler = getattr(self, "do_" + name.replace("-", "_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) handler = eater() elif name.startswith('xhtml_'): from logging import MisplacedXHTMLContent self.log( MisplacedXHTMLContent({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() else: try: from extension import Questionable # requalify the name with the default namespace qname = name from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE if self.getFeedType() in [ TYPE_APP_CATEGORIES, TYPE_APP_SERVICE ]: if qname.startswith('app_'): qname = qname[4:] if name.find('_') < 0 and self.name.find('_') >= 0: if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces: qname = 'atom_' + qname # is this element questionable? handler = getattr(Questionable(), "do_" + qname.replace("-", "_"))() from logging import QuestionableUsage self.log( QuestionableUsage({ "parent": ':'.join(self.name.split("_", 1)), "element": qname })) except AttributeError: from logging import UndefinedElement self.log( UndefinedElement({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(self.child)
def startElementNS(self, name, qname, attrs): if name == 'rss': if qname: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) self.dispatcher.defaultNamespaces.append(qname) if name == 'feed' or name == 'entry': if qname == pie_namespace: from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element": "feed"})) self.dispatcher.defaultNamespaces.append(pie_namespace) from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) elif not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) else: if name == 'feed': from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) else: from logging import TYPE_ATOM_ENTRY self.setFeedType(TYPE_ATOM_ENTRY) self.dispatcher.defaultNamespaces.append(atom_namespace) if qname <> atom_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) self.dispatcher.defaultNamespaces.append(qname) if name == 'Channel': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) elif qname != rss11_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) else: self.dispatcher.defaultNamespaces.append(qname) from logging import TYPE_RSS1 self.setFeedType(TYPE_RSS1) if name == 'OpenSearchDescription': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) qname = opensearch_namespace elif qname != opensearch_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "element": name, "namespace": qname })) self.dispatcher.defaultNamespaces.append(qname) qname = opensearch_namespace if name == 'XRDS': from logging import TYPE_XRD self.setFeedType(TYPE_XRD) if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) qname = xrds_namespace elif qname != xrds_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "element": name, "namespace": qname })) self.dispatcher.defaultNamespaces.append(qname) qname = xrds_namespace validatorBase.startElementNS(self, name, qname, attrs)