예제 #1
0
  def characters(self, string):
    if string.strip(): self.textOK()

    line=column=0
    pc=' '
    for c in string:

      # latin characters double encoded as utf-8
      if 0x80 <= ord(c) <= 0xBF:
        if 0xC2 <= ord(pc) <= 0xC3:
          try:
            string.encode('iso-8859-1').decode('utf-8')
            from validators import BadCharacters
            self.log(BadCharacters({"parent":self.parent.name, "element":self.name}), offset=(line,max(1,column-1)))
          except:
            pass
      pc = c

      # win1252
      if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
        from validators import BadCharacters
        self.log(BadCharacters({"parent":self.parent.name, "element":self.name}), offset=(line,column))
      column=column+1
      if ord(c) in (10,13):
        column=0
	line=line+1

    self.value = self.value + string
예제 #2
0
    def startElementNS(self, name, qname, attrs):
        # RSS 2.0 arbitrary restriction on extensions
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (
                feedtype == TYPE_RSS2) and self.name.find('_') >= 0:
            from logging import NotInANamespace
            self.log(
                NotInANamespace({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))

        # ensure element is "namespace well formed"
        if name.find(':') != -1:
            from logging import MissingNamespace
            self.log(MissingNamespace({"parent": self.name, "element": name}))

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))
            for c in attrs.get((namespace, attr)):
                if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
                    from validators import BadCharacters
                    self.log(BadCharacters({"parent": name, "element": attr}))

        # eat children
        self.push(eater(), name, attrs)
예제 #3
0
 def characters(self, string):
   for c in string:
     if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
       from validators import BadCharacters
       self.log(BadCharacters({"parent":self.parent.name, "element":self.name}))
   if (self.type=='xhtml') and string.strip() and not self.value.strip():
     self.log(MissingXhtmlDiv({"parent":self.parent.name, "element":self.name}))
   validatorBase.characters(self,string)
예제 #4
0
 def characters(self, string):
     for c in string:
         if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
             from validators import BadCharacters
             self.log(
                 BadCharacters({
                     "parent": self.parent.name,
                     "element": self.name
                 }))
예제 #5
0
 def handle_charref(self, name):
     if name.startswith('x'):
         value = int(name[1:], 16)
     else:
         value = int(name)
     if 0x80 <= value <= 0x9F or value == 0xfffd:
         self.log(
             BadCharacters({
                 "parent": self.element.parent.name,
                 "element": self.element.name,
                 "value": "&#" + name + ";"
             }))
예제 #6
0
    def startElementNS(self, name, qname, attrs):
        if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
            self.xmlLang = attrs.getValue(
                (u'http://www.w3.org/XML/1998/namespace', u'lang'))
            if self.xmlLang:
                from validators import iso639_validate
                iso639_validate(self.log, self.xmlLang, "xml:lang", name)

        from validators import eater
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (feedtype != TYPE_RSS2):
            from logging import UndeterminableVocabulary
            self.log(
                UndeterminableVocabulary({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))
            qname = "null"
        if qname in self.dispatcher.defaultNamespaces: qname = None

        nm_qname = near_miss(qname)
        if nearly_namespaces.has_key(nm_qname):
            prefix = nearly_namespaces[nm_qname]
            qname, name = None, prefix + "_" + name
            if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
                if hasattr(self, 'setItunes'): self.setItunes(True)

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        if qname == 'http://purl.org/atom/ns#':
            from logging import ObsoleteNamespace
            self.log(ObsoleteNamespace({"element": "feed"}))

        for key, string in attrs.items():
            for c in string:
                if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
                    from validators import BadCharacters
                    self.log(
                        BadCharacters({
                            "parent": name,
                            "element": key[-1]
                        }))

        if qname:
            handler = self.unknown_starttag(name, qname, attrs)
            name = "unknown_" + name
            self.child = name
        else:
            try:
                self.child = name
                if name.startswith('dc_'):
                    # handle "Qualified" Dublin Core
                    handler = getattr(
                        self, "do_" + name.replace("-", "_").split('.')[0])()
                else:
                    handler = getattr(self, "do_" + name.replace("-", "_"))()
            except AttributeError:
                if name.find(':') != -1:
                    from logging import MissingNamespace
                    self.log(
                        MissingNamespace({
                            "parent": self.name,
                            "element": name
                        }))
                    handler = eater()
                elif name.startswith('xhtml_'):
                    from logging import MisplacedXHTMLContent
                    self.log(
                        MisplacedXHTMLContent({
                            "parent":
                            ':'.join(self.name.split("_", 1)),
                            "element":
                            name
                        }))
                    handler = eater()
                else:
                    try:
                        from extension import Questionable

                        # requalify the name with the default namespace
                        qname = name
                        from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
                        if self.getFeedType() in [
                                TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
                        ]:
                            if qname.startswith('app_'): qname = qname[4:]

                        if name.find('_') < 0 and self.name.find('_') >= 0:
                            if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces:
                                qname = 'atom_' + qname

                        # is this element questionable?
                        handler = getattr(Questionable(),
                                          "do_" + qname.replace("-", "_"))()
                        from logging import QuestionableUsage
                        self.log(
                            QuestionableUsage({
                                "parent":
                                ':'.join(self.name.split("_", 1)),
                                "element":
                                qname
                            }))

                    except AttributeError:
                        from logging import UndefinedElement
                        self.log(
                            UndefinedElement({
                                "parent":
                                ':'.join(self.name.split("_", 1)),
                                "element":
                                name
                            }))
                        handler = eater()

        self.push(handler, name, attrs)

        # MAP - always append name, even if already exists (we need this to
        # check for too many hour elements in skipHours, and it doesn't
        # hurt anything else)
        self.children.append(self.child)
예제 #7
0
    def startElementNS(self, name, qname, attrs):
        if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
            self.xmlLang = attrs.getValue(
                (u'http://www.w3.org/XML/1998/namespace', u'lang'))
            if self.xmlLang:
                from validators import iso639_validate
                iso639_validate(self.log, self.xmlLang, "xml:lang", name)

        from validators import eater
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (feedtype != TYPE_RSS2):
            from logging import UndeterminableVocabulary
            self.log(
                UndeterminableVocabulary({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))
            qname = "null"
        if qname in self.defaultNamespaces: qname = None

        nm_qname = near_miss(qname)
        if nearly_namespaces.has_key(nm_qname):
            prefix = nearly_namespaces[nm_qname]
            qname, name = None, prefix + "_" + name
            if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
                if hasattr(self, 'setItunes'): self.setItunes(True)

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        for key, string in attrs.items():
            for c in string:
                if 0x80 <= ord(c) <= 0x9F:
                    from validators import BadCharacters
                    self.log(
                        BadCharacters({
                            "parent": name,
                            "element": key[-1]
                        }))

        if qname:
            handler = self.unknown_starttag(name, qname, attrs)
            name = "unknown_" + name
        else:
            try:
                self.child = name
                handler = getattr(self, "do_" + name.replace("-", "_"))()
            except AttributeError:
                if name.find(':') != -1:
                    from logging import MissingNamespace
                    self.log(
                        MissingNamespace({
                            "parent": self.name,
                            "element": name
                        }))
                    handler = eater()
                elif not qname:
                    from logging import UndefinedElement
                    self.log(
                        UndefinedElement({
                            "parent":
                            ':'.join(self.name.split("_", 1)),
                            "element":
                            name
                        }))
                    handler = eater()
                else:
                    handler = self.unknown_starttag(name, qname, attrs)
                    name = "unknown_" + name

        self.push(handler, name, attrs)

        # MAP - always append name, even if already exists (we need this to
        # check for too many hour elements in skipHours, and it doesn't
        # hurt anything else)
        self.children.append(name)