def _defaults(self, routekeys, reserved_keys, kargs): """Creates default set with values stringified Put together our list of defaults, stringify non-None values and add in our action/id default if they use it and didn't specify it. defaultkeys is a list of the currently assumed default keys routekeys is a list of the keys found in the route path reserved_keys is a list of keys that are not """ defaults = {} # Add in a controller/action default if they don't exist if "controller" not in routekeys and "controller" not in kargs and not self.explicit: kargs["controller"] = "content" if "action" not in routekeys and "action" not in kargs and not self.explicit: kargs["action"] = "index" defaultkeys = frozenset([key for key in kargs.keys() if key not in reserved_keys]) for key in defaultkeys: if kargs[key] is not None: defaults[key] = self.make_unicode(kargs[key]) else: defaults[key] = None if "action" in routekeys and not defaults.has_key("action") and not self.explicit: defaults["action"] = "index" if "id" in routekeys and not defaults.has_key("id") and not self.explicit: defaults["id"] = None newdefaultkeys = frozenset([key for key in defaults.keys() if key not in reserved_keys]) return (defaults, newdefaultkeys)
def _defaults(self, routekeys, reserved_keys, kargs): """Creates default set with values stringified Put together our list of defaults, stringify non-None values and add in our action/id default if they use it and didn't specify it defaultkeys is a list of the currently assumed default keys routekeys is a list of the keys found in the route path reserved_keys is a list of keys that are not """ defaults = {} # Add in a controller/action default if they don't exist if 'controller' not in routekeys and 'controller' not in kargs \ and not self.explicit: kargs['controller'] = 'content' if 'action' not in routekeys and 'action' not in kargs \ and not self.explicit: kargs['action'] = 'index' defaultkeys = frozenset([key for key in kargs.keys() \ if key not in reserved_keys]) for key in defaultkeys: if kargs[key] != None: defaults[key] = unicode(kargs[key]) else: defaults[key] = None if 'action' in routekeys and not defaults.has_key('action') \ and not self.explicit: defaults['action'] = 'index' if 'id' in routekeys and not defaults.has_key('id') \ and not self.explicit: defaults['id'] = None newdefaultkeys = frozenset([key for key in defaults.keys() \ if key not in reserved_keys]) return (defaults, newdefaultkeys)
def _setup_route(self): # Build our routelist, and the keys used in the route self.routelist = routelist = self._pathkeys(self.routepath) routekeys = frozenset([key['name'] for key in routelist if isinstance(key, dict)]) self.dotkeys = frozenset([key['name'] for key in routelist if isinstance(key, dict) and key['type'] == '.']) if not self.minimization: self.make_full_route() # Build a req list with all the regexp requirements for our args self.req_regs = {} for key, val in self.reqs.iteritems(): self.req_regs[key] = re.compile('^' + val + '$') # Update our defaults and set new default keys if needed. defaults # needs to be saved (self.defaults, defaultkeys) = self._defaults(routekeys, self.reserved_keys, self._kargs.copy()) # Save the maximum keys we could utilize self.maxkeys = defaultkeys | routekeys # Populate our minimum keys, and save a copy of our backward keys for # quicker generation later (self.minkeys, self.routebackwards) = self._minkeys(routelist[:]) # Populate our hardcoded keys, these are ones that are set and don't # exist in the route self.hardcoded = frozenset([key for key in self.maxkeys \ if key not in routekeys and self.defaults[key] is not None]) # Cache our default keys self._default_keys = frozenset(self.defaults.keys())
def reorder_steps(self, id_list): """ Reorders steps based on order provided in id_list. Raises ValueError if id_list contains invalid ids, or does not contain ids of all corresponding steps. """ if type(id_list) != type([]): raise ValueError("id_list must be a list") L = map(int, id_list) # cast all to int() S = dict((x.id, x) for x in self.step_set.all()) # map of ids to Step object sk = S.keys() if L == sk: return # order is the same. nothing to do. if len(L) != len(S) or frozenset(L) != frozenset(sk): raise ValueError("id_list does not match list of current steps") # set position of all steps to temp value self.step_set.update(position=(models.F('position') + 1) * -1) # update positions of steps for i,v in enumerate(L): S[v].position = i S[v].save()
def _minkeys(self, routelist): """Utility function to walk the route backwards Will also determine the minimum keys we can handle to generate a working route. routelist is a list of the '/' split route path defaults is a dict of all the defaults provided for the route """ minkeys = [] backcheck = routelist[:] # If we don't honor minimization, we need all the keys in the # route path if not self.minimization: for part in backcheck: if isinstance(part, dict): minkeys.append(part['name']) return (frozenset(minkeys), backcheck) gaps = False backcheck.reverse() for part in backcheck: if not isinstance(part, dict) and part not in self.done_chars: gaps = True continue elif not isinstance(part, dict): continue key = part['name'] if self.defaults.has_key(key) and not gaps: continue minkeys.append(key) gaps = True return (frozenset(minkeys), backcheck)
def _get_permissions(self): try: return self._permissions except AttributeError: # Permissions haven't been computed yet pass if not self.user: self._permissions= frozenset() else: self._permissions= frozenset([p.permission_name for p in self.user.permissions]) return self._permissions
def _get_groups(self): try: return self._groups except AttributeError: # Groups haven't been computed yet pass if not self.user: self._groups= frozenset() else: self._groups= frozenset([g.group_name for g in self.user.groups]) return self._groups
def _get_group_ids(self): """Get set of group IDs of this identity.""" try: return self._group_ids except AttributeError: # Groups haven't been computed yet pass if not self.user: self._group_ids = frozenset() else: self._group_ids = frozenset([g.id for g in self.user.groups]) return self._group_ids
def _get_permissions(self): """Get set of permission names of this identity.""" try: return self._permissions except AttributeError: # Permissions haven't been computed yet pass if not self.user: self._permissions = frozenset() else: self._permissions = frozenset( [p.permission_name for p in self.user.permissions]) return self._permissions
def _get_group_ids(self): '''Get set of group IDs of this identity.''' try: return self._group_ids except AttributeError: # pylint: disable-msg=W0704 # :W0704: Groups haven't been computed yet pass if not self.groups: self._group_ids = frozenset() else: self._group_ids = frozenset([g.id for g in self._user.approved_memberships]) return self._group_ids
def _get_permissions(self): try: return self._permissions except AttributeError: # Permissions haven't been computed yet pass if not self.user: self._permissions= frozenset() else: box = hub.getConnection() box.start( isolation = dejavu.storage.isolation.READ_COMMITTED ) self._permissions = frozenset( [ p.permission_name for p in self.user.permissions ] ) box.flush_all() return self._permissions
def _get_groups(self): try: return self._groups except AttributeError: # Groups haven't been computed yet pass if not self.user: self._groups= frozenset() else: box = hub.getConnection() box.start( isolation = dejavu.storage.isolation.READ_COMMITTED ) self._groups = frozenset( [ g.group_name for g in self.user.groups ] ) box.flush_all() return self._groups
def _get_group_ids(self): '''Get set of group IDs of this identity.''' try: return self._group_ids except AttributeError: # Groups haven't been computed yet pass if not self.user: self._group_ids = frozenset() else: ### TG: Difference. Our model has a many::many for people:groups # And an association proxy that links them together self._group_ids = frozenset([g.id for g in self.user.approved_memberships]) return self._group_ids
def get_wa_runners(lines = []): '''Parses predefined file for information about available Wak / Waf runners. We are looking into file named wa_runners.conf. If one is not found, or none of the entries refer to existing paths, we scan parent folder (.../project/tests/../) for filenames matching the following set ("wak.py", "waf-light", "waf") @param lines: (Default: []) A list of strings representing the contents of the config file. If list is nonempty, we bypass reading the file and parse the lines instead. @return: A tuple of form: ( list of runner labels (often the file name) in order as presented in the file, dictionary that maps the label to absolute path ) ''' print("\nDetecting Wak / Waf runners...") values = {} if not lines: try: lines = open('wa_runners.conf').readlines() except: pass for line in lines: if line.strip() and not line.strip().startswith("#"): k, v = line.split("=", 1) k = k.strip() v = os.path.abspath( os.path.expanduser(v.strip()) ) if os.path.exists(v): print("Detected %s at %s" % (k, v)) values[k] = v else: print("Skipping %s. Path not found: %s" % (k, v)) if not len(values.keys()): # you had your chance to specify the runners by hand, it seems we are forced to guess... # we are supposed to be in %PROJECTFOLDER%/tests. wak.py or waf are likely in %PROJECT FOLDER% matches = frozenset(('wak.py', 'waf-light', 'waf')).intersection( frozenset(os.listdir("../")) ) if not matches: raise Exception("No Wak / Waf runners found. Please, insure that wa_runners.conf file has entries proper for your project.") for match in matches: values[match] = os.path.abspath(os.path.normpath(os.path.join( '../' , match ))) print("%s Wak / Waf runners found.\n" % len(values.keys())) return sorted(values.keys()), values
def generate(self, _ignore_req_list=False, _append_slash=False, **kargs): """Generate a URL from ourself given a set of keyword arguments Toss an exception if this set of keywords would cause a gap in the url. """ # Verify that our args pass any regexp requirements if not _ignore_req_list: for key in self.reqs.keys(): val = kargs.get(key) if val and not self.req_regs[key].match(self.make_unicode(val)): return False # Verify that if we have a method arg, its in the method accept list. # Also, method will be changed to _method for route generation meth = as_unicode(kargs.get('method'), self.encoding) if meth: if self.conditions and 'method' in self.conditions \ and meth.upper() not in self.conditions['method']: return False kargs.pop('method') if self.minimization: url = self.generate_minimized(kargs) else: url = self.generate_non_minimized(kargs) if url is False: return url if not url.startswith('/') and not self.static: url = '/' + url extras = frozenset(kargs.keys()) - self.maxkeys if extras: if _append_slash and not url.endswith('/'): url += '/' fragments = [] # don't assume the 'extras' set preserves order: iterate # through the ordered kargs instead for key in kargs: if key not in extras: continue if key == 'action' or key == 'controller': continue val = kargs[key] if isinstance(val, (tuple, list)): for value in val: value = as_unicode(value, self.encoding) fragments.append((key, _str_encode(value, self.encoding))) else: val = as_unicode(val, self.encoding) fragments.append((key, _str_encode(val, self.encoding))) if fragments: url += '?' url += urlparse.urlencode(fragments) elif _append_slash and not url.endswith('/'): url += '/' return url
def get_dfa_state(states): states = self.epsilon_closure(states) frozenstates = frozenset(states) if frozenstates in set_to_state: return set_to_state[frozenstates] # already created this state if states == self.start_states: assert not set_to_state final = bool( filter(None, [state in self.final_states for state in states])) name = ", ".join([self.names[state] for state in states]) if name_precedence is not None: name_index = len(name_precedence) unmergeable = False for state in states: #print state if state in self.unmergeable_states: new_name = self.names[state] if name_precedence is not None: try: index = name_precedence.index(new_name) except ValueError: index = name_index #print new_name, index, name_precedence if index < name_index: name_index = index name = new_name else: name = new_name unmergeable = True result = set_to_state[frozenstates] = fda.add_state( name, final, unmergeable) stack.append((result, states)) return result
def _mergePage(self, page2, page2transformation = None): newResources = DictionaryObject() rename = {} originalResources = self['/Resources'].getObject() page2Resources = page2['/Resources'].getObject() for res in ('/ExtGState', '/Font', '/XObject', '/ColorSpace', '/Pattern', '/Shading', '/Properties'): new, newrename = PageObject._mergeResources(originalResources, page2Resources, res) if new: newResources[NameObject(res)] = new rename.update(newrename) newResources[NameObject('/ProcSet')] = ArrayObject(frozenset(originalResources.get('/ProcSet', ArrayObject()).getObject()).union(frozenset(page2Resources.get('/ProcSet', ArrayObject()).getObject()))) newContentArray = ArrayObject() originalContent = self.getContents() if originalContent is not None: newContentArray.append(PageObject._pushPopGS(originalContent, self.pdf)) page2Content = page2.getContents() if page2Content is not None: if page2transformation is not None: page2Content = page2transformation(page2Content) page2Content = PageObject._contentStreamRename(page2Content, rename, self.pdf) page2Content = PageObject._pushPopGS(page2Content, self.pdf) newContentArray.append(page2Content) self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf) self[NameObject('/Resources')] = newResources
def __init__(self, visit_key, user=None, username=None, password=None): if user: self._user = user self._groups = frozenset( [g['name'] for g in user['approved_memberships']] ) self.visit_key = visit_key if visit_key: # Set the cookie to the user's tg_visit key before requesting # authentication. That way we link the two together. session_id = visit_key else: session_id = None debug = config.get('jsonfas.debug', False) super(JsonFasIdentity, self).__init__(self.fas_url, useragent=self.useragent, debug=debug, username=username, password=password, session_id=session_id, cache_session=self.cache_session) if self.debug: import inspect caller = inspect.getouterframes(inspect.currentframe())[1][3] log.debug('JsonFasIdentity.__init__ caller: %s' % caller) response.simple_cookie[self.cookie_name] = visit_key # Send a request so that we associate the visit_cookie with the user self.send_request('', auth=True) log.debug('Leaving JsonFasIdentity.__init__')
def _get_user(self): '''Get user instance for this identity.''' visit = self.visit_key if not visit: # No visit, no user self._user = None else: if not (self.username and self.password): # Unless we were given the user_name and password to login on # this request, a CSRF token is required if (not '_csrf_token' in cherrypy.request.params or cherrypy.request.params['_csrf_token'] != hash_constructor(self.visit_key).hexdigest()): self.log.info("Bad _csrf_token") if '_csrf_token' in cherrypy.request.params: self.log.info("visit: %s token: %s" % (self.visit_key, cherrypy.request.params['_csrf_token'])) else: self.log.info('No _csrf_token present') cherrypy.request.fas_identity_failure_reason = 'bad_csrf' self._user = None # pylint: disable-msg=W0704 try: return self._user except AttributeError: # User hasn't already been set # Attempt to load the user. After this code executes, there # *will* be a _user attribute, even if the value is None. self._user = self.__retrieve_user() self._groups = frozenset( [g['name'] for g in self._user.approved_memberships] ) # pylint: enable-msg=W0704 return self._user
def generateImpliedEndTags(self, exclude=None): name = self.openElements[-1].name # XXX td, th and tr are not actually needed if name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and name != exclude: self.openElements.pop() # XXX This is not entirely what the specification says. We should # investigate it more closely. self.generateImpliedEndTags(exclude)
def append(self, left, right, expression): if self.is_joined(left, right): raise ValueError('Already joined') sources = frozenset([f.source for f in iter_fields(expression)]) if (len(sources) != 2) or (left not in sources) or (right not in sources): raise ValueError('Only expressions of fields of joining sources are allowed') self.lefts.append(left) self.rights.append(right) self.expressions.append(expression)
def closeTagOpenState(self): if (self.contentModelFlag in (contentModelFlags["RCDATA"], contentModelFlags["CDATA"])): if self.currentToken: charStack = [] # So far we know that "</" has been consumed. We now need to know # whether the next few characters match the name of last emitted # start tag which also happens to be the currentToken. We also need # to have the character directly after the characters that could # match the start tag name. for x in xrange(len(self.currentToken["name"]) + 1): charStack.append(self.stream.char()) # Make sure we don't get hit by EOF if charStack[-1] == EOF: break # Since this is just for checking. We put the characters back on # the stack. self.stream.unget(charStack) if self.currentToken \ and self.currentToken["name"].lower() == "".join(charStack[:-1]).lower() \ and charStack[-1] in (spaceCharacters | frozenset((u">", u"/", u"<", EOF))): # Because the characters are correct we can safely switch to # PCDATA mode now. This also means we don't have to do it when # emitting the end tag token. self.contentModelFlag = contentModelFlags["PCDATA"] else: self.tokenQueue.append({"type": "Characters", "data": u"</"}) self.state = self.states["data"] # Need to return here since we don't want the rest of the # method to be walked through. return True data = self.stream.char() if data in asciiLetters: self.currentToken = {"type":"EndTag", "name":data, "data":[]} self.state = self.states["tagName"] elif data == u">": self.tokenQueue.append({"type": "ParseError", "data": "expected-closing-tag-but-got-right-bracket"}) self.state = self.states["data"] elif data == EOF: self.tokenQueue.append({"type": "ParseError", "data": "expected-closing-tag-but-got-eof"}) self.tokenQueue.append({"type": "Characters", "data": u"</"}) self.state = self.states["data"] else: # XXX data can be _'_... self.tokenQueue.append({"type": "ParseError", "data": "expected-closing-tag-but-got-char", "datavars": {"data": data}}) self.stream.unget(data) self.state = self.states["bogusComment"] return True
def __init__(self, preserve=None, noescape=None): """Initialize the filter. :param preserve: a set or sequence of tag names for which white-space should be preserved :param noescape: a set or sequence of tag names for which text content should not be escaped The `noescape` set is expected to refer to elements that cannot contain further child elements (such as ``<style>`` or ``<script>`` in HTML documents). """ if preserve is None: preserve = [] self.preserve = frozenset(preserve) if noescape is None: noescape = [] self.noescape = frozenset(noescape)
def _get_groups(self): '''Return the groups that a user is a member of.''' try: return self._groups except AttributeError: # User and groups haven't been returned. Since the json call # returns both user and groups, this is set at user creation time. self._groups = frozenset() return self._groups
def checkBrowsingContext(self, token, tagName, attrName, attrValue): if not attrValue: return if attrValue[0] != '_': return attrValue = attrValue.lower() if attrValue in frozenset(('_self', '_parent', '_top', '_blank')): return yield {"type": tokenTypes["ParseError"], "data": "invalid-browsing-context", "datavars": {"tagName": tagName, "attributeName": attrName}}
def freezedicts(obj): """Recursively iterate over ``obj``, supporting dicts, tuples and lists, and freeze ``dicts`` such that ``obj`` can be used with hash(). """ if isinstance(obj, (list, tuple)): return type(obj)([freezedicts(sub) for sub in obj]) if isinstance(obj, dict): return frozenset(six.iteritems(obj)) return obj
def checkStartTagUnknownAttributes(self, token): # check for recognized attribute names name = token.get("name").lower() allowedAttributes = globalAttributes | allowedAttributeMap.get(name, frozenset(())) for attrName, attrValue in token.get("data", []): if attrName.lower() not in allowedAttributes: yield {"type": tokenTypes["ParseError"], "data": "unknown-attribute", "datavars": {"tagName": name, "attributeName": attrName}}
def pkey(self, cl, newpkey=None): """This method gets or sets the primary key of a class. Composite primary keys are represented as frozensets. Note that this raises an exception if the table does not have a primary key. If newpkey is set and is not a dictionary then set that value as the primary key of the class. If it is a dictionary then replace the _pkeys dictionary with a copy of it. """ # First see if the caller is supplying a dictionary if isinstance(newpkey, dict): # make sure that all classes have a namespace self._pkeys = dict([ ('.' in cl and cl or 'public.' + cl, pkey) for cl, pkey in newpkey.iteritems()]) return self._pkeys qcl = self._add_schema(cl) # build fully qualified class name # Check if the caller is supplying a new primary key for the class if newpkey: self._pkeys[qcl] = newpkey return newpkey # Get all the primary keys at once if qcl not in self._pkeys: # if not found, check again in case it was added after we started self._pkeys = {} if self.server_version >= 80200: # the ANY syntax works correctly only with PostgreSQL >= 8.2 any_indkey = "= ANY (pg_index.indkey)" else: any_indkey = "IN (%s)" % ', '.join( ['pg_index.indkey[%d]' % i for i in range(16)]) for r in self.db.query( "SELECT pg_namespace.nspname, pg_class.relname," " pg_attribute.attname FROM pg_class" " JOIN pg_namespace" " ON pg_namespace.oid = pg_class.relnamespace" " AND pg_namespace.nspname NOT LIKE 'pg_%'" " JOIN pg_attribute ON pg_attribute.attrelid = pg_class.oid" " AND pg_attribute.attisdropped = 'f'" " JOIN pg_index ON pg_index.indrelid = pg_class.oid" " AND pg_index.indisprimary = 't'" " AND pg_attribute.attnum " + any_indkey).getresult(): cl, pkey = _join_parts(r[:2]), r[2] self._pkeys.setdefault(cl, []).append(pkey) # (only) for composite primary keys, the values will be frozensets for cl, pkey in self._pkeys.iteritems(): self._pkeys[cl] = len(pkey) > 1 and frozenset(pkey) or pkey[0] self._do_debug(self._pkeys) # will raise an exception if primary key doesn't exist return self._pkeys[qcl]
def checkBoolean(self, token, tagName, attrName, attrValue): enumeratedValues = frozenset((attrName, '')) if attrValue not in enumeratedValues: yield {"type": tokenTypes["ParseError"], "data": "invalid-boolean-value", "datavars": {"tagName": tagName, "attributeName": attrName, "enumeratedValues": tuple(enumeratedValues)}} yield {"type": tokenTypes["ParseError"], "data": "invalid-attribute-value", "datavars": {"tagName": tagName, "attributeName": attrName}}
def _get_groups(self): '''Return the groups that a user is a member of.''' try: return self._groups except AttributeError: # pylint: disable-msg=W0704 # :W0704: Groups haven't been computed yet pass if not self.user: # User and groups haven't been returned. Since the json call # computes both user and groups, this will now be set. self._groups = frozenset() return self._groups
"expected-eof-but-got-end-tag": _(u"Unexpected end tag (%(name)s)" u". Expected end of file."), "eof-in-table": _(u"Unexpected end of file. Expected table content."), "eof-in-select": _(u"Unexpected end of file. Expected select content."), "eof-in-frameset": _(u"Unexpected end of file. Expected frameset content."), "XXX-undefined-error": (u"Undefined error (this sucks and should be fixed)"), } contentModelFlags = {"PCDATA": 0, "RCDATA": 1, "CDATA": 2, "PLAINTEXT": 3} scopingElements = frozenset(("applet", "button", "caption", "html", "marquee", "object", "table", "td", "th")) formattingElements = frozenset(("a", "b", "big", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u")) specialElements = frozenset(( "address", "area", "article", "aside", "base", "basefont", "bgsound", "blockquote", "body", "br",
# FIXME: this should all be confirmed against what a DTD says # (probably in a test; this may not match the DTD exactly, but we # should document just how it differs). # Data taken from http://www.w3.org/TR/html401/index/elements.html # and http://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements # for html5_tags. try: frozenset except NameError: from sets import Set as frozenset empty_tags = frozenset([ 'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param' ]) deprecated_tags = frozenset([ 'applet', 'basefont', 'center', 'dir', 'font', 'isindex', 'menu', 's', 'strike', 'u' ]) # archive actually takes a space-separated list of URIs link_attrs = frozenset([ 'action', 'archive', 'background', 'cite', 'classid', 'codebase',
class InequalityIndex(SeededIndex): __slots__ = ('last_cases', 'last_out') dispatch_function = staticmethod(dispatch_by_inequalities) def __init__(self): self.clear() def count_for(self, cases): """Get the total count of outgoing branches, given incoming cases""" casemap = self.casemap_for(cases) return len(casemap), sum([len(x) for x in casemap.values()]) def clear(self): """Reset index to empty""" self.allSeeds = {} # set of all seeds self.criteria = {} # criterion -> applicable seeds self.last_cases = None self.last_out = None def __setitem__(self, criterion, case): """Register 'case' under each of the criterion's seeds""" self.criteria[case] = criterion for (lo, hi) in criterion.ranges: self.allSeeds[lo] = self.allSeeds[hi] = None def addSeed(self, seed): raise NotImplementedError def casemap_for(self, cases): """Return a mapping from seeds->caselists for the given cases""" if cases is self.last_cases or cases == self.last_cases: return self.last_out tmp = {} out = {} get = self.criteria.get all = Inequality('..', [(Min, Max)]) have_ineq = False for case in cases: for (lo, hi) in get(case, all).ranges: if lo not in tmp: tmp[lo] = [], [], [] if lo == hi: tmp[lo][2].append(case) else: have_ineq = True if hi not in tmp: tmp[hi] = [], [], [] tmp[lo][0].append(case) if hi is not Max: tmp[hi][1].append(case) if have_ineq: keys = list(tmp.keys()) keys.sort() current = frozenset(tmp.get(Min, [[]])[0]) hi = Min for val in keys: add, remove, eq = tmp[val] lo, hi = hi, val out[lo, hi] = current current = current.difference(remove) out[val, val] = current.union(eq) current = current.union(add) else: out[Min, Max] = [] # default for val, (add, remove, eq) in list(tmp.items()): out[val, val] = eq self.last_out = out self.last_cases = cases return out
class HTMLSerializer(XHTMLSerializer): """Produces HTML text from an event stream. >>> from libs.genshi.builder import tag >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) >>> print ''.join(HTMLSerializer()(elem.generate())) <div><a href="foo"></a><br><hr noshade></div> """ _NOESCAPE_ELEMS = frozenset([ QName('script'), QName('http://www.w3.org/1999/xhtml}script'), QName('style'), QName('http://www.w3.org/1999/xhtml}style') ]) def __init__(self, doctype=None, strip_whitespace=True): """Initialize the HTML serializer. :param doctype: a ``(name, pubid, sysid)`` tuple that represents the DOCTYPE declaration that should be included at the top of the generated output :param strip_whitespace: whether extraneous whitespace should be stripped from the output """ super(HTMLSerializer, self).__init__(doctype, False) self.filters = [EmptyTagFilter()] if strip_whitespace: self.filters.append( WhitespaceFilter(self._PRESERVE_SPACE, self._NOESCAPE_ELEMS)) self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml')) def __call__(self, stream): boolean_attrs = self._BOOLEAN_ATTRS empty_elems = self._EMPTY_ELEMS noescape_elems = self._NOESCAPE_ELEMS have_doctype = False noescape = False stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) for kind, data, pos in stream: if kind is START or kind is EMPTY: tag, attrib = data buf = ['<', tag] for attr, value in attrib: if attr in boolean_attrs: if value: buf += [' ', attr] else: buf += [' ', attr, '="', escape(value), '"'] buf.append('>') if kind is EMPTY: if tag not in empty_elems: buf.append('</%s>' % tag) yield Markup(u''.join(buf)) if tag in noescape_elems: noescape = True elif kind is END: yield Markup('</%s>' % data) noescape = False elif kind is TEXT: if noescape: yield data else: yield escape(data, quotes=False) elif kind is COMMENT: yield Markup('<!--%s-->' % data) elif kind is DOCTYPE and not have_doctype: name, pubid, sysid = data buf = ['<!DOCTYPE %s'] if pubid: buf.append(' PUBLIC "%s"') elif sysid: buf.append(' SYSTEM') if sysid: buf.append(' "%s"') buf.append('>\n') yield Markup(u''.join(buf), *filter(None, data)) have_doctype = True elif kind is PI: yield Markup('<?%s %s?>' % data)
class XMLSerializer(object): """Produces XML text from an event stream. >>> from libs.genshi.builder import tag >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) >>> print ''.join(XMLSerializer()(elem.generate())) <div><a href="foo"/><br/><hr noshade="True"/></div> """ _PRESERVE_SPACE = frozenset() def __init__(self, doctype=None, strip_whitespace=True, namespace_prefixes=None): """Initialize the XML serializer. :param doctype: a ``(name, pubid, sysid)`` tuple that represents the DOCTYPE declaration that should be included at the top of the generated output, or the name of a DOCTYPE as defined in `DocType.get` :param strip_whitespace: whether extraneous whitespace should be stripped from the output :note: Changed in 0.4.2: The `doctype` parameter can now be a string. """ self.preamble = [] if doctype: if isinstance(doctype, basestring): doctype = DocType.get(doctype) self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) self.filters = [EmptyTagFilter()] if strip_whitespace: self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) def __call__(self, stream): have_decl = have_doctype = False in_cdata = False stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) for kind, data, pos in stream: if kind is START or kind is EMPTY: tag, attrib = data buf = ['<', tag] for attr, value in attrib: buf += [' ', attr, '="', escape(value), '"'] buf.append(kind is EMPTY and '/>' or '>') yield Markup(u''.join(buf)) elif kind is END: yield Markup('</%s>' % data) elif kind is TEXT: if in_cdata: yield data else: yield escape(data, quotes=False) elif kind is COMMENT: yield Markup('<!--%s-->' % data) elif kind is XML_DECL and not have_decl: version, encoding, standalone = data buf = ['<?xml version="%s"' % version] if encoding: buf.append(' encoding="%s"' % encoding) if standalone != -1: standalone = standalone and 'yes' or 'no' buf.append(' standalone="%s"' % standalone) buf.append('?>\n') yield Markup(u''.join(buf)) have_decl = True elif kind is DOCTYPE and not have_doctype: name, pubid, sysid = data buf = ['<!DOCTYPE %s'] if pubid: buf.append(' PUBLIC "%s"') elif sysid: buf.append(' SYSTEM') if sysid: buf.append(' "%s"') buf.append('>\n') yield Markup(u''.join(buf), *filter(None, data)) have_doctype = True elif kind is START_CDATA: yield Markup('<![CDATA[') in_cdata = True elif kind is END_CDATA: yield Markup(']]>') in_cdata = False elif kind is PI: yield Markup('<?%s %s?>' % data)
def _get_permissions(self): '''Get set of permission names of this identity.''' ### TG difference: No permissions in FAS return frozenset()
# This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision # history and logs, available at http://projects.edgewall.com/trac/. import htmlentitydefs from HTMLParser import HTMLParser, HTMLParseError import re try: frozenset except NameError: from sets import ImmutableSet as frozenset from StringIO import StringIO __all__ = ['escape', 'unescape', 'html'] _EMPTY_TAGS = frozenset(['br', 'hr', 'img', 'input']) _BOOLEAN_ATTRS = frozenset([ 'selected', 'checked', 'compact', 'declare', 'defer', 'disabled', 'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap' ]) class Markup(unicode): """Marks a string as being safe for inclusion in XML output without needing to be escaped. Strings are normally automatically escaped when added to the HDF. `Markup`-strings are however an exception. Use with care. (since Trac 0.9.3) """
INDENT_REGEX = re.compile(r'([ \t]*)') RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') DOCSTRING_REGEX = re.compile(r'u?r?["\']') WHITESPACE_AROUND_OPERATOR_REGEX = \ re.compile('([^\w\s]*)\s*(\t| )\s*([^\w\s]*)') EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \ re.compile(r'[()]|\s=[^=]|[^=!<>]=\s') LAMBDA_REGEX = re.compile(r'\blambda\b') WHITESPACE = ' \t' BINARY_OPERATORS = frozenset(['**=', '*=', '+=', '-=', '!=', '<>', '%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=', '%', '^', '&', '|', '=', '/', '//', '<', '>', '<<']) UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.INDENT, tokenize.DEDENT, tokenize.NEWLINE]) E225NOT_KEYWORDS = (frozenset(keyword.kwlist + ['print']) - frozenset(['False', 'None', 'True'])) BENCHMARK_KEYS = ('directories', 'files', 'logical lines', 'physical lines') ############################################################################## # Plugins (check functions) for physical lines ##############################################################################
def match(self, url, environ=None, sub_domains=False, sub_domains_ignore=None, domain_match=''): """Match a url to our regexp. While the regexp might match, this operation isn't guaranteed as there's other factors that can cause a match to fail even though the regexp succeeds (Default that was relied on wasn't given, requirement regexp doesn't pass, etc.). Therefore the calling function shouldn't assume this will return a valid dict, the other possible return is False if a match doesn't work out. """ # Static routes don't match, they generate only if self.static: return False match = self.regmatch.match(url) if not match: return False sub_domain = None if sub_domains and environ and 'HTTP_HOST' in environ: host = environ['HTTP_HOST'].split(':')[0] sub_match = re.compile('^(.+?)\.%s$' % domain_match) subdomain = re.sub(sub_match, r'\1', host) if subdomain not in sub_domains_ignore and host != subdomain: sub_domain = subdomain if self.conditions: if 'method' in self.conditions and environ and \ environ['REQUEST_METHOD'] not in self.conditions['method']: return False # Check sub-domains? use_sd = self.conditions.get('sub_domain') if use_sd and not sub_domain: return False elif not use_sd and 'sub_domain' in self.conditions and sub_domain: return False if isinstance(use_sd, list) and sub_domain not in use_sd: return False matchdict = match.groupdict() result = {} extras = self._default_keys - frozenset(matchdict.keys()) for key, val in matchdict.iteritems(): if key != 'path_info' and self.encoding: # change back into python unicode objects from the URL # representation try: val = as_unicode(val, self.encoding, self.decode_errors) except UnicodeDecodeError: return False if not val and key in self.defaults and self.defaults[key]: result[key] = self.defaults[key] else: result[key] = val for key in extras: result[key] = self.defaults[key] # Add the sub-domain if there is one if sub_domains: result['sub_domain'] = sub_domain # If there's a function, call it with environ and expire if it # returns False if self.conditions and 'function' in self.conditions and \ not self.conditions['function'](environ, result): return False return result
def buildnextreg(self, path, clist, include_names=True): """Recursively build our regexp given a path, and a controller list. Returns the regular expression string, and two booleans that can be ignored as they're only used internally by buildnextreg. """ if path: part = path[0] else: part = '' reg = '' # noreqs will remember whether the remainder has either a string # match, or a non-defaulted regexp match on a key, allblank remembers # if the rest could possible be completely empty (rest, noreqs, allblank) = ('', True, True) if len(path[1:]) > 0: self.prior = part (rest, noreqs, allblank) = self.buildnextreg(path[1:], clist, include_names) if isinstance(part, dict) and part['type'] in (':', '.'): var = part['name'] typ = part['type'] partreg = '' # First we plug in the proper part matcher if self.reqs.has_key(var): if include_names: partreg = '(?P<%s>%s)' % (var, self.reqs[var]) else: partreg = '(?:%s)' % self.reqs[var] if typ == '.': partreg = '(?:\.%s)??' % partreg elif var == 'controller': if include_names: partreg = '(?P<%s>%s)' % (var, '|'.join( map(re.escape, clist))) else: partreg = '(?:%s)' % '|'.join(map(re.escape, clist)) elif self.prior in ['/', '#']: if include_names: partreg = '(?P<' + var + '>[^' + self.prior + ']+?)' else: partreg = '(?:[^' + self.prior + ']+?)' else: if not rest: if typ == '.': exclude_chars = '/.' else: exclude_chars = '/' if include_names: partreg = '(?P<%s>[^%s]+?)' % (var, exclude_chars) else: partreg = '(?:[^%s]+?)' % exclude_chars if typ == '.': partreg = '(?:\.%s)??' % partreg else: end = ''.join(self.done_chars) rem = rest if rem[0] == '\\' and len(rem) > 1: rem = rem[1] elif rem.startswith('(\\') and len(rem) > 2: rem = rem[2] else: rem = end rem = frozenset(rem) | frozenset(['/']) if include_names: partreg = '(?P<%s>[^%s]+?)' % (var, ''.join(rem)) else: partreg = '(?:[^%s]+?)' % ''.join(rem) if self.reqs.has_key(var): noreqs = False if not self.defaults.has_key(var): allblank = False noreqs = False # Now we determine if its optional, or required. This changes # depending on what is in the rest of the match. If noreqs is # true, then its possible the entire thing is optional as there's # no reqs or string matches. if noreqs: # The rest is optional, but now we have an optional with a # regexp. Wrap to ensure that if we match anything, we match # our regexp first. It's still possible we could be completely # blank as we have a default if self.reqs.has_key(var) and self.defaults.has_key(var): reg = '(' + partreg + rest + ')?' # Or we have a regexp match with no default, so now being # completely blank form here on out isn't possible elif self.reqs.has_key(var): allblank = False reg = partreg + rest # If the character before this is a special char, it has to be # followed by this elif self.defaults.has_key(var) and \ self.prior in (',', ';', '.'): reg = partreg + rest # Or we have a default with no regexp, don't touch the allblank elif self.defaults.has_key(var): reg = partreg + '?' + rest # Or we have a key with no default, and no reqs. Not possible # to be all blank from here else: allblank = False reg = partreg + rest # In this case, we have something dangling that might need to be # matched else: # If they can all be blank, and we have a default here, we know # its safe to make everything from here optional. Since # something else in the chain does have req's though, we have # to make the partreg here required to continue matching if allblank and self.defaults.has_key(var): reg = '(' + partreg + rest + ')?' # Same as before, but they can't all be blank, so we have to # require it all to ensure our matches line up right else: reg = partreg + rest elif isinstance(part, dict) and part['type'] == '*': var = part['name'] if noreqs: if include_names: reg = '(?P<%s>.*)' % var + rest else: reg = '(?:.*)' + rest if not self.defaults.has_key(var): allblank = False noreqs = False else: if allblank and self.defaults.has_key(var): if include_names: reg = '(?P<%s>.*)' % var + rest else: reg = '(?:.*)' + rest elif self.defaults.has_key(var): if include_names: reg = '(?P<%s>.*)' % var + rest else: reg = '(?:.*)' + rest else: if include_names: reg = '(?P<%s>.*)' % var + rest else: reg = '(?:.*)' + rest allblank = False noreqs = False elif part and part[-1] in self.done_chars: if allblank: reg = re.escape(part[:-1]) + '(' + re.escape(part[-1]) + rest reg += ')?' else: allblank = False reg = re.escape(part) + rest # We have a normal string here, this is a req, and it prevents us from # being all blank else: noreqs = False allblank = False reg = re.escape(part) + rest return (reg, noreqs, allblank)
def __init__(self, routepath, **kargs): """Initialize a route, with a given routepath for matching/generation The set of keyword args will be used as defaults. Usage:: >>> from routes.base import Route >>> newroute = Route(':controller/:action/:id') >>> sorted(newroute.defaults.items()) [('action', 'index'), ('id', None)] >>> newroute = Route('date/:year/:month/:day', ... controller="blog", action="view") >>> newroute = Route('archives/:page', controller="blog", ... action="by_page", requirements = { 'page':'\d{1,2}' }) >>> newroute.reqs {'page': '\\\d{1,2}'} .. Note:: Route is generally not called directly, a Mapper instance connect method should be used to add routes. """ self.routepath = routepath self.sub_domains = False self.prior = None self.redirect = False self.name = None self.minimization = kargs.pop('_minimize', True) self.encoding = kargs.pop('_encoding', 'utf-8') self.reqs = kargs.get('requirements', {}) self.decode_errors = 'replace' # Don't bother forming stuff we don't need if its a static route self.static = kargs.pop('_static', False) self.filter = kargs.pop('_filter', None) self.absolute = kargs.pop('_absolute', False) # Pull out the member/collection name if present, this applies only to # map.resource self.member_name = kargs.pop('_member_name', None) self.collection_name = kargs.pop('_collection_name', None) self.parent_resource = kargs.pop('_parent_resource', None) # Pull out route conditions self.conditions = kargs.pop('conditions', None) # Determine if explicit behavior should be used self.explicit = kargs.pop('_explicit', False) # reserved keys that don't count reserved_keys = ['requirements'] # special chars to indicate a natural split in the URL self.done_chars = ('/', ',', ';', '.', '#') # Since static need to be generated exactly, treat them as # non-minimized if self.static: self.external = '://' in self.routepath self.minimization = False # Strip preceding '/' if present, and not minimizing if routepath.startswith('/') and self.minimization: routepath = routepath[1:] # Build our routelist, and the keys used in the route self.routelist = routelist = self._pathkeys(routepath) routekeys = frozenset([key['name'] for key in routelist \ if isinstance(key, dict)]) if not self.minimization: self.make_full_route() # Build a req list with all the regexp requirements for our args self.req_regs = {} for key, val in self.reqs.iteritems(): self.req_regs[key] = re.compile('^' + val + '$') # Update our defaults and set new default keys if needed. defaults # needs to be saved (self.defaults, defaultkeys) = self._defaults(routekeys, reserved_keys, kargs) # Save the maximum keys we could utilize self.maxkeys = defaultkeys | routekeys # Populate our minimum keys, and save a copy of our backward keys for # quicker generation later (self.minkeys, self.routebackwards) = self._minkeys(routelist[:]) # Populate our hardcoded keys, these are ones that are set and don't # exist in the route self.hardcoded = frozenset([key for key in self.maxkeys \ if key not in routekeys and self.defaults[key] is not None]) # Cache our default keys self._default_keys = frozenset(self.defaults.keys())
def validateAttributeValueDir(self, token, tagName, attrName, attrValue): for t in self.checkEnumeratedValue(token, tagName, attrName, attrValue, frozenset(('ltr', 'rtl'))) or []: yield t
def _powerset(iterator): first = frozenset([iterator.next()]) yield first for s in _powerset(iterator): yield s yield s | first
"unexpected-end-tag-after-frameset": _(u"Unexpected end tag (%(name)s)" u" in the after frameset phase. Ignored."), "expected-eof-but-got-char": _(u"Unexpected non-space characters. Expected end of file."), "expected-eof-but-got-start-tag": _(u"Unexpected start tag (%(name)s)" u". Expected end of file."), "expected-eof-but-got-end-tag": _(u"Unexpected end tag (%(name)s)" u". Expected end of file."), } contentModelFlags = {"PCDATA": 0, "RCDATA": 1, "CDATA": 2, "PLAINTEXT": 3} scopingElements = frozenset( ("button", "caption", "html", "marquee", "object", "table", "td", "th")) formattingElements = frozenset(("a", "b", "big", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u")) specialElements = frozenset( ("address", "area", "base", "basefont", "bgsound", "blockquote", "body", "br", "center", "col", "colgroup", "dd", "dir", "div", "dl", "dt", "embed", "fieldset", "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "iframe", "image", "img", "input", "isindex", "li", "link", "listing", "menu", "meta", "noembed", "noframes", "noscript", "ol", "optgroup", "option", "p", "param", "plaintext", "pre", "script", "select", "spacer", "style", "tbody", "textarea", "tfoot", "thead", "title", "tr", "ul", "wbr")) spaceCharacters = frozenset((u"\t", u"\n", u"\u000B", u"\u000C", u" ", u"\r"))
class HTMLSanitizer(HTMLParser): safe_tags = frozenset([ 'a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var' ]) safe_attrs = frozenset([ 'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'axis', 'border', 'bgcolor', 'cellpadding', 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', 'style', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width' ]) ignore_tags = frozenset(['html', 'body']) uri_attrs = frozenset( ['action', 'background', 'dynsrc', 'href', 'lowsrc', 'src']) safe_schemes = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) def __init__(self, out): HTMLParser.__init__(self) self.out = out self.waiting_for = None def handle_starttag(self, tag, attrs): if self.waiting_for: return if tag in self.ignore_tags: return if tag not in self.safe_tags: self.waiting_for = tag return self.out.write('<' + tag) def _get_scheme(text): if ':' not in text: return None chars = [char for char in text.split(':', 1)[0] if char.isalnum()] return ''.join(chars).lower() for attrname, attrval in attrs: if attrname not in self.safe_attrs: continue elif attrname in self.uri_attrs: # Don't allow URI schemes such as "javascript:" if _get_scheme(attrval) not in self.safe_schemes: continue elif attrname == 'style': # Remove dangerous CSS declarations from inline styles decls = [] for decl in filter(None, attrval.split(';')): is_evil = False if 'expression' in decl: is_evil = True for m in re.finditer(r'url\s*\(([^)]+)', decl): if _get_scheme(m.group(1)) not in self.safe_schemes: is_evil = True break if not is_evil: decls.append(decl.strip()) if not decls: continue attrval = '; '.join(decls) self.out.write(' ' + attrname + '="' + escape(attrval) + '"') if tag in _EMPTY_TAGS: self.out.write(' />') else: self.out.write('>') def handle_entityref(self, name): if not self.waiting_for: self.out.write('&%s;' % name) def handle_data(self, data): if not self.waiting_for: self.out.write(escape(data, quotes=False)) def handle_endtag(self, tag): if tag in self.ignore_tags: return if self.waiting_for: if self.waiting_for == tag: self.waiting_for = None return if tag not in _EMPTY_TAGS: self.out.write('</' + tag + '>')
_(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-scheme": _(u"Unregistered URI scheme: '%(attributeName)s' attribute on <%(tagName)s>." ), "invalid-rel": _(u"Invalid link relation: '%(attributeName)s' attribute on <%(tagName)s>." ), "invalid-mime-type": _(u"Invalid MIME type: '%(attributeName)s' attribute on <%(tagName)s>."), }) globalAttributes = frozenset( ('class', 'contenteditable', 'contextmenu', 'dir', 'draggable', 'id', 'irrelevant', 'lang', 'ref', 'tabindex', 'template', 'title', 'onabort', 'onbeforeunload', 'onblur', 'onchange', 'onclick', 'oncontextmenu', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onmessage', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onresize', 'onscroll', 'onselect', 'onsubmit', 'onunload')) # XXX lang in HTML only, xml:lang in XHTML only # XXX validate ref, template allowedAttributeMap = { 'html': frozenset(('xmlns', )), 'head': frozenset(()), 'title': frozenset(()), 'base':
def generate(self, *args, **kargs): """Generate a route from a set of keywords Returns the url text, or None if no URL could be generated. .. code-block:: python m.generate(controller='content',action='view',id=10) """ # Generate ourself if we haven't already if not self._created_gens: self._create_gens() if self.append_slash: kargs['_append_slash'] = True if not self.explicit: if 'controller' not in kargs: kargs['controller'] = 'content' if 'action' not in kargs: kargs['action'] = 'index' controller = kargs.get('controller', None) action = kargs.get('action', None) # If the URL didn't depend on the SCRIPT_NAME, we'll cache it # keyed by just by kargs; otherwise we need to cache it with # both SCRIPT_NAME and kargs: cache_key = unicode(args).encode('utf8') + \ unicode(kargs).encode('utf8') if self.urlcache is not None: if self.environ: cache_key_script_name = '%s:%s' % (self.environ.get( 'SCRIPT_NAME', ''), cache_key) else: cache_key_script_name = cache_key # Check the url cache to see if it exists, use it if it does for key in [cache_key, cache_key_script_name]: if key in self.urlcache: return self.urlcache[key] actionlist = self._gendict.get(controller) or self._gendict.get( '*', {}) if not actionlist and not args: return None (keylist, sortcache) = actionlist.get(action) or \ actionlist.get('*', (None, {})) if not keylist and not args: return None keys = frozenset(kargs.keys()) cacheset = False cachekey = unicode(keys) cachelist = sortcache.get(cachekey) if args: keylist = args elif cachelist: keylist = cachelist else: cacheset = True newlist = [] for route in keylist: if len(route.minkeys - keys) == 0: newlist.append(route) keylist = newlist def keysort(a, b): """Sorts two sets of sets, to order them ideally for matching.""" am = a.minkeys a = a.maxkeys b = b.maxkeys lendiffa = len(keys ^ a) lendiffb = len(keys ^ b) # If they both match, don't switch them if lendiffa == 0 and lendiffb == 0: return 0 # First, if a matches exactly, use it if lendiffa == 0: return -1 # Or b matches exactly, use it if lendiffb == 0: return 1 # Neither matches exactly, return the one with the most in # common if cmp(lendiffa, lendiffb) != 0: return cmp(lendiffa, lendiffb) # Neither matches exactly, but if they both have just as much # in common if len(keys & b) == len(keys & a): # Then we return the shortest of the two return cmp(len(a), len(b)) # Otherwise, we return the one that has the most in common else: return cmp(len(keys & b), len(keys & a)) keylist.sort(keysort) if cacheset: sortcache[cachekey] = keylist # Iterate through the keylist of sorted routes (or a single route if # it was passed in explicitly for hardcoded named routes) for route in keylist: fail = False for key in route.hardcoded: kval = kargs.get(key) if not kval: continue if isinstance(kval, str): kval = kval.decode(self.encoding) else: kval = unicode(kval) if kval != route.defaults[key]: fail = True break if fail: continue path = route.generate(**kargs) if path: if self.prefix: path = self.prefix + path external_static = route.static and route.external if self.environ and self.environ.get('SCRIPT_NAME', '') != ''\ and not route.absolute and not external_static: path = self.environ['SCRIPT_NAME'] + path key = cache_key_script_name else: key = cache_key if self.urlcache is not None: self.urlcache[key] = str(path) return str(path) else: continue return None
from html5lib.constants import scopingElements, tableInsertModeElements, namespaces try: frozenset except NameError: # Import from the sets module for python 2.3 from sets import Set as set from sets import ImmutableSet as frozenset # The scope markers are inserted when entering object elements, # marquees, table cells, and table captions, and are used to prevent formatting # from "leaking" into tables, object elements, and marquees. Marker = None listElementsMap = { None: (frozenset(scopingElements), False), "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), (namespaces["html"], "ul")])), False), "table": (frozenset([(namespaces["html"], "html"), (namespaces["html"], "table")]), False), "select": (frozenset([(namespaces["html"], "optgroup"), (namespaces["html"], "option")]), True) } class Node(object): def __init__(self, name):
def closeTagOpenState(self): if (self.contentModelFlag in (contentModelFlags["RCDATA"], contentModelFlags["CDATA"])): if self.currentToken: charStack = [] # So far we know that "</" has been consumed. We now need to know # whether the next few characters match the name of last emitted # start tag which also happens to be the currentToken. We also need # to have the character directly after the characters that could # match the start tag name. for x in xrange(len(self.currentToken["name"]) + 1): charStack.append(self.stream.char()) # Make sure we don't get hit by EOF if charStack[-1] == EOF: break # Since this is just for checking. We put the characters back on # the stack. self.stream.unget(charStack) if self.currentToken \ and self.currentToken["name"].lower() == "".join(charStack[:-1]).lower() \ and charStack[-1] in (spaceCharacters | frozenset((u">", u"/", u"<", EOF))): # Because the characters are correct we can safely switch to # PCDATA mode now. This also means we don't have to do it when # emitting the end tag token. self.contentModelFlag = contentModelFlags["PCDATA"] else: self.tokenQueue.append({"type": "Characters", "data": u"</"}) self.state = self.states["data"] # Need to return here since we don't want the rest of the # method to be walked through. return True data = self.stream.char() if data in asciiLetters: self.currentToken = {"type": "EndTag", "name": data, "data": []} self.state = self.states["tagName"] elif data == u">": self.tokenQueue.append({ "type": "ParseError", "data": "expected-closing-tag-but-got-right-bracket" }) self.state = self.states["data"] elif data == EOF: self.tokenQueue.append({ "type": "ParseError", "data": "expected-closing-tag-but-got-eof" }) self.tokenQueue.append({"type": "Characters", "data": u"</"}) self.state = self.states["data"] else: # XXX data can be _'_... self.tokenQueue.append({ "type": "ParseError", "data": "expected-closing-tag-but-got-char", "datavars": { "data": data } }) self.stream.unget(data) self.state = self.states["bogusComment"] return True
""" # Last synced with Rails copy at Revision 6057 on Feb 9th, 2007. import sys if sys.version < '2.4': from sets import ImmutableSet as frozenset from javascript import * from javascript import options_for_javascript from form_tag import form from tags import tag, camelize from urls import get_url CALLBACKS = frozenset(['uninitialized', 'loading', 'loaded', 'interactive', 'complete', 'failure', 'success'] + \ [str(x) for x in range(100,599)]) AJAX_OPTIONS = frozenset(['before', 'after', 'condition', 'url', 'asynchronous', 'method', 'insertion', 'position', 'form', 'with', 'with_', 'update', 'script'] + \ list(CALLBACKS)) def link_to_remote(name, options=None, **html_options): """ Links to a remote function. Returns a link to a remote action defined ``dict(url=url())`` (using the url() format) that's called in the background using XMLHttpRequest. The result of that request can then be inserted into a DOM object whose id can be specified with the ``update`` keyword.
class XHTMLSerializer(XMLSerializer): """Produces XHTML text from an event stream. >>> from libs.genshi.builder import tag >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) >>> print ''.join(XHTMLSerializer()(elem.generate())) <div><a href="foo"></a><br /><hr noshade="noshade" /></div> """ _EMPTY_ELEMS = frozenset([ 'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param' ]) _BOOLEAN_ATTRS = frozenset([ 'selected', 'checked', 'compact', 'declare', 'defer', 'disabled', 'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap' ]) _PRESERVE_SPACE = frozenset([ QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') ]) def __init__(self, doctype=None, strip_whitespace=True, namespace_prefixes=None): super(XHTMLSerializer, self).__init__(doctype, False) self.filters = [EmptyTagFilter()] if strip_whitespace: self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) namespace_prefixes = namespace_prefixes or {} namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) def __call__(self, stream): boolean_attrs = self._BOOLEAN_ATTRS empty_elems = self._EMPTY_ELEMS have_doctype = False in_cdata = False stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) for kind, data, pos in stream: if kind is START or kind is EMPTY: tag, attrib = data buf = ['<', tag] for attr, value in attrib: if attr in boolean_attrs: value = attr buf += [' ', attr, '="', escape(value), '"'] if kind is EMPTY: if tag in empty_elems: buf.append(' />') else: buf.append('></%s>' % tag) else: buf.append('>') yield Markup(u''.join(buf)) elif kind is END: yield Markup('</%s>' % data) elif kind is TEXT: if in_cdata: yield data else: yield escape(data, quotes=False) elif kind is COMMENT: yield Markup('<!--%s-->' % data) elif kind is DOCTYPE and not have_doctype: name, pubid, sysid = data buf = ['<!DOCTYPE %s'] if pubid: buf.append(' PUBLIC "%s"') elif sysid: buf.append(' SYSTEM') if sysid: buf.append(' "%s"') buf.append('>\n') yield Markup(u''.join(buf), *filter(None, data)) have_doctype = True elif kind is START_CDATA: yield Markup('<![CDATA[') in_cdata = True elif kind is END_CDATA: yield Markup(']]>') in_cdata = False elif kind is PI: yield Markup('<?%s %s?>' % data)
class Translator(object): """Can extract and translate localizable strings from markup streams and templates. For example, assume the followng template: >>> from genshi.template import MarkupTemplate >>> >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> ... <head> ... <title>Example</title> ... </head> ... <body> ... <h1>Example</h1> ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> ... </body> ... </html>''', filename='example.html') For demonstration, we define a dummy ``gettext``-style function with a hard-coded translation table, and pass that to the `Translator` initializer: >>> def pseudo_gettext(string): ... return { ... 'Example': 'Beispiel', ... 'Hello, %(name)s': 'Hallo, %(name)s' ... }[string] >>> >>> translator = Translator(pseudo_gettext) Next, the translator needs to be prepended to any already defined filters on the template: >>> tmpl.filters.insert(0, translator) When generating the template output, our hard-coded translations should be applied as expected: >>> print tmpl.generate(username='******', _=pseudo_gettext) <html> <head> <title>Beispiel</title> </head> <body> <h1>Beispiel</h1> <p>Hallo, Hans</p> </body> </html> Note that elements defining ``xml:lang`` attributes that do not contain variable expressions are ignored by this filter. That can be used to exclude specific parts of a template from being extracted and translated. """ IGNORE_TAGS = frozenset([ QName('script'), QName('http://www.w3.org/1999/xhtml}script'), QName('style'), QName('http://www.w3.org/1999/xhtml}style') ]) INCLUDE_ATTRS = frozenset( ['abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title']) def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS, include_attrs=INCLUDE_ATTRS, extract_text=True): """Initialize the translator. :param translate: the translation function, for example ``gettext`` or ``ugettext``. :param ignore_tags: a set of tag names that should not be localized :param include_attrs: a set of attribute names should be localized :param extract_text: whether the content of text nodes should be extracted, or only text in explicit ``gettext`` function calls """ self.translate = translate self.ignore_tags = ignore_tags self.include_attrs = include_attrs self.extract_text = extract_text def __call__(self, stream, ctxt=None, search_text=True): """Translate any localizable strings in the given stream. This function shouldn't be called directly. Instead, an instance of the `Translator` class should be registered as a filter with the `Template` or the `TemplateLoader`, or applied as a regular stream filter. If used as a template filter, it should be inserted in front of all the default filters. :param stream: the markup event stream :param ctxt: the template context (not used) :param search_text: whether text nodes should be translated (used internally) :return: the localized stream """ ignore_tags = self.ignore_tags include_attrs = self.include_attrs translate = self.translate if not self.extract_text: search_text = False skip = 0 xml_lang = XML_NAMESPACE['lang'] for kind, data, pos in stream: # skip chunks that should not be localized if skip: if kind is START: skip += 1 elif kind is END: skip -= 1 yield kind, data, pos continue # handle different events that can be localized if kind is START: tag, attrs = data if tag in self.ignore_tags or \ isinstance(attrs.get(xml_lang), basestring): skip += 1 yield kind, data, pos continue new_attrs = [] changed = False for name, value in attrs: newval = value if search_text and isinstance(value, basestring): if name in include_attrs: newval = self.translate(value) else: newval = list( self(_ensure(value), ctxt, search_text=False)) if newval != value: value = newval changed = True new_attrs.append((name, value)) if changed: attrs = new_attrs yield kind, (tag, attrs), pos elif search_text and kind is TEXT: text = data.strip() if text: data = data.replace(text, translate(text)) yield kind, data, pos elif kind is SUB: subkind, substream = data new_substream = list(self(substream, ctxt)) yield kind, (subkind, new_substream), pos else: yield kind, data, pos GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', 'ugettext', 'ungettext') def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, search_text=True): """Extract localizable strings from the given template stream. For every string found, this function yields a ``(lineno, function, message)`` tuple, where: * ``lineno`` is the number of the line on which the string was found, * ``function`` is the name of the ``gettext`` function used (if the string was extracted from embedded Python code), and * ``message`` is the string itself (a ``unicode`` object, or a tuple of ``unicode`` objects for functions with multiple string arguments). >>> from genshi.template import MarkupTemplate >>> >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> ... <head> ... <title>Example</title> ... </head> ... <body> ... <h1>Example</h1> ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> ... </body> ... </html>''', filename='example.html') >>> >>> for lineno, funcname, message in Translator().extract(tmpl.stream): ... print "%d, %r, %r" % (lineno, funcname, message) 3, None, u'Example' 6, None, u'Example' 7, '_', u'Hello, %(name)s' 8, 'ngettext', (u'You have %d item', u'You have %d items') :param stream: the event stream to extract strings from; can be a regular stream or a template stream :param gettext_functions: a sequence of function names that should be treated as gettext-style localization functions :param search_text: whether the content of text nodes should be extracted (used internally) :note: Changed in 0.4.1: For a function with multiple string arguments (such as ``ngettext``), a single item with a tuple of strings is yielded, instead an item for each string argument. """ tagname = None if not self.extract_text: search_text = False skip = 0 xml_lang = XML_NAMESPACE['lang'] for kind, data, pos in stream: if skip: if kind is START: skip += 1 if kind is END: skip -= 1 if kind is START and not skip: tag, attrs = data if tag in self.ignore_tags or \ isinstance(attrs.get(xml_lang), basestring): skip += 1 continue for name, value in attrs: if search_text and isinstance(value, basestring): if name in self.include_attrs: text = value.strip() if text: yield pos[1], None, text else: for lineno, funcname, text in self.extract( _ensure(value), gettext_functions, search_text=False): yield lineno, funcname, text elif not skip and search_text and kind is TEXT: text = data.strip() if text and filter(None, [ch.isalpha() for ch in text]): yield pos[1], None, text elif kind is EXPR or kind is EXEC: consts = dict([(n, chr(i) + '\x00') for i, n in enumerate(data.code.co_consts)]) gettext_locs = [ consts[n] for n in gettext_functions if n in consts ] ops = [ _LOAD_CONST, '(', '|'.join(gettext_locs), ')', _CALL_FUNCTION, '.\x00', '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)' ] for loc, opcodes in re.findall(''.join(ops), data.code.co_code): funcname = data.code.co_consts[ord(loc[0])] strings = [] opcodes = iter(opcodes) for opcode in opcodes: if opcode == _BINARY_ADD: arg = strings.pop() strings[-1] += arg else: arg = data.code.co_consts[ord(opcodes.next())] opcodes.next() # skip second byte if not isinstance(arg, basestring): break strings.append(unicode(arg)) if len(strings) == 1: strings = strings[0] else: strings = tuple(strings) yield pos[1], funcname, strings elif kind is SUB: subkind, substream = data messages = self.extract(substream, gettext_functions, search_text=search_text and not skip) for lineno, funcname, text in messages: yield lineno, funcname, text
class DB(object): """Wrapper class for the _pg connection type.""" def __init__(self, *args, **kw): """Create a new connection. You can pass either the connection parameters or an existing _pg or pgdb connection. This allows you to use the methods of the classic pg interface with a DB-API 2 pgdb connection. """ if not args and len(kw) == 1: db = kw.get('db') elif not kw and len(args) == 1: db = args[0] else: db = None if db: if isinstance(db, DB): db = db.db else: try: db = db._cnx except AttributeError: pass if not db or not hasattr(db, 'db') or not hasattr(db, 'query'): db = connect(*args, **kw) self._closeable = True else: self._closeable = False self.db = db self.dbname = db.db self._regtypes = False self._attnames = {} self._pkeys = {} self._privileges = {} self._args = args, kw self.debug = None # For debugging scripts, this can be set # * to a string format specification (e.g. in CGI set to "%s<BR>"), # * to a file object to write debug statements or # * to a callable object which takes a string argument # * to any other true value to just print debug statements def __getattr__(self, name): # All undefined members are same as in underlying pg connection: if self.db: return getattr(self.db, name) else: raise _int_error('Connection is not valid') # Context manager methods def __enter__(self): """Enter the runtime context. This will start a transaction.""" self.begin() return self def __exit__(self, et, ev, tb): """Exit the runtime context. This will end the transaction.""" if et is None and ev is None and tb is None: self.commit() else: self.rollback() # Auxiliary methods def _do_debug(self, s): """Print a debug message.""" if self.debug: if isinstance(self.debug, basestring): print self.debug % s elif isinstance(self.debug, file): file.write(s + '\n') elif callable(self.debug): self.debug(s) else: print s def _quote_text(self, d): """Quote text value.""" if not isinstance(d, basestring): d = str(d) return "'%s'" % self.escape_string(d) _bool_true = frozenset('t true 1 y yes on'.split()) def _quote_bool(self, d): """Quote boolean value.""" if isinstance(d, basestring): if not d: return 'NULL' d = d.lower() in self._bool_true else: d = bool(d) return ("'f'", "'t'")[d] _date_literals = frozenset( 'current_date current_time' ' current_timestamp localtime localtimestamp'.split()) def _quote_date(self, d): """Quote date value.""" if not d: return 'NULL' if isinstance(d, basestring) and d.lower() in self._date_literals: return d return self._quote_text(d) def _quote_num(self, d): """Quote numeric value.""" if not d and d != 0: return 'NULL' return str(d) def _quote_money(self, d): """Quote money value.""" if d is None or d == '': return 'NULL' if not isinstance(d, basestring): d = str(d) return d _quote_funcs = dict( # quote methods for each type text=_quote_text, bool=_quote_bool, date=_quote_date, int=_quote_num, num=_quote_num, float=_quote_num, money=_quote_money) def _quote(self, d, t): """Return quotes if needed.""" if d is None: return 'NULL' try: quote_func = self._quote_funcs[t] except KeyError: quote_func = self._quote_funcs['text'] return quote_func(self, d) def _split_schema(self, cl): """Return schema and name of object separately. This auxiliary function splits off the namespace (schema) belonging to the class with the name cl. If the class name is not qualified, the function is able to determine the schema of the class, taking into account the current search path. """ s = _split_parts(cl) if len(s) > 1: # name already qualfied? # should be database.schema.table or schema.table if len(s) > 3: raise _prg_error('Too many dots in class name %s' % cl) schema, cl = s[-2:] else: cl = s[0] # determine search path q = 'SELECT current_schemas(TRUE)' schemas = self.db.query(q).getresult()[0][0][1:-1].split(',') if schemas: # non-empty path # search schema for this object in the current search path q = ' UNION '.join([ "SELECT %d::integer AS n, '%s'::name AS nspname" % s for s in enumerate(schemas) ]) q = ("SELECT nspname FROM pg_class" " JOIN pg_namespace" " ON pg_class.relnamespace = pg_namespace.oid" " JOIN (%s) AS p USING (nspname)" " WHERE pg_class.relname = '%s'" " ORDER BY n LIMIT 1" % (q, cl)) schema = self.db.query(q).getresult() if schema: # schema found schema = schema[0][0] else: # object not found in current search path schema = 'public' else: # empty path schema = 'public' return schema, cl def _add_schema(self, cl): """Ensure that the class name is prefixed with a schema name.""" return _join_parts(self._split_schema(cl)) # Public methods # escape_string and escape_bytea exist as methods, # so we define unescape_bytea as a method as well unescape_bytea = staticmethod(unescape_bytea) def close(self): """Close the database connection.""" # Wraps shared library function so we can track state. if self._closeable: if self.db: self.db.close() self.db = None else: raise _int_error('Connection already closed') def reset(self): """Reset connection with current parameters. All derived queries and large objects derived from this connection will not be usable after this call. """ if self.db: self.db.reset() else: raise _int_error('Connection already closed') def reopen(self): """Reopen connection to the database. Used in case we need another connection to the same database. Note that we can still reopen a database that we have closed. """ # There is no such shared library function. if self._closeable: db = connect(*self._args[0], **self._args[1]) if self.db: self.db.close() self.db = db def begin(self, mode=None): """Begin a transaction.""" qstr = 'BEGIN' if mode: qstr += ' ' + mode return self.query(qstr) start = begin def commit(self): """Commit the current transaction.""" return self.query('COMMIT') end = commit def rollback(self, name=None): """Rollback the current transaction.""" qstr = 'ROLLBACK' if name: qstr += ' TO ' + name return self.query(qstr) def savepoint(self, name=None): """Define a new savepoint within the current transaction.""" qstr = 'SAVEPOINT' if name: qstr += ' ' + name return self.query(qstr) def release(self, name): """Destroy a previously defined savepoint.""" return self.query('RELEASE ' + name) def query(self, qstr, *args): """Executes a SQL command string. This method simply sends a SQL query to the database. If the query is an insert statement that inserted exactly one row into a table that has OIDs, the return value is the OID of the newly inserted row. If the query is an update or delete statement, or an insert statement that did not insert exactly one row in a table with OIDs, then the numer of rows affected is returned as a string. If it is a statement that returns rows as a result (usually a select statement, but maybe also an "insert/update ... returning" statement), this method returns a pgqueryobject that can be accessed via getresult() or dictresult() or simply printed. Otherwise, it returns `None`. The query can contain numbered parameters of the form $1 in place of any data constant. Arguments given after the query string will be substituted for the corresponding numbered parameter. Parameter values can also be given as a single list or tuple argument. Note that the query string must not be passed as a unicode value, but you can pass arguments as unicode values if they can be decoded using the current client encoding. """ # Wraps shared library function for debugging. if not self.db: raise _int_error('Connection is not valid') self._do_debug(qstr) return self.db.query(qstr, args) def pkey(self, cl, newpkey=None): """This method gets or sets the primary key of a class. Composite primary keys are represented as frozensets. Note that this raises an exception if the table does not have a primary key. If newpkey is set and is not a dictionary then set that value as the primary key of the class. If it is a dictionary then replace the _pkeys dictionary with a copy of it. """ # First see if the caller is supplying a dictionary if isinstance(newpkey, dict): # make sure that all classes have a namespace self._pkeys = dict([('.' in cl and cl or 'public.' + cl, pkey) for cl, pkey in newpkey.iteritems()]) return self._pkeys qcl = self._add_schema(cl) # build fully qualified class name # Check if the caller is supplying a new primary key for the class if newpkey: self._pkeys[qcl] = newpkey return newpkey # Get all the primary keys at once if qcl not in self._pkeys: # if not found, check again in case it was added after we started self._pkeys = {} if self.server_version >= 80200: # the ANY syntax works correctly only with PostgreSQL >= 8.2 any_indkey = "= ANY (pg_index.indkey)" else: any_indkey = "IN (%s)" % ', '.join( ['pg_index.indkey[%d]' % i for i in range(16)]) for r in self.db.query( "SELECT pg_namespace.nspname, pg_class.relname," " pg_attribute.attname FROM pg_class" " JOIN pg_namespace" " ON pg_namespace.oid = pg_class.relnamespace" " AND pg_namespace.nspname NOT LIKE 'pg_%'" " JOIN pg_attribute ON pg_attribute.attrelid = pg_class.oid" " AND pg_attribute.attisdropped = 'f'" " JOIN pg_index ON pg_index.indrelid = pg_class.oid" " AND pg_index.indisprimary = 't'" " AND pg_attribute.attnum " + any_indkey).getresult(): cl, pkey = _join_parts(r[:2]), r[2] self._pkeys.setdefault(cl, []).append(pkey) # (only) for composite primary keys, the values will be frozensets for cl, pkey in self._pkeys.iteritems(): self._pkeys[cl] = len(pkey) > 1 and frozenset(pkey) or pkey[0] self._do_debug(self._pkeys) # will raise an exception if primary key doesn't exist return self._pkeys[qcl] def get_databases(self): """Get list of databases in the system.""" return [ s[0] for s in self.db.query( 'SELECT datname FROM pg_database').getresult() ] def get_relations(self, kinds=None): """Get list of relations in connected database of specified kinds. If kinds is None or empty, all kinds of relations are returned. Otherwise kinds can be a string or sequence of type letters specifying which kind of relations you want to list. """ where = kinds and "pg_class.relkind IN (%s) AND" % ','.join( ["'%s'" % x for x in kinds]) or '' return map( _join_parts, self.db.query( "SELECT pg_namespace.nspname, pg_class.relname " "FROM pg_class " "JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace " "WHERE %s pg_class.relname !~ '^Inv' AND " "pg_class.relname !~ '^pg_' " "ORDER BY 1, 2" % where).getresult()) def get_tables(self): """Return list of tables in connected database.""" return self.get_relations('r') def get_attnames(self, cl, newattnames=None): """Given the name of a table, digs out the set of attribute names. Returns a dictionary of attribute names (the names are the keys, the values are the names of the attributes' types). If the optional newattnames exists, it must be a dictionary and will become the new attribute names dictionary. By default, only a limited number of simple types will be returned. You can get the regular types after calling use_regtypes(True). """ if isinstance(newattnames, dict): self._attnames = newattnames return elif newattnames: raise _prg_error('If supplied, newattnames must be a dictionary') cl = self._split_schema(cl) # split into schema and class qcl = _join_parts(cl) # build fully qualified name # May as well cache them: if qcl in self._attnames: return self._attnames[qcl] if qcl not in self.get_relations('rv'): raise _prg_error('Class %s does not exist' % qcl) q = "SELECT pg_attribute.attname, pg_type.typname" if self._regtypes: q += "::regtype" q += (" FROM pg_class" " JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid" " JOIN pg_attribute ON pg_attribute.attrelid = pg_class.oid" " JOIN pg_type ON pg_type.oid = pg_attribute.atttypid" " WHERE pg_namespace.nspname = '%s' AND pg_class.relname = '%s'" " AND (pg_attribute.attnum > 0 OR pg_attribute.attname = 'oid')" " AND pg_attribute.attisdropped = 'f'") % cl q = self.db.query(q).getresult() if self._regtypes: t = dict(q) else: t = {} for att, typ in q: if typ.startswith('bool'): typ = 'bool' elif typ.startswith('abstime'): typ = 'date' elif typ.startswith('date'): typ = 'date' elif typ.startswith('interval'): typ = 'date' elif typ.startswith('timestamp'): typ = 'date' elif typ.startswith('oid'): typ = 'int' elif typ.startswith('int'): typ = 'int' elif typ.startswith('float'): typ = 'float' elif typ.startswith('numeric'): typ = 'num' elif typ.startswith('money'): typ = 'money' else: typ = 'text' t[att] = typ self._attnames[qcl] = t # cache it return self._attnames[qcl] def use_regtypes(self, regtypes=None): """Use regular type names instead of simplified type names.""" if regtypes is None: return self._regtypes else: regtypes = bool(regtypes) if regtypes != self._regtypes: self._regtypes = regtypes self._attnames.clear() return regtypes def has_table_privilege(self, cl, privilege='select'): """Check whether current user has specified table privilege.""" qcl = self._add_schema(cl) privilege = privilege.lower() try: return self._privileges[(qcl, privilege)] except KeyError: q = "SELECT has_table_privilege('%s', '%s')" % (qcl, privilege) ret = self.db.query(q).getresult()[0][0] == 't' self._privileges[(qcl, privilege)] = ret return ret def get(self, cl, arg, keyname=None): """Get a tuple from a database table or view. This method is the basic mechanism to get a single row. The keyname that the key specifies a unique row. If keyname is not specified then the primary key for the table is used. If arg is a dictionary then the value for the key is taken from it and it is modified to include the new values, replacing existing values where necessary. For a composite key, keyname can also be a sequence of key names. The OID is also put into the dictionary if the table has one, but in order to allow the caller to work with multiple tables, it is munged as oid(schema.table). """ if cl.endswith('*'): # scan descendant tables? cl = cl[:-1].rstrip() # need parent table name # build qualified class name qcl = self._add_schema(cl) # To allow users to work with multiple tables, # we munge the name of the "oid" the key qoid = _oid_key(qcl) if not keyname: # use the primary key by default try: keyname = self.pkey(qcl) except KeyError: raise _prg_error('Class %s has no primary key' % qcl) # We want the oid for later updates if that isn't the key if keyname == 'oid': if isinstance(arg, dict): if qoid not in arg: raise _db_error('%s not in arg' % qoid) else: arg = {qoid: arg} where = 'oid = %s' % arg[qoid] attnames = '*' else: attnames = self.get_attnames(qcl) if isinstance(keyname, basestring): keyname = (keyname, ) if not isinstance(arg, dict): if len(keyname) > 1: raise _prg_error('Composite key needs dict as arg') arg = dict([(k, arg) for k in keyname]) where = ' AND '.join([ '%s = %s' % (k, self._quote(arg[k], attnames[k])) for k in keyname ]) attnames = ', '.join(attnames) q = 'SELECT %s FROM %s WHERE %s LIMIT 1' % (attnames, qcl, where) self._do_debug(q) res = self.db.query(q).dictresult() if not res: raise _db_error('No such record in %s where %s' % (qcl, where)) for att, value in res[0].iteritems(): arg[att == 'oid' and qoid or att] = value return arg def insert(self, cl, d=None, **kw): """Insert a tuple into a database table. This method inserts a row into a table. If a dictionary is supplied it starts with that. Otherwise it uses a blank dictionary. Either way the dictionary is updated from the keywords. The dictionary is then, if possible, reloaded with the values actually inserted in order to pick up values modified by rules, triggers, etc. Note: The method currently doesn't support insert into views although PostgreSQL does. """ qcl = self._add_schema(cl) qoid = _oid_key(qcl) if d is None: d = {} d.update(kw) attnames = self.get_attnames(qcl) names, values = [], [] for n in attnames: if n != 'oid' and n in d: names.append('"%s"' % n) values.append(self._quote(d[n], attnames[n])) names, values = ', '.join(names), ', '.join(values) selectable = self.has_table_privilege(qcl) if selectable and self.server_version >= 80200: ret = ' RETURNING %s*' % ('oid' in attnames and 'oid, ' or '') else: ret = '' q = 'INSERT INTO %s (%s) VALUES (%s)%s' % (qcl, names, values, ret) self._do_debug(q) res = self.db.query(q) if ret: res = res.dictresult() for att, value in res[0].iteritems(): d[att == 'oid' and qoid or att] = value elif isinstance(res, int): d[qoid] = res if selectable: self.get(qcl, d, 'oid') elif selectable: if qoid in d: self.get(qcl, d, 'oid') else: try: self.get(qcl, d) except ProgrammingError: pass # table has no primary key return d def update(self, cl, d=None, **kw): """Update an existing row in a database table. Similar to insert but updates an existing row. The update is based on the OID value as munged by get or passed as keyword, or on the primary key of the table. The dictionary is modified, if possible, to reflect any changes caused by the update due to triggers, rules, default values, etc. """ # Update always works on the oid which get returns if available, # otherwise use the primary key. Fail if neither. # Note that we only accept oid key from named args for safety qcl = self._add_schema(cl) qoid = _oid_key(qcl) if 'oid' in kw: kw[qoid] = kw['oid'] del kw['oid'] if d is None: d = {} d.update(kw) attnames = self.get_attnames(qcl) if qoid in d: where = 'oid = %s' % d[qoid] keyname = () else: try: keyname = self.pkey(qcl) except KeyError: raise _prg_error('Class %s has no primary key' % qcl) if isinstance(keyname, basestring): keyname = (keyname, ) try: where = ' AND '.join([ '%s = %s' % (k, self._quote(d[k], attnames[k])) for k in keyname ]) except KeyError: raise _prg_error('Update needs primary key or oid.') values = [] for n in attnames: if n in d and n not in keyname: values.append('%s = %s' % (n, self._quote(d[n], attnames[n]))) if not values: return d values = ', '.join(values) selectable = self.has_table_privilege(qcl) if selectable and self.server_version >= 880200: ret = ' RETURNING %s*' % ('oid' in attnames and 'oid, ' or '') else: ret = '' q = 'UPDATE %s SET %s WHERE %s%s' % (qcl, values, where, ret) self._do_debug(q) res = self.db.query(q) if ret: res = res.dictresult()[0] for att, value in res.iteritems(): d[att == 'oid' and qoid or att] = value else: if selectable: if qoid in d: self.get(qcl, d, 'oid') else: self.get(qcl, d) return d def clear(self, cl, a=None): """Clear all the attributes to values determined by the types. Numeric types are set to 0, Booleans are set to 'f', and everything else is set to the empty string. If the array argument is present, it is used as the array and any entries matching attribute names are cleared with everything else left unchanged. """ # At some point we will need a way to get defaults from a table. qcl = self._add_schema(cl) if a is None: a = {} # empty if argument is not present attnames = self.get_attnames(qcl) for n, t in attnames.iteritems(): if n == 'oid': continue if t in ('int', 'integer', 'smallint', 'bigint', 'float', 'real', 'double precision', 'num', 'numeric', 'money'): a[n] = 0 elif t in ('bool', 'boolean'): a[n] = 'f' else: a[n] = '' return a def delete(self, cl, d=None, **kw): """Delete an existing row in a database table. This method deletes the row from a table. It deletes based on the OID value as munged by get or passed as keyword, or on the primary key of the table. The return value is the number of deleted rows (i.e. 0 if the row did not exist and 1 if the row was deleted). """ # Like update, delete works on the oid. # One day we will be testing that the record to be deleted # isn't referenced somewhere (or else PostgreSQL will). # Note that we only accept oid key from named args for safety qcl = self._add_schema(cl) qoid = _oid_key(qcl) if 'oid' in kw: kw[qoid] = kw['oid'] del kw['oid'] if d is None: d = {} d.update(kw) if qoid in d: where = 'oid = %s' % d[qoid] else: try: keyname = self.pkey(qcl) except KeyError: raise _prg_error('Class %s has no primary key' % qcl) if isinstance(keyname, basestring): keyname = (keyname, ) attnames = self.get_attnames(qcl) try: where = ' AND '.join([ '%s = %s' % (k, self._quote(d[k], attnames[k])) for k in keyname ]) except KeyError: raise _prg_error('Delete needs primary key or oid.') q = 'DELETE FROM %s WHERE %s' % (qcl, where) self._do_debug(q) return int(self.db.query(q)) def notification_handler(self, event, callback, arg_dict={}, timeout=None): """Get notification handler that will run the given callback.""" return NotificationHandler(self.db, event, callback, arg_dict, timeout)
class HTMLParser(html.HTMLParser, object): """Parser for HTML input based on the Python `HTMLParser` module. This class provides the same interface for generating stream events as `XMLParser`, and attempts to automatically balance tags. The parsing is initiated by iterating over the parser object: >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>')) >>> for kind, data, pos in parser: ... print kind, data START (QName(u'ul'), Attrs([(QName(u'compact'), u'compact')])) START (QName(u'li'), Attrs()) TEXT Foo END li END ul """ _EMPTY_ELEMS = frozenset([ 'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param' ]) def __init__(self, source, filename=None, encoding='utf-8'): """Initialize the parser for the given HTML input. :param source: the HTML text as a file-like object :param filename: the name of the file, if known :param filename: encoding of the file; ignored if the input is unicode """ html.HTMLParser.__init__(self) self.source = source self.filename = filename self.encoding = encoding self._queue = [] self._open_tags = [] def parse(self): """Generator that parses the HTML source, yielding markup events. :return: a markup event stream :raises ParseError: if the HTML text is not well formed """ def _generate(): try: bufsize = 4 * 1024 # 4K done = False while 1: while not done and len(self._queue) == 0: data = self.source.read(bufsize) if data == '': # end of data self.close() done = True else: self.feed(data) for kind, data, pos in self._queue: yield kind, data, pos self._queue = [] if done: open_tags = self._open_tags open_tags.reverse() for tag in open_tags: yield END, QName(tag), pos break except html.HTMLParseError, e: msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) raise ParseError(msg, self.filename, e.lineno, e.offset) return Stream(_generate()).filter(_coalesce)
def generate(self, _ignore_req_list=False, _append_slash=False, **kargs): """Generate a URL from ourself given a set of keyword arguments Toss an exception if this set of keywords would cause a gap in the url. """ # Verify that our args pass any regexp requirements if not _ignore_req_list: for key in self.reqs.keys(): val = kargs.get(key) if val and not self.req_regs[key].match( self.make_unicode(val)): return False # Verify that if we have a method arg, its in the method accept list. # Also, method will be changed to _method for route generation meth = as_unicode(kargs.get('method'), self.encoding) if meth: if self.conditions and 'method' in self.conditions \ and meth.upper() not in self.conditions['method']: return False kargs.pop('method') if self.minimization: url = self.generate_minimized(kargs) else: url = self.generate_non_minimized(kargs) if url is False: return url if not url.startswith('/') and not self.static: url = '/' + url extras = frozenset(kargs.keys()) - self.maxkeys if extras: if _append_slash and not url.endswith('/'): url += '/' fragments = [] # don't assume the 'extras' set preserves order: iterate # through the ordered kargs instead for key in kargs: if key not in extras: continue if key == 'action' or key == 'controller': continue val = kargs[key] if isinstance(val, (tuple, list)): for value in val: value = as_unicode(value, self.encoding) fragments.append( (key, _str_encode(value, self.encoding))) else: val = as_unicode(val, self.encoding) fragments.append((key, _str_encode(val, self.encoding))) if fragments: url += '?' url += urllib.urlencode(fragments) elif _append_slash and not url.endswith('/'): url += '/' return url
def validateAttributeValueDraggable(self, token, tagName, attrName, attrValue): for t in self.checkEnumeratedValue(token, tagName, attrName, attrValue, frozenset(('true', 'false'))) or []: yield t
class HTMLSanitizer(object): """A filter that removes potentially dangerous HTML tags and attributes from the stream. >>> from genshi import HTML >>> html = HTML('<div><script>alert(document.cookie)</script></div>') >>> print html | HTMLSanitizer() <div/> The default set of safe tags and attributes can be modified when the filter is instantiated. For example, to allow inline ``style`` attributes, the following instantation would work: >>> html = HTML('<div style="background: #000"></div>') >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) >>> print html | sanitizer <div style="background: #000"/> Note that even in this case, the filter *does* attempt to remove dangerous constructs from style attributes: >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>') >>> print html | sanitizer <div style="color: #000"/> This handles HTML entities, unicode escapes in CSS and Javascript text, as well as a lot of other things. However, the style tag is still excluded by default because it is very hard for such sanitizing to be completely safe, especially considering how much error recovery current web browsers perform. :warn: Note that this special processing of CSS is currently only applied to style attributes, **not** style elements. """ SAFE_TAGS = frozenset([ 'a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var' ]) SAFE_ATTRS = frozenset([ 'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width' ]) SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) URI_ATTRS = frozenset( ['action', 'background', 'dynsrc', 'href', 'lowsrc', 'src']) def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS, safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS): """Create the sanitizer. The exact set of allowed elements and attributes can be configured. :param safe_tags: a set of tag names that are considered safe :param safe_attrs: a set of attribute names that are considered safe :param safe_schemes: a set of URI schemes that are considered safe :param uri_attrs: a set of names of attributes that contain URIs """ self.safe_tags = safe_tags "The set of tag names that are considered safe." self.safe_attrs = safe_attrs "The set of attribute names that are considered safe." self.uri_attrs = uri_attrs "The set of names of attributes that may contain URIs." self.safe_schemes = safe_schemes "The set of URI schemes that are considered safe." def __call__(self, stream): """Apply the filter to the given stream. :param stream: the markup event stream to filter """ waiting_for = None for kind, data, pos in stream: if kind is START: if waiting_for: continue tag, attrs = data if tag not in self.safe_tags: waiting_for = tag continue new_attrs = [] for attr, value in attrs: value = stripentities(value) if attr not in self.safe_attrs: continue elif attr in self.uri_attrs: # Don't allow URI schemes such as "javascript:" if not self.is_safe_uri(value): continue elif attr == 'style': # Remove dangerous CSS declarations from inline styles decls = self.sanitize_css(value) if not decls: continue value = '; '.join(decls) new_attrs.append((attr, value)) yield kind, (tag, Attrs(new_attrs)), pos elif kind is END: tag = data if waiting_for: if waiting_for == tag: waiting_for = None else: yield kind, data, pos elif kind is not COMMENT: if not waiting_for: yield kind, data, pos def is_safe_uri(self, uri): """Determine whether the given URI is to be considered safe for inclusion in the output. The default implementation checks whether the scheme of the URI is in the set of allowed URIs (`safe_schemes`). >>> sanitizer = HTMLSanitizer() >>> sanitizer.is_safe_uri('http://example.org/') True >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)') False :param uri: the URI to check :return: `True` if the URI can be considered safe, `False` otherwise :rtype: `bool` """ if ':' not in uri: return True # This is a relative URI chars = [char for char in uri.split(':', 1)[0] if char.isalnum()] return ''.join(chars).lower() in self.safe_schemes def sanitize_css(self, text): """Remove potentially dangerous property declarations from CSS code. In particular, properties using the CSS ``url()`` function with a scheme that is not considered safe are removed: >>> sanitizer = HTMLSanitizer() >>> sanitizer.sanitize_css(u''' ... background: url(javascript:alert("foo")); ... color: #000; ... ''') [u'color: #000'] Also, the proprietary Internet Explorer function ``expression()`` is always stripped: >>> sanitizer.sanitize_css(u''' ... background: #fff; ... color: #000; ... width: e/**/xpression(alert("foo")); ... ''') [u'background: #fff', u'color: #000'] :param text: the CSS text; this is expected to be `unicode` and to not contain any character or numeric references :return: a list of declarations that are considered safe :rtype: `list` """ decls = [] text = self._strip_css_comments(self._replace_unicode_escapes(text)) for decl in filter(None, text.split(';')): decl = decl.strip() if not decl: continue is_evil = False if 'expression' in decl: is_evil = True for match in re.finditer(r'url\s*\(([^)]+)', decl): if not self.is_safe_uri(match.group(1)): is_evil = True break if not is_evil: decls.append(decl.strip()) return decls _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub def _replace_unicode_escapes(self, text): def _repl(match): return unichr(int(match.group(1), 16)) return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text)) _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub def _strip_css_comments(self, text): return self._CSS_COMMENTS('', text)
def __hash__(self): rval = getattr(self, '_hash', None) if rval is None: rval = self._hash = hash(frozenset(self.iteritems())) return rval
def optimize(self): all_chars = self.get_all_chars() # find mergeable non_final = frozenset( set(range(self.num_states)) - self.final_states - self.unmergeable_states) final = frozenset(self.final_states - self.unmergeable_states) state_to_set = {} equivalence_sets = set() if non_final: equivalence_sets.add(non_final) if final: equivalence_sets.add(final) for state in range(self.num_states): if state in final: state_to_set[state] = final elif state in self.unmergeable_states: singleset = frozenset([state]) state_to_set[state] = singleset equivalence_sets.add(singleset) else: state_to_set[state] = non_final assert len(equivalence_sets) <= self.num_states while len(equivalence_sets) < self.num_states: new_equivalence_sets = set() changed = False for equivalent in equivalence_sets: for char in all_chars: targets = {} for state in equivalent: if (state, char) in self: nextstate = self[state, char] target = frozenset(state_to_set[nextstate]) else: nextstate = None target = None targets.setdefault(target, set()).add(state) if len(targets) != 1: for target, newequivalent in targets.iteritems(): newequivalent = frozenset(newequivalent) new_equivalence_sets.add(newequivalent) for state in newequivalent: state_to_set[state] = newequivalent changed = True break else: new_equivalence_sets.add(equivalent) if not changed: break equivalence_sets = new_equivalence_sets if len(equivalence_sets) == self.num_states: return False # merging the states newnames = [] newtransitions = {} newnum_states = len(equivalence_sets) newstates = list(equivalence_sets) newstate_to_index = {} newfinal_states = set() newunmergeable_states = set() for i, newstate in enumerate(newstates): newstate_to_index[newstate] = i # bring startstate into first slot startstateindex = newstate_to_index[state_to_set[0]] newstates[0], newstates[startstateindex] = newstates[ startstateindex], newstates[0] newstate_to_index[newstates[0]] = 0 newstate_to_index[newstates[startstateindex]] = startstateindex for i, newstate in enumerate(newstates): name = ", ".join([self.names[s] for s in newstate]) for state in newstate: if state in self.unmergeable_states: newunmergeable_states.add(i) name = self.names[state] if state in self.final_states: newfinal_states.add(i) newnames.append(name) for (state, char), nextstate in self.transitions.iteritems(): newstate = newstate_to_index[state_to_set[state]] newnextstate = newstate_to_index[state_to_set[nextstate]] newtransitions[newstate, char] = newnextstate self.names = newnames self.transitions = newtransitions self.num_states = newnum_states self.final_states = newfinal_states self.unmergeable_states = newunmergeable_states return True
"invalid-uri": _(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-http-or-ftp-uri": _(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-scheme": _(u"Unregistered URI scheme: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-rel": _(u"Invalid link relation: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-mime-type": _(u"Invalid MIME type: '%(attributeName)s' attribute on <%(tagName)s>."), }) globalAttributes = frozenset(('class', 'contenteditable', 'contextmenu', 'dir', 'draggable', 'id', 'irrelevant', 'lang', 'ref', 'tabindex', 'template', 'title', 'onabort', 'onbeforeunload', 'onblur', 'onchange', 'onclick', 'oncontextmenu', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onmessage', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onresize', 'onscroll', 'onselect', 'onsubmit', 'onunload')) # XXX lang in HTML only, xml:lang in XHTML only # XXX validate ref, template allowedAttributeMap = { 'html': frozenset(('xmlns',)), 'head': frozenset(()), 'title': frozenset(()), 'base': frozenset(('href', 'target')), 'link': frozenset(('href', 'rel', 'media', 'hreflang', 'type')), 'meta': frozenset(('name', 'http-equiv', 'content', 'charset')), # XXX charset in HTML only 'style': frozenset(('media', 'type', 'scoped')), 'body': frozenset(()),