def RobotKiller(next, logger, reqdata, environ): if not is_bad_robot_request(environ, reqdata): return next(logger, reqdata, environ) cfg = get_cfg(environ) # Under some configurations, no user agent is considered a # bad robot. ua = environ.get('HTTP_USER_AGENT', '') if not ua and not cfg['no-ua-is-bad-robot']: return next(logger, reqdata, environ) elif not ua or (ua == 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; MyIE2; Maxthon)'): return httputil.genError("web-robot", 403) # Look for bad robots. #brl = [x.strip() for x in cfg['bad-robots'].split(' | ')] brl = cfg['bad-robots'] for rua in brl: if rua in ua: # This error is somewhat arbitrary, yet generally # truthful. The file *isn't* available, at least # not in that form, to you, and it's because your # access is forbidden. # was 'file-not-available'. return httputil.genError("web-robot", 403) # Okay, we're good return next(logger, reqdata, environ)
def doPOSTMangling(reqdata, qdict, webserv): # Now, there is a complication, and that is that we # can't redirect from a POST to a GET without things # screaming at us. So we want our POST URLs to look # just like the regular URLs. So we have to encode # the view in the POST data, nnngh. Which means we # have to decode magic. # override reqdata view based on the POST data. if 'view' in qdict: reqdata['view'] = qdict['view'] # A view in POST is never allowed to be 'normal'. # The value here is arbitrary but chosen to be # a) distinct and b) likely to fail spectacularly # later. if reqdata['view'] == "normal": # This can only happen if a hand-crafted POST is # submitted. In that case -- outta here. reqdata['view'] = ":post:claimed-normal" # We do not reject this immediately for obscure # reasons involving that other things might be # handling POST queries that are intercepted later. # (I am not convinced that such POSTs can be useful. # FIXME: examine later.) # We immediately refuse out of zone POSTs before we # try to do *anything* more with them. if 'query' not in reqdata: return httputil.genError("out-of-zone") # We do this for clarity (right now), since we currently # have no way of logging messages here. FIXME: fix this. if not webserv.view_exists(reqdata['view']): return httputil.genError("sec-error", 403) # Only a few views support POST. Reject invalid views # immediately. if not webserv.view_cmd_allowed(reqdata['view'], "POST"): return httputil.genError("out-of-zone", 405) # At this point we must load the request data with the # view magic. (Am I starting to rethink the whole mess # of how reqdata goes to the HTML view? Yes.) view = reqdata['view'] paramdict = {} plist = webserv.view_param_list(view, "POST") for key in qdict.keys(): if key in plist: paramdict[key] = qdict[key] elif key == "view": pass else: # Not an expected parameter? YOU LOSE. return httputil.genError('sec-error', 403) if paramdict: reqdata['view-dict'] = paramdict # We let the HTML view code fix up missing parameters. return None
def HostKiller(next, logger, reqdata, environ): uh = httputil.hostFromEnv(environ) if uh and not valid_host(uh): environ['dwiki.logger'].warn("rejected invalid Host: value from request URI: %s" % repr(uh)) return httputil.genError("sec-error", 403) gh = getHost(environ) if not valid_host(gh): environ['dwiki.logger'].warn("rejected invalid Host: value from Host: header: %s" % repr(gh)) return httputil.genError("sec-error", 403) return next(logger, reqdata, environ)
def IpKiller(next, logger, reqdata, environ): cfg = get_cfg(environ) ipl = cfg['banned-ips'] sip = environ.get('REMOTE_ADDR', '') # no remote IP means immediate kill if not sip or httputil.matchIP(sip, ipl): logger.warn("banned IP denied access") return httputil.genError("disallowed", 403) else: return next(logger, reqdata, environ)
def RobotKiller2(next, logger, reqdata, environ): ua = environ.get('HTTP_USER_AGENT', '') if not ua: return next(logger, reqdata, environ) cfg = get_cfg(environ) brl = cfg['banned-robots'] for rua in brl: if rua in ua: return httputil.genError("web-robot", 403) return next(logger, reqdata, environ)
def StaticServ(next, logger, reqdata, environ): cfg = get_cfg(environ) if staticserv.getStaticPath(cfg, reqdata) is None: return next(logger, reqdata, environ) # We are serving static stuff, one way or another. if environ['REQUEST_METHOD'] == 'POST': logger.warn("POST request to a static URL") return httputil.genError("sec-error", 403) else: return staticserv.doStatic(cfg, reqdata, get_static(environ))
def InsaneKiller(next, logger, reqdata, environ): ri = environ.get("REQUEST_URI", '') # 'http://..../..' URLs are legitimate, at least for # HTTP/1.1 requests, and so we generously accept them # all the time. if ri and '#' in ri: return httputil.genError('out-of-zone') # The advantage of using this is that it is post-decode. bu = reqdata['request-fullpath'] if bu and ('//' in bu or '/../' in bu): return httputil.genError('out-of-zone') # HTTP Referer values including ', ' (or just space) are a) # illegal and b) the sign of certain spambots. We fail them # on POSTs. rt = environ.get('HTTP_REFERER', None) if rt and environ['REQUEST_METHOD'] == 'POST' and ', ' in rt: # this is sort of accurate. sort of. environ['dwiki.logger'].warn("rejected POST with bogus HTTP Referer") return httputil.genError("web-robot", 403) return next(logger, reqdata, environ)
def IpCommentKiller(next, logger, reqdata, environ): if reqdata['view'] != 'writecomment': return next(logger, reqdata, environ) cfg = get_cfg(environ) ipl = cfg['banned-comment-ips'] sip = environ.get('REMOTE_ADDR', '') if not sip or httputil.matchIP(sip, ipl): logger.warn("banned comment IP denied access") return httputil.genError("disallowed", 403) else: return next(logger, reqdata, environ)
def __call__(self, environ, start_response): cmd = environ['REQUEST_METHOD'] logger = environ['dwiki.logger'] if cmd not in ('GET', 'HEAD', 'POST'): # 5xx series errors are theoretically things that # can be retried later; that makes 501 the wrong # code here. 405 implies 'permanent failure'. resp = httputil.genError('not-supported', 405) return [sendResponse(resp, start_response)] else: cfunc = getattr(self, "do_"+cmd) return [cfunc(logger, environ, start_response)]
def doStatic(cfg, reqdata, staticstore): # We only serve static files from the normal view. # We pretend they don't exist in other ones. if reqdata['view'] != 'normal': return httputil.genError("out-of-zone") path = getStaticPath(cfg, reqdata) # We don't serve directories, so the empty path that # indicates our root is an immediate exit. if not path: return httputil.genError("file-not-available") po = staticstore.get(path) # We flush immediately because we only ever make one # call per request to the staticstore. staticstore.flush() if not po or po.type != "file" or not po.displayable(): return httputil.genError("file-not-available") resp = htmlresp.Response() resp.arbitrary(po.contents(), guessContentType(path)) if po.timestamp() > 0: resp.setLastModified(po.timestamp()) resp.setTimeReliable() resp.setContentLength() return resp
def genPostReqdata(logger, environ): # We get the POST body before flailing around establishing our # request parameters, because this is slightly faster if something # is broken in the POST. try: qdict = getPostBody(environ) except derrors.ReqErr as e: logger.warn("security: "+str(e)) return (None, httputil.genError('sec-error', 403)) # There are a number of reqdata fixups that we need to # do based on the state of the HTTP POST data and # similar things. They are all done out of line. reqdata = gather_reqdata(environ) resp = doPOSTMangling(reqdata, qdict, environ['dwiki.web']) return (reqdata, resp)
def ReqKiller(next, logger, reqdata, environ): if ':kill:request' in reqdata: return httputil.genError("sec-error", 403) else: return next(logger, reqdata, environ)
def doDwikiRequest(logger, reqdata, environ): # Recover things from the environment. cfg = environ['dwiki.cfg'] modelserv = environ['dwiki.model'] webserv = environ['dwiki.web'] # If it is not a request for something we handle out of # line, and it is not rooted under our root URL, it dies # right now. if 'query' not in reqdata: return httputil.genError("out-of-zone") # If the view doesn't exist at all, fail now with a 404. # Logging this is somewhat questionable. if not webserv.view_exists(reqdata['view']): logger.warn("nonexistent view '%s' in HTTP command '%s'" % \ (reqdata['view'], reqdata['http-command'])) return httputil.genError("out-of-zone") # Enforce only-accessible-by-POST views. Since we never # generate URLs to such zones, they must be being set # up by some overly-nasty outsider, so we are abrupt. if not webserv.view_cmd_allowed(reqdata['view'], reqdata['http-command']): logger.warn("view '%s' not allowed in HTTP command '%s'" % \ (reqdata['view'], reqdata['http-command'])) return httputil.genError("sec-error", 403) # Picky compliance issue: rooturl is a directory, not # a URL prefix. If you gave us the non-directory version, # we force a redirect to the directory version ... and # will probably re-redirect to the actual front page. # AHAHA ahem haclick. if not reqdata['query'] and \ reqdata['request-fullpath'] and \ reqdata['request-fullpath'][-1] != '/': return httputil.redirToSlashedDir(reqdata['request-fullpath'], reqdata) # Okay, it's something that we serve out of the HTML view. try: ctx = context.HTMLContext(cfg, modelserv, webserv, reqdata) cache = environ['dwiki.cache'] if cache: ctx.setvar(':_cachestore', cache) # Try to load authentication if there's a cookie # present. if 'cookie' in reqdata: htmlauth.setLoginFromCookie(ctx, reqdata['cookie']) viewer = webserv.viewFactory(ctx) resp = viewer.respond() # Did the request encounter a reportable error? # If so, report it but do not abort response processing # to display an error. if ctx.errors: for e in ctx.errors: logger.warn(e) except derrors.WikiErr as e: logger.error(e) resp = httputil.genError("internal-error", 500) modelserv.finish() return resp