def __init__(self): self.reader = None self.base_re = re.compile('http://([^.]+).wikipedia.org/wiki/(.+)') self.__load_data() self.reader = {} self.parser = Parser() self.cache = Cache() self.all_pages = None self.mime_types = {} # Language index file = os.path.join(base_dir, 'languages.txt') self.all_languages = load_languages(file) # Template global template_dir self.template = TemplateLookup(directories=[template_dir])
class Handler: def __init__(self): self.reader = None self.base_re = re.compile('http://([^.]+).wikipedia.org/wiki/(.+)') self.__load_data() self.reader = {} self.parser = Parser() self.cache = Cache() self.all_pages = None self.mime_types = {} # Language index file = os.path.join(base_dir, 'languages.txt') self.all_languages = load_languages(file) # Template global template_dir self.template = TemplateLookup(directories=[template_dir]) def __redirect(self, req, target): """Redirect to another page.""" req.send_response(307) req.send_header('location', target) req.end_headers() return True def __response(self, req, data, mime="text/html", code=200): """Send HTTP response.""" req.send_response(code) req.send_header('content-type', mime) req.end_headers() req.wfile.write(data) return True def __response_object(self, req, obj): """Send HTTP response containing Pickled Python object.""" req.send_response(200) req.send_header('content-type', "text/plain") req.end_headers() req.wfile.write(pickle.dumps(obj)) return True def __load_mime(self): """Load mime types.""" if not os.path.exists('/etc/mime.types'): global mime_types self.mime_types = mime_types else: self.mime_types = {} re_split = re.compile(u'\s+') f = open('/etc/mime.types') for line in f: line = line.strip() if len(line) > 0 and line[0] == '#': continue p = re_split.split(line) mime = p[0] for ext in p[1:]: self.mime_types[ext] = mime self.mime_types[''] = 'text/plain' def __get_mime(self, fname): """Get mime type of a file identified by its extension.""" p = fname.split('.') if not self.mime_types: self.__load_mime() if len(p) > 0: ext = p[-1] mime = self.mime_types.get(ext, self.mime_types['']) else: mime = self.mime_types[''] return mime def __get_main_page(self, lang): """Get main page of a Wikipedia data.""" reader = self.__load_reader(lang) base = reader.read_info('base') match = self.base_re.match(base) if match: main_page = match.group(2) else: main_page = 'Wikipedia' return '/%s/article/%s' % (lang, main_page) def __load_data(self): """Load data informations.""" locator = Locator() list = locator.scan(data_dir) self.data = {} self.languages = [] for item in list: self.data[item['code']] = item self.languages.append(item['code']) def __filter_article(self, article): """Get real article path.""" if article != None: while article[-1] == '/': article = article[:-1] if article.strip() != '': return article.strip() return None def __load_reader(self, lang): """Load reader object of data from a language.""" res = self.reader.get(lang, None) if res == None: self.reader[lang] = Reader(self.data[lang]['datafile']) res = self.reader[lang] self.parser.add_namespace(lang, self.reader[lang].get_namespaces()) self.parser.add_reader(self.reader[lang], lang) return res def serve_meta(self, req, id): if id == "languages": obj = [] for lang in self.languages: obj.append((lang,)) self.__response_object(req, obj) else: self.__response(req, "", code=404) def serve_article(self, req, lang, article): """Send an article.""" if not lang in self.languages: return self.serve_unavailable(req, lang, article) article = self.__filter_article(article) if article == None: return self.__redirect(req, self.__get_main_page(lang)) reader = self.__load_reader(lang) res = reader.read(article) if res == None: return self.serve_not_found(req, lang, article) title, wiki = res self.parser.set_url_base('/%s/' % lang) content = self.cache.get(wiki) if content == None: content = self.parser.parse(wiki, lang) self.cache.store(wiki, content) template = self.template.get_template('article.tpl') html = template.render( title=title, content=content, lang=lang ) return self.__response(req, html) def serve_unavailable(self, req, lang, article): """Show article unavailability message.""" if self.all_languages.get(lang, None) == None: return self.__redirect(req, '/') article = self.__filter_article(article) article = article.replace('_', ' ') template = self.template.get_template('unavailable.tpl') html = template.render( article=article, lang=lang, language=self.all_languages[lang] ) self.__response(req, html) def serve_not_found(self, req, lang, article): """Show article is not found message.""" article = self.__filter_article(article) article = article.replace('_', ' ') template = self.template.get_template('not_found.tpl') html = template.render( article=article, lang=lang ) return self.__response(req, html, code=404) def serve_misc(self, req, lang, item): """Serve other things.""" if not lang in self.languages: return self.__redirect(req, '/') html = "" if item == None: return self.__redirect(req, self.__get_main_page(lang)) return self.__response(req, html, 'text/html') def serve_index(self, req): """Show index page.""" template = self.template.get_template("index.tpl") html = template.render( languages=self.data.values() ) self.__response(req, html) def serve_static(self, req, path): """Return static content.""" global resource_dir, mime_types mime = self.__get_mime(path) fname = os.path.join(resource_dir, path) if not os.path.exists(fname): return self.__response(req, "Not found", code=404) f = open(fname) req.send_response(200) req.send_header('Content-type', mime) req.end_headers() req.wfile.write(f.read()) def serve_search(self, req, lang, keywords=None): """Search an article.""" if not lang in self.languages: return self.__redirect(req, '/') keywords = self.__filter_article(keywords) keywords = keywords.replace('_', ' ') if keywords == None or keywords.strip() == "": return self.__redirect(req, self.__get_main_page(lang)) reader = self.__load_reader(lang) db = "" stemmer = "" finder = Finder(reader) result = finder.find(keywords) data = [] for item in result: data_id = int(item[0]) rank = item[1] percent = item[2] did = item[3] reader = self.__load_reader(lang) res = reader.read_title(data_id) if not res: # FIXME continue (title, block, start, length) = res data.append([data_id, title, percent, rank]) template = self.template.get_template('search_result.tpl') html = template.render( keywords=keywords, lang=lang, result=data ) return self.__response(req, html) def serve_special(self, req, lang, type): """Show special pages.""" if type == 'all': return self.__serve_special_all(req, lang) else: return self.__redirect(lang, '/') def __serve_special_all(self, req, lang): """Show list of all available pages.""" if not lang in self.languages: return self.__redirect(req, self.__get_main_page(lang)) reader = self.__load_reader(lang) if self.all_pages == None or self.all_pages.lang != lang: self.all_pages = AllPages(lang, reader) # Parse URL start = None end = None (scheme, addr, path, params, qs, fragment) = urlparse.urlparse(req.path) p = qs.split('&') for q in p: if len(q) == 0: continue (key, val) = q.split('=') key = urllib.unquote(key) val = urllib.unquote(val) if key == 'start': start = int(val) elif key == 'end': end = int(val) pages = self.all_pages.get_pages(start, end) # Render page template = self.template.get_template('all_pages.tpl') html = template.render( pages=pages, lang=self.all_languages[lang], code=lang ) return self.__response(req, html)