def main(): import argparse parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument('dump_file') args = parser.parse_args() registerInternalLinkHook(None, wikipediaLinkHook) parse_json(args.dump_file)
def main(): import argparse parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument('query') args = parser.parse_args() registerInternalLinkHook(None, wikipediaLinkHook) extractor = EntWikiExtractor() extractor.parse_query(args.query) extractor.parse_wiki()
def wikipediaUserHook(parser_env, namespace, body): # namespace is going to be 'Wikipedia' (article, pipe, text) = body.partition('|') href = article.strip().capitalize().replace(' ', '_') text = (text or article).strip() return '<a href="http://en.wikipedia.org/wiki/User:%s">%s</a>' % (href, text) def wikipediaUserTalkHook(parser_env, namespace, body): # namespace is going to be 'Wikipedia' (article, pipe, text) = body.partition('|') href = article.strip().capitalize().replace(' ', '_') text = (text or article).strip() return '<a href="http://en.wikipedia.org/wiki/User_talk:%s">%s</a>' % (href, text) registerInternalLinkHook('*', wikipediaLinkHook) registerInternalLinkHook('User talk', wikipediaUserTalkHook) registerInternalLinkHook('User', wikipediaUserHook) USER_AGENT = "website:Wikum:v1.0.0 (by /u/smileyamers)" THREAD_CALL = 'http://disqus.com/api/3.0/threads/list.json?api_key=%s&forum=%s&thread=link:%s' COMMENTS_CALL = 'https://disqus.com/api/3.0/threads/listPosts.json?api_key=%s&thread=%s' def get_article(url, source, num): article = Article.objects.filter(url=url) if article.count() == 0: if source.source_name == "The Atlantic": url = url.strip().split('?')[0]
def markdown(s): wikimarkup.registerInternalLinkHook(None, link_hook) return wikimarkup.parse(s, showToc=True)
text = '<div style="background: #CCFFCC; font-size:87%; padding:0.2em 0.3em; text-align:center;">' text += body text += '</div>' return text def colorHook(parser_env, namespace, body): (color, pipe, text) = body.partition('|') return '<span style="color: %s">%s</span>' % (color, text) def passThroughHook(parser_env, namespace, body): return body registerInternalLinkHook('*', linkHook) registerInternalLinkHook('user talk', userTalkHook) registerInternalLinkHook('user', userHook) registerInternalLinkHook('file', fileHook) registerInternalTemplateHook('ping', pingTempHook) registerInternalTemplateHook('reply to', userHook) registerInternalTemplateHook('replyto', userHook) registerInternalTemplateHook('u', userHook) registerInternalTemplateHook('re', userHook) registerInternalTemplateHook('color', colorHook) registerInternalTemplateHook('cot', cotHook) registerInternalTemplateHook('tq', quoteHook) registerInternalTemplateHook('archivetop', archiveHook) registerInternalTemplateHook('quote box', quoteBoxHook) registerInternalTemplateHook('highlight round', highlightHook)