示例#1
0
文件: main.py 项目: jamespullar/cs253
 def post(self, path):
     content = self.request.get('content')
     
     page = PageContent(title=path, content=content)
     page.put()
     
     self.render('wikipage.html', logged_in=True, path=path[1:], content=page.content)
示例#2
0
    def get(self, site_id=None, count=None):
        if (not site_id) or (not count) or (int(count) > 10):
            template_attrs = {
                "fetch_config":
                fetch_config,
                "title":
                "API",
                "error_msg":
                "*count* should smaller than or equal to 10." if count and
                (int(count) > 10) else None
            }
            api_template = jinja_environment.get_template('api_page.html')
            self.response.write(api_template.render(template_attrs))
            return

        count = int(count)
        results = PageContent.query(PageContent.site_id == site_id)\
            .order(-PageContent.create_at).fetch(count)
        results = map(lambda x: x.to_json(), results)
        results = {
            "pages": results,
            "count": count,
            "site_id": site_id,
        }

        results = json.dumps(results, ensure_ascii=False)
        self.response.write(results)
示例#3
0
文件: main.py 项目: jamespullar/cs253
    def post(self, path):
        page = PageContent.all().filter("title =", path).get()
        content = self.request.get('content')

        page.content = content
        page.put()
        
        self.redirect(page.title)
示例#4
0
def defer_fetch(url, site_id, is_index=False):

    logging.info('fetching...%s' % url)

    site_config = fetch_config[site_id]

    if is_index:
        result = urlfetch.fetch(url)
        news_url = get_news_urls(site_id, result.content.decode(site_config["encoding"]).encode('utf-8'))
        for _url in news_url:
            taskqueue.add(url='/start_fetch', params={'url': _url, 'site_id': site_id})
    else:
        if is_exsiting(url):
            return

        # contents includes: title, content
        if site_id in ('jwc',):
            result = urlfetch.fetch(url)
            contents = parse_page(result.content)
        else:
            # 以下是 readability parser api 的输出示例:
            # http://www.readability.com/api/content/v1/parser?token=16208e14fab764c70989011f1f26fc8c71b85451&url=http://news.scu.edu.cn/news2012/cdzx/webinfo/2013/03/1343288895583976.htm

            # encode 是为了防止 url 包含中文时, 下面的 urlencode 抛错。url 变量默认是 unicode 的。
            payload = {"url": url.encode(site_config['encoding']), "token": "16208e14fab764c70989011f1f26fc8c71b85451"}
            payload = urllib.urlencode(payload)
            result = urlfetch.fetch("http://www.readability.com/api/content/v1/parser",
                                    payload=payload,
                                    method=urlfetch.POST,
                                    headers={'Content-Type': 'application/x-www-form-urlencoded'}
                                    )
            contents = result.content
            contents = json.loads(contents)
            try:
                p = PageContent(url=url, site_id=site_id, title=contents['title'], content=unescape(contents['content']))
                p.put()
            except KeyError as e:  # 如果 readability parse 出错
                logging.error("Error: %s" % e)
                logging.error("url: %s" % url)
                logging.error("payload: %s" % payload)
                pass
示例#5
0
文件: main.py 项目: jamespullar/cs253
    def get(self, path):
        user_id = self.request.cookies.get('user_id')

        # Redirect back to home of no page title is in url
        if path == '/':
            self.redirect('/')
        
        if user_id:
            # Check if page in db
            page = PageContent.all().filter("title =", path).get()

            if page:
                self.render('wikipage.html', path = path[1:], logged_in=True, edit=True, display_edit="none", content=page.content)
            else:
                self.render('wikipage.html', path = path[1:], logged_in=True, edit=True, display_edit="none")
        else:
            self.redirect('/')
示例#6
0
文件: main.py 项目: jamespullar/cs253
    def get(self, path):
        user_id = self.request.cookies.get('user_id')
        
        if user_id:
            cached_page = memcache.get(path[1:])
            if cached_page:
                page = cached_page
            else:
                page = PageContent.all().filter("title =", path).get()

            if page:
                memcache.add(page.title, page)
                self.render('wikipage.html', logged_in=True, path=path[1:], content=page.content)
            else:
                self.render('wikipage.html', path = path[1:], logged_in=True, edit=True, display_edit="none")
        else:
            self.redirect('/')
示例#7
0
    def get(self, site_id=None, count=None):
        if (not site_id) or (not count) or (int(count) > 10):
            template_attrs = {
                "fetch_config": fetch_config,
                "title": "API",
                "error_msg": "*count* should smaller than or equal to 10." if count and (int(count) > 10) else None
            }
            api_template = jinja_environment.get_template('api_page.html')
            self.response.write(api_template.render(template_attrs))
            return

        count = int(count)
        results = PageContent.query(PageContent.site_id == site_id)\
            .order(-PageContent.create_at).fetch(count)
        results = map(lambda x: x.to_json(), results)
        results = {
            "pages": results,
            "count": count,
            "site_id": site_id,
        }

        results = json.dumps(results, ensure_ascii=False)
        self.response.write(results)
示例#8
0
def is_exsiting(url):
    return PageContent.query(PageContent.url == url).get()