示例#1
0
    def post(self):
        data = json.loads(self.request.body)

        items = data['items']
        origin = data['origin']
        header = data.get('header')
        for item in items:
            url = item.get('url')
            if not url:
                continue
            fetcher = ContentFetcher(url, header=header,
                                        tried=2)
            fetchResult = fetcher.fetch()
            usedUrl = fetchResult.get('url')
            content = fetchResult.get('content')
            if not content:
                logging.error('Failed to get content from %s.' % (url, ))
                continue
            item['url'] = usedUrl
            try:
                editorFormat = globalconfig.getEditorFormat()
                page = pageanalyst.analyse(usedUrl, content,
                            editorFormat=editorFormat, monitorTitle=item.get('title'))
                if not item.get('title') and page.get('title'):
                    item['title'] = page['title']
                if not item.get('published') and page.get('published') \
                        and not page['published'].endswith('0000'):
                    # if no hour, minute, published is not precise enough
                    item['published'] = page['published']
                    if origin.get('timezone'):
                        item['published'] = dateutil.adjustDate14(item['published'], origin['timezone'])
                if not item.get('content') and page.get('content'):
                    item['content'] = page['content']
                if not item.get('img') and page.get('images'):
                    item['img'] = page['images'][0]
            except Exception:
                logging.exception('Error happens when analyse %s.' % (usedUrl, ))

        responseData = {
                'origin': data['origin'],
                'items': items,
        }

        self.response.headers['Content-Type'] = 'text/plain'
        callbackurl = data['callbackurl']
        success = networkutil.postData(callbackurl, responseData,
                    trycount=_CALLBACK_TRYCOUNT, timeout=_URL_TIMEOUT)

        if success:
            message = 'Push items back for %s to %s.' % (data['origin'], callbackurl)
        else:
            message = 'Failed to push items back for %s to %s.' % (data['origin'], callbackurl)
        logging.info(message)
        self.response.out.write(message)
示例#2
0
 def get(self):
     url = self.request.get('url')
     page = None
     if url:
         try:
             url = base64.b64decode(url)
             url2 = ''
             length = len(url)
             for i in range(0, length, 2):
                 if i + 1 < length:
                     url2 += url[i+1] + url[i]
             if length % 2 != 0:
                 url2 += url[-1]
             url = url2
         except TypeError:
             pass
         key = stringutil.calculateHash([url])
         page = memcache.get(key)
         contentGot = bool(page)
         if not page:
             tried = 2 # the max try count is 3
             fetcher = ContentFetcher(url, tried=tried)
             fetchResult = fetcher.fetch()
             content = fetchResult.get('content')
             if content:
                 editorFormat = globalconfig.getEditorFormat()
                 page = pageanalyst.analyse(url, content, editorFormat=editorFormat)
                 if page:
                     page['url'] = url
                 if page and (page.get('content') or page.get('images')):
                     memcache.set(key, page)
                     contentGot = True
     if not contentGot:
         page = {'url': url}
         self.redirect(url, permanent=True)
         return
     if 'images' in page:
         for image in page['images']:
             image['url'] = '/image/?url=' + urllib.quote(image['url'].encode('utf-8'))
     templateValues = {
         'page': page,
     }
     self.render(templateValues, 'home.html')
示例#3
0
 def post(self):
     url = self.request.get('url')
     title = self.request.get('title')
     fetchResult = {}
     content = None
     page = None
     fortest = bool(self.request.get('fortest'))
     httpheader = self.request.get('httpheader')
     header = None
     if httpheader:
         header = json.loads(httpheader)
     if url:
         tried = 2 # the max try count is 3
         fetcher = ContentFetcher(url,
                             header=header,
                             tried=tried
                          )
         fetchResult = fetcher.fetch()
         content = fetchResult.get('content')
     elementResult = {}
     if content:
         editorFormat = globalconfig.getEditorFormat()
         page = pageanalyst.analyse(url, content, editorFormat=editorFormat,
                             monitorTitle=title, fortest=fortest, elementResult=elementResult)
     if header:
         httpheader = jsonutil.getReadableString(header)
     templateValues = {
         'url': url,
         'title': title,
         'fortest': fortest,
         'httpheader': httpheader,
         'encoding': fetchResult.get('encoding'),
         'encodingSrc': fetchResult.get('encoding.src'),
         'oldContent': fetchResult.get('content.old'),
         'content': fetchResult.get('content'),
         'pagestr': jsonutil.getReadableString(page),
         'page': page,
         'elementResult': elementResult,
     }
     self.render(templateValues, 'test.html')