Exemplo n.º 1
0
def bootstrap_feedpages():
    print "Mongo DB feed_pages: %s" % MFeedPage.objects().count()
    # db.feed_pages.drop()
    print "Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count()

    print "FeedPages: %s" % FeedPage.objects.count()
    pprint(db.feed_pages.index_information())

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    i = 0
    for feed in feeds:
        i += 1
        print "%s/%s: %s" % (i, feed_count, feed,)
        sys.stdout.flush()
        
        if not MFeedPage.objects(feed_id=feed.pk):
            feed_page = FeedPage.objects.filter(feed=feed).values()
            if feed_page:
                del feed_page[0]['id']
                feed_page[0]['feed_id'] = feed.pk
                try:
                    MFeedPage(**feed_page[0]).save()
                except:
                    print '\n\n!\n\n'
                    continue
        

    print "\nMongo DB feed_pages: %s" % MFeedPage.objects().count()
def bootstrap_feedpages():
    print "Mongo DB feed_pages: %s" % MFeedPage.objects().count()
    # db.feed_pages.drop()
    print "Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count()

    print "FeedPages: %s" % FeedPage.objects.count()
    pprint(db.feed_pages.index_information())

    feeds = Feed.objects.all().order_by('-average_stories_per_month')
    feed_count = feeds.count()
    i = 0
    for feed in feeds:
        i += 1
        print "%s/%s: %s" % (
            i,
            feed_count,
            feed,
        )
        sys.stdout.flush()

        if not MFeedPage.objects(feed_id=feed.pk):
            feed_page = FeedPage.objects.filter(feed=feed).values()
            if feed_page:
                del feed_page[0]['id']
                feed_page[0]['feed_id'] = feed.pk
                try:
                    MFeedPage(**feed_page[0]).save()
                except:
                    print '\n\n!\n\n'
                    continue

    print "\nMongo DB feed_pages: %s" % MFeedPage.objects().count()
Exemplo n.º 3
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     if self.page_data:
         content = self.page_data
     elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
         key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key)
         compressed_content = key.get_contents_as_string()
         stream = StringIO(compressed_content)
         gz = gzip.GzipFile(fileobj=stream)
         try:
             content = gz.read()
         except IOError:
             content = None
     else:
         content = MFeedPage.get_data(feed_id=self.feed.pk)
     url = self._url_from_html(content)
     if not url:
         try:
             content = requests.get(self.feed.feed_link).content
             url = self._url_from_html(content)
         except (AttributeError, SocketError, requests.ConnectionError,
                 requests.models.MissingSchema, requests.sessions.InvalidSchema,
                 requests.sessions.TooManyRedirects,
                 requests.models.InvalidURL,
                 requests.models.ChunkedEncodingError,
                 requests.models.ContentDecodingError,
                 LocationParseError, OpenSSLError, PyAsn1Error), e:
             logging.debug(" ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)
Exemplo n.º 4
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     if self.page_data:
         content = self.page_data
     elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
         key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key)
         compressed_content = key.get_contents_as_string()
         stream = StringIO(compressed_content)
         gz = gzip.GzipFile(fileobj=stream)
         try:
             content = gz.read()
         except IOError:
             content = None
     else:
         content = MFeedPage.get_data(feed_id=self.feed.pk)
     url = self._url_from_html(content)
     if not url:
         try:
             content = requests.get(self.cleaned_feed_link).content
             url = self._url_from_html(content)
         except (AttributeError, SocketError, requests.ConnectionError,
                 requests.models.MissingSchema, requests.sessions.InvalidSchema,
                 requests.sessions.TooManyRedirects,
                 requests.models.InvalidURL,
                 requests.models.ChunkedEncodingError,
                 requests.models.ContentDecodingError,
                 httplib.IncompleteRead,
                 LocationParseError, OpenSSLError, PyAsn1Error), e:
             logging.debug(" ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)
Exemplo n.º 5
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     if self.page_data:
         content = self.page_data
     # Deleted By Xinyan Lu : No S3 storage
     # elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
     #     key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key)
     #     compressed_content = key.get_contents_as_string()
     #     stream = StringIO(compressed_content)
     #     gz = gzip.GzipFile(fileobj=stream)
     #     try:
     #         content = gz.read()
     #     except IOError:
     #         content = None
     else:
         content = MFeedPage.get_data(feed_id=self.feed.pk)
     # Modified By Xinyan Lu : content may be None
     if content:
         url = self._url_from_html(content)
     else:
         url = None
     if url:
         image, image_file = self.get_image_from_url(url)
     return image, image_file, url
Exemplo n.º 6
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     content = MFeedPage.get_data(feed_id=self.feed.pk)
     url = self._url_from_html(content)
     if url:
         image, image_file = self.get_image_from_url(url)
     return image, image_file, url
Exemplo n.º 7
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     content = MFeedPage.get_data(feed_id=self.feed.pk)
     url = self._url_from_html(content)
     if url:
         image, image_file = self.get_image_from_url(url)
     return image, image_file, url
Exemplo n.º 8
0
def load_feed_page(request, feed_id):
    if not feed_id:
        raise Http404
        
    data = MFeedPage.get_data(feed_id=feed_id)

    if not data:
        data = "Fetching feed..."
    
    return HttpResponse(data, mimetype='text/html')
Exemplo n.º 9
0
def load_feed_page(request, feed_id):
    if not feed_id:
        raise Http404
        
    data = MFeedPage.get_data(feed_id=feed_id)

    if not data:
        data = "Fetching feed..."
    
    return HttpResponse(data, mimetype='text/html')
Exemplo n.º 10
0
def load_feed_page(request):
    feed_id = int(request.GET.get('feed_id', 0))
    if feed_id == 0:
        raise Http404
        
    data = MFeedPage.get_data(feed_id=feed_id)

    if not data:
        data = "Fetching feed..."
    
    return HttpResponse(data, mimetype='text/html')
Exemplo n.º 11
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     content = None
     if self.page_data:
         content = self.page_data
     elif settings.BACKED_BY_AWS.get('pages_on_node'):
         domain = Site.objects.get_current().domain
         url = "https://%s/original_page/%s" % (
             domain,
             self.feed.pk,
         )
         try:
             page_response = requests.get(url)
             if page_response.status_code == 200:
                 content = page_response.content
         except requests.ConnectionError:
             pass
     elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
         key = settings.S3_CONN.Bucket(
             settings.S3_PAGES_BUCKET_NAME).Object(
                 key=self.feed.s3_pages_key)
         compressed_content = key.get()["Body"].read()
         stream = BytesIO(compressed_content)
         gz = gzip.GzipFile(fileobj=stream)
         try:
             content = gz.read()
         except IOError:
             pass
     else:
         content = MFeedPage.get_data(feed_id=self.feed.pk)
     url = self._url_from_html(content)
     if not url:
         try:
             content = requests.get(self.cleaned_feed_link,
                                    timeout=10).content
             url = self._url_from_html(content)
         except (AttributeError, SocketError, requests.ConnectionError,
                 requests.models.MissingSchema,
                 requests.sessions.InvalidSchema,
                 requests.sessions.TooManyRedirects,
                 requests.models.InvalidURL,
                 requests.models.ChunkedEncodingError,
                 requests.models.ContentDecodingError,
                 http.client.IncompleteRead, requests.adapters.ReadTimeout,
                 LocationParseError, OpenSSLError, PyAsn1Error,
                 ValueError) as e:
             logging.debug(
                 " ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)
     if url:
         image, image_file = self.get_image_from_url(url)
     return image, image_file, url
Exemplo n.º 12
0
def load_feed_page(request):
    feed_id = None
    try:
        feed_id = int(request.REQUEST.get('feed_id', 0))
    except ValueError:
        feed_id_matches = re.search(r'(\d+)', request.REQUEST['feed_id'])
        if feed_id_matches: feed_id = int(feed_id_matches.group(1))
    if not feed_id:
        raise Http404
        
    data = MFeedPage.get_data(feed_id=feed_id)

    if not data:
        data = "Fetching feed..."
    
    return HttpResponse(data, mimetype='text/html')
Exemplo n.º 13
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     if self.page_data:
         content = self.page_data
     elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
         key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key)
         compressed_content = key.get_contents_as_string()
         stream = StringIO(compressed_content)
         gz = gzip.GzipFile(fileobj=stream)
         content = gz.read()
     else:
         content = MFeedPage.get_data(feed_id=self.feed.pk)
     url = self._url_from_html(content)
     if url:
         image, image_file = self.get_image_from_url(url)
     return image, image_file, url
Exemplo n.º 14
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     if self.page_data:
         content = self.page_data
     elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
         key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key)
         compressed_content = key.get_contents_as_string()
         stream = StringIO(compressed_content)
         gz = gzip.GzipFile(fileobj=stream)
         content = gz.read()
     else:
         content = MFeedPage.get_data(feed_id=self.feed.pk)
     url = self._url_from_html(content)
     if url:
         image, image_file = self.get_image_from_url(url)
     return image, image_file, url