Exemplo n.º 1
0
    def handle(self, *args, **options):
        print "in process"
        ## delete the redis key
        redis_conn.delete("RedisStore:article")

        for article in Article.objects.all():
            q.enqueue(article.make_keyword_index)

        print "workers are working on reindexing, type`rqinfo` to check progress"
Exemplo n.º 2
0
def add_article(request):
    url = request.POST.get("url") or None
    if(url is None):
        raise ParseError("require url parameter")

    article, _ = Article.objects.get_or_create(original_url=url)
    upa, created = UserPostArticle.objects.get_or_create(article=article, user=request.user)
    # post process in rq worker
    if created:
        q.enqueue(upa.defer_process)

    return {}
Exemplo n.º 3
0
    def handle(self, *args, **options):
        machine = User.objects.filter().first()

        ## try http://news.dbanotes.net/
        d = pq(url="http://news.dbanotes.net/")
        for element in d(".title a"):
            url = pq(element).attr("href")
            article, _ = Article.objects.get_or_create(original_url=url)
            upa, created = UserPostArticle.objects.get_or_create(article=article, user=machine)
            # post process in rq worker
            if created:
                q.enqueue(upa.defer_process)
Exemplo n.º 4
0
    def handle(self, *args, **options):
        machine = User.objects.filter().first()

        ## try http://news.dbanotes.net/
        d = pq(url="http://news.dbanotes.net/")
        for element in d(".title a"):
            url = pq(element).attr("href")
            article, _ = Article.objects.get_or_create(original_url=url)
            upa, created = UserPostArticle.objects.get_or_create(
                article=article, user=machine)
            # post process in rq worker
            if created:
                q.enqueue(upa.defer_process)
Exemplo n.º 5
0
 def defer_process(self):
     from readability.readability import Document
     import urllib
     article  = self.article
     if not article.finished:
         header = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',}
         response = requests.get(article.original_url, headers=header, verify=False)
         response.encoding = chardet.detect(response.content)["encoding"]
         html = response.text
         article.content = Document(html).summary()
         article.title = Document(html).short_title()
         article.primary = True # TODO
         article.finished = True
         redis_conn.sadd(Article.ALL_PRIMARY_IDS_KEY, article.id)
         article.save()
         q.enqueue(article._catch_image)
Exemplo n.º 6
0
    def handle(self, *args, **options):
        print "in process"
        file_path = options["pocket_path"]
        soup = None
        with open(file_path, 'r') as pocket_file:
            soup = BeautifulSoup(pocket_file.read())

        user = User.objects.filter().first()
        for link in soup.find_all('a'):
            url = link.get('href')

            article, _ = Article.objects.get_or_create(original_url=url)
            upa, created = UserPostArticle.objects.get_or_create(article=article, user=user)
            # post process in rq worker
            if created:
                q.enqueue(upa.defer_process)

        print "done"
Exemplo n.º 7
0
    def handle(self, *args, **options):
        print "in process"
        file_path = options["pocket_path"]
        soup = None
        with open(file_path, 'r') as pocket_file:
            soup = BeautifulSoup(pocket_file.read())

        user = User.objects.filter().first()
        for link in soup.find_all('a'):
            url = link.get('href')

            article, _ = Article.objects.get_or_create(original_url=url)
            upa, created = UserPostArticle.objects.get_or_create(
                article=article, user=user)
            # post process in rq worker
            if created:
                q.enqueue(upa.defer_process)

        print "done"
Exemplo n.º 8
0
 def defer_process(self):
     from readability.readability import Document
     import urllib
     article = self.article
     if not article.finished:
         header = {
             'user-agent':
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',
         }
         response = requests.get(article.original_url,
                                 headers=header,
                                 verify=False)
         response.encoding = chardet.detect(response.content)["encoding"]
         html = response.text
         article.content = Document(html).summary()
         article.title = Document(html).short_title()
         article.primary = True  # TODO
         article.finished = True
         redis_conn.sadd(Article.ALL_PRIMARY_IDS_KEY, article.id)
         article.save()
         q.enqueue(article._catch_image)