Пример #1
0
    def test_get_post_lifeqna(self, mock_get):
        mock_get.get('http://berlinreport.com/bbs/board.php?bo_table=lifeqna',
                     content=open('news/fixtures/lifeqna.html', 'r').read())

        category, latest_id = get_latest(
            'http://berlinreport.com/bbs/board.php?bo_table=lifeqna&page=1')

        self.assertEqual(category, 'lifeqna')
        self.assertEqual(latest_id, 207141)
Пример #2
0
    def get_posts(self,
                  publisher_name=None,
                  url=None,
                  sleep_time=0,
                  howmany=100,
                  single=True):
        category, latest_id = get_latest(url)

        post_id = latest_id
        print 'Start at url [%s]' % url

        while post_id > 1:
            print post_id
            posts = Post.objects.filter(post_id=post_id, table_category=category)
            post_url = "http://berlinreport.com/bbs/board.php?bo_table=%s&wr_id=%d"\
                       % (category, post_id)
            post_id = post_id - 1

            if posts.exists():
                # print 'duplicated %s %s' % (post_url, posts[0].subject)
                print 'd',
                continue

            bp = BerlinParser(url=post_url)
            item = bp.parse_post(publisher_name)

            if item['subject']:
                item['table_category'] = category
                links = item.pop('links')
                emails = item.pop('emails')
                images = item.pop('images')
                post = Post.objects.create(**item)
                post.update_relates(links, emails, images)

                print "%s %s %s" % (post.subject, post.member, post.created_at)
                time.sleep(sleep_time)
            else:
                links = item.pop('links')
                emails = item.pop('emails')
                images = item.pop('images')
                item['subject'] = item['post_id']
                post = Post.objects.create(**item)

                print 'skip %s' % post_url

            if single or (howmany < 0):
                break

            howmany = howmany - 1