예제 #1
0
    def _get_licaishi_viewpoint_list_in_one_page(self, soup, user_id):

        archiveList = []

        try:

            name = soup.find("title").get_text()
            name = name.replace("的个人主页_新浪理财师", "").strip()

            statusAll = soup.find("div", {"class": "s_left"})
            statusList = statusAll.findAll("div", {"class": "s_widget w_vp"})
            for status in statusList:

                try:

                    archive = Article()
                    archive.user_id = user_id
                    archive.user_name = name

                    h2_title = status.find("h2", {"class": "w_vp_h2"})
                    if h2_title:
                        archive.title = h2_title.get_text().strip()
                        a_href = h2_title.find("a")
                        if a_href:
                            archive.href = self.site + a_href["href"]

                    p_detail = status.find("p", {"class": "w_vp_p"})
                    if p_detail:
                        archive.detail = p_detail.get_text().strip()

                    div_time = status.find("span", {"class": "w_vp_de"})
                    if div_time:
                        archive.publish_time = regularization_time(div_time.get_text().strip())

                    a_device = status.find("a", {"class": "w_vp_fra"})
                    if a_device:
                        archive.device = a_device.get_text().strip()

                    div_watch_count = status.find("div", {"class": "w_vp_ort"})
                    if div_watch_count:
                        watch_count = div_watch_count.get_text().strip().replace("人阅读", "")
                        archive.watch_count = int(watch_count)

                    archiveList.append(archive)

                except Exception, e:
                    print e
        except Exception, e:
            print e
예제 #2
0
    def _get_archive_list_in_one_page(self, soup, id):

        archiveList = []

        try:
            name = soup.find('title').get_text()
            name = name.replace('-','').replace('雪球', '').strip()

            statusAll = soup.find('ul', {'class':'status-list'})
            statusList = statusAll.findAll('li')
            for status in statusList:

                try:

                    archive = Article()
                    archive.user_id = id
                    archive.user_name = name
                    archive.detail = status.find('div', {'class':'detail'}).get_text()
                    infos = status.find('div', {'class':'infos'})
                    archive.publish_time = regularization_time(infos.find('a', {'class':'time'}).get_text())
                    archive.device = infos.find('span').get_text().replace('来自', '')

                    try:
                        # 若存在标题
                        titleH4 = status.find('h4')
                        archive.title = titleH4.find('a').get_text()
                        archive.href = self.site + titleH4.find('a')['href']
                    except:
                        print 'arctive has no title and href'

                    ops = status.find('div', {'class':'ops'})
                    repost = ops.find('a', {'class':'repost second'}).get_text()
                    donate = ops.find('a', {'class':'donate'}).get_text()
                    comment = ops.find('a', {'class':'statusComment last'}).get_text()
                    archive.repost_count = repost.replace('转发', '').replace('(','').replace(')','')
                    archive.donate_count = donate.replace('赞助', '').replace('(','').replace(')','')
                    archive.comment_count = comment.replace('评论', '').replace('(','').replace(')','')

                    archiveList.append(archive)

                except Exception, e:
                    print e
        except Exception, e:
                print e