Пример #1
0
 def getAlbumComments(self):
     comment = Comment(self.userID,self.spider,self.albumID,'album',self.ownerID)
     content = comment.work()
     if content == '':
         with open(self.path + '/comments.markdown','w') as f:
             f.write((u'**评论: **\n\n').encode('utf-8'))
             f.write(content)
Пример #2
0
    def saveBlog(self):
        #获取html中所需的内容,可以通过Chrome开发者工具的element选项在人人网该页面查看
        self.content = self.spider.getContent(self.url)
        soup = BeautifulSoup(self.content)
        blogContent = soup.find('div',
                                id='blogContent',
                                class_='blogDetail-content')

        #将标签换位换行符,方便阅读
        pattern = r'<p>|<br>|</p>|<br/>'  #将<p>,<br>,</p>和<br/>四个标签换为换行符\n
        blogContent = re.sub(pattern, r'\n', blogContent.decode())

        with open(self.filename, 'wb+') as f:
            line = u'*** 日志标题: ***' + self.summary['title'] + '\n\n'
            line += u'*** 创建时间: ***' + self.summary['createTime'] + '\n\n'
            line += u'*** 所属分类: ***' + self.summary['category'] + '\n\n'
            line += Config.GAP
            f.write(line.encode('utf-8'))
            f.write(blogContent.encode('utf-8'))
            if int(self.summary['commentCount']):
                f.write(Config.GAP.encode('utf-8'))
                f.write((u'*** 评论: ***\n\n').encode('utf-8'))
                comments = Comment(self.spider, self.userID, self.blogID,
                                   'blog', self.ownerID)
                f.write(comments.work())
        print(self.filename + ' saves successfully')
Пример #3
0
 def getAlbumComments(self):
     comment = Comment(self.userID, self.spider, self.albumID, 'album',
                       self.ownerID)
     content = comment.work()
     if content == '':
         with open(self.path + '/comments.markdown', 'w') as f:
             f.write((u'**评论: **\n\n').encode('utf-8'))
             f.write(content)
Пример #4
0
 def savePhotoComment(self):
     with open(self.path + "/photo details.markdown", "w") as f:
         for item in self.photos:
             line = u"**ID: " + str(item["id"]) + "**\n\n"
             line += u"**名称: " + item["title"].replace("\n", " ") + "**\n\n"
             line += u"**时间: " + item["date"] + "**\n\n"
             f.write(line.encode("utf-8"))
             filename = str(item["id"])
             f.write(("![" + filename + "](" + filename + ".jpg)\n\n").encode("utf-8"))
             if int(item["commentCount"]):
                 comment = Comment(self.userID, self.spider, item["id"], "photo", item["owner"])
                 f.write((u"**评论: **\n\n").encode("utf-8"))
                 f.write(comment.work())
             f.write(config.gap)
Пример #5
0
 def saveBlog(self):
     soup = BeautifulSoup(self.content)
     blogContent = soup.find('div',id='blogContent',class_='blogDetail-content')
     with open(self.filename, 'w+') as f:
         line = u'###日志标题: ' + self.summary['title'] + '\n\n'
         line += u'#####创建时间: ' + self.summary['createTime'] + '\n\n'
         line += u'#####所属分类: ' + self.summary['category'] + '\n\n'
         line += config.gap
         f.write(line.encode('utf-8'))
         f.write(blogContent.encode('utf-8'))
         if int(self.summary['commentCount']):
             f.write(config.gap)
             f.write((u'#####评论:\n\n').encode('utf-8'))
             comments = Comment(self.userID,self.spider,self.blogID,'blog',self.ownerID)
             f.write(comments.work())
     print self.filename + ' save success'
Пример #6
0
 def savePhotoComment(self):
     with open(self.path + '/photo details.markdown', 'w') as f:
         for item in self.photos:
             line = u'**ID: ' + str(item['id']) + '**\n\n'
             line += u'**名称: ' + item['title'].replace('\n', ' ') + '**\n\n'
             line += u'**时间: ' + item['date'] + '**\n\n'
             f.write(line.encode('utf-8'))
             filename = str(item['id'])
             f.write(('![' + filename + '](' + filename +
                      '.jpg)\n\n').encode('utf-8'))
             if int(item['commentCount']):
                 comment = Comment(self.userID, self.spider, item['id'],
                                   'photo', item['owner'])
                 f.write((u'**评论: **\n\n').encode('utf-8'))
                 f.write(comment.work())
             f.write(config.gap)
Пример #7
0
 def saveBlog(self):
     soup = BeautifulSoup(self.content)
     blogContent = soup.find('div',
                             id='blogContent',
                             class_='blogDetail-content')
     with open(self.filename, 'w+') as f:
         line = u'###日志标题: ' + self.summary['title'] + '\n\n'
         line += u'#####创建时间: ' + self.summary['createTime'] + '\n\n'
         line += u'#####所属分类: ' + self.summary['category'] + '\n\n'
         line += config.gap
         f.write(line.encode('utf-8'))
         f.write(blogContent.encode('utf-8'))
         if int(self.summary['commentCount']):
             f.write(config.gap)
             f.write((u'#####评论:\n\n').encode('utf-8'))
             comments = Comment(self.userID, self.spider, self.blogID,
                                'blog', self.ownerID)
             f.write(comments.work())
     print self.filename + ' save success'
Пример #8
0
    def saveContent(self):
        self.statusCount = len(self.status)
        with open(config.PATH + '/' + self.ownerID + '/status.markdown','w') as f:
            f.write('quantity of status:' + str(self.statusCount) + '\n')
            f.write(config.gap)
            for item in self.status:
                line = u'**ID号:** ' + str(item['id']) + '\n'
                line += u'**发表时间:** ' + item['dtime'] + '\n'
                line += u'**评论数:** ' + str(item['comment_count']) + '\n\n'
#                 line += 'content: ' + BeautifulSoup(item['content']).getText() + '\t\t'
                line += u'**内容:** ' + item['content'] + '\n\n'
                line += u'**原作者:** ' + item['rootDoingUserName'] + '\n\n'
                line += u'**原内容:** ' + item['rootContent'] + '\n\n'
                f.write(line.encode('utf-8'))
                if int(item['comment_count']):
                    f.write((u'**评论:**\n\n').encode('utf-8'))
                    comments = Comment(self.userID,self.spider,item['id'],'status',self.ownerID)
                    f.write(comments.work())
                f.write(config.gap)
        print datetime.datetime.now(), ': status save successfully'
Пример #9
0
    def savePhotoComment(self):
        with open(self.path + '/photo_detail.markdown', 'wb') as f:
            for item in self.photos:
                f.write(Config.GAP.encode('utf-8'))

                line = '***Photo ID: ' + str(item['id']) + '***\n\n'
                line += '***Photo Name: ' + item['title'].replace(
                    '\n', ' ') + '***\n\n'
                line += '*** Photo Time: ' + item['date'] + '***\n\n'
                f.write(line.encode('utf-8'))  #转为utf-8编码格式
                filename = str(item['id'])
                f.write(('Photo File Name: ' + filename +
                         '.jpg\n\n').encode('utf-8'))
                if int(item['commentCount']):
                    comment = Comment(self.spider, self.userID, item['id'],
                                      'photo', item['owner'])
                    f.write((u'***评论: ***\n\n').encode(
                        'utf-8'))  #字符串前面加u也是将编码变为utf-8,但是后面已经转码了,所以没有必要其实
                    f.write(comment.work())

                f.write(Config.GAP.encode('utf-8'))