def getAlbumComments(self): comment = Comment(self.userID,self.spider,self.albumID,'album',self.ownerID) content = comment.work() if content == '': with open(self.path + '/comments.markdown','w') as f: f.write((u'**评论: **\n\n').encode('utf-8')) f.write(content)
def saveBlog(self): #获取html中所需的内容,可以通过Chrome开发者工具的element选项在人人网该页面查看 self.content = self.spider.getContent(self.url) soup = BeautifulSoup(self.content) blogContent = soup.find('div', id='blogContent', class_='blogDetail-content') #将标签换位换行符,方便阅读 pattern = r'<p>|<br>|</p>|<br/>' #将<p>,<br>,</p>和<br/>四个标签换为换行符\n blogContent = re.sub(pattern, r'\n', blogContent.decode()) with open(self.filename, 'wb+') as f: line = u'*** 日志标题: ***' + self.summary['title'] + '\n\n' line += u'*** 创建时间: ***' + self.summary['createTime'] + '\n\n' line += u'*** 所属分类: ***' + self.summary['category'] + '\n\n' line += Config.GAP f.write(line.encode('utf-8')) f.write(blogContent.encode('utf-8')) if int(self.summary['commentCount']): f.write(Config.GAP.encode('utf-8')) f.write((u'*** 评论: ***\n\n').encode('utf-8')) comments = Comment(self.spider, self.userID, self.blogID, 'blog', self.ownerID) f.write(comments.work()) print(self.filename + ' saves successfully')
def getAlbumComments(self): comment = Comment(self.userID, self.spider, self.albumID, 'album', self.ownerID) content = comment.work() if content == '': with open(self.path + '/comments.markdown', 'w') as f: f.write((u'**评论: **\n\n').encode('utf-8')) f.write(content)
def savePhotoComment(self): with open(self.path + "/photo details.markdown", "w") as f: for item in self.photos: line = u"**ID: " + str(item["id"]) + "**\n\n" line += u"**名称: " + item["title"].replace("\n", " ") + "**\n\n" line += u"**时间: " + item["date"] + "**\n\n" f.write(line.encode("utf-8")) filename = str(item["id"]) f.write(("![" + filename + "](" + filename + ".jpg)\n\n").encode("utf-8")) if int(item["commentCount"]): comment = Comment(self.userID, self.spider, item["id"], "photo", item["owner"]) f.write((u"**评论: **\n\n").encode("utf-8")) f.write(comment.work()) f.write(config.gap)
def saveBlog(self): soup = BeautifulSoup(self.content) blogContent = soup.find('div',id='blogContent',class_='blogDetail-content') with open(self.filename, 'w+') as f: line = u'###日志标题: ' + self.summary['title'] + '\n\n' line += u'#####创建时间: ' + self.summary['createTime'] + '\n\n' line += u'#####所属分类: ' + self.summary['category'] + '\n\n' line += config.gap f.write(line.encode('utf-8')) f.write(blogContent.encode('utf-8')) if int(self.summary['commentCount']): f.write(config.gap) f.write((u'#####评论:\n\n').encode('utf-8')) comments = Comment(self.userID,self.spider,self.blogID,'blog',self.ownerID) f.write(comments.work()) print self.filename + ' save success'
def savePhotoComment(self): with open(self.path + '/photo details.markdown', 'w') as f: for item in self.photos: line = u'**ID: ' + str(item['id']) + '**\n\n' line += u'**名称: ' + item['title'].replace('\n', ' ') + '**\n\n' line += u'**时间: ' + item['date'] + '**\n\n' f.write(line.encode('utf-8')) filename = str(item['id']) f.write(('![' + filename + '](' + filename + '.jpg)\n\n').encode('utf-8')) if int(item['commentCount']): comment = Comment(self.userID, self.spider, item['id'], 'photo', item['owner']) f.write((u'**评论: **\n\n').encode('utf-8')) f.write(comment.work()) f.write(config.gap)
def saveBlog(self): soup = BeautifulSoup(self.content) blogContent = soup.find('div', id='blogContent', class_='blogDetail-content') with open(self.filename, 'w+') as f: line = u'###日志标题: ' + self.summary['title'] + '\n\n' line += u'#####创建时间: ' + self.summary['createTime'] + '\n\n' line += u'#####所属分类: ' + self.summary['category'] + '\n\n' line += config.gap f.write(line.encode('utf-8')) f.write(blogContent.encode('utf-8')) if int(self.summary['commentCount']): f.write(config.gap) f.write((u'#####评论:\n\n').encode('utf-8')) comments = Comment(self.userID, self.spider, self.blogID, 'blog', self.ownerID) f.write(comments.work()) print self.filename + ' save success'
def saveContent(self): self.statusCount = len(self.status) with open(config.PATH + '/' + self.ownerID + '/status.markdown','w') as f: f.write('quantity of status:' + str(self.statusCount) + '\n') f.write(config.gap) for item in self.status: line = u'**ID号:** ' + str(item['id']) + '\n' line += u'**发表时间:** ' + item['dtime'] + '\n' line += u'**评论数:** ' + str(item['comment_count']) + '\n\n' # line += 'content: ' + BeautifulSoup(item['content']).getText() + '\t\t' line += u'**内容:** ' + item['content'] + '\n\n' line += u'**原作者:** ' + item['rootDoingUserName'] + '\n\n' line += u'**原内容:** ' + item['rootContent'] + '\n\n' f.write(line.encode('utf-8')) if int(item['comment_count']): f.write((u'**评论:**\n\n').encode('utf-8')) comments = Comment(self.userID,self.spider,item['id'],'status',self.ownerID) f.write(comments.work()) f.write(config.gap) print datetime.datetime.now(), ': status save successfully'
def savePhotoComment(self): with open(self.path + '/photo_detail.markdown', 'wb') as f: for item in self.photos: f.write(Config.GAP.encode('utf-8')) line = '***Photo ID: ' + str(item['id']) + '***\n\n' line += '***Photo Name: ' + item['title'].replace( '\n', ' ') + '***\n\n' line += '*** Photo Time: ' + item['date'] + '***\n\n' f.write(line.encode('utf-8')) #转为utf-8编码格式 filename = str(item['id']) f.write(('Photo File Name: ' + filename + '.jpg\n\n').encode('utf-8')) if int(item['commentCount']): comment = Comment(self.spider, self.userID, item['id'], 'photo', item['owner']) f.write((u'***评论: ***\n\n').encode( 'utf-8')) #字符串前面加u也是将编码变为utf-8,但是后面已经转码了,所以没有必要其实 f.write(comment.work()) f.write(Config.GAP.encode('utf-8'))