Python PostInfo.postId 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: xitekInfo

클래스/타입: PostInfo

메소드/함수: postId

hotexamples.com에서의 예제들: 2

Python PostInfo.postId - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 xitekInfo.PostInfo.postId에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

content(2)

postId(2)

threadId(2)

_id(1)

postDate(1)

uname(1)

예제 #1

파일 보기

파일: mongo.py 프로젝트: zzyfisher/learnpy

        self.db = self.conn.xitek

        # 保存到db-thread（主题）

    def saveThread(self, thread):
        j = json.dumps(thread, default=lambda thread: thread.__dict__)
        self.db.threads.insert(json.loads(j))

    def saveForum(self, forum):
        j = json.dumps(forum, default=lambda forum: forum.__dict__)
        print(j)
        self.db.forums.insert(json.loads(j))

    def savePost(self, post):
        j = json.dumps(post, default=lambda post: post.__dict__)
        self.db.posts.insert(json.loads(j))


if __name__ == "__main__":
    ms = MongoStore()
    ms.open()
    forum = ForumInfo()
    forum.forumId = 100
    forum.forumName = "测试论坛"
    ms.saveForum(forum)
    post = PostInfo()
    post.threadId = 1
    post.content = "hello"
    post.postId = "2"
    ms.savePost(post)

예제 #2

파일 보기

파일: xitekThread.py 프로젝트: zzyfisher/learnpy

	def parsePage(self,pageData,pageNum):
		soup = BeautifulSoup(pageData,"html.parser")
		#print (soup.prettify())

		#postlist=soup.find("div",id='postlist')
		tablelist=soup.find_all("table",id=re.compile('^pid'))
	
		retList = []
		#现在开始解析发帖子的信息（用户，发帖时间，更新时间，帖子内容）
		for t in tablelist:		
			#用户:第一个 <td class="pls"
			u = t.find("td",attrs={"class":"pls"})
			#print ("user:"******"pls" nowrap="" valign="top" width="120">
			<font class="allb" size="3"><a href="/space-uid-1843865.html" target="_blank"><b>maomaodada1979</b></a></font>
			<br/>
			<font color="black" id="small9">
			泡菜 <img alt="邮箱已验证" border="0" src="static/image/common/mailverified.gif" title="邮箱已验证" width="16px"/>
			<br/>
			                        泡网分: 0.077<br/>
			主题: 1<br/>
			帖子: 52<br/>
			注册: 2011年12月<br/>
			</font>
			</td>
			'''
			postInfo = PostInfo()
			postInfo.threadId=self.threadId

			pattern = re.compile(r"<b>(.*?)</b>.*?注册: (.*?)<br/>",re.S)
			msg=u.prettify()
			v=re.findall(pattern,msg)
			postInfo.uname=v[0][0].strip()


			#用正则处理内容信息
			#内容
			m = t.find("td",id=re.compile('^postmessage_'))	
			msg=m.prettify()
			#print("message:" + m.prettify())
			
			pattern = re.compile(r"<td .*?postmessage_(.*?)\".*?>(.*?)</td>",re.S)			
			v=re.findall(pattern,msg)
			
			postInfo.postId=v[0][0].strip()
			postInfo.content=v[0][1].strip()
			postInfo._id=postInfo.postId
			
			#处理时间,table的第2行，第1列
			#注意这里用tr[2]的原因是table中还嵌套了一个table，里边也有tr
			td = t.find_all("tr")[2].find("td")
			#print("====")
			#print(td.prettify())

			postInfo.postDate=td.get_text().strip()
			retList.append(postInfo)

		#处理分页(页面中有2个alln class，都是分页区域)
		'''
		<span class=alln>
		<div class="pg">
		<a href="thread-1482195-1-1-1.html" class="prev">&nbsp;&nbsp;</a>
		<a href="thread-1482195-1-1-1.html">1</a>
		<strong>2</strong>
		</div>
		</span>
		'''
		pageSpan=soup.find("span",attrs={"class":"alln"})
		#print(pageSpan)
		#定位其中最大一个<a>即为页数，如果没有，那么当前页就是最后页，如果找到的最后页数小于当前页，则当前页也是最后页
		listA=pageSpan.find_all("a")
		maxPage=pageNum
		for a in listA:
			href = a.get('href')
			#print("PAGE:" + href)
			sp = href.split("-")
			if int (sp[2]) >maxPage:
				maxPage=int(sp[2])

		return (retList,maxPage)