Пример #1
0
def main() :

	#Create a session for data collection
	dataSession = SessionGoogle(CONSTANT.LOGIN_URL, CONSTANT.AUTH_URL, CONSTANT.UNAME, CONSTANT.PASSWORD)
	communityPage = BeautifulSoup(dataSession.get("https://plus.google.com/u/0/communities/102497715636887179986").text)

	all_contents = communityPage.body.find_all('span', attrs={'class':'uv PL'})
	
	all_categories = communityPage.body.find_all('span', attrs={'class':'Wt Hm Ve tr'})

	updateURLs(all_contents)
Пример #2
0
def main() :
	id_dbase = ''
	if len(sys.argv) != 2:
		print('Usage: python dataStore.py test/live')
		return 
	else:
		if sys.argv[1] =='test' :
			id_dbase = 'test'
			
		else :
			if sys.argv[1] =='live' :
				id_dbase = 'live'
			else :
				print ' Invalid database!!! Please enter either test or live'
#	urlDict = readUrlsFromFile ()#Gets all urls to be updated from file
	dataSession = SessionGoogle(CONSTANT.LOGIN_URL, CONSTANT.AUTH_URL, CONSTANT.UNAME, CONSTANT.PASSWORD)
	#Create a list of Posts
	allUrls = readUrlsFromFile()
	allUrls = filter(None, allUrls) 
	print allUrls
	#Create post object for all posts 
	latestPosts=[]
	for url in allUrls :
		latestPosts.append(Post(url))
	print(len(latestPosts))


	#Get details of each post

	client = MongoClient('localhost', 27017) #Create a mongodb client
	if id_dbase == 'test' :
		dbase = client.test_database
	else :
		dbase = client.live_database


	for post in latestPosts :
		#retrive information for each post
		try:
			postPage 		=	BeautifulSoup(dataSession.get(post.url).text)
			post.date 		=	getPostDate(postPage)
			post.category 	=	getCategory(postPage)
			post.plusOned 	=	getPlusOned(postPage)
			post.numberOfComments 	=	getNumberOfComments(postPage)
			post.user = (getUser(postPage)[1])
			post.userId = (getUser(postPage)[0])
			post.userURL = 'https://plus.google.com/' + post.userId + '/about/'
			post.content = getPostContent(postPage)

			all_contents = postPage.find_all('div', attrs={'class':'fR'})

			#print all_contents
			i=0
			comments =[]
			all_contents = getPostComments(postPage)
			allusers = getUserURL(postPage)
			for contents in all_contents :
				if i > 0 : #To skip the post content as post content is provided by 0th index

					newComment = Comment(post.url,contents.text)
					newComment.user = allusers[i-1][1]
					newComment.userId = allusers[i-1][0]

					comments.append(newComment)
				i=i+1
			#If user not in db :
	
			insertUserInDb(dbase,post.user,post.userId,id_dbase)
			loadPostsInDatabase(dbase,post,id_dbase)
			#Insert comments only after inserting users#Implement post comment check logic
			for comment in comments :
				insertUserInDb(dbase,comment.user,comment.userId,id_dbase)
				loadCommentsInDatabase(dbase,comment,id_dbase)
		except :
				print 'Exeption for : ',post.url
				continue
		clearFile()