def main() : #Create a session for data collection dataSession = SessionGoogle(CONSTANT.LOGIN_URL, CONSTANT.AUTH_URL, CONSTANT.UNAME, CONSTANT.PASSWORD) communityPage = BeautifulSoup(dataSession.get("https://plus.google.com/u/0/communities/102497715636887179986").text) all_contents = communityPage.body.find_all('span', attrs={'class':'uv PL'}) all_categories = communityPage.body.find_all('span', attrs={'class':'Wt Hm Ve tr'}) updateURLs(all_contents)
def main() : id_dbase = '' if len(sys.argv) != 2: print('Usage: python dataStore.py test/live') return else: if sys.argv[1] =='test' : id_dbase = 'test' else : if sys.argv[1] =='live' : id_dbase = 'live' else : print ' Invalid database!!! Please enter either test or live' # urlDict = readUrlsFromFile ()#Gets all urls to be updated from file dataSession = SessionGoogle(CONSTANT.LOGIN_URL, CONSTANT.AUTH_URL, CONSTANT.UNAME, CONSTANT.PASSWORD) #Create a list of Posts allUrls = readUrlsFromFile() allUrls = filter(None, allUrls) print allUrls #Create post object for all posts latestPosts=[] for url in allUrls : latestPosts.append(Post(url)) print(len(latestPosts)) #Get details of each post client = MongoClient('localhost', 27017) #Create a mongodb client if id_dbase == 'test' : dbase = client.test_database else : dbase = client.live_database for post in latestPosts : #retrive information for each post try: postPage = BeautifulSoup(dataSession.get(post.url).text) post.date = getPostDate(postPage) post.category = getCategory(postPage) post.plusOned = getPlusOned(postPage) post.numberOfComments = getNumberOfComments(postPage) post.user = (getUser(postPage)[1]) post.userId = (getUser(postPage)[0]) post.userURL = 'https://plus.google.com/' + post.userId + '/about/' post.content = getPostContent(postPage) all_contents = postPage.find_all('div', attrs={'class':'fR'}) #print all_contents i=0 comments =[] all_contents = getPostComments(postPage) allusers = getUserURL(postPage) for contents in all_contents : if i > 0 : #To skip the post content as post content is provided by 0th index newComment = Comment(post.url,contents.text) newComment.user = allusers[i-1][1] newComment.userId = allusers[i-1][0] comments.append(newComment) i=i+1 #If user not in db : insertUserInDb(dbase,post.user,post.userId,id_dbase) loadPostsInDatabase(dbase,post,id_dbase) #Insert comments only after inserting users#Implement post comment check logic for comment in comments : insertUserInDb(dbase,comment.user,comment.userId,id_dbase) loadCommentsInDatabase(dbase,comment,id_dbase) except : print 'Exeption for : ',post.url continue clearFile()