Exemplo n.º 1
0
    def put_article_in_db(self, story_url):
        try:
            if (not self.db.in_set({'url': story_url})):
                current_article = NewsArticle(story_url)

                #publish date for article : datetime object
                article_published = current_article.date_made()

                #title of article : String
                article_title = current_article.get_title()
                #print article_title

                current_article.goodArticle()
                #keywords in article: Array of Strings
                article_key_words = current_article.getKeywords()

                #videos in story : Array of Strings (url to videos)
                article_videos = current_article.get_videos()

                #summary of article : String
                article_summary = current_article.getSummary()

                #authors of article: Array of Strings
                article_authors = current_article.getAuthors()

                #image for article : String (url to image)
                article_thumbnaillink = current_article.thumbnail_url()

                article_url = current_article.get_url()

                res = get_tld(article_url, as_object=True)
                new_entry = {}
                new_entry['title'] = article_title
                new_entry['sum'] = article_summary
                new_entry['auth'] = article_authors
                new_entry['thumb'] = article_thumbnaillink
                new_entry['pub'] = article_published
                new_entry['keywords'] = article_key_words
                new_entry['vids'] = article_videos
                new_entry['likes'] = 0
                new_entry['dislikes'] = 0
                new_entry['comments'] = []
                new_entry['url'] = article_url

                new_entry['creationtime'] = datetime.datetime.now()
                new_entry['publisher'] = res.domain

                new_entry['creationtime'] = datetime.datetime.utcnow()
                new_entry['companycreator'] = res.domain

                self.db.CollectionSubmitOne(new_entry)

        except Exception as e:
            print "------"
            print "its f****d emma"
            print e
            print "------"
Exemplo n.º 2
0
	def put_article_in_db(self):
		counter=0
		try:
			for x in self.sublist:
				submissions=self.reddit.get_subreddit(x).get_hot(limit=30)
				for submission in submissions:
					story_url=submission.url.encode('ascii', 'ignore')
					if( not self.db.in_set({'url':story_url})):
						print str(story_url)
						current_article = NewsArticle(story_url)
						

						
						#publish date for article : datetime object 
						article_published = current_article.date_made()
						
						

						#title of article : String
						article_title=current_article.get_title()
						#print article_title


					
						current_article.goodArticle()
						#keywords in article: Array of Strings
						article_key_words = current_article.getKeywords()
						
						#videos in story : Array of Strings (url to videos)
						article_videos = current_article.get_videos()


						#summary of article : String
						article_summary = current_article.getSummary()
						
						#authors of article: Array of Strings
						article_authors = current_article.getAuthors()
						
						#image for article : String (url to image)
						article_thumbnaillink = current_article.thumbnail_url()

						
						mydb = pymongo.MongoClient()
						res=get_tld(story_url, as_object=True)
						new_entry = {}
						new_entry['title']=article_title
						new_entry['sum']=article_summary
						new_entry['author']=article_authors
						new_entry['thumb'] = article_thumbnaillink
						new_entry['pub'] = article_published
						new_entry['keywords'] = article_key_words
						new_entry['vids']  = article_videos
						new_entry['likes']=0
						new_entry['dislikes']=0
						new_entry['comments'] = []
						new_entry['url'] = story_url
						new_entry['_id'] = uuid.uuid4().hex
						new_entry['postnum']=mydb.lyket.articles.count()
						new_entry['creationtime']=datetime.datetime.now()
						new_entry['publisher'] = res.domain

						new_entry['creationtime']=datetime.datetime.utcnow()
						new_entry['companycreator'] = res.domain

						self.db.CollectionSubmitOne(new_entry)
						print "Done with article " + str(mydb.lyket.articles.count())
					else:
						print "Already have it " + str(counter)
						counter=counter+1

		except Exception as e:
			print "------"
			print "its f****d emma"
			print e
			print "------"
Exemplo n.º 3
0
	def put_article_in_db(self,story_url):
		try:
			if( not self.db.in_set({'url':story_url})):
				current_article = NewsArticle(story_url)
				

				
				#publish date for article : datetime object 
				article_published = current_article.date_made()
				
				

				#title of article : String
				article_title=current_article.get_title()
				#print article_title


			
				current_article.goodArticle()
				#keywords in article: Array of Strings
				article_key_words = current_article.getKeywords()
				
				#videos in story : Array of Strings (url to videos)
				article_videos = current_article.get_videos()


				#summary of article : String
				article_summary = current_article.getSummary()
				
				#authors of article: Array of Strings
				article_authors = current_article.getAuthors()
				
				#image for article : String (url to image)
				article_thumbnaillink = current_article.thumbnail_url()

				article_url = current_article.get_url()

				res=get_tld(article_url, as_object=True)
				new_entry = {}
				new_entry['title']=article_title
				new_entry['sum']=article_summary
				new_entry['auth']=article_authors
				new_entry['thumb'] = article_thumbnaillink
				new_entry['pub'] = article_published
				new_entry['keywords'] = article_key_words
				new_entry['vids']  = article_videos
				new_entry['likes']=0
				new_entry['dislikes']=0
				new_entry['comments'] = []
				new_entry['url'] = article_url

				new_entry['creationtime']=datetime.datetime.now()
				new_entry['publisher'] = res.domain

				new_entry['creationtime']=datetime.datetime.utcnow()
				new_entry['companycreator'] = res.domain

				self.db.CollectionSubmitOne(new_entry)

		except Exception as e:
			print "------"
			print "its f****d emma"
			print e
			print "------"