def testStoryExists(self): story = self._getFakeStory() db = StoryDatabase() db.createDatabase(self.TEST_DB_NAME) db.addStory(story) saved_story = db.getStory(str(story['stories_id'])) self.assertTrue(db.storyExists(str(story['stories_id']))) self.assertFalse(db.storyExists('43223535')) db.deleteDatabase(self.TEST_DB_NAME)
def testAddStory(self): story = self._getFakeStory() db = StoryDatabase() db.createDatabase(self.TEST_DB_NAME) worked = db.addStory(story) self.assertTrue(worked) worked = db.addStory(story) self.assertFalse(worked) saved_story = db.getStory(str(story['stories_id'])) self.assertEquals(saved_story['_id'], str(story['stories_id'])) self.assertEquals(saved_story['story_sentences_count'], 2) db.deleteDatabase(self.TEST_DB_NAME)
def testGetMaxStoryId(self): story1 = self._getFakeStory() story1['stories_id'] = "1000" story2 = self._getFakeStory() story1['stories_id'] = "2000" db = StoryDatabase() db.createDatabase(self.TEST_DB_NAME) db._db.save(mediacloud.examples.getAllExampleViews()) self.assertEquals(db.getMaxStoryId(),0) db.addStory(story1) db.addStory(story2) self.assertEquals(db.getMaxStoryId(),2000) db.deleteDatabase(self.TEST_DB_NAME)
To Install: >>> import nltk >>> nltk.download() [ select d for Download ] [ enter "stopwords" as the identifier ] [ enter "punkt" as the identifier ] ''' config = ConfigParser.ConfigParser() config.read('mc-client.config') # set up a connection to a local DB db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') ) # connect to MC and fetch some articles mc = MediaCloud( config.get('api','user'), config.get('api','pass') ) results = mc.recentStories() print "Fetched "+str(len(results))+" stories" # set up my callback function that adds readability score to the story pub.subscribe(mediacloud.examples.addFleshKincaidGradeLevelToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # save all the stories in the db (this will fire the callback above) saved = 0 for story in results: worked = db.addStory(story) if worked: saved = saved + 1 print "Saved "+str(saved)+" stories"
# setup the mediacloud connection mc = MediaCloud( config.get('api','user'), config.get('api','pass') ) # Must first seed database with latest Story ID, or else it will start at the beginning (2005) max_story_id = articles_db.getMaxStoryId() results = mc.storiesSince( max_story_id, STORIES_TO_FETCH, fetch_raw_text = True ) log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")") # set up a callback function that adds twitter username occurrences to the story pub.subscribe(mediacloud.examples.addTwitterReferencesToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # save all the stories in the db saved = 0 for story in results: print 'new story', worked = articles_db.addStory(story) if worked: saved = saved + 1 else: log.warning(" unable to save story "+str(story['stories_id'])) if story['first_raw_download_file']: text = story['first_raw_download_file'] elif story['story_text']: text = story['story_text'] else: text = '' try: if not mediacloud.examples.isEnglish(text): print 'i', continue