Пример #1
0
 def testGetMaxStoryId(self):
     story1 = self._getFakeStory()
     story1['stories_id'] = "1000"
     story2 = self._getFakeStory()
     story1['stories_id'] = "2000"
     db = StoryDatabase()
     db.createDatabase(self.TEST_DB_NAME)
     db._db.save(mediacloud.examples.getAllExampleViews())
     self.assertEquals(db.getMaxStoryId(),0)
     db.addStory(story1)
     db.addStory(story2)
     self.assertEquals(db.getMaxStoryId(),2000)
     db.deleteDatabase(self.TEST_DB_NAME)        
config = ConfigParser.ConfigParser()
config.read('mc-client.config')

# setup logging
logging.basicConfig(filename='mc-realtime.log',level=logging.DEBUG)
log = logging.getLogger('mc-realtime')
log.info("---------------------------------------------------------------------------")

# setup a connection to a local DB
db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') )

# setup the mediacloud connection
mc = MediaCloud( config.get('api','user'), config.get('api','pass') )

max_story_id = db.getMaxStoryId()
results = mc.storiesSince( max_story_id, STORIES_TO_FETCH )
log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")")

# set up my callback function that adds word count to the story
pub.subscribe(mediacloud.examples.addWordCountToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# set up my callback function that adds the language guess to the story
pub.subscribe(mediacloud.examples.addIsEnglishToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# set up my callback function that adds the reading grade level to the story
pub.subscribe(mediacloud.examples.addFleshKincaidGradeLevelToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# save all the stories in the db
saved = 0
for story in results:
log.info("---------------------------------------------------------------------------")

# setup a connection to a local DB of articles
#db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') )
articles_db = StoryDatabase('articles', config.get('db','host'), config.get('db','port') )

# setup a connection to a local DB of twitter accounts
#accounts_db = StoryDatabase('accounts', config.get('db','host'), config.get('db','port') )
server = couchdb.Server()
accounts_db = server['accounts']

# setup the mediacloud connection
mc = MediaCloud( config.get('api','user'), config.get('api','pass') )

# Must first seed database with latest Story ID, or else it will start at the beginning (2005)
max_story_id = articles_db.getMaxStoryId()
results = mc.storiesSince( max_story_id, STORIES_TO_FETCH, fetch_raw_text = True )
log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")")

# set up a callback function that adds twitter username occurrences to the story
pub.subscribe(mediacloud.examples.addTwitterReferencesToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# save all the stories in the db
saved = 0
for story in results:
    print 'new story',
    worked = articles_db.addStory(story)
    if worked:
      saved = saved + 1
    else:
      log.warning("  unable to save story "+str(story['stories_id']))
Пример #4
0
 def testCreateMaxIdView(self):
     db = StoryDatabase()
     db.createDatabase(self.TEST_DB_NAME)
     db._db.save(mediacloud.examples.getAllExampleViews())
     self.assertEquals(db.getMaxStoryId(),0)
     db.deleteDatabase(self.TEST_DB_NAME)