def setUp(self): self.mgr = ContainerMgr(MagicMock(), 'foobar') self.mgr.chunkmgr = MagicMock() self.mgr.dbmgr = MagicMock() # TODO: srsly? self.mgr.chunkmgr.dbmgr.save_chunk.return_value = 'chunkid' self.chunk = chunk_samples.chunk_sample.copy() chunk_samples.chunk_container_sample["size"] = 10
def test_constructor_from_db(self): cc = ChunkContainer(ContainerMgr(MagicMock(), 'foobar'), self.container.get('size'), self.container.get('start_date'), self.container.get('chunks'), self.container.get('current_chunk')) self.assertEquals(type(cc.current_chunk), str)
def test_constructor_bad_size(self): self.container = chunk_samples.chunk_container_sample_bad_size self.assertRaises(AssertionError, ChunkContainer, ContainerMgr(MagicMock(), 'foobar'), self.container.get('size'), self.container.get('start_date'), self.container.get('chunks'), self.container.get('current_chunk'))
class ChunkContainerInjector(Injector): def __init__(self, conn, db_name): self.container_mgr = ContainerMgr(conn, db_name) # last modified object self.current_chunk_container = None def to_db(self, message): ''' updates current_chunk_container with message and stores the chunk in the db if necessary''' # doesnt make sense to have a non-time based msg in a time-based container if not message.is_time_based(): return if not self.current_chunk_container: self.current_chunk_container = self.pick_container_from_msg_date( message) if not self.current_chunk_container.tweet_fits(message): # store the current container and get a new one self._refresh_current_container(message) if self.current_chunk_container.current_chunk_isfull(): self.container_mgr.refresh_current_chunk( self.current_chunk_container) self.current_chunk_container.update(message) def pick_container_from_msg_date(self, message): # given a message object, returns the associated container for that time container_key = self._get_associated_container_key(message) return self.container_mgr.load_obj_from_id(container_key) def _refresh_current_container(self, message): # TODO: shouldn't the container be in charge of deciding whether to be # stored in the db or not? if self.current_chunk_container.changed_since_retrieval: msg = "saving chunk in db because key {0} doesnt match tweet {1} with date {2}" logger.info( msg.format(self.current_chunk_container.start_date, message.get_id(), message.get_creation_time())) self.container_mgr.save_in_db(self.current_chunk_container) self.current_chunk_container = self.pick_container_from_msg_date( message) def _get_associated_container_key(self, message): ''' returns the chunk container the message belongs to @params chunk_container_size, integer with the number of minutes delimiting a chunk. 60 must be divisible by it''' def reset_seconds(date): return datetime(date.year, date.month, date.day, date.hour, date.minute) if self.container_mgr.size > 60 or 60 % self.container_mgr.size: raise Exception( "chunk_container_size of size {0} is not valid".format( self.container_mgr.size)) creation_time = message.get_creation_time() delta = timedelta(minutes=creation_time.minute % self.container_mgr.size) return reset_seconds(creation_time - delta) def last_to_db(self): if self.current_chunk_container: self.container_mgr.save_in_db(self.current_chunk_container)
class TestContainerMgr(unittest.TestCase): def setUp(self): self.mgr = ContainerMgr(MagicMock(), 'foobar') self.mgr.chunkmgr = MagicMock() self.mgr.dbmgr = MagicMock() # TODO: srsly? self.mgr.chunkmgr.dbmgr.save_chunk.return_value = 'chunkid' self.chunk = chunk_samples.chunk_sample.copy() chunk_samples.chunk_container_sample["size"] = 10 def test_save_in_db(self): container = MagicMock() container.default.return_value = 'foocontainer' container.current_chunk = False self.mgr.save_in_db(container) self.mgr.chunkmgr.save_in_db.assert_has_calls([]) self.mgr.dbmgr.update_obj.assert_called_once_with('foocontainer') container.current_chunk = True self.mgr.save_in_db(container) self.mgr.chunkmgr.save_in_db.assert_called_once_with(True) self.mgr.dbmgr.update_obj.assert_called_with('foocontainer') def test_get_empty_obj(self): c = self.mgr.get_empty_obj(3, datetime(2013, 10, 4, 9, 8)) self.assertEquals(c.size, 3) self.assertEquals(c.start_date, datetime(2013, 10, 4, 9, 8)) self.assertEquals(c.changed_since_retrieval, False) self.assertEquals(c.chunks, {}) self.assertEquals(type(c.current_chunk), tuple) self.assertEquals(c.current_chunk[0], None) self.assertEquals(type(c.current_chunk[1]), Chunk) def test_get_obj_from_db(self): c = self.mgr.get_obj(chunk_samples.chunk_container_sample) self.assertEquals(type(c), ChunkContainer) self.assertEquals(c.size, 10) self.assertEquals(c.start_date, datetime(2013, 8, 16, 9, 48)) def test_get_chunk(self): c = self.mgr.get_chunk(self.chunk) self.assertEqual(c.terms, {"de" : 25, "y" : 14, "http" : 14, "co" : 14, "es" : 14, "por" : 6, "s" : 6, "o" : 6, "n" : 6}) def test_get_top_occurrences(self): chunks = (chunk_samples.chunk_sample.copy(), chunk_samples.chunk_sample_small1.copy(), chunk_samples.chunk_sample_small2.copy()) chunks = [Chunk(**chunk) for chunk in chunks] r = self.mgr.get_top_occurrences(chunks, 4) self.assertEquals(r, {'user_mentions': [(4, 'Fulendstambulen'), (4, 'el_fary'), (2, 'Los40_Spain'), (2, 'williamlevybra')], 'hashtags': [(4, '10CosasQueOdio'), (3, 'nature'), (1, 'PutaVidaTete')], 'terms': [(25, 'de'), (20, 'pollo'), (15, 'froyo'), (14, 'co')]})
def test_default(self): self.container = chunk_samples.chunk_container_with_chunks.copy() cc = ChunkContainer(ContainerMgr(MagicMock(), 'foobar'), self.container.get('size'), self.container.get('start_date'), self.container.get('chunks'), self.container.get('current_chunk')) self.assertEquals( cc.default(), { 'chunks': ['1', '2'], 'chunk_size': 100, 'current_chunk': '52499970e138235994c416a3', 'start_date': 1376646480, 'size': 10 })
class ChunkContainerInjector(Injector): def __init__(self, conn, db_name): self.container_mgr = ContainerMgr(conn, db_name) # last modified object self.current_chunk_container = None def to_db(self, message): ''' updates current_chunk_container with message and stores the chunk in the db if necessary''' # doesnt make sense to have a non-time based msg in a time-based container if not message.is_time_based(): return if not self.current_chunk_container: self.current_chunk_container = self.pick_container_from_msg_date(message) if not self.current_chunk_container.tweet_fits(message): # store the current container and get a new one self._refresh_current_container(message) if self.current_chunk_container.current_chunk_isfull(): self.container_mgr.refresh_current_chunk(self.current_chunk_container) self.current_chunk_container.update(message) def pick_container_from_msg_date(self, message): # given a message object, returns the associated container for that time container_key = self._get_associated_container_key(message) return self.container_mgr.load_obj_from_id(container_key) def _refresh_current_container(self, message): # TODO: shouldn't the container be in charge of deciding whether to be # stored in the db or not? if self.current_chunk_container.changed_since_retrieval: msg = "saving chunk in db because key {0} doesnt match tweet {1} with date {2}" logger.info(msg.format(self.current_chunk_container.start_date, message.get_id(), message.get_creation_time())) self.container_mgr.save_in_db(self.current_chunk_container) self.current_chunk_container = self.pick_container_from_msg_date(message) def _get_associated_container_key(self, message): ''' returns the chunk container the message belongs to @params chunk_container_size, integer with the number of minutes delimiting a chunk. 60 must be divisible by it''' def reset_seconds(date): return datetime(date.year, date.month, date.day, date.hour, date.minute) if self.container_mgr.size > 60 or 60 % self.container_mgr.size: raise Exception("chunk_container_size of size {0} is not valid".format(self.container_mgr.size)) creation_time = message.get_creation_time() delta = timedelta(minutes=creation_time.minute % self.container_mgr.size) return reset_seconds(creation_time - delta) def last_to_db(self): if self.current_chunk_container: self.container_mgr.save_in_db(self.current_chunk_container)
class TestContainerMgr(unittest.TestCase): def setUp(self): self.mgr = ContainerMgr(MagicMock(), 'foobar') self.mgr.chunkmgr = MagicMock() self.mgr.dbmgr = MagicMock() # TODO: srsly? self.mgr.chunkmgr.dbmgr.save_chunk.return_value = 'chunkid' self.chunk = chunk_samples.chunk_sample.copy() chunk_samples.chunk_container_sample["size"] = 10 def test_save_in_db(self): container = MagicMock() container.default.return_value = 'foocontainer' container.current_chunk = False self.mgr.save_in_db(container) self.mgr.chunkmgr.save_in_db.assert_has_calls([]) self.mgr.dbmgr.update_obj.assert_called_once_with('foocontainer') container.current_chunk = True self.mgr.save_in_db(container) self.mgr.chunkmgr.save_in_db.assert_called_once_with(True) self.mgr.dbmgr.update_obj.assert_called_with('foocontainer') def test_get_empty_obj(self): c = self.mgr.get_empty_obj(3, datetime(2013, 10, 4, 9, 8)) self.assertEquals(c.size, 3) self.assertEquals(c.start_date, datetime(2013, 10, 4, 9, 8)) self.assertEquals(c.changed_since_retrieval, False) self.assertEquals(c.chunks, {}) self.assertEquals(type(c.current_chunk), tuple) self.assertEquals(c.current_chunk[0], None) self.assertEquals(type(c.current_chunk[1]), Chunk) def test_get_obj_from_db(self): c = self.mgr.get_obj(chunk_samples.chunk_container_sample) self.assertEquals(type(c), ChunkContainer) self.assertEquals(c.size, 10) self.assertEquals(c.start_date, datetime(2013, 8, 16, 9, 48)) def test_get_chunk(self): c = self.mgr.get_chunk(self.chunk) self.assertEqual( c.terms, { "de": 25, "y": 14, "http": 14, "co": 14, "es": 14, "por": 6, "s": 6, "o": 6, "n": 6 }) def test_get_top_occurrences(self): chunks = (chunk_samples.chunk_sample.copy(), chunk_samples.chunk_sample_small1.copy(), chunk_samples.chunk_sample_small2.copy()) chunks = [Chunk(**chunk) for chunk in chunks] r = self.mgr.get_top_occurrences(chunks, 4) self.assertEquals( r, { 'user_mentions': [(4, 'Fulendstambulen'), (4, 'el_fary'), (2, 'Los40_Spain'), (2, 'williamlevybra')], 'hashtags': [(4, '10CosasQueOdio'), (3, 'nature'), (1, 'PutaVidaTete')], 'terms': [(25, 'de'), (20, 'pollo'), (15, 'froyo'), (14, 'co')] })
def __init__(self, conn, db_name): self.container_mgr = ContainerMgr(conn, db_name) # last modified object self.current_chunk_container = None