Пример #1
0
 def setUp(self):
     self.mgr = ContainerMgr(MagicMock(), 'foobar')
     self.mgr.chunkmgr = MagicMock()
     self.mgr.dbmgr = MagicMock()
     # TODO: srsly?
     self.mgr.chunkmgr.dbmgr.save_chunk.return_value = 'chunkid'
     self.chunk = chunk_samples.chunk_sample.copy()
     chunk_samples.chunk_container_sample["size"] = 10
Пример #2
0
 def test_constructor_from_db(self):
     cc = ChunkContainer(ContainerMgr(MagicMock(), 'foobar'),
                         self.container.get('size'),
                         self.container.get('start_date'),
                         self.container.get('chunks'),
                         self.container.get('current_chunk'))
     self.assertEquals(type(cc.current_chunk), str)
Пример #3
0
 def setUp(self):
   self.mgr = ContainerMgr(MagicMock(), 'foobar')
   self.mgr.chunkmgr = MagicMock()
   self.mgr.dbmgr = MagicMock()
   # TODO: srsly?
   self.mgr.chunkmgr.dbmgr.save_chunk.return_value = 'chunkid'
   self.chunk = chunk_samples.chunk_sample.copy()
   chunk_samples.chunk_container_sample["size"] = 10
Пример #4
0
 def test_constructor_bad_size(self):
     self.container = chunk_samples.chunk_container_sample_bad_size
     self.assertRaises(AssertionError, ChunkContainer,
                       ContainerMgr(MagicMock(), 'foobar'),
                       self.container.get('size'),
                       self.container.get('start_date'),
                       self.container.get('chunks'),
                       self.container.get('current_chunk'))
Пример #5
0
class ChunkContainerInjector(Injector):
    def __init__(self, conn, db_name):
        self.container_mgr = ContainerMgr(conn, db_name)
        # last modified object
        self.current_chunk_container = None

    def to_db(self, message):
        ''' updates current_chunk_container with message and stores the chunk in the db if necessary'''
        # doesnt make sense to have a non-time based msg in a time-based container
        if not message.is_time_based():
            return
        if not self.current_chunk_container:
            self.current_chunk_container = self.pick_container_from_msg_date(
                message)
        if not self.current_chunk_container.tweet_fits(message):
            # store the current container and get a new one
            self._refresh_current_container(message)
        if self.current_chunk_container.current_chunk_isfull():
            self.container_mgr.refresh_current_chunk(
                self.current_chunk_container)
        self.current_chunk_container.update(message)

    def pick_container_from_msg_date(self, message):
        # given a message object, returns the associated container for that time
        container_key = self._get_associated_container_key(message)
        return self.container_mgr.load_obj_from_id(container_key)

    def _refresh_current_container(self, message):
        # TODO: shouldn't the container be in charge of deciding whether to be
        # stored in the db or not?
        if self.current_chunk_container.changed_since_retrieval:
            msg = "saving chunk in db because key {0} doesnt match tweet {1} with date {2}"
            logger.info(
                msg.format(self.current_chunk_container.start_date,
                           message.get_id(), message.get_creation_time()))
            self.container_mgr.save_in_db(self.current_chunk_container)
        self.current_chunk_container = self.pick_container_from_msg_date(
            message)

    def _get_associated_container_key(self, message):
        ''' returns the chunk container the message belongs to
    @params chunk_container_size, integer with the number of minutes delimiting a
     chunk. 60 must be divisible by it'''
        def reset_seconds(date):
            return datetime(date.year, date.month, date.day, date.hour,
                            date.minute)

        if self.container_mgr.size > 60 or 60 % self.container_mgr.size:
            raise Exception(
                "chunk_container_size of size {0} is not valid".format(
                    self.container_mgr.size))
        creation_time = message.get_creation_time()
        delta = timedelta(minutes=creation_time.minute %
                          self.container_mgr.size)
        return reset_seconds(creation_time - delta)

    def last_to_db(self):
        if self.current_chunk_container:
            self.container_mgr.save_in_db(self.current_chunk_container)
Пример #6
0
class TestContainerMgr(unittest.TestCase):
  def setUp(self):
    self.mgr = ContainerMgr(MagicMock(), 'foobar')
    self.mgr.chunkmgr = MagicMock()
    self.mgr.dbmgr = MagicMock()
    # TODO: srsly?
    self.mgr.chunkmgr.dbmgr.save_chunk.return_value = 'chunkid'
    self.chunk = chunk_samples.chunk_sample.copy()
    chunk_samples.chunk_container_sample["size"] = 10

  def test_save_in_db(self):
    container = MagicMock()
    container.default.return_value = 'foocontainer'
    container.current_chunk = False
    self.mgr.save_in_db(container)
    self.mgr.chunkmgr.save_in_db.assert_has_calls([])
    self.mgr.dbmgr.update_obj.assert_called_once_with('foocontainer')
    container.current_chunk = True
    self.mgr.save_in_db(container)
    self.mgr.chunkmgr.save_in_db.assert_called_once_with(True)
    self.mgr.dbmgr.update_obj.assert_called_with('foocontainer')

  def test_get_empty_obj(self):
    c = self.mgr.get_empty_obj(3, datetime(2013, 10, 4, 9, 8))
    self.assertEquals(c.size, 3)
    self.assertEquals(c.start_date, datetime(2013, 10, 4, 9, 8))
    self.assertEquals(c.changed_since_retrieval, False)
    self.assertEquals(c.chunks, {})
    self.assertEquals(type(c.current_chunk), tuple)
    self.assertEquals(c.current_chunk[0], None)
    self.assertEquals(type(c.current_chunk[1]), Chunk)

  def test_get_obj_from_db(self):
    c = self.mgr.get_obj(chunk_samples.chunk_container_sample)
    self.assertEquals(type(c), ChunkContainer)
    self.assertEquals(c.size, 10)
    self.assertEquals(c.start_date, datetime(2013, 8, 16, 9, 48))

  def test_get_chunk(self):
    c = self.mgr.get_chunk(self.chunk)
    self.assertEqual(c.terms, {"de" : 25, "y" : 14, "http" : 14, "co" : 14, "es" : 14, "por" : 6, "s" : 6, "o" : 6, "n" : 6})

  def test_get_top_occurrences(self):
    chunks = (chunk_samples.chunk_sample.copy(),
              chunk_samples.chunk_sample_small1.copy(),
              chunk_samples.chunk_sample_small2.copy())
    chunks = [Chunk(**chunk) for chunk in chunks]
    r = self.mgr.get_top_occurrences(chunks, 4)
    self.assertEquals(r, {'user_mentions': [(4, 'Fulendstambulen'), (4, 'el_fary'), (2, 'Los40_Spain'), (2, 'williamlevybra')],
                          'hashtags': [(4, '10CosasQueOdio'), (3, 'nature'), (1, 'PutaVidaTete')],
                          'terms': [(25, 'de'), (20, 'pollo'), (15, 'froyo'), (14, 'co')]})
Пример #7
0
 def test_default(self):
     self.container = chunk_samples.chunk_container_with_chunks.copy()
     cc = ChunkContainer(ContainerMgr(MagicMock(), 'foobar'),
                         self.container.get('size'),
                         self.container.get('start_date'),
                         self.container.get('chunks'),
                         self.container.get('current_chunk'))
     self.assertEquals(
         cc.default(), {
             'chunks': ['1', '2'],
             'chunk_size': 100,
             'current_chunk': '52499970e138235994c416a3',
             'start_date': 1376646480,
             'size': 10
         })
Пример #8
0
class ChunkContainerInjector(Injector):
  def __init__(self, conn, db_name):
    self.container_mgr = ContainerMgr(conn, db_name)
    # last modified object
    self.current_chunk_container = None

  def to_db(self, message):
    ''' updates current_chunk_container with message and stores the chunk in the db if necessary'''
    # doesnt make sense to have a non-time based msg in a time-based container
    if not message.is_time_based():
      return
    if not self.current_chunk_container:
      self.current_chunk_container = self.pick_container_from_msg_date(message)
    if not self.current_chunk_container.tweet_fits(message):
      # store the current container and get a new one
      self._refresh_current_container(message)
    if self.current_chunk_container.current_chunk_isfull():
      self.container_mgr.refresh_current_chunk(self.current_chunk_container)
    self.current_chunk_container.update(message)

  def pick_container_from_msg_date(self, message):
    # given a message object, returns the associated container for that time
    container_key = self._get_associated_container_key(message)
    return self.container_mgr.load_obj_from_id(container_key)

  def _refresh_current_container(self, message):
    # TODO: shouldn't the container be in charge of deciding whether to be
    # stored in the db or not?
    if self.current_chunk_container.changed_since_retrieval:
      msg = "saving chunk in db because key {0} doesnt match tweet {1} with date {2}"
      logger.info(msg.format(self.current_chunk_container.start_date,
                             message.get_id(),
                             message.get_creation_time()))
      self.container_mgr.save_in_db(self.current_chunk_container)
    self.current_chunk_container = self.pick_container_from_msg_date(message)

  def _get_associated_container_key(self, message):
    ''' returns the chunk container the message belongs to
    @params chunk_container_size, integer with the number of minutes delimiting a
     chunk. 60 must be divisible by it'''
    def reset_seconds(date):
      return datetime(date.year, date.month, date.day, date.hour, date.minute)
    if self.container_mgr.size > 60 or 60 % self.container_mgr.size:
      raise Exception("chunk_container_size of size {0} is not valid".format(self.container_mgr.size))
    creation_time = message.get_creation_time()
    delta = timedelta(minutes=creation_time.minute % self.container_mgr.size)
    return reset_seconds(creation_time - delta)

  def last_to_db(self):
    if self.current_chunk_container:
      self.container_mgr.save_in_db(self.current_chunk_container)
Пример #9
0
class TestContainerMgr(unittest.TestCase):
    def setUp(self):
        self.mgr = ContainerMgr(MagicMock(), 'foobar')
        self.mgr.chunkmgr = MagicMock()
        self.mgr.dbmgr = MagicMock()
        # TODO: srsly?
        self.mgr.chunkmgr.dbmgr.save_chunk.return_value = 'chunkid'
        self.chunk = chunk_samples.chunk_sample.copy()
        chunk_samples.chunk_container_sample["size"] = 10

    def test_save_in_db(self):
        container = MagicMock()
        container.default.return_value = 'foocontainer'
        container.current_chunk = False
        self.mgr.save_in_db(container)
        self.mgr.chunkmgr.save_in_db.assert_has_calls([])
        self.mgr.dbmgr.update_obj.assert_called_once_with('foocontainer')
        container.current_chunk = True
        self.mgr.save_in_db(container)
        self.mgr.chunkmgr.save_in_db.assert_called_once_with(True)
        self.mgr.dbmgr.update_obj.assert_called_with('foocontainer')

    def test_get_empty_obj(self):
        c = self.mgr.get_empty_obj(3, datetime(2013, 10, 4, 9, 8))
        self.assertEquals(c.size, 3)
        self.assertEquals(c.start_date, datetime(2013, 10, 4, 9, 8))
        self.assertEquals(c.changed_since_retrieval, False)
        self.assertEquals(c.chunks, {})
        self.assertEquals(type(c.current_chunk), tuple)
        self.assertEquals(c.current_chunk[0], None)
        self.assertEquals(type(c.current_chunk[1]), Chunk)

    def test_get_obj_from_db(self):
        c = self.mgr.get_obj(chunk_samples.chunk_container_sample)
        self.assertEquals(type(c), ChunkContainer)
        self.assertEquals(c.size, 10)
        self.assertEquals(c.start_date, datetime(2013, 8, 16, 9, 48))

    def test_get_chunk(self):
        c = self.mgr.get_chunk(self.chunk)
        self.assertEqual(
            c.terms, {
                "de": 25,
                "y": 14,
                "http": 14,
                "co": 14,
                "es": 14,
                "por": 6,
                "s": 6,
                "o": 6,
                "n": 6
            })

    def test_get_top_occurrences(self):
        chunks = (chunk_samples.chunk_sample.copy(),
                  chunk_samples.chunk_sample_small1.copy(),
                  chunk_samples.chunk_sample_small2.copy())
        chunks = [Chunk(**chunk) for chunk in chunks]
        r = self.mgr.get_top_occurrences(chunks, 4)
        self.assertEquals(
            r, {
                'user_mentions': [(4, 'Fulendstambulen'), (4, 'el_fary'),
                                  (2, 'Los40_Spain'), (2, 'williamlevybra')],
                'hashtags': [(4, '10CosasQueOdio'), (3, 'nature'),
                             (1, 'PutaVidaTete')],
                'terms': [(25, 'de'), (20, 'pollo'), (15, 'froyo'), (14, 'co')]
            })
Пример #10
0
 def __init__(self, conn, db_name):
   self.container_mgr = ContainerMgr(conn, db_name)
   # last modified object
   self.current_chunk_container = None
Пример #11
0
 def __init__(self, conn, db_name):
     self.container_mgr = ContainerMgr(conn, db_name)
     # last modified object
     self.current_chunk_container = None