Exemplo n.º 1
0
 def get_summary(self, **args):
     channel_id = args['channel_id'] if 'channel_id' in args else None
     channel_name = args['channel_name'] if 'channel_name' in args else None
     user_id = args['user_id'] if 'user_id' in args else None
     user_name = args['user_name'] if 'user_name' in args else None
     params = args['params'] if 'params' in args else None
     request_id = uuid.uuid1()
     response = None
     msgs = None
     if self.test:
         with io.open(TEST_JSON, encoding='utf-8') as iot:
             msgs = json.load(iot)[u'messages']
     else:
         msgs = self.get_messages(channel_id, params)
     summ_object = args['summ']
     summ_impl = None
     summary = u''
     if summ_object and "spacy" in SUMMS:
         self.logger.info(u'Using spacy')
         summ_impl = SpacyTsSummarizer()
         summ_impl.set_summarizer(summ_object)
     elif "gensim" in SUMMS:
         self.logger.info(u'Using gensim')
         summ_impl = TextRankTsSummarizer()
     if summ_impl:
         summ_impl.set_channel(channel_name)
         summary = summ_impl.summarize(msgs)
     else:
         self.logger.warn(u'No summarizer was set!')
     self.logger.info(u'Summary request %s user_id: %s', request_id,
                      user_id)
     self.logger.info(u'Summary request %s channel_name: %s', request_id,
                      channel_name)
     self.logger.info(u'Summary request %s parameters: %s', request_id,
                      params)
     self.logger.debug(u'Summary request %s messages: %s', request_id, msgs)
     self.logger.info(u'Summary request %s summary:\n %s', request_id,
                      summary)
     res = u"*Chat Summary:* \n " + summary + "\n \n"
     return res
Exemplo n.º 2
0
 def test_spacy_summarization(self):
     """Pass the intervals to summarizer"""
     if "spacy" in SUMMS:
         asd = [
             {"minutes": 60, "size": 2, "txt": u"Summary for first 60 minutes:\n"},
             {"hours": 12, "size": 1, "txt": u"Summary for last 12 hours:\n"},
         ]
         summ = None
         lsa_summ = lsa.LsaSummarizer()
         summ = SpacyTsSummarizer()
         for rs in asd:
             summ.set_summarizer(lsa_summ)
             summ.set_channel("elasticsearch")
             logger.debug("Testing spacy summarizer")
             sumry = summ.summarize(TestSummarize.test_msgs, range_spec=rs)
             logger.debug("Summary is %s, length %s", sumry, len(sumry))
             self.assertTrue(len(sumry) > 1)
     else:
         pass
Exemplo n.º 3
0
    def get_summary(self, **args):
        channel_id = args['channel_id'] if 'channel_id' in args else None
        channel_name = args['channel_name'] if 'channel_name' in args else None
        user_id = args['user_id'] if 'user_id' in args else None
        user_name = args['user_name'] if 'user_name' in args else None
        params = args['params'] if 'params' in args else None
        request_id = uuid.uuid1()
        response = None
        msgs = None
        if self.test:
            with io.open(TEST_JSON, encoding='utf-8') as iot:
                msgs = json.load(iot)[u'messages']
        else:
            msgs = self.get_messages(channel_id, params)
        summ_object = args['summ']
        summ_impl = None
        summary = u''
        if summ_object and "spacy" in SUMMS:
            self.logger.info(u'Using spacy')
            summ_impl = SpacyTsSummarizer()
            summ_impl.set_summarizer(summ_object)
        elif "gensim" in SUMMS:
            self.logger.info(u'Using gensim')
            summ_impl = TextRankTsSummarizer()
        if summ_impl:
            summ_impl.set_channel(channel_name)
            summary = summ_impl.summarize(msgs)
        else:
            self.logger.warn(u'No summarizer was set!')
        self.logger.info(u'Summary request %s user_id: %s', request_id, user_id)
        self.logger.info(u'Summary request %s channel_name: %s', request_id, channel_name)
        self.logger.info(u'Summary request %s parameters: %s', request_id, params)
        self.logger.debug(u'Summary request %s messages: %s', request_id, msgs)
        self.logger.info(u'Summary request %s summary:\n %s', request_id, summary)
	res = u"*Chat Summary:* \n " + summary + "\n \n"
        return res
Exemplo n.º 4
0
 def test_spacy_summarization(self):
     """Pass the intervals to summarizer"""
     if "spacy" in SUMMS:
         asd = [{
             'minutes': 60,
             'size': 2,
             'txt': u'Summary for first 60 minutes:\n'
         }, {
             'hours': 12,
             'size': 1,
             'txt': u'Summary for last 12 hours:\n'
         }]
         summ = None
         lsa_summ = lsa.LsaSummarizer()
         summ = SpacyTsSummarizer()
         for rs in asd:
             summ.set_summarizer(lsa_summ)
             summ.set_channel('elasticsearch')
             logger.debug("Testing spacy summarizer")
             sumry = summ.summarize(TestSummarize.test_msgs, range_spec=rs)
             logger.debug("Summary is %s, length %s", sumry, len(sumry))
             self.assertTrue(len(sumry) > 1)
     else:
         pass
Exemplo n.º 5
0
class TestSummarize(unittest.TestCase):

    test_msgs = test_json_msgs
    summ = SpacyTsSummarizer()
    summ.set_summarizer(lsa.LsaSummarizer())

    @given(lists(elements=sampled_from(test_json_msgs), min_size=3),
           integers(min_value=1, max_value=20),
           settings=hs.Settings(timeout=1000))
    def test_text_rank_summarization_ds1_days(self, smp_msgs, days):
        """Generate something for N day interval"""
        logger.info("Input is %s", smp_msgs)
        asd = {
            'days': days,
            'size': 3,
            'txt': u'Summary for first {} days:\n'.format(days)
        }
        #TestSummarize.summ.set_interval()
        TestSummarize.summ.set_channel('elasticsearch')
        sumry = TestSummarize.summ.summarize(smp_msgs, range_spec=asd)
        logger.debug("Summary is %s", sumry)
        # Length of summary is at least 1 and no greater than 3
        self.assertTrue(len(sumry) >= 1)
        #self.assertTrue(len(sumry) <= 3)
        # Length of summary is less than or equal to the original length
        #self.assertTrue(len(sumry) <= len(smp_msgs))
        # Each message in the summary must correspond to a message

    @given(lists(elements=sampled_from(test_json_msgs_c2), min_size=12),
           integers(min_value=1, max_value=20),
           settings=hs.Settings(timeout=1000))
    def test_text_rank_summarization_ds2_days(self, smp_msgs, days):
        """Generate something for N day interval"""
        logger.info("Input is %s", smp_msgs)
        asd = {
            'days': days,
            'size': 3,
            'txt': u'Summary for first {} days:\n'.format(days)
        }
        #TestSummarize.summ.set_interval(asd)
        TestSummarize.summ.set_channel('elasticsearch')
        sumry = TestSummarize.summ.summarize(smp_msgs, range_spec=asd)
        logger.debug("Summary is %s", sumry)
        # Length of summary is at least 1 and no greater than 3
        self.assertTrue(len(sumry) >= 1)
        #self.assertTrue(len(sumry) <= 3)
        # Length of summary is less than or equal to the original length
        #self.assertTrue(len(sumry) <= len(smp_msgs))
        # Each message in the summary must correspond to a message

    @given(integers(min_value=1, max_value=1000),
           integers(min_value=1, max_value=20),
           settings=hs.Settings(timeout=1000))
    def test_text_rank_summarization_ds3_days(self, sampsize, days):
        """Generate something for N day interval"""
        channel, ssamp = random.choice(test_json_msgs_c3)
        samp = ssamp[random.randint(1, len(ssamp) - 2):]
        logger.info("Input is segment is %s", samp)
        asd = {
            'days': days,
            'size': 3,
            'txt': u'Summary for first {} days:\n'.format(days)
        }
        #TestSummarize.summ.set_interval()
        TestSummarize.summ.set_channel(channel)
        sumry = TestSummarize.summ.summarize(samp, range_spec=asd)
        logger.debug("Summary is %s", sumry)
        # Length of summary is at least 1 and no greater than 3
        self.assertTrue(len(sumry) >= 1)
        #self.assertTrue(len(sumry) <= 3)
        # Length of summary is less than or equal to the original length
        #self.assertTrue(len(sumry) <= len(samp))
        # Each message in the summary must correspond to a message

    @given(lists(elements=sampled_from(test_json_msgs), min_size=1),
           integers(min_value=1, max_value=24),
           settings=hs.Settings(timeout=1000))
    def test_text_rank_summarization_ds1_hours(self, smp_msgs, hours):
        """Generate something for N hour intervals"""
        logger.info("Input is %s", smp_msgs)
        asd = {
            'hours': hours,
            'size': 3,
            'txt': u'Summary for first {} hours:\n'.format(hours)
        }
        #TestSummarize.summ.set_interval()
        TestSummarize.summ.set_channel('elasticsearch')
        sumry = TestSummarize.summ.summarize(smp_msgs, range_spec=asd)
        logger.debug("Summary is %s", sumry)
        # Length of summary is at least 1 and no greater than 3
        self.assertTrue(len(sumry) >= 1)
        #self.assertTrue(len(sumry) <= 3)
        # Length of summary is less than or equal to the original length
        #self.assertTrue(len(sumry) <= len(smp_msgs))
        # Each message in the summary must correspond to a message

    @given(lists(elements=sampled_from(test_json_msgs_c2), min_size=1),
           integers(min_value=1, max_value=24),
           settings=hs.Settings(timeout=1000))
    def test_text_rank_summarization_ds2_hours(self, smp_msgs, hours):
        """Generate something for N hour intervals"""
        logger.info("Input is %s", smp_msgs)
        asd = {
            'hours': hours,
            'size': 3,
            'txt': u'Summary for first {} hours:\n'.format(hours)
        }
        #TestSummarize.summ.set_interval()
        TestSummarize.summ.set_channel('elasticsearch')
        sumry = TestSummarize.summ.summarize(smp_msgs, range_spec=asd)
        logger.debug("Summary is %s", sumry)
        # Length of summary is at least 1 and no greater than 3
        self.assertTrue(len(sumry) >= 1)
        #self.assertTrue(len(sumry) <= 3)
        # Length of summary is less than or equal to the original length
        #self.assertTrue(len(sumry) <= len(smp_msgs))
        # Each message in the summary must correspond to a message

    @given(integers(min_value=2, max_value=1000),
           integers(min_value=1, max_value=24),
           settings=hs.Settings(timeout=1000))
    def test_text_rank_summarization_ds3_hours(self, sampsize, hours):
        """Generate something for N hour intervals"""
        channel, ssamp = random.choice(test_json_msgs_c3)
        samp = ssamp[random.randint(1, len(ssamp) - 2):]
        TestSummarize.summ.set_channel(channel)
        logger.info("Input is segment is %s", samp)
        asd = {
            'hours': hours,
            'size': 3,
            'txt': u'Summary for first {} hours:\n'.format(hours)
        }
        sumry = TestSummarize.summ.summarize(samp, range_spec=asd)
        logger.debug("Summary is %s", sumry)
        # Length of summary is at least 1 and no greater than 3
        self.assertTrue(len(sumry) >= 1)