def get_summary(self, **args): channel_id = args['channel_id'] if 'channel_id' in args else None channel_name = args['channel_name'] if 'channel_name' in args else None user_id = args['user_id'] if 'user_id' in args else None user_name = args['user_name'] if 'user_name' in args else None params = args['params'] if 'params' in args else None request_id = uuid.uuid1() response = None msgs = None if self.test: with io.open(TEST_JSON, encoding='utf-8') as iot: msgs = json.load(iot)[u'messages'] else: msgs = self.get_messages(channel_id, params) summ_object = args['summ'] summ_impl = None summary = u'' if summ_object and "spacy" in SUMMS: self.logger.info(u'Using spacy') summ_impl = SpacyTsSummarizer() summ_impl.set_summarizer(summ_object) elif "gensim" in SUMMS: self.logger.info(u'Using gensim') summ_impl = TextRankTsSummarizer() if summ_impl: summ_impl.set_channel(channel_name) summary = summ_impl.summarize(msgs) else: self.logger.warn(u'No summarizer was set!') self.logger.info(u'Summary request %s user_id: %s', request_id, user_id) self.logger.info(u'Summary request %s channel_name: %s', request_id, channel_name) self.logger.info(u'Summary request %s parameters: %s', request_id, params) self.logger.debug(u'Summary request %s messages: %s', request_id, msgs) self.logger.info(u'Summary request %s summary:\n %s', request_id, summary) res = u"*Chat Summary:* \n " + summary + "\n \n" return res
def test_spacy_summarization(self): """Pass the intervals to summarizer""" if "spacy" in SUMMS: asd = [ {"minutes": 60, "size": 2, "txt": u"Summary for first 60 minutes:\n"}, {"hours": 12, "size": 1, "txt": u"Summary for last 12 hours:\n"}, ] summ = None lsa_summ = lsa.LsaSummarizer() summ = SpacyTsSummarizer() for rs in asd: summ.set_summarizer(lsa_summ) summ.set_channel("elasticsearch") logger.debug("Testing spacy summarizer") sumry = summ.summarize(TestSummarize.test_msgs, range_spec=rs) logger.debug("Summary is %s, length %s", sumry, len(sumry)) self.assertTrue(len(sumry) > 1) else: pass
def test_spacy_summarization(self): """Pass the intervals to summarizer""" if "spacy" in SUMMS: asd = [{ 'minutes': 60, 'size': 2, 'txt': u'Summary for first 60 minutes:\n' }, { 'hours': 12, 'size': 1, 'txt': u'Summary for last 12 hours:\n' }] summ = None lsa_summ = lsa.LsaSummarizer() summ = SpacyTsSummarizer() for rs in asd: summ.set_summarizer(lsa_summ) summ.set_channel('elasticsearch') logger.debug("Testing spacy summarizer") sumry = summ.summarize(TestSummarize.test_msgs, range_spec=rs) logger.debug("Summary is %s, length %s", sumry, len(sumry)) self.assertTrue(len(sumry) > 1) else: pass
class TestSummarize(unittest.TestCase): test_msgs = test_json_msgs summ = SpacyTsSummarizer() summ.set_summarizer(lsa.LsaSummarizer()) @given(lists(elements=sampled_from(test_json_msgs), min_size=3), integers(min_value=1, max_value=20), settings=hs.Settings(timeout=1000)) def test_text_rank_summarization_ds1_days(self, smp_msgs, days): """Generate something for N day interval""" logger.info("Input is %s", smp_msgs) asd = { 'days': days, 'size': 3, 'txt': u'Summary for first {} days:\n'.format(days) } #TestSummarize.summ.set_interval() TestSummarize.summ.set_channel('elasticsearch') sumry = TestSummarize.summ.summarize(smp_msgs, range_spec=asd) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) #self.assertTrue(len(sumry) <= 3) # Length of summary is less than or equal to the original length #self.assertTrue(len(sumry) <= len(smp_msgs)) # Each message in the summary must correspond to a message @given(lists(elements=sampled_from(test_json_msgs_c2), min_size=12), integers(min_value=1, max_value=20), settings=hs.Settings(timeout=1000)) def test_text_rank_summarization_ds2_days(self, smp_msgs, days): """Generate something for N day interval""" logger.info("Input is %s", smp_msgs) asd = { 'days': days, 'size': 3, 'txt': u'Summary for first {} days:\n'.format(days) } #TestSummarize.summ.set_interval(asd) TestSummarize.summ.set_channel('elasticsearch') sumry = TestSummarize.summ.summarize(smp_msgs, range_spec=asd) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) #self.assertTrue(len(sumry) <= 3) # Length of summary is less than or equal to the original length #self.assertTrue(len(sumry) <= len(smp_msgs)) # Each message in the summary must correspond to a message @given(integers(min_value=1, max_value=1000), integers(min_value=1, max_value=20), settings=hs.Settings(timeout=1000)) def test_text_rank_summarization_ds3_days(self, sampsize, days): """Generate something for N day interval""" channel, ssamp = random.choice(test_json_msgs_c3) samp = ssamp[random.randint(1, len(ssamp) - 2):] logger.info("Input is segment is %s", samp) asd = { 'days': days, 'size': 3, 'txt': u'Summary for first {} days:\n'.format(days) } #TestSummarize.summ.set_interval() TestSummarize.summ.set_channel(channel) sumry = TestSummarize.summ.summarize(samp, range_spec=asd) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) #self.assertTrue(len(sumry) <= 3) # Length of summary is less than or equal to the original length #self.assertTrue(len(sumry) <= len(samp)) # Each message in the summary must correspond to a message @given(lists(elements=sampled_from(test_json_msgs), min_size=1), integers(min_value=1, max_value=24), settings=hs.Settings(timeout=1000)) def test_text_rank_summarization_ds1_hours(self, smp_msgs, hours): """Generate something for N hour intervals""" logger.info("Input is %s", smp_msgs) asd = { 'hours': hours, 'size': 3, 'txt': u'Summary for first {} hours:\n'.format(hours) } #TestSummarize.summ.set_interval() TestSummarize.summ.set_channel('elasticsearch') sumry = TestSummarize.summ.summarize(smp_msgs, range_spec=asd) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) #self.assertTrue(len(sumry) <= 3) # Length of summary is less than or equal to the original length #self.assertTrue(len(sumry) <= len(smp_msgs)) # Each message in the summary must correspond to a message @given(lists(elements=sampled_from(test_json_msgs_c2), min_size=1), integers(min_value=1, max_value=24), settings=hs.Settings(timeout=1000)) def test_text_rank_summarization_ds2_hours(self, smp_msgs, hours): """Generate something for N hour intervals""" logger.info("Input is %s", smp_msgs) asd = { 'hours': hours, 'size': 3, 'txt': u'Summary for first {} hours:\n'.format(hours) } #TestSummarize.summ.set_interval() TestSummarize.summ.set_channel('elasticsearch') sumry = TestSummarize.summ.summarize(smp_msgs, range_spec=asd) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) #self.assertTrue(len(sumry) <= 3) # Length of summary is less than or equal to the original length #self.assertTrue(len(sumry) <= len(smp_msgs)) # Each message in the summary must correspond to a message @given(integers(min_value=2, max_value=1000), integers(min_value=1, max_value=24), settings=hs.Settings(timeout=1000)) def test_text_rank_summarization_ds3_hours(self, sampsize, hours): """Generate something for N hour intervals""" channel, ssamp = random.choice(test_json_msgs_c3) samp = ssamp[random.randint(1, len(ssamp) - 2):] TestSummarize.summ.set_channel(channel) logger.info("Input is segment is %s", samp) asd = { 'hours': hours, 'size': 3, 'txt': u'Summary for first {} hours:\n'.format(hours) } sumry = TestSummarize.summ.summarize(samp, range_spec=asd) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1)