def test_generate_summary(self): """Various test cases for summarizer.generate_summary.""" summarizer_simple = Summarizer(self.simple_dataframe, self.config) output_df_simple = summarizer_simple.generate_summary() self.assertEqual(len(output_df_simple), 3) self.assertIn(1, output_df_simple['Size'].values) self.assertIn('test', output_df_simple['Text'].values) self.assertIn('test2', output_df_simple['Text'].values) self.assertIn('test3', output_df_simple['Text'].values) self.assertIn('', output_df_simple['ClassLines'].values) summarizer_stack_lines = Summarizer(self.stack_lines_dataframe, self.config) output_df_stack_lines = summarizer_stack_lines.generate_summary() self.assertIn('some.class.java', output_df_stack_lines['ClassLines'].values) self.assertIn('some.class2.java', output_df_stack_lines['ClassLines'].values) self.assertIn('some.class3.java', output_df_stack_lines['ClassLines'].values) self.assertIn('', output_df_stack_lines['Text'].values) summarizer_multi_cluster = Summarizer(self.multi_cluster_dataframe, self.config) output_df_multi_cluster = summarizer_multi_cluster.generate_summary() self.assertEqual(len(output_df_multi_cluster), 2) self.assertIn(2, output_df_multi_cluster['Size'].values) self.assertIn(1, output_df_multi_cluster['Size'].values)
def run_classification_summary(df, classifier_config): """Runs the various classification algorithms outputting a summary dataframe. Args: df: pandas dataframe containing the error information we wish to classify and summarize classifier_config: config_pb2 proto specified by the configuration file Returns: pandas dataframe that summarizes the information obtained from the classification algorithms run on the input dataframe """ # Running our classifiers error_code_matcher = ErrorCodeMatcher(df, classifier_config) error_code_matcher.match_informative_errors() k_means_classifier = KMeansClusterer(df, classifier_config) k_means_classifier.cluster_errors() # Running the summarizer summarizer = Summarizer(df, classifier_config) return summarizer.generate_summary()