def test_convert_economic_state_score_into_integer(self): eliminated = parser.eliminate_rows_with_na_in_economic_status( self.data, self.watcher_type.iloc_economic_status_score) converted = parser.convert_economic_state_score_into_integer( eliminated, self.watcher_type.iloc_economic_status_score, self.watcher_type.score_map) self.assertTrue(converted.score.dropna().dtype == np.float)
def __parse_data(data_to_parse: pd.DataFrame, watcher_type: WatcherType): data = parser.eliminate_rows_with_na_in_economic_status(data_to_parse, watcher_type.iloc_economic_status_score) data = parser.eliminate_newline_code(data) data = parser.build_is_tokyo_flag(data, watcher_type.iloc_is_tokyo_flag) data = parser.make_field_column(data, watcher_type.iloc_field) data = parser.make_region_column(data) data = parser.clean_field_column(data) data = parser.convert_economic_state_score_into_integer(data, watcher_type.iloc_economic_status_score, watcher_type.score_map) data = parser.eliminate_rows_without_sentence(data, watcher_type.iloc_reason_sentence) data = parser.clean_sentence_reason(data, watcher_type.iloc_reason_sentence) return data
def test_eliminate_rows_without_sentence(self): eliminated = parser.eliminate_rows_with_na_in_economic_status( self.data, self.watcher_type.iloc_economic_status_score) data_with_integer_score = parser.convert_economic_state_score_into_integer( eliminated, self.watcher_type.iloc_economic_status_score, self.watcher_type.score_map) eliminated = parser.eliminate_rows_without_sentence( data_with_integer_score, self.watcher_type.iloc_reason_sentence) self.assertTrue( (eliminated.iloc[:, self.watcher_type.iloc_reason_sentence].apply(len) > 1).all())
def test_clean_sentence_reason(self): eliminated = parser.eliminate_rows_with_na_in_economic_status( self.data, self.watcher_type.iloc_economic_status_score) data_with_integer_score = parser.convert_economic_state_score_into_integer( eliminated, self.watcher_type.iloc_economic_status_score, self.watcher_type.score_map) eliminated = parser.eliminate_rows_without_sentence( data_with_integer_score, self.watcher_type.iloc_reason_sentence) cleaned = parser.clean_sentence_reason( eliminated, self.watcher_type.iloc_reason_sentence) # not any values start with center dot `・` self.assertFalse((cleaned.reason_sentence.str.find('・') == 0).any())