def test_key_insertion_deletion(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", speaker=Speaker(id="alice")), Utterance(id="1", text="my name is bob", speaker=Speaker( id="bob")), Utterance( id="2", text="this is a test", speaker=Speaker(id="charlie")), ]) corpus1.get_utterance("0").meta['foo'] = 'bar' corpus1.get_utterance("1").meta['foo'] = 'bar2' corpus1.get_utterance("2").meta['hey'] = 'jude' corpus1.get_conversation(None).meta['convo_meta'] = 1 corpus1.get_speaker("alice").meta['surname'] = 1.0 self.assertEqual(corpus1.meta_index.utterances_index['foo'], [str(type('bar'))]) self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'], [str(type(1))]) self.assertEqual(corpus1.meta_index.speakers_index['surname'], [str(type(1.0))]) # test that deleting an attribute from an individual utterance fails to remove it del corpus1.get_utterance("2").meta['hey'] corpus1.get_utterance("2").meta['hey'] # test that delete_metadata works corpus1.delete_metadata('utterance', 'foo') self.assertRaises(KeyError, lambda: corpus1.meta_index.utterances_index['foo']) self.assertRaises(KeyError, lambda: corpus1.get_utterance("0").meta["foo"])
def transform(self, corpus: Corpus) -> Corpus: super().transform(corpus) if self.replace_text: selector = lambda utt_: self.input_filter(utt_, None) for utt in corpus.iter_utterances(selector): cleaned_text = utt.retrieve_meta(self.output_field) if self.save_original: utt.add_meta(self.output_field, utt.text) utt.text = cleaned_text if not self.save_original: corpus.delete_metadata('utterance', self.output_field) return corpus