Пример #1
0
    def test_overlap_diff_metadata(self):
        """
        Merge with overlap in utterance id and utterance has same data but diff metadata

        Second corpus utterance metadata should override if the keys are the same.
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", user=User(id="alice")),
            Utterance(id="1", text="my name is bob", user=User(id="bob")),
            Utterance(id="2",
                      text="this is a test",
                      user=User(id="charlie"),
                      meta={
                          'hey': 'jude',
                          'the': 'beatles'
                      }),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2",
                      text="this is a test",
                      user=User(id="charlie"),
                      meta={
                          'hey': 'jude',
                          'the': 'ringo',
                          'let it': 'be'
                      }),
            Utterance(id="4", text="this is a sentence", user=User(id="echo")),
            Utterance(id="5", text="goodbye", user=User(id="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_users())), 5)

        self.assertEqual(len(merged.get_utterance("2").meta), 3)
        self.assertEqual(merged.get_utterance("2").meta['the'], 'ringo')
Пример #2
0
    def test_no_overlap(self):
        """
        Basic merge: no overlap in utterance id
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="3", text="i like pie", speaker=Speaker(id="delta")),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 6)
        self.assertEqual(len(list(merged.iter_speakers())), 6)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)
Пример #3
0
    def test_with_overlap(self):
        """
        Basic merge: with overlap in utterance id (but utterance has same data & metadata)
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", user=User(name="alice")),
            Utterance(id="1", text="my name is bob", user=User(name="bob")),
            Utterance(id="2", text="this is a test", user=User(
                name="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2", text="this is a test", user=User(
                name="charlie")),
            Utterance(
                id="4", text="this is a sentence", user=User(name="echo")),
            Utterance(id="5", text="goodbye", user=User(name="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_users())), 5)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)
Пример #4
0
 def setUp(self) -> None:
     """
     Basic Conversation tree (left to right within subtree => earliest to latest)
                0
         1      2      3
       4 5 6   7 8     9
     10                11
     """
     self.corpus = Corpus(utterances=[
         Utterance(id="0",
                   reply_to=None,
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=0),
         Utterance(id="2",
                   reply_to="0",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=2),
         Utterance(id="1",
                   reply_to="0",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=1),
         Utterance(id="3",
                   reply_to="0",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=3),
         Utterance(id="4",
                   reply_to="1",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=4),
         Utterance(id="5",
                   reply_to="1",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=5),
         Utterance(id="6",
                   reply_to="1",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=6),
         Utterance(id="7",
                   reply_to="2",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=4),
         Utterance(id="8",
                   reply_to="2",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=5),
         Utterance(id="9",
                   reply_to="3",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=4),
         Utterance(id="10",
                   reply_to="4",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=5),
         Utterance(id="11",
                   reply_to="9",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=10),
         Utterance(id="other",
                   reply_to=None,
                   root="other",
                   speaker=Speaker(id="alice"),
                   timestamp=99)
     ])
     self.corpus.get_conversation("0").meta['hey'] = 'jude'
     self.corpus.meta['foo'] = 'bar'