Python Sentence примеры использования

Язык программирования: Python

Пространство имен/Пакет: yalign.datatypes

Класс/Тип: Sentence

Примеров на hotexamples.com: 13

Python Sentence - 13 примеров найдено. Это лучшие примеры Python кода для yalign.datatypes.Sentence, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Sentence(12)

check_is_tokenized(1)

Основные методы

Sentence (12)

check_is_tokenized (1)

Пример #1

Показать файл

Файл: test_sentencepairscore.py Проект: wannaphong/yalign

 def test_score_order(self):
     a = Sentence(u"Call History .".split())
     b = Sentence(u"Historial de llamadas .".split())
     score1 = self.score(a, b)
     a = Sentence(u"Replace the cover .".split())
     b = Sentence(u"Vuelva a ingresar un nuevo código de bloqueo .".split())
     score2 = self.score(a, b)
     self.assertLess(score1, score2)

Пример #2

Показать файл

Файл: test_train_data_generation.py Проект: wannaphong/yalign

 def test_sample_values(self):
     A, B = sentences([u'A', u'B']), sentences([u'Y', u'Z'])
     samples = list(_misaligned_samples(A, B, [(0, 0), (1, 1)]))
     s0 = SentencePair(Sentence([u'A']), Sentence([u'Z']))
     s1 = SentencePair(Sentence([u'B']), Sentence([u'Y']))
     self.assertEquals(2, len(samples))
     for sample in samples:
         self.assertTrue(sample in [s0, s1])

Пример #3

Показать файл

Файл: test_train_data_generation.py Проект: wannaphong/yalign

 def test_sample_values(self):
     A, B = sentences([u'A', u'B']), sentences([u'Y', u'Z'])
     samples = list(_aligned_samples(A, B, [(0, 1), (1, 0)]))
     # Note alignments swapped so A -> Z and B-> Y
     s0 = SentencePair(Sentence([u'A']), Sentence([u'Z']), aligned=True)
     s1 = SentencePair(Sentence([u'B']), Sentence([u'Y']), aligned=True)
     self.assertEquals(2, len(samples))
     self.assertEquals([s0, s1], samples)

Пример #4

Показать файл

Файл: test_sentencepairscore.py Проект: wannaphong/yalign

 def test_generates_numbers(self):
     a = Sentence(u"house you".split())
     b = Sentence(u"casa usted".split())
     x = self.score(a, b)
     self.assertIsInstance(x, (int, float))
     a = Sentence(u"Valar Morghulis".split())
     b = Sentence(u"Dracarys".split())
     x = self.score(a, b)
     self.assertIsInstance(x, (int, float))

Пример #5

Показать файл

Файл: test_sentencepairscore.py Проект: wannaphong/yalign

    def test_number_of_word_pair_scores_better_than_all_mismatchs(self):
        a = Sentence(u"house µa µb µc µd".split())
        b = Sentence(u"casa  µ1 µ2 µ3 µ4".split())
        s1 = self.score.problem.number_of_word_pair_scores(SentencePair(a, b))

        c = Sentence(u"µx µa µb µc µd".split())
        d = Sentence(u"µ5 µ1 µ2 µ3 µ4".split())
        s2 = self.score.problem.number_of_word_pair_scores(SentencePair(c, d))

        self.assertGreater(s1, s2)

Пример #6

Показать файл

Файл: input_conversion.py Проект: wannaphong/yalign

def tokenize(text, language="en"):
    """
    Returns a Sentence with Words (ie, a list of unicode objects)
    """
    if not isinstance(text, unicode):
        raise ValueError("Can only tokenize unicode strings")
    return Sentence(_tokenizers[language].tokenize(text), text=text)

Пример #7

Показать файл

    def test_save_load_and_align(self):
        doc1 = [Sentence([u"House"]), Sentence([u"asoidfhuioasgh"])]
        doc2 = [Sentence([u"Casa"])]
        result_before_save = self.model.align(doc1, doc2)

        # Save
        tmp_folder = tempfile.mkdtemp()
        self.model.save(tmp_folder)

        # Load
        new_model = YalignModel.load(tmp_folder)
        result_after_load = new_model.align(doc1, doc2)
        self.assertEqual(result_before_save, result_after_load)
        self.assertEqual(self.model.threshold, new_model.threshold)
        self.assertEqual(self.model.document_pair_aligner.penalty,
                         new_model.document_pair_aligner.penalty)

Пример #8

Показать файл

Файл: input_conversion.py Проект: wannaphong/yalign

def srt_to_document(text, lang="en"):
    """ Convert a string of srt into a list of Sentences. """
    text = UnicodeDammit(text).markup
    d = []
    for m in SRT_REGEX.finditer(text):
        sent = m.group(1)
        sent = SRT_PRE_IGNORE.sub("", sent)
        sent = Sentence(x for x in tokenize(sent, lang)
                        if x not in SRT_POST_IGNORE)
        d.append(sent)
    return d

Пример #9

Показать файл

Файл: input_conversion.py Проект: wannaphong/yalign

def _sentence_from_csv_elem(elem, label, labels):
    words = elem[labels[label]].decode("utf-8").split()
    sentence = Sentence(words)
    sentence.check_is_tokenized()
    return sentence

Пример #10

Показать файл

Файл: input_conversion.py Проект: wannaphong/yalign

def _document(lines):
    doc = list([Sentence(line.split()) for line in lines])
    for sentence in doc:
        sentence.check_is_tokenized()
    return doc

Пример #11

Показать файл

Файл: input_conversion.py Проект: krzwolk/yalign

def _sentence_from_csv_elem(elem, label, labels):
    words = elem[labels[label]].decode("utf-8").split()
    sentence = Sentence(words)
    sentence.check_is_tokenized()
    return sentence

Пример #12

Показать файл

Файл: test_train_data_generation.py Проект: wannaphong/yalign

def sentences(xs):
    return [Sentence([unicode(x)]) for x in xs]

Пример #13

Показать файл

 def test_reasonable_alignment(self):
     doc1 = [Sentence([u"House"]), Sentence([u"asoidfhuioasgh"])]
     doc2 = [Sentence([u"Casa"])]
     result = self.model.align(doc1, doc2)
     result = [(list(x), list(y)) for x, y in result]
     self.assertIn((list(doc1[0]), list(doc2[0])), result)