Python TeproAlgo.getNumeralRewritingOperName примеры использования

Язык программирования: Python

Пространство имен/Пакет: TeproAlgo

Класс/Тип: TeproAlgo

Метод/Функция: getNumeralRewritingOperName

Примеров на hotexamples.com: 4

Python TeproAlgo.getNumeralRewritingOperName - 4 примера найдено. Это лучшие примеры Python кода для TeproAlgo.TeproAlgo.getNumeralRewritingOperName, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getSentenceSplittingOperName(10)

getAvailableOperations(9)

getLemmatizationOperName(8)

getPOSTaggingOperName(8)

getTokenizationOperName(8)

getDependencyParsingOperName(5)

getBiomedicalNamedEntityRecognitionOperName(4)

getHyphenationOperName(4)

getNamedEntityRecognitionOperName(4)

getNumeralRewritingOperName(4)

getPhoneticTranscriptionOperName(4)

getChunkingOperName(3)

getAbbreviationRewritingOperName(3)

getStressIdentificationOperName(3)

getTextNormOperName(2)

getDiacRestorationOperName(2)

reconfigureWithStrictRequirements(1)

canPerform(1)

getOperationsForAlgo(1)

getDefaultAlgoForOper(1)

getAvailableAlgorithms(1)

getAlgorithmsForOper(1)

resolveDependencies(1)

Пример #1

Показать файл

Файл: test_platform.py Проект: Nenma/teprolin-preprocessing

def test_TTS():
    tepro.configure(TeproAlgo.getSentenceSplittingOperName(),
                    TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoTTL)

    dto = tepro.pcExec(text5, [
        TeproAlgo.getHyphenationOperName(),
        TeproAlgo.getStressIdentificationOperName(),
        TeproAlgo.getPhoneticTranscriptionOperName(),
        TeproAlgo.getAbbreviationRewritingOperName(),
        TeproAlgo.getNumeralRewritingOperName(),
    ])

    # Processed two sentences...
    assert dto.getNumberOfSentences() == 1

    # For the first sentence:
    assert dto.getSentenceTokens(0)[3].getExpansion() == \
        'o sută douăzeci și trei'
    assert dto.getSentenceTokens(0)[0].getSyllables() == "a-'ceas-ta"
    assert dto.getSentenceTokens(0)[0].getPhonetical() == "a ch e@ a s t a"
    assert dto.getSentenceTokens(0)[11].getSyllables() == "'vir-gu-lă"
    assert dto.getSentenceTokens(0)[11].getPhonetical() == "v i r g u l @"
    assert dto.getSentenceTokens(0)[14].getExpansion() == \
        'patruzeci și cinci virgulă șase sute treizeci și unu'

Пример #2

Показать файл

    def copyFrom(self, fromTok, align: list, oper: str):
        """Copy the value corresponding to 'oper' into
        the right field.
        align is 0-based and represents the 2-place tuples
        of indexes, i from the source and j from the target."""

        if oper == TeproAlgo.getPOSTaggingOperName():
            self.setMSD(fromTok.getMSD())
            self.setCTAG(fromTok.getCTAG())
        elif oper == TeproAlgo.getLemmatizationOperName():
            self.setLemma(fromTok.getLemma())
        elif oper == TeproAlgo.getChunkingOperName():
            self.setChunk(fromTok.getChunk())
        elif oper == TeproAlgo.getDependencyParsingOperName():
            self.setDepRel(fromTok.getDepRel())
            fh = fromTok.getHead()

            if fh == 0:
                self.setHead(0)
            else:
                th = 0

                for (i, j) in align:
                    if i + 1 == fh:
                        th = j + 1
                        break

                if th != self._id:
                    # Do not introduce a cycle here.
                    self.setHead(th)
        elif oper == TeproAlgo.getAbbreviationRewritingOperName():
            self.setExpansion(fromTok.getExpansion())
        elif oper == TeproAlgo.getHyphenationOperName():
            self.setSyllables(fromTok.getSyllables())
        elif oper == TeproAlgo.getPhoneticTranscriptionOperName():
            self.setPhonetical(fromTok.getPhonetical())
        elif oper == TeproAlgo.getNumeralRewritingOperName():
            self.setExpansion(fromTok.getExpansion())
        elif oper == TeproAlgo.getStressIdentificationOperName():
            self.setSyllables(fromTok.getSyllables())
        elif oper == TeproAlgo.getNamedEntityRecognitionOperName():
            self.setNER(fromTok.getNER())
        elif oper == TeproAlgo.getBiomedicalNamedEntityRecognitionOperName():
            self.setBioNER(fromTok.getBioNER())

Пример #3

Показать файл

Файл: TTSOps.py Проект: Nenma/teprolin-preprocessing

    def _runApp(self, dto, opNotDone):
        if (TeproAlgo.getTokenizationOperName() in opNotDone):
            # Tokenization is required for MLPLAServer to work
            return dto

        for i in range(dto.getNumberOfSentences()):
            tsent = dto.getSentenceTokens(i)
            wforms = []

            for tok in tsent:
                wforms.append(tok.getWordForm())

            msent = self._getSentenceAnnotation(" ".join(wforms))

            if len(msent) == len(tsent):
                for j in range(len(msent)):
                    orig = tsent[j]
                    mtok = msent[j]

                    if orig.getWordForm() == mtok[0]:
                        if mtok[1] != '_' and \
                            (TeproAlgo.getHyphenationOperName() in opNotDone or \
                                TeproAlgo.getStressIdentificationOperName() in opNotDone):
                            orig.setSyllables(mtok[1])

                        if mtok[2] != '_' and TeproAlgo.getPhoneticTranscriptionOperName(
                        ) in opNotDone:
                            orig.setPhonetical(mtok[2])

                        if mtok[3] != '_' and \
                            (TeproAlgo.getNumeralRewritingOperName() in opNotDone or \
                                TeproAlgo.getAbbreviationRewritingOperName() in opNotDone):
                            orig.setExpansion(mtok[3])
                    # end if word forms match
                # end all tokens
            # end if sentence lengths match
        # end all found sentences

        return dto

Пример #4

Показать файл

Файл: PyTEPRO.py Проект: racai-ai/TEPROLIN

def main():
    # How to use the Teprolin Python 3 object:
    # 1. Create the object
    tepro = Teprolin()

    # 0.9 Test NER auto-configuration
    text = "Intel Celeron N4020"
    dto = tepro.pcFull(text)
    dto.dumpConllX()

    # 1.0 Check new TTSOps
    text = "Aceasta este propoziția 123 de test și nu-ți dă cu virgulă ca în 45.631."
    tepro.configure(TeproAlgo.getSentenceSplittingOperName(),
                    TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoTTL)
    dto = tepro.pcExec(
        text, [TeproAlgo.getHyphenationOperName(), TeproAlgo.getPhoneticTranscriptionOperName(),
        TeproAlgo.getNumeralRewritingOperName()])
    dto.dumpConllX()

    tepro.getStats(Teprolin.statsTokens, Teprolin.statsDay, 2)

    # 1.1 Test the UDPipe flow
    tepro.configure(
        TeproAlgo.getSentenceSplittingOperName(), TeproAlgo.algoUDPipe)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoUDPipe)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoUDPipe)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoUDPipe)

    text = "Diabetul zaharat este un sindrom caracterizat prin valori crescute ale concentrației glucozei \
        in sange (hiperglicemie) si dezechilibrarea metabolismului. \
        Daca l-ai luat, te-ai imbolnavit destul de grav."
    dto = tepro.pcExec(
        text, [TeproAlgo.getDependencyParsingOperName()])
    dto.dumpConllX()

    # 2. Optionally, configure the operation execution
    # Example configuration call
    tepro.configure(TeproAlgo.getSentenceSplittingOperName(),
                    TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoTTL)

    # 2.1 Test biomedical NER
    text = "Diabetul zaharat este un sindrom caracterizat prin valori crescute ale concentrației glucozei în sânge (hiperglicemie) și dezechilibrarea metabolismului."
    dto = tepro.pcExec(
        text, [TeproAlgo.getBiomedicalNamedEntityRecognitionOperName()])
    dto.dumpConllX()

    # 2.2 Test NER
    text = "Instanta suprema reia astazi judecarea. In dosar, se judeca Liviu Dragnea cu Ministerul Justitiei, condus de Tudorel Toader."
    dto = tepro.pcExec(text, [TeproAlgo.getNamedEntityRecognitionOperName()])
    dto.dumpConllX()

    # 2.3 Test for some bugs
    text = "Am aflat aprope ca euro si dolarul sunt cele mai bune."
    dto = tepro.pcFull(text)
    dto.dumpConllX()

    text = "Stia ca demonstratia o sa fie un succes."
    dto = tepro.pcFull(text)
    dto.dumpConllX()

    # 2.4 Test for a crash
    text = "Președintele Klaus Iohannis a anunțat că nu promulgă legea bugetului pe 2019 și sesizează Curtea Constituțională. " + \
        "„Este bugetul rușinii naționale”, a spus șeful statului care a acuzat PSD că e incapabil să guverneze pentru România, singura preocupare fiind Liviu Dragnea.\n\n" + \
        "„Un lucru este clar, Guvernarea PSD a eșuat. În spitale, probleme peste probleme Educația este subfinanțată. " + \
        "România este bulversată mai ales după OUG 114, dată în mare taină la finalul anului trecut. " + \
        "Despre justiție, întreaga guvernare pesedistă a fost un asalt asupra statului de drept din România. PSD e incapabil să conducă România. " + \
        "PSD nu guvernează pentru români, PSD guvernează pentru Dragnea”, a spus Iohannis.\n\n" + \
        "Referindu-se la bugetul pe 2019, șeful statului a spus că acesta este „nerealist și supraevaluat”, calificându-l drept unul al „rușinii naționale”.\n\n" + \
        "Președintele a acuzat PSD că nu are bani de investiții, dar are bani pentru partid. " + \
        "„150 de milioane va primi PSD din finanțarea partidelor, din 270 de milioane propuse pentru finanțarea partidelor. " + \
        "PSD și-a tras bani de 20 de ori mai mult decât anul trecut (președinția a precizat ulterior că această comparație a fost făcută cu 2016-n.r.). " + \
        "Pentru asta au bani”, a spus Iohannis.\n"
    dto = tepro.pcFull(text)
    dto.dumpConllX()

    text = "HotNews.ro transmite LIVETEXT cele mai importante declarații din cadrul audierilor\n\n" + \
        "Ora 17,00: Andres Ritter, candidatul Germaniei a vorbit despre necesitatea înființării Parchetului European în contextul fraudelor și corupției, " + \
        "care slăbesc credibilitatea UE în ochii contribuabililor. În opinia sa, abordarea la nivel național nu a fost suficientă, este necesară o " + \
        "abordare unitară la nivelul UE\n\n" + \
        "Ora 16:40 S-a stabilit ordinea audierilor, prin tragere la sorți: " + \
        "Primul va fi audiat candidatul Germaniei, Andrés Ritter (54 de ani), urmat de candidatul Franței, " + \
        "Jean-François Bohnert (58 de ani) și de Laura Codruța Kovesi.\n"
    dto = tepro.pcFull(text)
    dto.dumpConllX()

    text = "La 7      minute de centrul Brasovului,  imobilul\tpropus \
        spre vanzare se adreseaza\t\tcelor care isi doresc un spatiu \
        generos de locuit.\n\nAmplasarea constructiei  si\t\tgarajul reusesc sa exploateze \
        la maxim lotul de teren de 670 mp, ce are o deschidere de 15 ml.\n"

    # 3. Call one of the already created 'processing chains' ('pc' for short)
    # or call the generic pcExec method.
    # Example 1: using a canned processing chain ('pc'), e.g. diacritics insertion.
    dto = tepro.pcDiac(text)
    print(dto.getText())

    # Example 2: using another canned pc, e.g. lemmatization.
    dto = tepro.pcLemma(text)
    print(json.dumps(dto.jsonDict(), default=lambda x: x.__dict__))

    # Example 3: requesting specific operations, e.g. hyphenation and phonetic transcription.
    # TEPROLIN will figure out what else has to run such that these two operations are applied.
    dto = tepro.pcExec(text, [TeproAlgo.getHyphenationOperName(
    ), TeproAlgo.getPhoneticTranscriptionOperName()])
    dto.dumpConllX()

    tepro.getStats(Teprolin.statsTokens, Teprolin.statsMonth, 5)