Пример #1
0
def process_paragraph(sent, without_output = False):
  doc_raw = nlp(sent)
  doc_clean = nlp(sent.replace('\n', ' '))

  annotated = doc_to_format(doc_raw)
  joined_sents = join_clean_raw(doc_clean, annotated)

  if without_output:
    joined_sents = [x[0:-1] for x in joined_sents]

  return joined_sents
Пример #2
0
def parse_sentence(sent):
    sent = sent.replace('\n', ' ')
    # print(sent)
    doc = nlp(sent)

    word_l = []
    for i, token in enumerate(doc):
        data = {
            'word': token.text,
            'id': i,
            'word_properties': {
                'lemma': token.lemma_,
            },
            # 'morpho': {
            #  'raw': token.tag_,
            #  **morpho_proc(token.tag_)
            # },
            'pos_tag': {
                'tag': token.pos_,
                **render_pos(token.pos_)
            },
            # 'dependency': {
            #    'value':  token.dep_
            # }
        }
        word_l.append(data)

    return word_l
Пример #3
0
def test_03():
    for sent in sents:
        spacy_sent = nlp(sent)
        tf = FuncaoTextualSentenceParser(sent, spacy_sent)
        print(tf)
        print()
    pass
Пример #4
0
    def annotate(self, pars):

      labels = []
      paragraphs = []
      for i_p, p in enumerate(pars):
        text = p
        text = text.replace('\n', ' ').strip()
        sentences = nlp(text).sents
        paragraph = []
        for i,s in enumerate(sentences):
          short_labels, ann = self.stj.annotate(s)
          labels += short_labels
          paragraph.append({
            'sent_id': i,
            'original': s.text,
            'annotation': ann,
          })
        paragraphs.append(paragraph)

        labels_transform = ClasificacaoTextual.seq_transform(labels)
        ct = ClasificacaoTextual.classify(
            labels_transform
        )

      return {
            'text_classification': ct,
            'paragraphs': paragraphs,
      }
Пример #5
0
def test_02():
    spacy_sents = [nlp(sent) for sent in sents]
    idx = 0
    # print(sents[idx],'\n', spacy_sents[idx])
    tf = FuncaoTextualSentenceParser(sents[idx], spacy_sents[idx])
    print(tf)
    pass
Пример #6
0
def test_03():
    ss = ['Entendeu-se por isso a culpa.']
    spacy_sent = nlp(ss[0])
    pc = ParseConnectors()
    # A. connectors
    result = pc.parse(spacy_sent)
    print('[connectors]\n', result)
Пример #7
0
def test_ssj():
    for s in sents[:4]:
        n = nlp(s)
        stj = SentenceToJson()
        sl, m = stj.annotate(n)
        print(sl)
        print(m)  ####
        print('======')
Пример #8
0
def parse_sentences_list(sents_l):
    sents = sents_l  #not a list
    sents = nlp(sents.replace('\n', ' '))
    sents = [x.text for x in list(sents.sents)]
    sents = [{
        'sent_id': i,
        'original': s,
        'parsed': parse_sentence(s)
    } for i, s in enumerate(sents)]
    return sents
Пример #9
0
def test_01():
    spacy_sents = [nlp(sent) for sent in sents]
    cp = CausalityParser()
    for ss in spacy_sents:
        ee = cp.parse_causality(ss)

        print('sent:', ss)
        for e_ in ee:
            print(e_)

        print()
    def __init__(self, paragraph):
        self.last_symbol_line = "!,.:;?)}]"
        self.first_symbol_line = "({["
        self.prep_ll = ['para', 'por', 'pelo', 'pela', 'pelo', 'pelas']
        self.consonants = 'bcdfghjklmnpqrstvwxyz'

        self.paragraph = paragraph.replace('\n', ' ')  # (a) and (b)
        # self.lines = [x.text for x in list(nlp(self.paragraph).sents)] # (c)
        self.lines = nlp(self.paragraph).sents

        self.tokenized_lines = []  # (d)
        for line in self.lines:
            self.tokenized_lines.append([(token.text, token.pos_)
                                         for token in line])
Пример #11
0
    def annotate(self, pars):

        paragraphs = []
        sentences_all = []
        for i_p, p in enumerate(pars):
            text_rec = p.replace('\n', ' ').strip()
            sentences = [
                nlp(sent.text.strip()) for sent in nlp(text_rec).sents
            ]
            sentences_all += sentences

            ft = FuncaoTextual(sentences)
            result = ft.parse()

            table = result['table']
            text_func = result['textual_function']

            paragraphs.append({'id_par': i_p, 'funcao_textual': text_func})

        ft = FuncaoTextual(sentences_all)
        result = ft.parse()
        table = result['table']

        return {'tabela': table, 'paragraphs': paragraphs}
Пример #12
0
def test_01():
    pc = ParseConnectors()
    pv = ParseVerbs()
    pd = ParseDenominacao()

    for sent in sents:
        spacy_sent = nlp(sent)

        # A. connectors
        print(pc.parse(spacy_sent))

        # B. Verbs
        result = pv.parse(spacy_sent)
        print(result)

        # C. Denominacao
        result = pd.parse(spacy_sent)
        print(result)
Пример #13
0
def vocabulary_of_paragraph(text, exclude_list=[]):
    doc = nlp(text)
    exclude_list = [x.lower() for x in exclude_list]

    # tokens = [token for token in doc]
    elements = [(token.orth_, token.pos_) for token in doc]
    vocabulary = [
        e[0].upper() for e in elements
        if e[1] in ['NOUN', 'VERB', 'ADJ', 'PROPN']
    ]
    vocabulary = [x for x in vocabulary if not (x.lower() in stopwords_pt)]
    vocabulary = [x for x in vocabulary if not (x.lower() in exclude_list)]

    final_vocabulary = []
    for i in range(len(vocabulary)):
        word = vocabulary[i]
        if not (word in vocabulary[:i]):
            final_vocabulary.append(word)
        pass

    return final_vocabulary
Пример #14
0
def test_02():
    ss = ['Por isso ele não foi à escola.', 'Entendeu-se por isso a culpa.']
    pc = ParseConnectors()
    pv = ParseVerbs()
    pd = ParseDenominacao()

    for sent in ss:
        spacy_sent = nlp(sent)
        print(spacy_sent)

        # A. connectors
        result = pc.parse(spacy_sent)
        print('[connectors]\n', result)

        # B. Verbs
        result = pv.parse(spacy_sent)
        print('[verbs]\n', result)

        # C. Denominacao
        result = pd.parse(spacy_sent)
        print('[den]\n', result)
Пример #15
0
def test_01():
    spacy_sent = [nlp(text)]
    ft = FuncaoTextual(spacy_sent)
    r = ft.parse()
    print(r)