Exemplo n.º 1
0
def load_config():
    """
    Carrega configuração
    :return:
    """
    #print(environment)

    config = ConfigParser.ConfigParser()
    here = os.path.abspath(os.path.dirname(__file__))
    config_file = os.path.join(here, '../' + environment + '.ini')
    config.read(config_file)

    # Parâmetros globais de configuração
    nltk.data.path.append(config.get('nltk', 'data_dir'))
    nlpnet.set_data_dir(config.get('nlpnet', 'data_dir'))

    # Logging
    logging.config.fileConfig(config_file)

    # Cache configurations
    cache_opts = {
        'cache.regions': config.get('lbsociam', 'cache.regions'),
        'cache.type': config.get('lbsociam', 'cache.type'),
        'cache.short_term.expire': config.get('lbsociam', 'cache.short_term.expire'),
        'cache.default_term.expire': config.get('lbsociam', 'cache.default_term.expire'),
        'cache.long_term.expire': config.get('lbsociam', 'cache.long_term.expire')
    }

    cache = CacheManager(**parse_cache_config_options(cache_opts))

    return config
Exemplo n.º 2
0
    def get_dependencies(self, dependency_string):
        """
        Returns dependency_string with sentence dependencies included.
        """

        nlpnet.set_data_dir(self.get_data_dir_path())
        dependency_parser = nlpnet.DependencyParser()

        return dependency_parser.parse(dependency_string)
Exemplo n.º 3
0
    def get_dependencies(self, dependency_string):
        """
        Returns dependency_string with sentence dependencies included.
        """

        nlpnet.set_data_dir(self.get_data_dir_path())
        dependency_parser = nlpnet.DependencyParser()

        return dependency_parser.parse(dependency_string)
Exemplo n.º 4
0
    def tokenize(self, tokenize_string):
        """
        Returns the tokenized version of tokenize_string, which is just
        a normal English sentence.
        """

        # Setting up the nlpnet parser
        nlpnet.set_data_dir(self.get_data_dir_path())
        pos_parser = nlpnet.POSTagger()

        return pos_parser.tag(tokenize_string)
Exemplo n.º 5
0
    def tokenize(self, tokenize_string):
        """
        Returns the tokenized version of tokenize_string, which is just
        a normal English sentence.
        """

        # Setting up the nlpnet parser
        nlpnet.set_data_dir(self.get_data_dir_path())
        pos_parser = nlpnet.POSTagger()

        return pos_parser.tag(tokenize_string)
Exemplo n.º 6
0
 def __init__(self, parent, *args, **kwargs):
     wx.Panel.__init__(self, parent=parent, *args, **kwargs)
     self._init_grid()
     
     self.Bind(wx.EVT_BUTTON, self.on_run, self.button_run)
     self.Bind(wx.EVT_BUTTON, self.on_load, self.button_load)
     self.Bind(wx.EVT_BUTTON, self.on_save, self.button_save)
     self.Bind(wx.EVT_SPINCTRL, self.on_change_font, self.font_spin)
     
     nlpnet.set_data_dir('data')
     self.pos_tagger = nlpnet.POSTagger(language='pt')
     self.srl_tagger = nlpnet.SRLTagger(language='pt')
Exemplo n.º 7
0
def load_config():
    """
    Carrega configuração
    :return:
    """
    #print(environment)

    config = ConfigParser.ConfigParser()
    here = os.path.abspath(os.path.dirname(__file__))
    config_file = os.path.join(here, '../' + environment + '.ini')
    config.read(config_file)

    # Parâmetros globais de configuração
    nltk.data.path.append(config.get('nltk', 'data_dir'))
    nlpnet.set_data_dir(config.get('nlpnet', 'data_dir'))

    # Logging
    logging.config.fileConfig(config_file)

    # Cache configurations
    cache_opts = {
        'cache.regions':
        config.get('lbsociam', 'cache.regions'),
        'cache.type':
        config.get('lbsociam', 'cache.type'),
        'cache.short_term.expire':
        config.get('lbsociam', 'cache.short_term.expire'),
        'cache.default_term.expire':
        config.get('lbsociam', 'cache.default_term.expire'),
        'cache.long_term.expire':
        config.get('lbsociam', 'cache.long_term.expire')
    }

    cache = CacheManager(**parse_cache_config_options(cache_opts))

    return config
Exemplo n.º 8
0
# -*- coding: utf-8; -*-
import nlpnet
import pandas as pd

nlpnet.set_data_dir('/Users/danielfalci/Downloads/srl-pt')
tagger = nlpnet.SRLTagger()


def getByPredicate(predicate, result):
    #quando nao acha nada
    if len(result.arg_structures) == 0:
        return {}, []
    for este in result.arg_structures:
        if este[0] == predicate:
            return este[1], result.tokens
    # quando nao acha o predicado
    return {}, []


def handleTag(tag):
    if tag.startswith('V:') or (tag.startswith('A') and ':' in tag):
        quantidade = int(tag[tag.find(':') + 1:])
        tagFinal = tag[:tag.find(':')]
        if quantidade == 1:
            return [u'(' + tagFinal + u'*' + tagFinal + u')']
        else:
            temp = []
            for i in xrange(0, quantidade):
                if i == 0:
                    temp.append(u'(' + tagFinal + u'*')
                elif i < quantidade - 1:
Exemplo n.º 9
0
import time
import os

# import nltk, sys
# from alpes_core.textProcess import stemming
# from nltk.corpus import floresta
# from nltk.probability import FreqDist
# from nltk import word_tokenize as wt

#############################################################################################################
### INDICA A BASE DE DADOS PARA FAZER A CLASSIFICACAO SINTATICA
### USO DO POS TAGGER DA NLPNET
### COPIAR PARA O SERVIDOR A PASTA NLPNET-DATA
### COLOCAR CAMINHO CORRETO DENTRO DO SERVIDOR!!!
#nlpnet.set_data_dir('/home/panceri/nlpnet-data/pos-pt')
nlpnet.set_data_dir(
    os.path.join(os.path.dirname(__file__), '../../../nlpnet-data'))

#############################################################################################################

#############################################################################################################

#############################################################################################################
## Desenvolvimento da lógica de execução do Núcleo de processamento do Alpes                                #
## Aplicação das técnicas de pré-processamento textual a fim de ajudar no processo de comparação e busca    #
## de textos similares                                                                                      #
## Técnicas desenvolvidas                                                                                   #
## 1 - Case folding                                                                                         #
## 2 - Troca de caracteres acentuados por caracteres não acentuados                                         #
## 3 - Remoção pontuações                                                                                   #
## 4 - Remoção de stopwords                                                                                 #
## 5 - Stemming                                                                                             #
Exemplo n.º 10
0
                                  formatter_class=argparse.RawDescriptionHelpFormatter)
 parser.add_argument('type', help='Format of the embeddings. See the description below.', 
                     choices=['plain', 'senna', 'gensim', 'word2embeddings'])
 parser.add_argument('embeddings', help='File containing the actual embeddings')
 parser.add_argument('-v', help='Vocabulary file, if applicable. '\
                     'In SENNA, it is hash/words.lst', dest='vocabulary')
 parser.add_argument('-o', help='Directory to save the output', default='.',
                     dest='output_dir')
 parser.add_argument('--task', help='Task for which the embeddings will be used. '\
                     'It determines the name of the embeddings file. If not given, '\
                     'it will be nlpnet-embeddings.npy.', dest='task', default=None, 
                     choices=['pos', 'srl', 'srl_boundary',
                              'srl_classify', 'srl_predicates'])
 args = parser.parse_args()
 
 nlpnet.set_data_dir(args.output_dir)
 output_vocabulary = nlpnet.config.FILES['vocabulary']
 if args.task is None:
     output_embeddings = os.path.join(args.output_dir, 'nlpnet-embeddings.npy')
 else:
     key = 'type_features_%s' % args.task
     output_embeddings = nlpnet.config.FILES[key]
 
 nlpnet.utils.set_logger(logging.INFO)
 logger = logging.getLogger('Logger')
 logger.info('Loading data...')
 if args.type == 'senna':
     words = read_senna_vocabulary(args.vocabulary)
     matrix = read_plain_embeddings(args.embeddings)
 elif args.type == 'plain':
     words = read_plain_vocabulary(args.vocabulary)
Exemplo n.º 11
0
                argument = ' '.join(arg_structure[label])
                line = '\t%s: %s' % (label, argument)
                print(line.encode('utf-8'))
        print()


if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()
    parser.add_argument('task', help='Task for which the network should be used.',
                        type=str, choices=['srl', 'pos', 'dependency'])
    parser.add_argument('--data', help='Directory containing trained models (default: current)', type=str,
                        default='.')
    parser.add_argument('-v', help='Verbose mode', action='store_true', dest='verbose')
    parser.add_argument('-t', action='store_true', dest='disable_tokenizer',
                        help='Disable built-in tokenizer. Tokens are assumed to be separated by whitespace.')
    parser.add_argument('--lang', dest='lang', default='en',
                        help='Language (used to determine which tokenizer to run. Ignored if -t is provided)', 
                        choices=['en', 'pt'])
    parser.add_argument('--no-repeat', dest='no_repeat', action='store_true',
                        help='Forces the classification step to avoid repeated argument labels (SRL only)')
    args = parser.parse_args()
    
    logging_level = logging.DEBUG if args.verbose else logging.WARNING
    utils.set_logger(logging_level)
    logger = logging.getLogger("Logger")
    nlpnet.set_data_dir(args.data)
    
    interactive_running(args)
    
Exemplo n.º 12
0
import nlpnet
nlpnet.set_data_dir(
    '/media/mateus/Data/Main/Projects/ufpb/fact-check/classificator/nlpnet_data/pos-pt'
)
tagger = nlpnet.POSTagger()

while True:
    text = input()
    print(tagger.tag(text))
Exemplo n.º 13
0
from numpy import array
from nltk.probability import FreqDist
#from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.cluster import KMeansClusterer, GAAClusterer, euclidean_distance
from cStringIO import StringIO
import sys
import nlpnet
import codecs
import os
import re
import math
import utils

nlpnet.set_data_dir(os.path.join(os.getcwd(), "nlpnet-data"))
stop_words = stopwords.words('portuguese')
tokenizer = RegexpTokenizer(r'\w+')


class Tadano_Summarizer(object):
    def __init__(self, name, opinions_path, aspect_manager):
        self.__name = name
        self.__aspect_manager = aspect_manager
        self.__sentence_list = {}
        self.__clusters = {}
        self.__aspect_list = {
            key: 0
            for key in aspect_manager.get_aspects_reviews(name)
        }
        self.__read_files(opinions_path)
Exemplo n.º 14
0
import os
import re
import nlpnet
import logging


pln_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
nlpnet.set_data_dir(os.path.join(pln_dir, u'pos-pt'))
tagger = nlpnet.POSTagger()


def tag_text(text):
    """Add tags to passed text.

    Args:
        text (str): Text to be tagged.

    Return:
        str: Tagged text.

    Example:
        >>> tag_text('Bom dia')
        'Bom/ADJ dia/N'
    """
    if text.replace(' ', ''):
        try:
            tags = tagger.tag(text)[0]
            tagged_text = ' '.join(['{}/{}'.format(x,y) for (x, y) in tags])
            return tagged_text
        except:
            logging.exception(u'Error tagging text: "%s"', text)
Exemplo n.º 15
0
import os

# import nltk, sys
# from alpes_core.textProcess import stemming
# from nltk.corpus import floresta
# from nltk.probability import FreqDist
# from nltk import word_tokenize as wt


#############################################################################################################
### INDICA A BASE DE DADOS PARA FAZER A CLASSIFICACAO SINTATICA
### USO DO POS TAGGER DA NLPNET 
### COPIAR PARA O SERVIDOR A PASTA NLPNET-DATA
### COLOCAR CAMINHO CORRETO DENTRO DO SERVIDOR!!!
#nlpnet.set_data_dir('/home/panceri/nlpnet-data/pos-pt')
nlpnet.set_data_dir(os.path.join(os.path.dirname(__file__),'../../../nlpnet-data'))

#############################################################################################################

#############################################################################################################

#############################################################################################################
## Desenvolvimento da lógica de execução do Núcleo de processamento do Alpes                                #
## Aplicação das técnicas de pré-processamento textual a fim de ajudar no processo de comparação e busca    #
## de textos similares                                                                                      #
## Técnicas desenvolvidas                                                                                   #
## 1 - Case folding                                                                                         #
## 2 - Troca de caracteres acentuados por caracteres não acentuados                                         #
## 3 - Remoção pontuações                                                                                   #
## 4 - Remoção de stopwords                                                                                 #
## 5 - Stemming                                                                                             #
Exemplo n.º 16
0
import re
import nlpnet

filenames = ['como', 'direta', 'existe', 'o_que', 'por_que', 'posso', 'qual']
nlpnet.set_data_dir('/home/jpegx100/develop/lpln/pos-pt')
tagger = nlpnet.POSTagger()

for filename in filenames:
    tagged_questions = list()

    with open('./no_stopwords/{}_no_st.txt'.format(filename), 'r') as arq:
        text = arq.read()
        questions = text.split('\n')
        for quest in questions:
            # type_id = re.search(r'_ (.*?) _', quest)
            # no_type_id = re.sub(r'_ .*? _', 'TYPE_ID', quest)
            no_type_id = quest

            tagged_question = list()
            words = no_type_id.split()

            while words:
                word = words.pop(0)
                if word == '_':
                    tagged_question.append('_')
                    continue
                if word.startswith('*'):
                    markeds = [word]
                    while words and not markeds[-1].endswith('*'):
                        markeds.append(words.pop(0))
Exemplo n.º 17
0
import re
import string
LANGUAGE = 'portuguese'

# used for: tagger for Portuguese
import nlpnet
# used for: tagger for English, tokenizer, stopwords lists, stemmer
import nltk
from nltk.stem import RSLPStemmer, PorterStemmer

# NLP variables
stemmer = dict(portuguese=RSLPStemmer(), english=PorterStemmer())
nlpnet.set_data_dir("language/portuguese")
nlpnet_POSTagger = nlpnet.POSTagger()

# lists of negation words
negation_words = {
    'portuguese': [
        "sem", "jamais", "nada", "nem", "nenhum", "ninguém", "nunca", "não",
        "tampouco", "nao", "ñ", "ninguem", "longe", "evitar", "impedir",
        "perder", "tirar"
    ],
    'english': [
        "never", "neither", "nobody", "no", "none", "nor", "nothing",
        "nowhere", "not", 'n\'t'
    ]
}

LANGUAGE_DIR = 'language'

stopwords = []
Exemplo n.º 18
0
 def make_pos(self, path='./data/tweentsentbr/resources/pos-pt'):
     nlpnet.set_data_dir(path)
     self.tagger = nlpnet.POSTagger()
Exemplo n.º 19
0
# Part-of-Speech-Tagging

# -*- coding: utf-8 -*-
"""
Created on Mon Dec 11 13:30:28 2017

@author: d7-02
"""

import nlpnet
nlpnet.set_data_dir('dependency')

#parser = nlpnet.DependencyParser('dependency', language='en')
#tagger = nlpnet.POSTagger('/path/to/pos-model/', language='pt')

tagger = nlpnet.POSTagger()
print tagger.tag(u"I want to book a flight from Delhi to Pune on Sunday")
#parsed_text = parser.parse(u'I want to book a flight from Delhi to Pune on Sunday')
#sent = parsed_text[0]
#print(sent.to_conll())
Exemplo n.º 20
0
# <-----> Auxiliary Modules <----->
# <------------------------------->

# clustering algorithm used in the implementation of RF method
from sklearn.cluster import AgglomerativeClustering

# used to obtain the best alignment in polynomial time (Hungarian method)
from scipy.optimize import linear_sum_assignment

# <----------------------->
# <-----> Variables <----->
# <----------------------->

# NLP variables
stemmer = dict(portuguese=RSLPStemmer(), english=PorterStemmer())
nlpnet.set_data_dir("nlpnet_data/")
nlpnet_POSTagger = nlpnet.POSTagger()

# lists of negation words
negation_words = {
    'portuguese':
    ["jamais", "nada", "nem", "nenhum", "ninguém", "nunca", "não", "tampouco"],
    'english': [
        "never", "neither", "nobody", "no", "none", "nor", "nothing",
        "nowhere", "not", 'n\'t'
    ]
}

# represents the summary
contrastive_pairs = [(0, 0), (1, 1)]
Exemplo n.º 21
0
    def use_nlpnet(self, base_string, test_string, pattern_arg):
        """
        Main interface method from the NLPNET class to the rest of
        the program.
        """

        # Setting up the nlpnet parser
        nlpnet.set_data_dir(self.get_data_dir_path())
        dependency_parser = nlpnet.DependencyParser()
        pos_parser = nlpnet.POSTagger()

        # Getting the passed patterns
        patterns = pattern_arg

        # Parsing the base_string
        base_parse = dependency_parser.parse(base_string)
        base_blob = TextBlob(base_string)
        base_sentences = base_blob.sentences
        base_sentence_info = []

        for index in range(0, len(base_parse)):
            # Grabbing sentence information
            raw_data = str(base_sentences[index])
            pos_sentence = pos_parser.tag(str(base_sentences[index]))
            subject, verb, object, prepositional_phrases = self.identify_sentence_parts_nlpnet(base_parse[index].tokens, base_parse[index].labels)

            """
            # Displaying information for debugging purposes
            #print "***BASE***"
            #print "Raw Sentence     : " + raw_data
            #print "POS Sentence    : " + str( pos_sentence )
            #print "[ Tokens ]       : " + str( base_parse[ index ].tokens )
            #print "[ Labels ]       : " + str( base_parse[ index ].labels )
            #print "[ Subject ]     : " + subject
            #print "[ Verb ]        : " + verb
            #print "[ Object ]      : " + object
            #print "[ Prep Phrases ] : " + str( prepositional_phrases )
            """

            # Deciding whether the sentence/pattern should be added
            add_sentence = True
            for sentence in base_sentence_info:
                if sentence != []:
                    if sentence[len(sentence) - 1] == raw_data:
                        add_sentence = False

                        break

            # If the sentence should be added to the possible patterns, add it
            if add_sentence:
                base_sentence_info.append([subject, verb, object, [], raw_data])

        # Parsing the test_string
        test_parse = dependency_parser.parse(test_string)
        test_blob = TextBlob(test_string)
        test_sentences = test_blob.sentences
        test_sentence_info = []

        for index in range(0, len(test_parse)):
            # Grabbing sentence information
            raw_data = str(test_sentences[index])
            pos_sentence = pos_parser.tag(str(test_sentences[index]))
            subject, verb, object, prepositional_phrases = self.identify_sentence_parts_nlpnet(test_parse[index].tokens, test_parse[index].labels)

            """
            #print "***TEST***"
            #print "Raw Sentence     : " + raw_data
            #print "POS Sentence    : " + str( pos_sentence )
            #print "[ Tokens ]       : " + str( test_parse[ index ].tokens )
            #print "[ Labels ]       : " + str( test_parse[ index ].labels )
            #print "[ Subject ]     : " + subject
            #print "[ Verb ]        : " + verb
            #print "[ Object ]      : " + object
            #print "[ Prep Phrases ] : " + str( prepositional_phrases )
            """

            # Deciding whether the sentence/pattern should be added
            add_sentence = True
            for sentence in test_sentence_info:
                if sentence != []:
                    if sentence[len(sentence) - 1] == raw_data:
                        add_sentence = False

                        break

            # If the sentence should be added to the possible patterns, add it
            if add_sentence:
                test_sentence_info.append([subject, verb, object, [], raw_data])

        # Returning the patterns found in the text
        return self.identify_common_patterns(base_sentence_info, test_sentence_info, patterns)
Exemplo n.º 22
0
# coding=utf-8

"""
Semantic module.
    It provides:
    - Synonyms
    - Antonyms
    - Semantic Role Labeling (SRL)
"""
import nltk
import nlpnet
import nlpnet.config


nlpnet.set_data_dir('nlpnet') # replace by data


__all__ = ['synsets', 'antonyms', 'srl']


def synsets(token):
    """
    get a set of words which are synonyms to the token
    :param token1: one token string
    :return: list
    """
    synset = []

    return synset

def antonyms(token):
Exemplo n.º 23
0
'''
Created on 17/12/2014

@author: Roque Lopez
'''
from __future__ import unicode_literals
from nltk.tag import brill
import unicodedata
import itertools
import nlpnet
import utils
import codecs
import os
import re

nlpnet.set_data_dir(str("../resource//nlpnet_data/"))


class Ganesan_Summarizer(object):
    '''
    Class that implements Ganesan method
    '''
    def __init__(self, name, opinions_path, aspect_manager):
        self.__name = name
        self.__aspect_manager = aspect_manager
        self.__data = {}
        self.__aspect_frequency = {}
        self.__tagger = nlpnet.POSTagger()
        self.__read_files(opinions_path)

    def __read_files(self, opinions_path):
Exemplo n.º 24
0
    def use_nlpnet(self, base_string, test_string, pattern_arg):
        """
        Main interface method from the NLPNET class to the rest of
        the program.
        """

        # Setting up the nlpnet parser
        nlpnet.set_data_dir(self.get_data_dir_path())
        dependency_parser = nlpnet.DependencyParser()
        pos_parser = nlpnet.POSTagger()

        # Getting the passed patterns
        patterns = pattern_arg

        # Parsing the base_string
        base_parse = dependency_parser.parse(base_string)
        base_blob = TextBlob(base_string)
        base_sentences = base_blob.sentences
        base_sentence_info = []

        for index in range(0, len(base_parse)):
            # Grabbing sentence information
            raw_data = str(base_sentences[index])
            pos_sentence = pos_parser.tag(str(base_sentences[index]))
            subject, verb, object, prepositional_phrases = self.identify_sentence_parts_nlpnet(
                base_parse[index].tokens, base_parse[index].labels)
            """
            # Displaying information for debugging purposes
            #print "***BASE***"
            #print "Raw Sentence     : " + raw_data
            #print "POS Sentence    : " + str( pos_sentence )
            #print "[ Tokens ]       : " + str( base_parse[ index ].tokens )
            #print "[ Labels ]       : " + str( base_parse[ index ].labels )
            #print "[ Subject ]     : " + subject
            #print "[ Verb ]        : " + verb
            #print "[ Object ]      : " + object
            #print "[ Prep Phrases ] : " + str( prepositional_phrases )
            """

            # Deciding whether the sentence/pattern should be added
            add_sentence = True
            for sentence in base_sentence_info:
                if sentence != []:
                    if sentence[len(sentence) - 1] == raw_data:
                        add_sentence = False

                        break

            # If the sentence should be added to the possible patterns, add it
            if add_sentence:
                base_sentence_info.append(
                    [subject, verb, object, [], raw_data])

        # Parsing the test_string
        test_parse = dependency_parser.parse(test_string)
        test_blob = TextBlob(test_string)
        test_sentences = test_blob.sentences
        test_sentence_info = []

        for index in range(0, len(test_parse)):
            # Grabbing sentence information
            raw_data = str(test_sentences[index])
            pos_sentence = pos_parser.tag(str(test_sentences[index]))
            subject, verb, object, prepositional_phrases = self.identify_sentence_parts_nlpnet(
                test_parse[index].tokens, test_parse[index].labels)
            """
            #print "***TEST***"
            #print "Raw Sentence     : " + raw_data
            #print "POS Sentence    : " + str( pos_sentence )
            #print "[ Tokens ]       : " + str( test_parse[ index ].tokens )
            #print "[ Labels ]       : " + str( test_parse[ index ].labels )
            #print "[ Subject ]     : " + subject
            #print "[ Verb ]        : " + verb
            #print "[ Object ]      : " + object
            #print "[ Prep Phrases ] : " + str( prepositional_phrases )
            """

            # Deciding whether the sentence/pattern should be added
            add_sentence = True
            for sentence in test_sentence_info:
                if sentence != []:
                    if sentence[len(sentence) - 1] == raw_data:
                        add_sentence = False

                        break

            # If the sentence should be added to the possible patterns, add it
            if add_sentence:
                test_sentence_info.append(
                    [subject, verb, object, [], raw_data])

        # Returning the patterns found in the text
        return self.identify_common_patterns(base_sentence_info,
                                             test_sentence_info, patterns)
Created on 17/12/2014

@author: Roque Lopez
'''
from __future__ import unicode_literals
from nltk.tag import brill
import unicodedata
import itertools
import nlpnet
from uteis import utils_opizer
import codecs
import os
import re

file_path = __file__[:-(len(__name__ + ".py"))]
nlpnet.set_data_dir(file_path + "resource/nlpnet_data/")


class Ganesan_Summarizer(object):
    '''
    Class that implements Ganesan method
    '''
    def __init__(self, review_data, review_key, id_key, aspect_manager):
        self.__name = 'produto'
        self.__aspect_manager = aspect_manager
        self.__data = {}
        self.__aspect_frequency = {}
        self.__tagger = nlpnet.POSTagger()
        self.__read_files(review_data, review_key, id_key)

    def __read_files(self, review_data, review_key, id_key):
Exemplo n.º 26
0
# PCP,

import matplotlib.pyplot as plt
import nlpnet
nlpnet.set_data_dir('pos-pt/')
nlpnet_POSTagger = nlpnet.POSTagger()

FUND, MEDIO = 0, 1
filenames = list()
filenames.append([('ENSINO_FUNDAMENTAL_amostras_corpus/part' + str(i) +
                   '_ENSINO_FUNDAMENTAL_historia_e_geografia.txt')
                  for i in range(171)])
filenames.append(list())
for i in range(70):
    filenames[MEDIO].append('ENSINO_MEDIO_amostras_corpus/part' + str(i) +
                            '_ENSINO_MEDIO_ciencias_humanas.txt')
for i in range(127):
    filenames[MEDIO].append('ENSINO_MEDIO_amostras_corpus/part' + str(i) +
                            '_ENSINO_MEDIO_ciencias_humanas_II.txt')

disconsidered_text_tags = [
    '<title>', '</title>', '<subtitle>', '</subtitle>', '<imagem>', '<figura>',
    '<tabela>', '<gráfico>', '[Figura]'
]

tag_list = [
    'ADJ', 'ADV', 'ADV-KS', 'ADV-KS-REL', 'ART', 'CUR', 'IN', 'KC', 'KS', 'N',
    'NPROP', 'NUM', 'PCP', 'PDEN', 'PREP', 'PREP+ADV', 'PREP+ART',
    'PREP+PROADJ', 'PREP+PRO-KS', 'PREP+PRO-KS-REL', 'PREP+PROPESS',
    'PREP+PROSUB', 'PROADJ', 'PRO-KS', 'PRO-KS-REL', 'PROPESS', 'PROSUB', 'PU',
    'V', 'VAUX'
Exemplo n.º 27
0
    def load_tagger(self):
        if not self._data_dir:
            self._data_dir = config['NLPNET_DATA_DIR']

        nlpnet.set_data_dir(self._data_dir)
        self._tagger = nlpnet.POSTagger()
Exemplo n.º 28
0
                argument = ' '.join(arg_structure[label])
                line = '\t%s: %s' % (label, argument)
                print(line.encode('utf-8'))
        print


if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()
    parser.add_argument('task', help='Task for which the network should be used.',
                        type=str, choices=['srl', 'pos', 'dependency'])
    parser.add_argument('--data', help='Directory containing trained models (default: current)', type=str,
                        default='.')
    parser.add_argument('-v', help='Verbose mode', action='store_true', dest='verbose')
    parser.add_argument('-t', action='store_true', dest='disable_tokenizer',
                        help='Disable built-in tokenizer. Tokens are assumed to be separated by whitespace.')
    parser.add_argument('--lang', dest='lang', default='en',
                        help='Language (used to determine which tokenizer to run. Ignored if -t is provided)', 
                        choices=['en', 'pt'])
    parser.add_argument('--no-repeat', dest='no_repeat', action='store_true',
                        help='Forces the classification step to avoid repeated argument labels (SRL only)')
    args = parser.parse_args()
    
    logging_level = logging.DEBUG if args.verbose else logging.WARNING
    utils.set_logger(logging_level)
    logger = logging.getLogger("Logger")
    nlpnet.set_data_dir(args.data)
    
    interactive_running(args)
    
Exemplo n.º 29
0
""" Este script eh um script chamador da biblioteca de NLP.
https://github.com/erickrf/nlpnet."""
import sys
import getopt
try:
    from configparser import ConfigParser
except ImportError:
    from ConfigParser import ConfigParser  # ver. < 3.0
import nlpnet


CONFIG = ConfigParser()
CONFIG.read('setup.ini')
nlpnet.set_data_dir(CONFIG.get('attributes', 'setdatadir'))

TEXT = ''
METHOD = ''

try:
    OPTS, ARGS = getopt.getopt(sys.argv[1:], "ht:m:", ["text=", "method="])
except getopt.GetoptError:
    sys.exit(1)
for opt, arg in OPTS:
    if opt == '-h':
        print 'nlpnet2go.py -t <"text to be analyzed"> -m <method [''pos''] OR [''srl'']>'
        print 'Eg.: python nlpnet2go.py -t "teste do edward" -m pos'
        sys.exit()
    elif opt in ("-t", "--text"):
        TEXT = arg
    elif opt in ("-m", "--method"):
        METHOD = arg
Exemplo n.º 30
0
        'type',
        help='Format of the embeddings. See the description below.',
        choices=[
            'plain', 'senna', 'gensim', 'word2embeddings', 'single', 'polyglot'
        ])
    parser.add_argument('embeddings',
                        help='File containing the actual embeddings')
    parser.add_argument('-v', help='Vocabulary file, if applicable. '\
                        'In SENNA, it is hash/words.lst', dest='vocabulary')
    parser.add_argument('-o',
                        help='Directory to save the output',
                        default='.',
                        dest='output_dir')
    args = parser.parse_args()

    nlpnet.set_data_dir(args.output_dir)
    output_vocabulary = nlpnet.config.FILES['vocabulary']
    output_embeddings = nlpnet.config.FILES['type_features']

    nlpnet.utils.set_logger(logging.INFO)
    logger = logging.getLogger('Logger')
    logger.info('Loading data...')
    if args.type == 'senna':
        words = read_senna_vocabulary(args.vocabulary)
        matrix = read_plain_embeddings(args.embeddings)
    elif args.type == 'plain':
        words = read_plain_vocabulary(args.vocabulary)
        matrix = read_plain_embeddings(args.embeddings)
    elif args.type == 'gensim':
        matrix, words = read_gensim_embeddings(args.embeddings)
    elif args.type == 'word2embeddings':
Exemplo n.º 31
0
    def __init__(self, nlpnet_model_dir=''):

        if nlpnet_model_dir != '':
            nlpnet.set_data_dir(nlpnet_model_dir)
            self.tagger = nlpnet.POSTagger()
Exemplo n.º 32
0
 def __init__(self):
     self.cmudict = cmudict.dict()
     nlpnet.set_data_dir("dependency")
     self.tagger = nlpnet.taggers.DependencyParser(language="en")
     pass
Exemplo n.º 33
0
    def load_tagger(self):
        if not self._data_dir:
            self._data_dir = config['NLPNET_DATA_DIR']

        nlpnet.set_data_dir(self._data_dir)
        self._tagger = nlpnet.POSTagger()