Пример #1
0
    def form_valid(self, form, **kwargs):
        context = {}

        # This method is called when valid form data has been POSTed.
        # It should return an HttpResponse.
        etiquetador = settings.ETIQUETADOR
        frases = form.cleaned_data['text']

        # segmenta texto em sentenças
        sentences_tokenizer = nltk.data.load('tokenizers/punkt/portuguese.pickle')
        sentences = sentences_tokenizer.tokenize(frases)

        # armazena sentenças taggeadas
        context['text'] = frases
        context['sentences'] = sentences
        context['tagged_sentences'] = []
        context['tagged_sentences_visl'] = []

        for sentence in sentences:
            context['tagged_sentences'].append(etiquetador.tag(nltk.word_tokenize(sentence)))
            visl = Visl()
            context['tagged_sentences_visl'].append(visl.tag(sentence))

        context['show_sentences'] = True

        self.request.session['context'] = context
        
        return super(NlpView, self).form_valid(form)
Пример #2
0
def collect_email_data(sender, **kwargs):
   
    email = kwargs["instance"]
    
    if not email.parent:
        visl = Visl()
        tagged_sentence = visl.tag(email.raw_message)
        
        xml = tagged_sentence_to_xml(tagged_sentence)
        data = EmailData.objects.create(email=email, data_type=EMAIL_DATA.TAGS, data=xml)
    else:
        filtered_message = email.parent.raw_message.split(" ") #make a copy of the word_list
        for key, word in enumerate(filtered_message): # iterate over word_list
            if word in stopwords.words('portuguese'): 
                filtered_message[key] = "*"
                
        aiml = questions_to_aiml([(" ".join(filtered_message), email.raw_message)])
        data = EmailData.objects.create(email=email.parent, data_type=EMAIL_DATA.AIML, data=aiml)