def form_valid(self, form, **kwargs): context = {} # This method is called when valid form data has been POSTed. # It should return an HttpResponse. etiquetador = settings.ETIQUETADOR frases = form.cleaned_data['text'] # segmenta texto em sentenças sentences_tokenizer = nltk.data.load('tokenizers/punkt/portuguese.pickle') sentences = sentences_tokenizer.tokenize(frases) # armazena sentenças taggeadas context['text'] = frases context['sentences'] = sentences context['tagged_sentences'] = [] context['tagged_sentences_visl'] = [] for sentence in sentences: context['tagged_sentences'].append(etiquetador.tag(nltk.word_tokenize(sentence))) visl = Visl() context['tagged_sentences_visl'].append(visl.tag(sentence)) context['show_sentences'] = True self.request.session['context'] = context return super(NlpView, self).form_valid(form)
def collect_email_data(sender, **kwargs): email = kwargs["instance"] if not email.parent: visl = Visl() tagged_sentence = visl.tag(email.raw_message) xml = tagged_sentence_to_xml(tagged_sentence) data = EmailData.objects.create(email=email, data_type=EMAIL_DATA.TAGS, data=xml) else: filtered_message = email.parent.raw_message.split(" ") #make a copy of the word_list for key, word in enumerate(filtered_message): # iterate over word_list if word in stopwords.words('portuguese'): filtered_message[key] = "*" aiml = questions_to_aiml([(" ".join(filtered_message), email.raw_message)]) data = EmailData.objects.create(email=email.parent, data_type=EMAIL_DATA.AIML, data=aiml)