Exemplo n.º 1
0
def loadMetrics(filenames):

	data = []

	#loading nlpnet	
	tagger = POSTagger(r'var/nlpnet', language='pt')

	labels = ['Pausality', 'Emotivity']

	#loading files
	for filename in filenames:
		with open(filename, encoding='utf8') as f:

			# calculates all frequencies
			freqs = countTags(f.read(),tagger)
			# inserts them into data matrix
			data.append(freqs)

	# turns data matrix into a dataframe
	df = pd.DataFrame(data,columns=labels)
	# loads features that i already have saved in .csv files
	df_extra_features = loadMetricsCSV('var/metrics_csv/')
	# concatenates the two dataframes: the one with features i've extracted, and the one with features i got from the .csv files
	df = pd.concat([df,df_extra_features],axis=1).drop('Id',axis=1)
	
	return df
Exemplo n.º 2
0
def vectorize(text, tagger=None):

    if tagger == None:
        tagger = POSTagger(r'var/nlpnet', language='pt')

    labels = list({
        'ADJ': 0,
        'ADV': 0,
        'ADV-KS': 0,
        'ART': 0,
        'CUR': 0,
        'IN': 0,
        'KC': 0,
        'KS': 0,
        'N': 0,
        'NPROP': 0,
        'NUM': 0,
        'PCP': 0,
        'PDEN': 0,
        'PREP': 0,
        'PROADJ': 0,
        'PRO-KS': 0,
        'PROPESS': 0,
        'PROSUB': 0,
        'V': 0,
        'PU': 0
    }.keys())
    freqs = countTags(text, tagger)

    return pd.DataFrame([freqs], columns=labels)
Exemplo n.º 3
0
def loadPos(filenames):

	data = []

	#loading nlpnet	
	tagger = POSTagger(r'var/nlpnet', language='pt')

	labels = list({'ADJ': 0, 'ADV': 0, 'ADV-KS': 0, 'ART': 0, 'CUR': 0, 'IN': 0, 'KC': 0, 'KS': 0, 'N': 0, 'NPROP': 0, 'NUM': 0, 'PCP': 0, 'PDEN': 0, 'PREP': 0, 'PROADJ': 0, 'PRO-KS': 0, 'PROPESS': 0, 'PROSUB': 0, 'V': 0, 'PU': 0}.keys())

	#loading files
	for filename in filenames:
		with open(filename, encoding='utf8') as f:
			#preprocesses the text read in f using prep()
			#then counts the frequencies using the tagger
			#returns a list with frequencies
			freqs = countTags(f.read(),tagger)
			#then appends this list into the data segment of the result dict
			data.append(freqs)

	return pd.DataFrame(data,columns=labels)
Exemplo n.º 4
0
from django.http import JsonResponse
from django.shortcuts import render
from django.views.decorators.http import require_http_methods
from django.views.decorators.csrf import csrf_exempt

from preprocess import bow, pos
from sklearn.svm import LinearSVC
from sklearn.externals import joblib

#supress some warnings about type conversion
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=FutureWarning)
    from nlpnet import POSTagger
tagger = POSTagger(r'var/nlpnet', language='pt')

vocabulary = joblib.load('var/vocabulary.pkl')
print(len(vocabulary))
uni_clf = joblib.load('var/linearsvc_unigram-binary.pkl')
pos_clf = joblib.load('var/linearsvc_pos.pkl')


@csrf_exempt
@require_http_methods(['POST'])
def check(request):
    text = request.POST['text']
    model = request.POST['model']
    data = {}

    # vectorizing received text
    if (model == 'pos'):