Python loadSlangs示例

编程语言: Python

命名空间/包名称: preprocessing

方法/功能: loadSlangs

hotexamples.com的示例: 3

Python loadSlangs - 已找到3个示例。这些是从开源项目中提取的最受好评的preprocessing.loadSlangs现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： svm.py 项目： ziany/uranus

import numpy as np
import nltk # for pos tags 

import features
import polarity
import ngramGenerator
import preprocessing


KERNEL_FUNCTION='linear'
C_PARAMETER=0.6

print "Initializing dictionnaries"
stopWords = preprocessing.getStopWordList('../resources/stopWords.txt')
slangs = preprocessing.loadSlangs('../resources/internetSlangs.txt')
afinn=polarity.loadAfinn('../resources/afinn.txt')
#sentiWordnet=polarity.loadSentiWordnet('../resources/sentiWordnetBig.csv')
emoticonDict=features.createEmoticonDictionary("../resources/emoticon.txt")

print "Bulding Bag of words ..."
positive=ngramGenerator.mostFreqList('../data/used/positive1.csv',3000)
negative=ngramGenerator.mostFreqList('../data/used/negative1.csv',3000)
neutral=ngramGenerator.mostFreqList('../data/used/neutral1.csv',3000)


for w in positive:
    if w in negative+neutral : 
        positive.remove(w)

for w in negative:

示例#2

显示文件

import numpy as np

import features
import polarity
import ngramGenerator
import preprocessing

# User input for model parameters
N_NEIGHBORS = 10  # number of neighbors for KNN
KERNEL_FUNCTION = 'linear'  # kernel function for SVM
C_PARAMETER = 0.2
UNIGRAM_SIZE = 3000

print "Initializing dictionnaries"
stopWords = preprocessing.getStopWordList('../resources/stopWords.txt')
slangs = preprocessing.loadSlangs('../resources/internetSlangs.txt')
afinn = polarity.loadAfinn('../resources/afinn.txt')
emoticonDict = features.createEmoticonDictionary("../resources/emoticon.txt")

print "Bulding unigram vector"
positive = ngramGenerator.mostFreqList('../data/used/positive1.csv',
                                       UNIGRAM_SIZE)  # add as needed
negative = ngramGenerator.mostFreqList('../data/used/negative1.csv',
                                       UNIGRAM_SIZE)
neutral = ngramGenerator.mostFreqList('../data/used/neutral1.csv',
                                      UNIGRAM_SIZE)

for w in positive:
    if w in negative + neutral:
        positive.remove(w)

示例#3

显示文件

文件： main.py 项目： gipsylobos/clasificacion-de-tweets


def predecir(tweet,
             model):  # prueba un tweet nuevo en base aun modelo ya creado
    z = mapTweet(tweet, afinn, emoticonDict, positive, negative, neutral,
                 slangs)
    z_scaled = scaler.transform([z])
    z = normalizer.transform(z_scaled)
    z = z[0].tolist()
    return model.predict([z]).tolist()


# Preprocesamiento de los archivos
stopWords = preprocessing.getStopWordList(
    abs_file_url('resources/stopWords.txt'))
slangs = preprocessing.loadSlangs(abs_file_url('resources/internetSlangs.txt'))
afinn = polarity.loadAfinn(abs_file_url('resources/afinn.txt'))
emoticonDict = features.createEmoticonDictionary(
    abs_file_url('resources/emoticon.txt'))

# Se construye el vector con las palabras más frecuentes presentes en tweets positivos, negativos, y neutrales
positive = ngramGenerator.mostFreqList(abs_file_url('data/used/positive1.csv'),
                                       3000)
negative = ngramGenerator.mostFreqList(abs_file_url('data/used/negative1.csv'),
                                       3000)
neutral = ngramGenerator.mostFreqList(abs_file_url('data/used/neutral1.csv'),
                                      3000)

# Normalizamos el tamaño de los unigramas, si es que son menores a 3000
min_len = min([len(positive), len(negative), len(neutral)])