Python TextProcessing примеры использования

Язык программирования: Python

Пространство имен/Пакет: TextProcessing

Класс/Тип: TextProcessing

Примеров на hotexamples.com: 10

Python TextProcessing - 10 примеров найдено. Это лучшие примеры Python кода для TextProcessing.TextProcessing, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TextProcessing(8)

clean_text(1)

extractFeature(1)

get_text(1)

languageDetection(1)

process_comment(1)

read_data(1)

Пример #1

Показать файл

Файл: Extractor.py Проект: micjagga/scikic

    def getScore(self, text, typeList=['ope','con','ext','agr','neu']):
        """ get the Json String according to the predicted data (predict model is determined by the typeList)
        Args: 
            text: a string of text used to predict big5 trait   
            typeList: contains the big5 trait list which need to be returned  
        Returns:
            jsonStr: a Json String, e.g., {"ope": 4.2, "neu": 2.31, "con": 3.09, "ext": 3.69, "agr": 3.08}  
        """
        
        #hack to make this work from any directory.
        import sys
        sys.path.insert(1,os.path.dirname(__file__))
        model_path =  os.path.join(os.path.dirname(__file__), """model/""")
        
        data = OrderedDict()
        
        tp = TextProcessing()
        X = tp.extractFeature(text)
        X = np.array(X)
            
        for t in typeList:
            if t not in fullList:
                continue
            else:
                model_name = """Predictor_"""+t+""".pkl"""
                model = joblib.load(model_path+model_name)
                y_pred = model.test(X)  
                data[t] = y_pred[0]

        return data

Пример #2

Показать файл

Файл: Extractor.py Проект: scikic/scikic

    def getScore(self, text, typeList=['ope', 'con', 'ext', 'agr', 'neu']):
        """ get the Json String according to the predicted data (predict model is determined by the typeList)
        Args: 
            text: a string of text used to predict big5 trait   
            typeList: contains the big5 trait list which need to be returned  
        Returns:
            jsonStr: a Json String, e.g., {"ope": 4.2, "neu": 2.31, "con": 3.09, "ext": 3.69, "agr": 3.08}  
        """

        #hack to make this work from any directory.
        import sys
        sys.path.insert(1, os.path.dirname(__file__))
        model_path = os.path.join(os.path.dirname(__file__), """model/""")

        data = OrderedDict()

        tp = TextProcessing()
        X = tp.extractFeature(text)
        X = np.array(X)

        for t in typeList:
            if t not in fullList:
                continue
            else:
                model_name = """Predictor_""" + t + """.pkl"""
                model = joblib.load(model_path + model_name)
                y_pred = model.test(X)
                data[t] = y_pred[0]

        return data

Пример #3

Показать файл

    def evolve(self):
        my_text_object_global = TextProcessing(
            text="", text_file_address="Attachment/global_text.txt")
        # my_text_object_encoded = TextProcessing(text="", text_file_address="Attachment/encoded_text.txt")
        my_text_object_encoded = TextProcessing(text=self.encoded_text)
        text = my_text_object_encoded.text
        global_text = my_text_object_global.clean_text()
        encoded_text = my_text_object_encoded.clean_text()
        generation = Population(self.population_size, True)

        max_fitness = generation.find_max_fitness(encoded_text, global_text)
        for j in range(self.number_of_generations):
            if generation.get_individuals()[0].fitness != max_fitness:

                # print("Generation number: ", j)
                for chromosome in generation.get_individuals():
                    chromosome.calculate_fitness(global_text, encoded_text)

                generation.get_individuals().sort(key=lambda x: x.fitness,
                                                  reverse=True)

                new_generation = generation.get_individuals(
                )[:int(self.population_size * 0.1)]
                for i in range(int(self.population_size * 0.7)):
                    if random.uniform(0, 1) < self.cross_chance:
                        while True:
                            parent1, parent2 = random.choices(
                                new_generation[:int(self.population_size *
                                                    0.8)],
                                k=2)
                            if parent1 != parent2:
                                break
                        child1, child2 = self.__crossover(parent1, parent2)
                        child1 = self.mutation(child1)
                        child2 = self.mutation(child2)
                        new_generation.append(child1)
                        new_generation.append(child2)

                generation.save_individuals(new_generation)
                # print("Report Best Fitness: ", generation.get_individuals()[0].fitness)
                # print("Population number is:", len(generation.get_individuals()))
            else:
                break
        for chromosome in generation.get_individuals():
            chromosome.calculate_fitness(my_text_object_global.get_text(),
                                         my_text_object_encoded.get_text())
        generation.get_individuals().sort(key=lambda x: x.fitness,
                                          reverse=True)

        return generation.get_individuals()[0].decode_text(text)

Пример #4

Показать файл

def main():
    tp = TextProcessing()
    tp.read_data([
        "data/Youtube01-Psy.csv", "data/Youtube02-KatyPerry.csv",
        "data/Youtube03-LMFAO.csv", "data/Youtube04-Eminem.csv",
        "data/Youtube05-Shakira.csv"
    ])

    # tp.print_spam()
    print("Number of comments: %d" % len(tp.comments))
    training_data, test_data, training_labels, test_labels = train_test_split(
        tp.comments, tp.labels, train_size=0.7, random_state=7)
    fe = FeatureExtraction()
    fe.fit(training_data)

    clf_svm = SVMClassifier()
    clf_svm.train(fe.extract(training_data), training_labels)

    print("SVM results:")
    print(
        classification_report(test_labels,
                              clf_svm.predict(fe.extract(test_data))))
    print(confusion_matrix(test_labels,
                           clf_svm.predict(fe.extract(test_data))))
    comment = tp.process_comment(
        "Like comment and http://www.google.com subscribe")
    print("Test comment: %s" % comment)
    print("Predicted label: %s" % clf_svm.predict(fe.extract([comment])))

    clf_nn = NNClassifier()
    clf_nn.train(fe.extract(training_data), training_labels)
    print("\nNN results:")
    print(
        classification_report(test_labels,
                              clf_nn.predict(fe.extract(test_data))))
    print(confusion_matrix(test_labels, clf_nn.predict(fe.extract(test_data))))
    comment = tp.process_comment(
        "Like comment and http://www.google.com subscribe")
    print("Test comment: %s" % comment)
    print("Predicted label: %s" % clf_nn.predict(fe.extract([comment])))

    # print(fe.vectorizer.vocabulary_)
    print("Number of features: %d" % len(fe.vectorizer.vocabulary_))

Пример #5

Показать файл

Файл: text_processing.py Проект: karim199260/a3

from TextProcessing import TextProcessing

client = TextProcessing("xslgc26CVzxMaUUF1QhmQASVgjGdY7Tj")


def get_sentiment(text):
    text = text.encode('utf-8').strip()
    response = client.sentiment(text, language='english')
    return vars(response)['body']


def mean(l):
    return float(sum(l)) / len(l) if len(l) > 0 else float('nan')


def get_mean_sentiment(tweets):
    pos = []
    neg = []
    neutral = []
    for tweet in tweets:
        pos.append(float(tweet.pos))
        neg.append(float(tweet.neg))
        neutral.append(float(tweet.neutral))
    pos = mean(pos)
    neg = mean(neg)
    neutral = mean(neutral)

    if neutral > 0.5:
        label = 'neutral'
        prob = neutral
    else:

Пример #6

Показать файл

    '../data/ynet_cars_500_talkbacks.json',
    '../data/ynet_dating_500_talkbacks.json',
    '../data/ynet_digital_500_talkbacks.json',
    '../data/ynet_economy_500_talkbacks.json',
    '../data/ynet_education_500_talkbacks.json',
    '../data/ynet_health_500_talkbacks.json',
    '../data/ynet_national_500_talkbacks.json',
    '../data/ynet_parents_500_talkbacks.json',
    '../data/ynet_politics_500_talkbacks.json',
]

for file in files:
    print "file", file
    for line in open(file, 'rb'):
        line_dict = json.loads(line)
        processed_list = TextProcessing(
            line_dict['title_text']).cleanTextHebrew(exclude_punct=False)
        data.append(processed_list)

word2idf = json.loads(open(TFIDF_FILE, 'rb').read())
vocabulary = word2idf.keys()

file = open(OUTPUT_FILE, 'wb')
for d_list in data:
    for d in d_list:
        if d not in ILLEGAL_PUNCT:
            if d not in LEGAL_PUNCT and d not in vocabulary:
                file.write(UNKNOWN)
            else:
                file.write(d.encode('utf8'))
            file.write(' ')
    file.write('\n')

Пример #7

Показать файл

Файл: main.py Проект: soheil647/Artificial_Intelligent

from DataExtractor import DataExtractor
from TextProcessing import TextProcessing
from TextClassifier import TextClassifier
import pandas as pd

train, target = DataExtractor('data.csv').data_producer('train')
test = DataExtractor('test.csv').data_producer('test')

text_classifier = TextClassifier()

(x_train, y_train), (x_validation,
                     y_validation) = text_classifier.split_validation_data(
                         train, target)

cleaned_x_train = TextProcessing(x_train).clean_text()
cleaned_x_validation = TextProcessing(x_validation).clean_text()
cleaned_x_test = TextProcessing(test).clean_text()

text_classifier.fit(cleaned_x_train, y_train)

text_classifier.evaluate(cleaned_x_validation, y_validation)

text_classifier.confusion(y_validation, cleaned_x_validation)

result = text_classifier.predict(cleaned_x_test)
pd.DataFrame(result, columns=['category']).to_csv('output.csv',
                                                  index=True,
                                                  index_label='index')

Пример #8

Показать файл

 def getFreqDict(self, text):
     return TextProcessing(text).getFreqDict()

Пример #9

Показать файл

Файл: Extractor.py Проект: micjagga/scikic

 def isEnglish(self, s):
     '''check if the input text s is in English '''
     tp = TextProcessing()
     lan = tp.languageDetection(s)
     if lan == 'en': return True
     else: return False

Пример #10

Показать файл

Файл: Extractor.py Проект: scikic/scikic

 def isEnglish(self, s):
     '''check if the input text s is in English '''
     tp = TextProcessing()
     lan = tp.languageDetection(s)
     if lan == 'en': return True
     else: return False