Python TurkishNLP示例，turkishnlp.detector.TurkishNLP Python示例

示例#1

0

显示文件

def kelime_düzelt(dizi):
    nesne = detector.TurkishNLP()
    varmı = True
    try:
        open("words.pkl", "r")
    except FileNotFoundError:
        varmı = False
    if not varmı:
        nesne.download()
    nesne.create_word_set()
    for i in range(len(dizi) - 1):
        kelime_kontrol = nesne.list_words(dizi[i])
        düzgün_kelime = nesne.auto_correct(kelime_kontrol)
        düzgün_kelime_son = " ".join(düzgün_kelime)
        dizi[i] = düzgün_kelime_son
    return dizi

示例#2

0

显示文件

def _auto_correct(job):
    global df
    df_str = df["corrected"].astype(str)
    obj = detector.TurkishNLP()
    #obj.download()
    obj.create_word_set()
    length = df.shape[0]
    for index in range(length):
        _print_progress_bar(index,
                            length,
                            job=job,
                            prefix=f"{job} Progress:",
                            length=50)
        if " " in df_str[index]:
            words = df_str[index].split()
            words = obj.auto_correct(words)
            words = " ".join(words)
            df["corrected"][index] = words

示例#3

0

显示文件

文件： data_cleaning.py 项目： quancore/toxic-comment

    def _get_spell_checker(self, lang):
        if lang is None:
            return language_tool_python.LanguageTool(lang)

        _exist = lang in self.spell_checkers

        if not _exist and lang in ['ru', 'en', 'es', 'fr', 'pt']:
            local_lang = lang if lang != 'pt' else 'pt-PT'
            self.spell_checkers[lang] = language_tool_python.LanguageTool(
                local_lang)

        elif not _exist and lang == 'tr':
            obj = detector.TurkishNLP()
            obj.download()
            obj.create_word_set()
            self.spell_checkers[lang] = obj

        return self.spell_checkers[lang]

示例#4

0

显示文件

文件： Kod.py 项目： kadirpinar/Machine-Learning-NLP-App-Semantic-Analyis-for-Blog-Posts

      classes.append(folderName)

allBlogs = np.array(allBlogs)  #Creating rank 1 array
df_allBlogs = pd.DataFrame({'Blog': allBlogs, 'Class': classes})  #Creating data structure with labeled axes.(data, rows and columns)
df_allBlogs = df_allBlogs[['Blog', 'Class']]
print(df_allBlogs)

#NOT: soyle söyle gibi ingilzce karakterleri Türkçe karakterlere çevirmek.

nltk.download('punkt')
tokenizer = nltk.data.load('tokenizers/punkt/PY3/turkish.pickle')
nltk.download('stopwords')
turkishStopWords = set(stopwords.words('turkish'))
stemmer = TurkishStemmer()
#Typos
turkishNLPObject = detector.TurkishNLP()
turkishNLPObject.download()
turkishNLPObject.create_word_set()

number=0
numberTokenized=0
numberTypoAndStopWords=0
numberAllAndStemmed=0

def preprocessBlogs(blog):
  #Converting to lowercase characters and removing leading and trailing whitespaces.
  blog = blog.lower()
  blog = blog.strip()
  global number
  global numberTokenized
  global numberTypoAndStopWords

示例#5

0

显示文件

文件： main.py 项目： gavisko123/Denemeler

    print("""
      
    
                  TWITTER SENTIMENT ANALYSIS
    
    
    Coded by FFH
    """)
    target = input("Enter target name:")
    asagi_cek = int(input("Number of page downs:"))
    liste = getTweets(target, asagi_cek)
    df = tweets_to_df(liste)
    df['sentiment'] = np.array(
        [analyze_sentiment(tweet) for tweet in df['Tweets']])
    pos, neg, notr = 0, 0, 0
    for i in df['sentiment']:
        if (i == 1):
            pos += 1
        elif (i == 0):
            notr += 1
        else:
            neg += 1

    dialog(pos, neg, notr)


nesne = detector.TurkishNLP()
nesne.download()
nesne.create_word_set()
menu()

示例#6

0

显示文件

文件： semantic.py 项目： nyavuzcan/TurkishSemanticAnalysis

import glob
import re
import os
from snowballstemmer import TurkishStemmer
from stop_words import get_stop_words
from sklearn.model_selection import train_test_split
import turkishnlp
from turkishnlp import detector
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

obj = detector.TurkishNLP()  # TurkishNLP dosyalarının  indirilmesi
# obj.download()
obj.create_word_set()

turkStem = TurkishStemmer()
stop_words = get_stop_words('turkish')  # stop_words array


def read_txt(folder_name):  # read ve data preprocessing işlemleri
    file_list = glob.glob(os.path.join(os.getcwd(), str(folder_name), "*.txt"))

    cumle_list = []  # 35, 35,35 dosyanın içeriklerini tutan array
    tip_list = []  # p,n,t tutan array 105 tane

    for file_path in file_list:
        with open(file_path) as f_input:

示例#7

0

显示文件

文件： views.py 项目： CelalettinOZGEN/NLPSentX

def addComment(request,id):
    title = get_object_or_404(Title, id=id)

    comment_control = title.comment.filter(comment_author = request.user).count()

    obj = detector.TurkishNLP()
    #obj.download()
    obj.create_word_set() #veri setini oluşturmak için kullanıyoruz.

    if request.method == "POST":
        comment_content = obj.list_words(request.POST.get("comment_content"))
        comment_author = request.user #sonra bu author'u yorumu yapan kişiye at

        if comment_control == 0:

            control = obj.is_turkish(comment_content) #türkçe yazım doğruluğu oranını kontrol etmek için.

            if control > 0.10:

                model = load_model("model.h5")

                corrected_words = obj.auto_correct(comment_content) #H2. Burada "list_words" metodunun yaptığı string olarak gelen texti regex yardımıyla kelimelerine ayırmaktır.
                corrected_string = " ".join(corrected_words) #H3. Kelimeleri birleştirmek için Python'ın "join" metodu kullanılabilir. 

                texts = [corrected_string]

                tokens = tokenizer.texts_to_sequences(texts)
                tokens_pad = pad_sequences(tokens, maxlen=max_tokens)

                sentiment = model.predict(tokens_pad)

                if (sentiment>=0.85):
                    sentiment = "Pozitif"
                    messages.success(request,"Pozitif")
                elif (0.40 <= sentiment < 0.85):
                    sentiment = "Nötr"
                    messages.success(request,"Nötr")
                else:
                    sentiment = "Negatif"
                    messages.success(request,"Negatif")

                newComment = Comment(comment_author = comment_author, comment_content = corrected_string, sentiment = sentiment )

                newComment.title = title

                newComment.save()

                total_comments = Comment.objects.filter(comment_author = comment_author).count()
                positive_count = Comment.objects.filter(comment_author = comment_author, sentiment = "Pozitif").count()
                negative_count = Comment.objects.filter(comment_author = comment_author, sentiment = "Negatif").count()
                notr_count = Comment.objects.filter(comment_author = comment_author, sentiment = "Nötr").count()

                positive_count_title = Comment.objects.filter(title = title, sentiment = "Pozitif").count()
                negative_count_title = Comment.objects.filter(title = title, sentiment = "Negatif").count()
                notr_count_title = Comment.objects.filter(title = title, sentiment = "Nötr").count()

                if SentimentTotalSystem.objects.filter(title = title).exists():
                    for y in SentimentTotalSystem.objects.filter(title = title):
                        y.positive_count = positive_count_title
                        y.negative_count = negative_count_title
                        y.notr_count = notr_count_title

                        y.save()
                else:
                    new_sentiment_total = SentimentTotalSystem(title =  title, positive_count= positive_count_title, negative_count = negative_count_title, notr_count = notr_count_title)
                    
                    new_sentiment_total.save()
                    
                if PersonalAwardSystem.objects.filter(user = comment_author).exists():
                    for x in PersonalAwardSystem.objects.filter(user = comment_author):
                        x.total_comment = total_comments
                        x.positive_count = positive_count
                        x.negative_count = negative_count
                        x.notr_count = notr_count

                        if x.total_comment>=5:
                            x.is_gold = True
                            x.is_silver = False
                            x.is_bronze = False
                        elif x.total_comment>=3:
                            x.is_gold = False
                            x.is_silver = True
                            x.is_bronze = False
                        elif x.total_comment>=1:
                            x.is_gold = False
                            x.is_silver = False
                            x.is_bronze = True
                        
                        x.save()
                else:
                    new_total = PersonalAwardSystem(user = comment_author, total_comment = total_comments, positive_count = positive_count, negative_count = negative_count, notr_count = notr_count)
                    
                    if total_comments>=5:
                        new_total.is_gold = True
                        new_total.is_silver = False
                        new_total.is_bronze = False
                    elif total_comments>=3:
                        new_total.is_gold = False
                        new_total.is_silver = True
                        new_total.is_bronze = False
                    elif total_comments>=1:
                        new_total.is_gold = False
                        new_total.is_silver = False
                        new_total.is_bronze = True
                    
                    new_total.save()


            else:
                messages.success(request,"Yorumunuz Yazım Hatalarından Dolayı Göderilemedi. Lütfen Yorumunuzu Tekrar Belirtiniz!")
        else:
            messages.success(request,"Yorum Bulunmaktadır Yorum Yapamazsınız")
    return redirect("/title/titles/" + str(id))

示例#8

0

显示文件

文件： konsolKelimeOyunu.py 项目： nadas00/NLP-Destekli-Kelime-Oyunu

# Kelime oyununun basit konsol versiyonudur. Fonksiyonlar değişiklik gösterebilir.

# 1 kere çalıştırdıktan sonra obj.download() satırını siliniz

from turkishnlp import detector

obj = detector.TurkishNLP()
obj.download()
obj.create_word_set()

my_array = []
i = 0
toplampuan = 0

baslangicKelimesi = "başarı"

print("\nBu oyun verilen kelimenin son harfi ile başlayan bir kelime yazan kullanıcıya puan kazandırmayı hedefler. \n"
      "Başlagıç kelimesi -başarı-\n"
      "kelime giriniz...")

def puan_kazandir():

    bonus = 0

    global toplampuan
    kelimeUzunlugu = len(kelime)
    puan = kelimeUzunlugu * 4

    if(obj.is_vowel_harmonic(kelime)):
     print("%10 büyük ünlü uyumu bonusu ! ")