def kelime_düzelt(dizi): nesne = detector.TurkishNLP() varmı = True try: open("words.pkl", "r") except FileNotFoundError: varmı = False if not varmı: nesne.download() nesne.create_word_set() for i in range(len(dizi) - 1): kelime_kontrol = nesne.list_words(dizi[i]) düzgün_kelime = nesne.auto_correct(kelime_kontrol) düzgün_kelime_son = " ".join(düzgün_kelime) dizi[i] = düzgün_kelime_son return dizi
def _auto_correct(job): global df df_str = df["corrected"].astype(str) obj = detector.TurkishNLP() #obj.download() obj.create_word_set() length = df.shape[0] for index in range(length): _print_progress_bar(index, length, job=job, prefix=f"{job} Progress:", length=50) if " " in df_str[index]: words = df_str[index].split() words = obj.auto_correct(words) words = " ".join(words) df["corrected"][index] = words
def _get_spell_checker(self, lang): if lang is None: return language_tool_python.LanguageTool(lang) _exist = lang in self.spell_checkers if not _exist and lang in ['ru', 'en', 'es', 'fr', 'pt']: local_lang = lang if lang != 'pt' else 'pt-PT' self.spell_checkers[lang] = language_tool_python.LanguageTool( local_lang) elif not _exist and lang == 'tr': obj = detector.TurkishNLP() obj.download() obj.create_word_set() self.spell_checkers[lang] = obj return self.spell_checkers[lang]
classes.append(folderName) allBlogs = np.array(allBlogs) #Creating rank 1 array df_allBlogs = pd.DataFrame({'Blog': allBlogs, 'Class': classes}) #Creating data structure with labeled axes.(data, rows and columns) df_allBlogs = df_allBlogs[['Blog', 'Class']] print(df_allBlogs) #NOT: soyle söyle gibi ingilzce karakterleri Türkçe karakterlere çevirmek. nltk.download('punkt') tokenizer = nltk.data.load('tokenizers/punkt/PY3/turkish.pickle') nltk.download('stopwords') turkishStopWords = set(stopwords.words('turkish')) stemmer = TurkishStemmer() #Typos turkishNLPObject = detector.TurkishNLP() turkishNLPObject.download() turkishNLPObject.create_word_set() number=0 numberTokenized=0 numberTypoAndStopWords=0 numberAllAndStemmed=0 def preprocessBlogs(blog): #Converting to lowercase characters and removing leading and trailing whitespaces. blog = blog.lower() blog = blog.strip() global number global numberTokenized global numberTypoAndStopWords
print(""" TWITTER SENTIMENT ANALYSIS Coded by FFH """) target = input("Enter target name:") asagi_cek = int(input("Number of page downs:")) liste = getTweets(target, asagi_cek) df = tweets_to_df(liste) df['sentiment'] = np.array( [analyze_sentiment(tweet) for tweet in df['Tweets']]) pos, neg, notr = 0, 0, 0 for i in df['sentiment']: if (i == 1): pos += 1 elif (i == 0): notr += 1 else: neg += 1 dialog(pos, neg, notr) nesne = detector.TurkishNLP() nesne.download() nesne.create_word_set() menu()
import glob import re import os from snowballstemmer import TurkishStemmer from stop_words import get_stop_words from sklearn.model_selection import train_test_split import turkishnlp from turkishnlp import detector from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.model_selection import cross_val_score from sklearn.metrics import f1_score from sklearn.metrics import recall_score from sklearn.metrics import precision_score obj = detector.TurkishNLP() # TurkishNLP dosyalarının indirilmesi # obj.download() obj.create_word_set() turkStem = TurkishStemmer() stop_words = get_stop_words('turkish') # stop_words array def read_txt(folder_name): # read ve data preprocessing işlemleri file_list = glob.glob(os.path.join(os.getcwd(), str(folder_name), "*.txt")) cumle_list = [] # 35, 35,35 dosyanın içeriklerini tutan array tip_list = [] # p,n,t tutan array 105 tane for file_path in file_list: with open(file_path) as f_input:
def addComment(request,id): title = get_object_or_404(Title, id=id) comment_control = title.comment.filter(comment_author = request.user).count() obj = detector.TurkishNLP() #obj.download() obj.create_word_set() #veri setini oluşturmak için kullanıyoruz. if request.method == "POST": comment_content = obj.list_words(request.POST.get("comment_content")) comment_author = request.user #sonra bu author'u yorumu yapan kişiye at if comment_control == 0: control = obj.is_turkish(comment_content) #türkçe yazım doğruluğu oranını kontrol etmek için. if control > 0.10: model = load_model("model.h5") corrected_words = obj.auto_correct(comment_content) #H2. Burada "list_words" metodunun yaptığı string olarak gelen texti regex yardımıyla kelimelerine ayırmaktır. corrected_string = " ".join(corrected_words) #H3. Kelimeleri birleştirmek için Python'ın "join" metodu kullanılabilir. texts = [corrected_string] tokens = tokenizer.texts_to_sequences(texts) tokens_pad = pad_sequences(tokens, maxlen=max_tokens) sentiment = model.predict(tokens_pad) if (sentiment>=0.85): sentiment = "Pozitif" messages.success(request,"Pozitif") elif (0.40 <= sentiment < 0.85): sentiment = "Nötr" messages.success(request,"Nötr") else: sentiment = "Negatif" messages.success(request,"Negatif") newComment = Comment(comment_author = comment_author, comment_content = corrected_string, sentiment = sentiment ) newComment.title = title newComment.save() total_comments = Comment.objects.filter(comment_author = comment_author).count() positive_count = Comment.objects.filter(comment_author = comment_author, sentiment = "Pozitif").count() negative_count = Comment.objects.filter(comment_author = comment_author, sentiment = "Negatif").count() notr_count = Comment.objects.filter(comment_author = comment_author, sentiment = "Nötr").count() positive_count_title = Comment.objects.filter(title = title, sentiment = "Pozitif").count() negative_count_title = Comment.objects.filter(title = title, sentiment = "Negatif").count() notr_count_title = Comment.objects.filter(title = title, sentiment = "Nötr").count() if SentimentTotalSystem.objects.filter(title = title).exists(): for y in SentimentTotalSystem.objects.filter(title = title): y.positive_count = positive_count_title y.negative_count = negative_count_title y.notr_count = notr_count_title y.save() else: new_sentiment_total = SentimentTotalSystem(title = title, positive_count= positive_count_title, negative_count = negative_count_title, notr_count = notr_count_title) new_sentiment_total.save() if PersonalAwardSystem.objects.filter(user = comment_author).exists(): for x in PersonalAwardSystem.objects.filter(user = comment_author): x.total_comment = total_comments x.positive_count = positive_count x.negative_count = negative_count x.notr_count = notr_count if x.total_comment>=5: x.is_gold = True x.is_silver = False x.is_bronze = False elif x.total_comment>=3: x.is_gold = False x.is_silver = True x.is_bronze = False elif x.total_comment>=1: x.is_gold = False x.is_silver = False x.is_bronze = True x.save() else: new_total = PersonalAwardSystem(user = comment_author, total_comment = total_comments, positive_count = positive_count, negative_count = negative_count, notr_count = notr_count) if total_comments>=5: new_total.is_gold = True new_total.is_silver = False new_total.is_bronze = False elif total_comments>=3: new_total.is_gold = False new_total.is_silver = True new_total.is_bronze = False elif total_comments>=1: new_total.is_gold = False new_total.is_silver = False new_total.is_bronze = True new_total.save() else: messages.success(request,"Yorumunuz Yazım Hatalarından Dolayı Göderilemedi. Lütfen Yorumunuzu Tekrar Belirtiniz!") else: messages.success(request,"Yorum Bulunmaktadır Yorum Yapamazsınız") return redirect("/title/titles/" + str(id))
# Kelime oyununun basit konsol versiyonudur. Fonksiyonlar değişiklik gösterebilir. # 1 kere çalıştırdıktan sonra obj.download() satırını siliniz from turkishnlp import detector obj = detector.TurkishNLP() obj.download() obj.create_word_set() my_array = [] i = 0 toplampuan = 0 baslangicKelimesi = "başarı" print("\nBu oyun verilen kelimenin son harfi ile başlayan bir kelime yazan kullanıcıya puan kazandırmayı hedefler. \n" "Başlagıç kelimesi -başarı-\n" "kelime giriniz...") def puan_kazandir(): bonus = 0 global toplampuan kelimeUzunlugu = len(kelime) puan = kelimeUzunlugu * 4 if(obj.is_vowel_harmonic(kelime)): print("%10 büyük ünlü uyumu bonusu ! ")