def beta_money_Detection(txt): beta_value = betafunc.Beta(txt) #print(beta_value) upper_limit = 0.19 lower_limit = 0.057 std = 0.025 if(beta_value + std*2.5 < upper_limit): return 5.0 elif(beta_value + std <= upper_limit or beta_value - std >= lower_limit): return 2.5 else: return 0.0
def beta_for_imgntn(text): beta_value = betafunc.Beta(text) lower_limit = 0.48 upper_limit = 1.25 std = 0.17 if(beta_value > 0.8): return 5.0 elif(beta_value-std > lower_limit or beta_value+std < upper_limit): #print(beta_value) return 2.5 else: return 0.0
def betaPublicisticDetection(text): grade = 0.0 beta = betafunc.Beta(text) sent_len = average_length_of_word.average_sent_length(text) prefix = ScienceDetectionFuncs.numPrefixCheck(text) text_vector = tags_counter.counter(text) if (text_vector[0] < 0.025 and beta < 0.5): grade = 2.5 if (prefix < 70): grade = 5.0 return grade else: return grade else: return grade
def publicisticDetection(text): grade = 0.0 #vector = tags_counter.counter(text) beta = betafunc.Beta(text) prefix = ScienceDetectionFuncs.numPrefixCheck(text) NN_value = same_neighbors_counter.NN_same_neighbors(text) if (beta > 0.22 and prefix < 90): """Сейчас этот текст скорее всего не имеет признаки делового или научного стилей""" grade = 2.5 #print(NN_value) if (NN_value >= 400): """Сейчас этот текст имеет признаки публицистического текста""" grade = 5.0 #print(grade) return grade else: #print(grade) return grade else: #print(grade) return grade
def scienceParamDetection(txt): grade = 0 prefix = numPrefixCheck(txt) tags_vector = tags_counter.counter(txt) #sent_len = average_length_of_word.average_sent_length(txt) beta = betafunc.Beta(txt) #print(prefix, beta) if (prefix > 100 and beta > 0.22): grade = 2.5 #print("Данный текст почти невозможно переделать в текст делового или публицистического стиля \n") if (tags_vector[5] < 1936908127739503 / 4611686018427387904 or tags_vector[4] < 5404319552844595 / 72057594037927936): grade = 5 #print("Есть признаки схожести с научным текстом") return grade else: #print("Возможно схоже с художестным текстом") return grade return grade else: #print("There is nothing about science here") return 0
maxFilename = ["", "", "", "", "", "", ""] average_prefix = [0, 0, 0, 0] average_m_prefix = [0, 0, 0, 0] for file in iner_list_of_dir: if (file.endswith("utf.txt")): with open(folder + "/" + file, "r", encoding="utf-8") as currfile: text = currfile.read() freq = tags_counter.counter(text) average_m_prefix[ itr] += MoneyDetectionFuncs.anti_mutex_prefix_Detection( text) average_prefix[itr] += ScienceDetectionFuncs.numPrefixCheck( text) #std_list.append(average_prefix[itr]) amount_of_words[itr] += len(nltk.tokenize.word_tokenize(text)) beta += betafunc.Beta(text) average_word_length += average_length_of_word.average_word_length( text) average_sent_length += average_length_of_word.average_sent_length( text) itr2 = 0 for feature in maximus: if (maximus[itr2] < freq[itr2]): maximus[itr2] = freq[itr2] maxFilename[itr2] = file itr2 += 1 print(average_prefix[itr]) print(average_m_prefix[itr]) #print(beta) print(freq) freqall[itr] = [x + y for x, y in zip(freq, freqall[itr])]