def beta_money_Detection(txt):
    beta_value = betafunc.Beta(txt)
    #print(beta_value)
    upper_limit = 0.19
    lower_limit = 0.057
    std = 0.025
    if(beta_value + std*2.5 < upper_limit):
        return 5.0
    elif(beta_value + std <= upper_limit or beta_value - std >= lower_limit):
        return 2.5
    else:
        return 0.0
示例#2
0
def beta_for_imgntn(text):
    beta_value = betafunc.Beta(text)
    lower_limit = 0.48
    upper_limit = 1.25
    std = 0.17
    if(beta_value > 0.8):
        return 5.0
    elif(beta_value-std > lower_limit or beta_value+std < upper_limit):
        #print(beta_value)
        return 2.5
    else:
        return 0.0
def betaPublicisticDetection(text):
    grade = 0.0
    beta = betafunc.Beta(text)
    sent_len = average_length_of_word.average_sent_length(text)
    prefix = ScienceDetectionFuncs.numPrefixCheck(text)
    text_vector = tags_counter.counter(text)
    if (text_vector[0] < 0.025 and beta < 0.5):
        grade = 2.5
        if (prefix < 70):
            grade = 5.0
            return grade
        else:
            return grade
    else:
        return grade
def publicisticDetection(text):
    grade = 0.0
    #vector = tags_counter.counter(text)
    beta = betafunc.Beta(text)
    prefix = ScienceDetectionFuncs.numPrefixCheck(text)
    NN_value = same_neighbors_counter.NN_same_neighbors(text)
    if (beta > 0.22 and prefix < 90):
        """Сейчас этот текст скорее всего не имеет признаки делового или научного стилей"""
        grade = 2.5
        #print(NN_value)
        if (NN_value >= 400):
            """Сейчас этот текст имеет признаки публицистического текста"""
            grade = 5.0
            #print(grade)
            return grade
        else:
            #print(grade)
            return grade
    else:
        #print(grade)
        return grade
示例#5
0
def scienceParamDetection(txt):
    grade = 0
    prefix = numPrefixCheck(txt)
    tags_vector = tags_counter.counter(txt)
    #sent_len = average_length_of_word.average_sent_length(txt)
    beta = betafunc.Beta(txt)
    #print(prefix, beta)
    if (prefix > 100 and beta > 0.22):
        grade = 2.5
        #print("Данный текст почти невозможно переделать в текст делового или публицистического стиля \n")
        if (tags_vector[5] < 1936908127739503 / 4611686018427387904
                or tags_vector[4] < 5404319552844595 / 72057594037927936):
            grade = 5
            #print("Есть признаки схожести с научным текстом")
            return grade
        else:
            #print("Возможно схоже с художестным текстом")
            return grade

        return grade
    else:
        #print("There is nothing about science here")
        return 0
 maxFilename = ["", "", "", "", "", "", ""]
 average_prefix = [0, 0, 0, 0]
 average_m_prefix = [0, 0, 0, 0]
 for file in iner_list_of_dir:
     if (file.endswith("utf.txt")):
         with open(folder + "/" + file, "r", encoding="utf-8") as currfile:
             text = currfile.read()
             freq = tags_counter.counter(text)
             average_m_prefix[
                 itr] += MoneyDetectionFuncs.anti_mutex_prefix_Detection(
                     text)
             average_prefix[itr] += ScienceDetectionFuncs.numPrefixCheck(
                 text)
             #std_list.append(average_prefix[itr])
             amount_of_words[itr] += len(nltk.tokenize.word_tokenize(text))
             beta += betafunc.Beta(text)
             average_word_length += average_length_of_word.average_word_length(
                 text)
             average_sent_length += average_length_of_word.average_sent_length(
                 text)
             itr2 = 0
             for feature in maximus:
                 if (maximus[itr2] < freq[itr2]):
                     maximus[itr2] = freq[itr2]
                     maxFilename[itr2] = file
                 itr2 += 1
             print(average_prefix[itr])
             print(average_m_prefix[itr])
             #print(beta)
             print(freq)
             freqall[itr] = [x + y for x, y in zip(freq, freqall[itr])]