Python doc_to_sentence примеры использования

Язык программирования: Python

Пространство имен/Пакет: splitter.doc_to_sentence

Метод/Функция: doc_to_sentence

Примеров на hotexamples.com: 9

Python doc_to_sentence - 9 примеров найдено. Это лучшие примеры Python кода для splitter.doc_to_sentence.doc_to_sentence, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: weight_schema.py Проект: udhanMti/SimDocSin

def get_idf_weighting_list(doc, sentence_count, N, lang):
    weights = []
    sentences = doc_to_sentence(doc, lang)
    for sentence in sentences:
        sent = sentence.strip()
        weights.append(1 + math.log((1.0 + N) / (1.0 + sentence_count[sent])))
    return weights

Пример #2

Показать файл

Файл: weight_schema.py Проект: udhanMti/SimDocSin

def get_senetence_frequencies(doc, word, lang):
    frequency = 0
    sentences = doc_to_sentence(doc, lang)
    for sentence in sentences:
        if (word in sentence):
            frequency += 1
    return frequency

Пример #3

Показать файл

Файл: weight_schema.py Проект: udhanMti/SimDocSin

def get_sentence_length_weighting_list(doc, lang):
    weight = []
    sentences = doc_to_sentence(doc, lang)
    for sentence in sentences:
        weight.append(get_sentence_count() * len(sentence.split()))
    total_tokens = float(sum(weight))

    return [x / total_tokens for x in weight]

Пример #4

Показать файл

Файл: extract_digits.py Проект: udhanMti/SimDocSin

def extract_digits(doc, lang):
    sentences = doc_to_sentence(doc, lang)
    digits = []
    for sentence in sentences:
        temp = re.findall(r'\d+', sentence)
        res = list(map(int, temp))
        digits += res
    return digits

Пример #5

Показать файл

Файл: weight_schema.py Проект: udhanMti/SimDocSin

def get_sentence_frequency_list(doc, lang):
    sentences = doc_to_sentence(doc, lang)
    length = len(sentences)
    weights = []
    for sent in sentences:
        frequency = sentences.count(sent)
        weights.append(frequency / length)

    return weights

Пример #6

Показать файл

Файл: weight_schema.py Проект: udhanMti/SimDocSin

def sentence_count_web_domain(documents, lang):
    sentence_count = {}
    for doc in documents:
        sentences = doc_to_sentence(doc, lang)
        for sentence in sentences:
            sent = sentence.strip()
            if (sent in sentence_count):
                sentence_count[sent] += 1
            else:
                sentence_count[sent] = 1
    return sentence_count

Пример #7

Показать файл

Файл: weight_schema.py Проект: udhanMti/SimDocSin

def get_inter_doc_word_idf_weighting_list(doc, word_count, N, lang):
    weights = []
    sentences = []
    sentences = doc_to_sentence(doc, lang)
    for sentence in sentences:
        weight = 0
        words = sentence_to_word(sentence, lang)

        for word in words:
            #weight+= (words.count(word)/len(words)) * (len(sentences)/get_senetence_frequencies(doc, word))
            weight += 1 + math.log((1.0 + N) / (1.0 + word_count[word]))
        weights.append(weight)
    return weights

Пример #8

Показать файл

Файл: weight_schema.py Проект: udhanMti/SimDocSin

def word_count_over_docs(documents, lang):
    word_count = {}
    for doc in documents:
        sentences = []
        sentences = doc_to_sentence(doc, lang)
        my_words = []
        for sentence in sentences:
            words = []
            if (lang == 'en'):
                words = sentence_to_word(sentence, "en")
            elif (lang == 'si'):
                words = sentence_to_word(sentence, "si")
            for word in words:
                if (word not in my_words):
                    my_words.append(word)

        for word in my_words:
            if (word in word_count):
                word_count[word] += 1
            else:
                word_count[word] = 1
    return word_count

Пример #9

Показать файл

def get_embeddig_list(doc, lang='en'):
    sentences = doc_to_sentence(doc, lang)
    embedding_list = sent_embedding(sentences, lang)

    return embedding_list