Python parse_document примеры использования

Язык программирования: Python

Пространство имен/Пакет: normalization

Метод/Функция: parse_document

Примеров на hotexamples.com: 7

Python parse_document - 7 примеров найдено. Это лучшие примеры Python кода для normalization.parse_document, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def get_corpus(name):

    corpus = []
    with open(name, 'r') as corpus_file:
        for doc in corpus_file:
            if doc.strip():
                corpus.append(parse_document(doc))
        corpus_file.close()
        return corpus

Пример #2

Показать файл

Файл: keyphrase_extraction.py Проект: santosh500/APL_PythonDeepLearning

            for status, chunk in itertools.groupby(
                flattened_chunks, lambda (word, pos, chunk): chunk != 'O')
        ]

        valid_chunks = [
            ' '.join(word.lower() for word, tag, chunk in wtc_group
                     if word.lower() not in stopword_list)
            for status, wtc_group in valid_chunks_tagged if status
        ]

        all_chunks.append(valid_chunks)

    return all_chunks


sentences = parse_document(toy_text)
valid_chunks = get_chunks(sentences)
print valid_chunks


def get_tfidf_weighted_keyphrases(sentences,
                                  grammar=r'NP: {<DT>? <JJ>* <NN.*>+}',
                                  top_n=10):

    valid_chunks = get_chunks(sentences, grammar=grammar)

    dictionary = corpora.Dictionary(valid_chunks)
    corpus = [dictionary.doc2bow(chunk) for chunk in valid_chunks]

    tfidf = models.TfidfModel(corpus)
    corpus_tfidf = tfidf[corpus]

Пример #3

Показать файл

Файл: document_summarization.py Проект: alamRios/natural-language-processing-essentials

to control their body temperature. Their pillar-like legs can 
carry their great weight. African elephants have larger ears 
and concave backs while Asian elephants have smaller ears 
and convex or level backs.  
"""


from gensim.summarization import summarize, keywords

def text_summarization_gensim(text, summary_ratio=0.5):
    
    summary = summarize(text, split=True, ratio=summary_ratio)
    for sentence in summary:
        print(sentence)

docs = parse_document(toy_text)
text = ' '.join(docs)
text_summarization_gensim(text, summary_ratio=0.4)


    
sentences = parse_document(toy_text)
norm_sentences = normalize_corpus(sentences,lemmatize=False) 

total_sentences = len(norm_sentences)
print('Total Sentences in Document:', total_sentences)

num_sentences = 3
num_topics = 2

vec, dt_matrix = build_feature_matrix(sentences,

Пример #4

Показать файл

Файл: keyphrase_extraction.py Проект: 000Nelson000/text-analytics-with-python

                                             lambda (word,pos,chunk): chunk != 'O')]
        
        valid_chunks = [' '.join(word.lower() 
                                for word, tag, chunk 
                                in wtc_group 
                                    if word.lower() 
                                        not in stopword_list) 
                                    for status, wtc_group 
                                    in valid_chunks_tagged
                                        if status]
                                            
        all_chunks.append(valid_chunks)
    
    return all_chunks
    
sentences = parse_document(toy_text)          
valid_chunks = get_chunks(sentences)
print valid_chunks

def get_tfidf_weighted_keyphrases(sentences, 
                                  grammar=r'NP: {<DT>? <JJ>* <NN.*>+}',
                                  top_n=10):
    
    valid_chunks = get_chunks(sentences, grammar=grammar)
                                     
    dictionary = corpora.Dictionary(valid_chunks)
    corpus = [dictionary.doc2bow(chunk) for chunk in valid_chunks]
    
    tfidf = models.TfidfModel(corpus)
    corpus_tfidf = tfidf[corpus]

Пример #5

Показать файл

Файл: document_summarization.py Проект: jishmisc28/Text_Summarization_NLP

for moving objects and digging. Elephants' large ear flaps help 
to control their body temperature. Their pillar-like legs can 
carry their great weight. African elephants have larger ears 
and concave backs while Asian elephants have smaller ears 
and convex or level backs.  
"""


def text_summarization_gensim(text, summary_ratio=0.5):
    summary = summarize(text, split=True, ratio=summary_ratio)
    for sentence in summary:
        print(sentence)


# Using Gensim Summarization Method
docs = parse_document(document1)
text = ' '.join(docs)
text_summarization_gensim(text, summary_ratio=0.3)

sentences = parse_document(document1)
norm_sentences = normalize_corpus(sentences, lemmatize=False)

total_sentences = len(norm_sentences)
print('Total Sentences in Document:', total_sentences)

num_sentences = 3
num_topics = 1

vec, dt_matrix = build_feature_matrix(sentences, feature_type='frequency')

td_matrix = dt_matrix.transpose()

Пример #6

Показать файл

    top_sentence_indices.sort()
    s = ''
    for index in top_sentence_indices:
        s = s + ' ' + sentences[index]
        print(sentences[index])
    return s


path = r'../../data/raw/OpinosisDataset1.0_0/topics/'
allFiles = glob.glob(path + "/*.data")
reviews = list()
for file_ in allFiles:
    with open(file_, "r") as f:
        review = f.read()
        DOCUMENT = review
        sentences = parse_document(DOCUMENT)
        norm_sentences = normalize_corpus(sentences, lemmatize=True)
        print("Total Sentences:", len(norm_sentences))
        filename_search = re.search(r'[^\\/:*?"<>|\r\n]+$', file_)
        filename = filename_search.group()
        myfile = open(r'../../data/processed/lsa/' + filename, 'w')
        myfile.writelines(
            lsa_text_summarizer(norm_sentences,
                                num_sentences=2,
                                num_topics=5,
                                feature_type='frequency',
                                sv_threshold=0.5))
        myfile = open(r'../../data/processed/textrank_cosine/' + filename, 'w')
        myfile.writelines(
            textrank_text_summarizer(norm_sentences,
                                     num_sentences=2,

Пример #7

Показать файл

Файл: document_summarization.py Проект: 000Nelson000/text-analytics-with-python

to control their body temperature. Their pillar-like legs can 
carry their great weight. African elephants have larger ears 
and concave backs while Asian elephants have smaller ears 
and convex or level backs.  
"""


from gensim.summarization import summarize, keywords

def text_summarization_gensim(text, summary_ratio=0.5):
    
    summary = summarize(text, split=True, ratio=summary_ratio)
    for sentence in summary:
        print sentence

docs = parse_document(toy_text)
text = ' '.join(docs)
text_summarization_gensim(text, summary_ratio=0.4)


    
sentences = parse_document(toy_text)
norm_sentences = normalize_corpus(sentences,lemmatize=False) 

total_sentences = len(norm_sentences)
print 'Total Sentences in Document:', total_sentences   



num_sentences = 3
num_topics = 2