Пример #1
0
def Random(rsc_file, dst_file, count):
    language = "chinese"
    parser = PlaintextParser.from_file(rsc_file,
                                       Tokenizer(language),
                                       encoding='utf-8')
    stemmer = Stemmer(language)  # 语言容器

    summarizer = RandomSummarizer(stemmer)  # LSA算法
    summarizer.stop_words = get_stop_words(language)
    with open(dst_file, 'w', encoding='utf-8') as f:
        for sentence in summarizer(parser.document, count):
            f.write(str(sentence))
            f.write('\n')
            print(sentence)
import os


#create folder
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print('Error: Creating directory. ' + directory)


LANGUAGE = "bangla"
SENTENCES_COUNT = 2

if __name__ == "__main__":

    createFolder('Dataset/NCTB/RandomSummary/')
    for i in range(1, 140):
        serial_no = str(i)
        path = "Dataset/NCTB/Source/" + serial_no + ".txt"
        parser = PlaintextParser.from_file(path, Tokenizer(LANGUAGE))
        stemmer = Stemmer(LANGUAGE)
        summarizer = Summarizer(stemmer)
        summarizer.stop_words = get_stop_words(LANGUAGE)
        summary = ""
        for sentence in summarizer(parser.document, SENTENCES_COUNT):
            summary = summary + " " + str(sentence)
        fi = open('Dataset/NCTB/RandomSummary/' + serial_no + '.txt', '+w')
        fi.write(summary)