def Random(rsc_file, dst_file, count): language = "chinese" parser = PlaintextParser.from_file(rsc_file, Tokenizer(language), encoding='utf-8') stemmer = Stemmer(language) # 语言容器 summarizer = RandomSummarizer(stemmer) # LSA算法 summarizer.stop_words = get_stop_words(language) with open(dst_file, 'w', encoding='utf-8') as f: for sentence in summarizer(parser.document, count): f.write(str(sentence)) f.write('\n') print(sentence)
import os #create folder def createFolder(directory): try: if not os.path.exists(directory): os.makedirs(directory) except OSError: print('Error: Creating directory. ' + directory) LANGUAGE = "bangla" SENTENCES_COUNT = 2 if __name__ == "__main__": createFolder('Dataset/NCTB/RandomSummary/') for i in range(1, 140): serial_no = str(i) path = "Dataset/NCTB/Source/" + serial_no + ".txt" parser = PlaintextParser.from_file(path, Tokenizer(LANGUAGE)) stemmer = Stemmer(LANGUAGE) summarizer = Summarizer(stemmer) summarizer.stop_words = get_stop_words(LANGUAGE) summary = "" for sentence in summarizer(parser.document, SENTENCES_COUNT): summary = summary + " " + str(sentence) fi = open('Dataset/NCTB/RandomSummary/' + serial_no + '.txt', '+w') fi.write(summary)