def exercise_gutenberg(): # 打印古腾堡项目的文件列表 print gutenberg.fileids() # 挑选一个文本: 简-奥斯丁的《爱玛》 emma = gutenberg.words("austen-emma.txt") # 查看书的长度 print len(emma) # 导入文本 emma_text = nltk.Text(emma) emma_text.concordance("surprize") for file_id in gutenberg.fileids(): chars_list = gutenberg.raw(file_id) words_list = gutenberg.words(file_id) sents_list = gutenberg.sents(file_id) # 统计文件的总字符数 num_chars = len(chars_list) # 统计文件的总单词数 num_words = len(words_list) # 统计文件的总句子数 num_sents = len(sents_list) # 统计文件的非重复单词数 num_vocab = len(set([w.lower() for w in words_list])) # 打印词的平均字符数, 句子的平均单词数, 每个单词出现的平均次数, 文件名 print num_chars / num_words, num_words / num_sents, num_words / num_vocab, file_id
def gutenberg(): from nltk.corpus import gutenberg for t in gutenberg.fileids(): num_chars = len(gutenberg.raw(t)) num_words = len(gutenberg.words(t)) num_sents = len(gutenberg.sents(t)) num_vocab = len(set([w.lower() for w in gutenberg.words(t)])) print int(num_chars/num_words), int(num_words/num_sents), int(num_words/num_vocab), t
def fun02(): """fun02""" for fileid in gutenberg.fileids(): num_chars = len(gutenberg.raw(fileid)) num_words = len(gutenberg.words(fileid)) num_sents = len(gutenberg.sents(fileid)) num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)])) # average word length average sentence length print int(num_chars/num_words), int(num_words/num_sents), # number of times each vocabulary item appers in the text print int(num_words/num_vocab), fileid
def for_print(): ''' 显示每个文本的三个统计量 :return: ''' for fileid in gutenberg.fileids(): num_chars=len(gutenberg.raw(fileid)) num_words=len(gutenberg.words(fileid)) num_sents=len(gutenberg.sents(fileid)) num_vocab=len(set([w.lower() for w in gutenberg.words(fileid)])) print int(num_chars/num_words),int(num_words/num_sents),int(num_words/num_vocab),fileid
def page57(): """Statistics from the Gutenberg corpora""" from nltk.corpus import gutenberg for fileid in gutenberg.fileids(): num_chars = len(gutenberg.raw(fileid)) num_words = len(gutenberg.words(fileid)) num_sents = len(gutenberg.sents(fileid)) num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)])) print int(num_chars / num_words), int(num_words / num_sents), print int(num_words / num_vocab), fileid
def generateSentence(): corpus = random.randint(0,3) if corpus == 0: text = brown.words() elif corpus == 1: text = gutenberg.words() elif corpus == 2: text = webtext.words() elif corpus == 3: text = movie_reviews.words() tweetString = '' lengthOfTweet = random.randint(0,20) len(text) firstRun = True blank = ' ' startOfWord = '' startOfWordIndex = 0 startingWord = random.randint(0, (len(text) - 40)) punctuation = [".", ",", '"', ";", ":", "?", "!", ")", "(", "*", "[", "]", "‘", "“", "#"] for x in xrange(startingWord,(startingWord + len(text))): startOfWord = text[x] if startOfWord ==".": startOfWordIndex = x break for x in xrange(startOfWordIndex + 1, startOfWordIndex+lengthOfTweet): if text[x] in punctuation: tweetString = tweetString + text[x] elif text[x] not in punctuation: tweetString = tweetString + blank + text[x] return tweetString
def main(): # gutenberg gu_words = gutenberg.words() gu_words_exclude_stops = exclude_stopwords(gu_words) gu_fd1 = get_frequency_distribution(gu_words) gu_fd2 = get_frequency_distribution(gu_words_exclude_stops) pylab.plot(gu_fd1, color='red') pylab.plot(gu_fd2, color='orange') # inaugural in_words = inaugural.words() in_words_exclude_stops = exclude_stopwords(in_words) in_fd1 = get_frequency_distribution(in_words) in_fd2 = get_frequency_distribution(in_words_exclude_stops) pylab.plot(in_fd1, color='black') pylab.plot(in_fd2, color='gray') # reuters yen_words = reuters.words(categories='yen') yen_words_exclude_stops = exclude_stopwords(yen_words) yen_fd1 = get_frequency_distribution(yen_words) yen_fd2 = get_frequency_distribution(yen_words_exclude_stops) pylab.plot(yen_fd1, color='blue') pylab.plot(yen_fd2, color='green') pylab.xscale('log') pylab.yscale('log') pylab.show()
def ex2(): from nltk.corpus import gutenberg ap = gutenberg.words("austen-persuasion.txt") word_tokens = len(ap) word_types = len(set([w.lower() for w in ap])) print "#-word tokens=", word_tokens print "#-word types=", word_types
def ex17(): from nltk.corpus import gutenberg macbeth = gutenberg.words("shakespeare-macbeth.txt") stopwords = set(nltk.corpus.stopwords.words()) fd = nltk.FreqDist([w for w in macbeth if w.lower() not in stopwords and len(w) > 3 and w.isalpha()]) print fd.keys()[0:50]
def find_word_probability(CORPUS): ''' Find word occurrence probabilty from the given corpus''' cfd = ConditionalFreqDist() prev_word = None for word in gutenberg.words(CORPUS): cfd[prev_word][word] += 1 prev_word = word return cfd
def fun01(): """fun01""" print gutenberg.fileids() # emma by jane austen emma = gutenberg.words('austen-emma.txt') # how many words it contains print len(emma) print Text(emma).concordance("surprize")
def exercise2(): print print "Exercise 2" words = gutenberg.words('austen-persuasion.txt') print "Number of word tokens in the text austen-persuasion.txt: %d" %len(words) print "Number of word-types in the text austen-persuasion.txt: %d" %len(set(words)) print set(words) print
def main(): loader = WordLoader() loader.load_valid_words_from_aspell("en_GB") loader.load_valid_words_from_aspell("en_US") all_words = brown.words() + gutenberg.words() sorted_words_filename = 'sorted_words.txt' loader.write_sorted_words(all_words, sorted_words_filename) sorted_words = loader.sorted_words print_anagrams(sorted_words, all_words)
def searchText(): moby = nltk.Text(gutenberg.words('melville-moby_dick.txt')) moby.findall(r"<a> (<.*>) <man>") chat = nltk.Text(nps_chat.words()) chat.findall(r"<.*> <.*> <bro>") chat.findall(r"<l.*>{3,}") hobbies_learned = nltk.Text(brown.words(categories=['hobbies', 'learned'])) hobbies_learned.findall(r"<\w*> <and> <other> <\w*s>")
def structure(): raw = gutenberg.raw("burgess-busterbrown.txt") raw[1:20] words = gutenberg.words("burgess-busterbrown.txt") words[1:20] sents = gutenberg.sents("burgess-busterbrown.txt") sents[1:20]
def gutenberg(): emma = nltk.corpus.gutenberg.words('austen-emma.txt') print len(emma) print gutenberg.fileids() emma = gutenberg.words('austen-emma.txt') macbeth_sentences = gutenberg.sents('shakespeare-macbeth.txt') macbeth_sentences[1037] longest_len = max([len(s) for s in macbeth_sentences]) [s for s in macbeth_sentences if len(s) == longest_len] for fileid in gutenberg.fileids(): num_chars = len(gutenberg.raw(fileid)) num_words = len(gutenberg.words(fileid)) num_sents = len(gutenberg.sents(fileid)) num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)])) print int(num_chars/num_words), int(num_words/num_sents), int(num_words/num_vocab), fileid
def main(): sample_rankings = FreqDist(gutenberg.words('austen-persuasion.txt')) training_set_rankings = udhr_rankings(debug=True) predictions = predict_language(sample_rankings, training_set_rankings, debug=True) print for language, value in predictions: if value != 0: # print '%.-32s\t%.-10s' % (language, value) print '{:.<32}{}'.format(language, value)
def ex18(): from nltk.corpus import gutenberg macbeth = gutenberg.words("shakespeare-macbeth.txt") stopwords = set(nltk.corpus.stopwords.words()) bigrams = nltk.bigrams(macbeth) print bigrams bigrams_wo_stopwords = filter(lambda (k, v) : k not in stopwords and v not in stopwords and k.isalpha() and v.isalpha(), bigrams) fd = nltk.FreqDist(map(lambda (k,v) : k+":"+v, bigrams_wo_stopwords)) print map(lambda k : (k.split(":")[0], k.split(":")[1]), fd.keys())[0:50]
def exercise_unusual_words(): text = gutenberg.words("austen-sense.txt") # 取出文本中的词汇, 去除数字, 转换为小写 text_vocab = set(w.lower() for w in text if w.isalpha()) # 取出词典中的词汇 english_vocab = set(w.lower() for w in words.words()) # 找出文本中的非常用词汇(错误词汇) unusual_vocab = text_vocab.difference(english_vocab) print sorted(unusual_vocab)
def searchTokenText(): from nltk.corpus import gutenberg, nps_chat moby = nltk.Text(gutenberg.words('melville-moby_dick.txt')) print moby.findall(r"<a> (<.*>) <man>") chat = nltk.Text(nps_chat.words()) print chat.findall(r"<.*> <.*> <bro>") print chat.findall(r"<l.*>{3,}") from nltk.corpus import brown hobbies_learned = nltk.Text(brown.words(categories=['hobbies', 'learned'])) hobbies_learned.findall(r"<\w*> <and> <other> <\w*s>")
def getCorrectWord(word): str = re.sub(re.compile('-\n'), '', word) words = gutenberg.words('austen-emma.txt') # print sorted(set([w.lower() for w in words])) # print len(sorted(set([w.lower() for w in words]))) isExist = False for w in words: # print '...' + str + '...', w if str == w: return w # break # print ' all done.' if isExist is False: return removeBreak(word)
def find_phrases(regexp): fids = gutenberg.fileids() rs = [] for fid in fids: txt = nltk.Text(gutenberg.words(fid)) ts = nltk.text.TokenSearcher(txt) r = ts.findall(regexp) for x in r: if x[0].lower() in wrong_vbs: x[0] = 'looking at' if x[-1].lower() in wrong_vbs: x[-1] = 'me' rs.extend(r) return rs
def nltk_test_1(): fd = FreqDist() # for each token in the relevant text, increment its counter for word in gutenberg.words('austen-persuasion.txt'): fd[word.lower()] += 1 print fd.N() # total number of samples print fd.B() # number of bins or unique samples # Get a list of the top 10 words sorted by frequency l = [] for word in fd.keys(): tp = (word, fd[word]) l.append(tp) l.sort(key = lambda x : x[1], reverse=True) for itr in l[:10]: print itr[0], itr[1]
def load_data(): global N, words freqs = [ FreqDist(corpus.words(fileid)) for fileid in corpus.fileids() ] words = list(set(word for dist in freqs for word in dist.keys() if word not in ENGLISH_STOP_WORDS and word not in punctuation)) data = [] N = len(words) for dist in freqs: x = volumize(dist) data.append((x, x.w)) return data
def load_data(): global N, words freqs = [ FreqDist(corpus.words(fileid)) for fileid in corpus.fileids() ] words = list(set(word for dist in freqs for word in dist.keys() if word not in ENGLISH_STOP_WORDS and word not in punctuation)) data = [] N = len(words) for dist in freqs: V = Vol(1, 1, N, 0.0) for i, word in enumerate(words): V.w[i] = dist.freq(word) data.append((V, V.w)) return data
def extract_meaningful_info(self): text=self.text text2=[] for line in text: text2+=line.split() words_spec=[] words_spec=[words for words in text2 if words not in list(set(gutenberg.words()[0:3000]))] tech_words=[words for words in text2 if words in dict_tech] link_from_readme=[elt.split('/')[2:] for elt in words_spec if 'https' in elt or 'http' in elt] link_from_readme=filter(lambda elt:elt!=[],link_from_readme) associated_website=[elt[0] for elt in link_from_readme if 'github' not in elt[0].lower()] associated_github=[elt[1]+'/'+elt[2] for elt in link_from_readme if 'github' in elt[0].lower()] self.words=words_spec self.links=link_from_readme self.associated_github=list(set(associated_github)) self.associated_website=list(set(associated_website)) self.tools=list(set([word for word in words_spec if isupper_(word) and 'http' not in word])) self.summarize=text2[0:50] self.technical=tech_words return words_spec
def nltk_test_3(): # For each token, count current word given previous word. # Create distribution object. # cfd = ConditionalFreqDist() # for word in word_tokenize(sent): # condition = len(word) # cfd[condition][word] += 1 cfd = ConditionalFreqDist((len(word), word) for word in gutenberg.words('austen-persuasion.txt')) # Start predicting at the given word, say ’therefore’ word = 'therefore' i = 1 print cfd.N() print cfd.conditions() # Find all words that can possibly follow the current word and choose one at random while i <= 20: print word, lwords = cfd[word] follower = choice(lwords) word = follower i += 1
def __init__(self, blackboard, min_words = 4, max_words = 8): super(NGramExpert, self).__init__(blackboard, "NGram Expert") self.blackboard = blackboard self.min = min_words self.max = max_words self.poems = list(gutenberg.words('blake-poems.txt')) self.poems.extend(list(gutenberg.words('whitman-leaves.txt'))) self.poems.extend(list(gutenberg.words('shakespeare-macbeth.txt'))) self.poems.extend(list(gutenberg.words('shakespeare-hamlet.txt'))) self.poems.extend(list(gutenberg.words('shakespeare-caesar.txt'))) self.poems.extend(list(gutenberg.words('milton-paradise.txt'))) exclude = set(string.punctuation) self.poems = [w.lower() for w in self.poems if w not in exclude] self.poem_bigrams = nltk.bigrams(self.poems) self.cfd = nltk.ConditionalFreqDist(self.poem_bigrams)
def nltk_test_2(): # Count each token in each text of the Gutenberg collection fd = FreqDist() for text in gutenberg.fileids(): for word in gutenberg.words(text): fd[word.lower()] += 1 # Initialize two empty lists which will hold our ranks and frequencies ranks = [] freqs = [] # Generate a (rank, frequency) point for each counted token and append to the respective lists for rank, word in enumerate(fd): ranks.append(rank + 1) freqs.append(fd[word]) freqs.sort(reverse=True) # Plot rank vs frequency on a loglog plot and show the plot plt.loglog(ranks, freqs) plt.xlabel('frequency(f)', fontsize = 14, fontweight = 'bold') plt.ylabel('rank(r)', fontsize = 14, fontweight = 'bold') plt.grid(True) plt.show()
def main(): # store word lengths brown_word_lens = [] web_word_lens = [] inaugural_word_lens = [] gutenberg_word_lens = [] genesis_word_lens = [] for file in gutenberg.fileids(): for word in gutenberg.words(file): gutenberg_word_lens.append(len(word)) for file in brown.fileids(): for word in brown.words(file): brown_word_lens.append(len(word)) for file in webtext.fileids(): for word in webtext.words(file): web_word_lens.append(len(word)) for file in inaugural.fileids(): for word in inaugural.words(file): inaugural_word_lens.append(len(word)) for file in genesis.fileids(): for word in genesis.words(file): genesis_word_lens.append(len(word)) with open("wordlens.txt", 'w') as f: sys.stdout = f f.write("GENESIS, INAUGURAL, WEBTEXT, BROWN, GUTENBERG\n") for i in xrange(max(len(genesis_word_lens), len(inaugural_word_lens), len(web_word_lens), len(brown_word_lens), len(gutenberg_word_lens))): for corpus in [genesis_word_lens, inaugural_word_lens, web_word_lens, brown_word_lens, gutenberg_word_lens]: if(i >= len(corpus)): f.write(",") else: f.write(str(corpus[i]) + ",") f.write("\n")
# author: ‘CHENG CHI FUNG' # student_id: '12219691' import nltk from nltk.corpus import gutenberg from nltk.corpus import brown from nltk.corpus import wordnet as wn nltk.download('gutenberg') nltk.download('brown') nltk.download('wordnet') # load the words from corpus gutenberg words = gutenberg.words('austen-sense.txt') # load the sentences from corpus gutenberg sents = gutenberg.sents('austen-sense.txt') # sentences is a list of words # raw = each character # load the words from corpus romance romance_words = brown.words(categories='romance') # import the data from corpus hobbies and romance hobbies_words = brown.words(categories='hobbies') def q1(): print('q1: {:}'.format('')) # 1. Print the number of word tokens # YOUR CODE print(len(words))
def get_corpus(text_name): return gutenberg.words(text_name)
import nltk from nltk.corpus import gutenberg from nltk.corpus import brown print('Printing file identifiers of Project Gutenberg for books: \n', gutenberg.fileids()) emma = gutenberg.words('austen-persuasion.txt') print('\nChoosing "Persuasion" of Jane Austen and printing it\'s length: ', len(emma)) # to apply concordance from main.py we need to employ such statements: # emma = nltk.Text(gutenberg.words('austen-emma.txt')) # print('\nConcordance on other texts, other then from nltk.book (with word "surprize"): \n') # emma.concordance('surprize') # displaying other information about each text, by looping over all the values of fileid print('FileId | num_chars | num_sents | num_words | num_vocab') for fileId in gutenberg.fileids(): num_chars = len(gutenberg.raw(fileId)) num_sents = len(gutenberg.sents(fileId)) num_words = len(gutenberg.words(fileId)) num_vocab = len(set(w.lower() for w in gutenberg.words(fileId))) print(fileId, num_chars, num_sents, num_words, num_vocab) print('Categories of Brown Corpus: \n', brown.categories()) # comparing genres in their usage of modal verbs print('Comparing news-genres in their usage of modal verbs:') news_text = brown.words(categories='news') fdist = nltk.FreqDist(w.lower() for w in news_text)
#%% from nltk.corpus import gutenberg from nltk.probability import * allwords = gutenberg.words('shakespeare-hamlet.txt') # A frequency distribution for the outcomes ''' sx = all characters in allwords, and store into a list with lower case [sx.lower() for sx in allwords if sx.isalpha()]''' fd2 = FreqDist([sx.lower() for sx in allwords if sx.isalpha()]) # get all unique values print(fd2.B()) # get all values print(fd2.N()) # output the front 20 values as a form of table fd2.tabulate(20) fd2.plot(20) # cumulative = add up fd2.plot(20, cumulative=True) # %%
#!/usr/bin/python3 # coding: utf-8 import nltk from nltk.corpus import gutenberg # 导入 gutenberg 集 ################################################################## ## FreqDist 跟踪分布中的采样频率 (sample frequencies) from nltk import FreqDist # 导入 FreqDist 类 fd = FreqDist( gutenberg.words('austen-persuasion.txt')) # 频率分布实例化, 统计文本中的 Token print( fd ) # <FreqDist with 51156 samples and 2621613 outcomes>; 可以得到 51156 个 不重复值, 2621613 个 token print(type(fd)) # <class 'nltk.probability.FreqDist'> print(fd['the']) # 3120; 查看 word 出现次数; 默认 FreqDist 是一个字典 print(fd.N()) # 98171; 是单词, 不是字母, 有重复的 print(fd.B() ) # 6132; number of bins or unique samples; 唯一单词, bins 表示相同的会在一个 bin 中 print(len(fd.keys()), type(fd.keys())) # 6132 <class 'dict_keys'> print(fd.keys()) # fd.B() 只是输出个数, 这个是把所有词汇表输出 print(fd.max()) # 频率最高的一个词 print(fd.freq('the')) # 0.03178127960395636; 出现频率 3120 / 98171 print(fd.hapaxes()) # ['[', 'Persuasion', 'Jane', ...] 只出现一次的罕用词 # 出现频率最高的大多是一些"虚词", 出现频率极低的(hapaxes)又只能靠上下文来理解; 文本中出现频率最高和最低的那些词往往并不能反映这个文本的特征 for idx, word in enumerate(fd): # 可以用 enumerate 来遍历, 是按出现顺序排的 if idx == 5: break print(idx, word) # 0 [; 1 Persuasion; 2 by; 3 Jane; 4 Austen ################################################################## ## 统计词的长度频率 fdist = FreqDist(len(w) for w in gutenberg.words('austen-persuasion.txt')) print(fdist) # <FreqDist with 16 samples and 98171 outcomes> print(
import nltk from nltk.corpus import gutenberg emma = gutenberg.words("austen-emma.txt") """ # 各ファイルの単語平均長、文平均長、各単語の出現回数を算出 for fileid in gutenberg.fileids(): # 総文字数 num_chars = len(gutenberg.raw(fileid)) # 総単語数 num_words = len(gutenberg.words(fileid)) # 総文数 num_sents = len(gutenberg.sents(fileid)) # 異なり文字数 num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)])) print(int(num_chars / num_words), int(num_words / num_sents), int(num_words / num_vocab), fileid) """ """ # 法助動詞の数をカウント from nltk.corpus import brown news_text = brown.words(categories='news') fdist = nltk.FreqDist(news_text) modals = ['can', 'could', 'may', 'might', 'will', 'must'] for m in modals: print('{0} : {1}'.format(m, fdist[m])) """ """ # 法助動詞の数でジャンル予測 from nltk.corpus import brown cfd = nltk.ConditionalFreqDist( (genre, word) for genre in brown.categories()
# URL: <http://www.nltk.org/> # For license information, see LICENSE.TXT from nltk.corpus import gutenberg, genesis, inaugural,\ nps_chat, webtext, treebank, wordnet from nltk.text import Text from nltk.probability import FreqDist from nltk.util import bigrams from nltk.misc import babelize_shell print "*** Introductory Examples for the NLTK Book ***" print "Loading text1, ..., text9 and sent1, ..., sent9" print "Type the name of the text or sentence to view it." print "Type: 'texts()' or 'sents()' to list the materials." text1 = Text(gutenberg.words('melville-moby_dick.txt')) print "text1:", text1.name text2 = Text(gutenberg.words('austen-sense.txt')) print "text2:", text2.name text3 = Text(genesis.words('english-kjv.txt'), name="The Book of Genesis") print "text3:", text3.name text4 = Text(inaugural.words(), name="Inaugural Address Corpus") print "text4:", text4.name text5 = Text(nps_chat.words(), name="Chat Corpus") print "text5:", text5.name text6 = Text(webtext.words('grail.txt'),
import nltk # Notes: See fileids in Gutenberg corpus | Corpus is a large body of text. Gutenberg collection contains 25,000 free electronic books nltk.corpus.gutenberg.fileids() emma = nltk.corpus.gutenberg.words( 'austen-emma.txt' ) # Notes: Pick out the text Emma from Jane Austen and name it emma. Count the number of words in it. emma = nltk.Text(nltk.corpus.gutenberg.words( 'austen-emma.txt')) # Notes: Use the concordance function on emma. print( "Number of words in emma file in gutenberg corpus: ", len(emma) ) # Notes: Library/Package --> Package --> Object --> Method/Function == NLTK --> corpus --> gutenberg --> words from nltk.corpus import gutenberg # Notes: Avoid using long statements by using 'the import statement' for fileid in gutenberg.fileids( ): # Notes: Loop over all fileids in Gutenberg corpus and print following statistics: avg chars/word; avg words/sentence; a lexical diversity score; fileids. char_count = len(gutenberg.raw(fileid)) word_count = len(gutenberg.words(fileid)) sent_count = len(gutenberg.sents(fileid)) vocab_count = len(set(w.lower() for w in gutenberg.words(fileid))) print(round(char_count / word_count), round(word_count / sent_count), round(word_count / vocab_count), fileid) macbeth_sentences = gutenberg.sents( 'shakespeare-macbeth.txt' ) # Notes: Dispalay the longest sentence from the macbeth text and its length print("\n112th Macbeth sentence: ", macbeth_sentences[111]) print("\nNumber of sentences in Macbeth: ", len(macbeth_sentences)) longest_length = max(len(s) for s in macbeth_sentences) longest_sentence = [ sentence for sentence in macbeth_sentences if len(sentence) == longest_length ] print("\nLength of longest sentence in Macbeth: ", longest_length)
------------------------- main ------------------------- ''' if __name__ == '__main__': scores_lst = [] weights = [11, 33, 50, 0.04, 4] sig_list = [] fileids = gutenberg.fileids() print('\n\nCalculating Table of Signatures...') print('\n{:>25} {:>12} {:>12} {:>12} {:>12} {:>12}\n'.format( 'File Name:', 'word_len:', 'lex_div:', 'hap_rat:', 'sent_len:', 'sent_comp:')) for fid in fileids: # compute features, make a list of features words = gutenberg.words(fid) sents = gutenberg.sents(fid) sig = compute_signature(words, sents, fid) sig_list.append(sig) print( '{:>25} {:>12.4f} {:>12.4f} {:>12.4f} {:>12.4f} {:>12.4f}'.format( sig[0], sig[1], sig[2], sig[3], sig[4], sig[5])) write_signatures(sig_list, 'out.txt') n_files = int(input('Enter Number of Mystery Files: ')) m_sig_list = [] for f in range(n_files): filename = input('Enter Name of Mystery File: ') raw_text = read_text(filename) m_words = gutenberg.words(filename)