def get_chunk_of_corpus(corpus): """ Returns a randomly selected piece of a poem from a text file. :param corpus: text file :return: some number of successive lines from the text file """ test = '' lines = [] f = codecs.open(corpus, 'r', 'utf-8') #Go to the randomly selected line random_place = random.randint(1, helpers.file_length(corpus) - 10) for i in range(random_place): f.readline() while len(lines) < 3: test = f.readline() #Go to the beginning of the next verse while not test.isspace(): test = f.readline() if test == '': break while test.isspace(): test = f.readline() if test == '': break while not test.isspace() and len(lines) < 6: lines.append(test.strip()) test = f.readline() if test == '': break sent_end = ('.', '!', '?', ':', ';') if not test.isspace(): if not lines[-1].endswith(sent_end): while not test.isspace() and\ not lines[-1].endswith(sent_end): lines.append(test.strip()) test = f.readline() if test == '': break f.close # Insert a dot to the end punctuation = [',', ':', ';'] last = lines[-1] if last[-1].isalpha(): last = last + '.' elif last[-1] in punctuation: last = last[:len(last) - 1] + '.' lines[-1] = last string = '\n'.join(lines) return string
def get_chunk_of_corpus(corpus): """ Returns a randomly selected piece of a poem from a text file. :param corpus: text file :return: some number of successive lines from the text file """ test = '' lines = [] f = codecs.open(corpus, 'r', 'utf-8') #Go to the randomly selected line random_place = random.randint(1, helpers.file_length(corpus)-10) for i in range(random_place): f.readline() while len(lines) < 3: test = f.readline() #Go to the beginning of the next verse while not test.isspace(): test = f.readline() if test == '': break while test.isspace(): test = f.readline() if test == '': break while not test.isspace() and len(lines) < 6: lines.append(test.strip()) test = f.readline() if test == '': break sent_end = ('.', '!', '?', ':', ';') if not test.isspace(): if not lines[-1].endswith(sent_end): while not test.isspace() and\ not lines[-1].endswith(sent_end): lines.append(test.strip()) test = f.readline() if test == '': break f.close # Insert a dot to the end punctuation = [',', ':', ';'] last = lines[-1] if last[-1].isalpha(): last = last + '.' elif last[-1] in punctuation: last = last[:len(last)-1] + '.' lines[-1] = last string = '\n'.join(lines) return string
def get_verse_of_corpus(corpus, num_words, num_lines): """ Reads some text from a file and formats it. The formatted text has the given number of lines and the given number of words per line. >>> verse = get_verse_of_corpus('../apparatus/poetry/english_poems.txt', 4, 4) >>> len(verse.split()) == 16 True :param corpus: text file :param num_words: number of words per line :param num_lines: number of lines :return: random text with the given number of lines and given number of words per line """ f = codecs.open(corpus, 'r', 'utf-8') #Go to the randomly selected line random_place = random.randint(1, helpers.file_length(corpus) - 10) for i in range(random_place): f.readline() test = '' lines = [] words = [] i = 0 test = f.readline() words = test.split() while len(lines) < num_lines: line = [] while len(line) < num_words: if i == len(words): i = 0 test = f.readline() if test == '': break words = test.split() for j in range(i, len(words)): if len(line) < num_words: line.append(words[j]) i = i + 1 else: break lines.append(line) f.close string = '' for line in lines: string = string + ' '.join(line) + '\n' return string
def get_verse_of_corpus(corpus, num_words, num_lines): """ Reads some text from a file and formats it. The formatted text has the given number of lines and the given number of words per line. >>> verse = get_verse_of_corpus('../apparatus/poetry/english_poems.txt', 4, 4) >>> len(verse.split()) == 16 True :param corpus: text file :param num_words: number of words per line :param num_lines: number of lines :return: random text with the given number of lines and given number of words per line """ f = codecs.open(corpus, 'r', 'utf-8') #Go to the randomly selected line random_place = random.randint(1, helpers.file_length(corpus)-10) for i in range(random_place): f.readline() test = '' lines = [] words = [] i = 0 test = f.readline() words = test.split() while len(lines) < num_lines: line = [] while len(line) < num_words: if i == len(words): i = 0 test = f.readline() if test == '': break words = test.split() for j in range(i, len(words)): if len(line) < num_words: line.append(words[j]) i = i+1 else: break lines.append(line) f.close string = '' for line in lines: string = string + ' '.join(line) + '\n' return string