def __init__(self, max_files=1000000, file_lines=FILE_LINES, batch_size=32, shuffle=True): 'Initialization' self.raw_dir = RAW_DIR self.cache_dir = CACHE_DIR self.raw_files = [] self.cache_files = [] files = [] for file in os.listdir(self.raw_dir): files.append(file) if len(files) >= max_files: break for file in files: self.raw_files.append(self.raw_dir + '/' + file) for file in files: self.cache_files.append(self.cache_dir + '/' + file + '.pk') # arange does this self.indexes = [1] * len(self.raw_files) for i in range(len(self.raw_files)): self.indexes[i] = i self.file_lines = file_lines self.batch_size = batch_size self.syll_mgr = syllables.syllables() self.n_classes = self.syll_mgr.get_size() self.shuffle = shuffle self.on_epoch_end() self.cmudict = cmudict.CMUDict()
def __init__(self, dir=DIR, file_lines=FILE_LINES, num_lines=LINES, batch_size=32, shuffle=True): 'Initialization' self.dir = dir self.list_IDs = [] for file in os.listdir(dir): self.list_IDs.append(dir + '/' + file) self.indexes = [1] * len(self.list_IDs) for i in range(len(self.list_IDs)): self.indexes[i] = i self.file_lines = file_lines self.num_lines = num_lines self.batch_size = batch_size self.syll_mgr = syllables.syllables() self.n_classes = self.syll_mgr.get_size() self.shuffle = shuffle self.on_epoch_end() self.cmudict = cmudict.CMUDict()
from sets import Set import sys import cmudict import tokens cmudict = cmudict.CMUDict() wordset = Set([]) for line in sys.stdin: text = line.split("\t")[0] words = tokens.tokenize(line) words = tokens.fixtokens(words) words = tokens.hyphen(words, cmudict.syll_dict) for word in words: wordset.add(word) for word in sorted(wordset): print(word)
next = list(sentence) next.append(l) yield next recurse(syll_list, poss_array, 0) #print(poss_array) sentences = [] min_len = len(syll_list) + 1 for sentence in walktree(poss_array, []): sentence = list(sentence) if len(sentence) < min_len: min_len = len(sentence) sentences.append(" ".join(sentence)) out = [] for s in set(sentences): if len(s.split(' ')) == min_len: out.append(s) return out if __name__ == "__main__": decoder = Decoder(cmudict.CMUDict()) for x in decoder.decode_sentence(['DH AH', 'S AH N', 'L IH T']): print(x) print(' ') for x in decoder.decode_sentence([ 'DH AH', 'S AH N', 'L IH T', 'AA', 'N IH NG', 'HH IY', 'V IH NG', 'OW', 'V ER', 'HH EH D' ]): print(x)
# haiku gen 1 column import sys import snlp import cmudict nsyll = 5 cd = cmudict.CMUDict() # TODO # looking good except for sentences with hyphenated words. # they don't get clipped, just keep going ?? num_lines = 0 num_pass = 0 num_nonword = 0 outf_5 = open("haiku_5.txt", "w") outf_7 = open("haiku_7.txt", "w") num_5 = 0 num_7 = 0 lastline = '' for line in sys.stdin: if line == lastline: continue lastline = line line = line[:-1].lower() if line[0] != '(':
continue if len(stress) == len(meter): fail = self.distance(stress, meter) elif len(stress) + 1 == meter: fail = self.distance('?' + stress, meter) if fail > 1: fail = self.distance(stress + '?', meter) if fail < 2: poss.append(name) return poss if __name__ == "__main__": data = [["short!", ['1', '10']], ["long!", ['11001000000']], ["swap", ['01', '10', '0', '1', '0101']], ["correct", ['0101010101']], ["one", ['0111010101']], ["two!", ['0111010001']], ["first missing", ['101010101']], ["last missing", ['010101010']]] meter = meter(cmudict.CMUDict(), {"meter": "01"}, "/def/null") print("Strict meter:") for test in data: print("{0} -> {1}".format(test[0], meter.meter_strict(test[1]))) print("Broken meter:") for test in data: print("{0} -> {1}".format(test[0], meter.meter_loose(test[1]))) print(meter.possibles(['a', 'word'], {'a': 0, 'word': 0, 'word(2)': 0}))