Пример #1
0
 def __init__(self,
              max_files=1000000,
              file_lines=FILE_LINES,
              batch_size=32,
              shuffle=True):
     'Initialization'
     self.raw_dir = RAW_DIR
     self.cache_dir = CACHE_DIR
     self.raw_files = []
     self.cache_files = []
     files = []
     for file in os.listdir(self.raw_dir):
         files.append(file)
         if len(files) >= max_files:
             break
     for file in files:
         self.raw_files.append(self.raw_dir + '/' + file)
     for file in files:
         self.cache_files.append(self.cache_dir + '/' + file + '.pk')
     # arange does this
     self.indexes = [1] * len(self.raw_files)
     for i in range(len(self.raw_files)):
         self.indexes[i] = i
     self.file_lines = file_lines
     self.batch_size = batch_size
     self.syll_mgr = syllables.syllables()
     self.n_classes = self.syll_mgr.get_size()
     self.shuffle = shuffle
     self.on_epoch_end()
     self.cmudict = cmudict.CMUDict()
Пример #2
0
 def __init__(self,
              dir=DIR,
              file_lines=FILE_LINES,
              num_lines=LINES,
              batch_size=32,
              shuffle=True):
     'Initialization'
     self.dir = dir
     self.list_IDs = []
     for file in os.listdir(dir):
         self.list_IDs.append(dir + '/' + file)
     self.indexes = [1] * len(self.list_IDs)
     for i in range(len(self.list_IDs)):
         self.indexes[i] = i
     self.file_lines = file_lines
     self.num_lines = num_lines
     self.batch_size = batch_size
     self.syll_mgr = syllables.syllables()
     self.n_classes = self.syll_mgr.get_size()
     self.shuffle = shuffle
     self.on_epoch_end()
     self.cmudict = cmudict.CMUDict()
Пример #3
0
from sets import Set
import sys

import cmudict
import tokens

cmudict = cmudict.CMUDict()

wordset = Set([])
for line in sys.stdin:
    text = line.split("\t")[0]
    words = tokens.tokenize(line)
    words = tokens.fixtokens(words)
    words = tokens.hyphen(words, cmudict.syll_dict)
    for word in words:
        wordset.add(word)

for word in sorted(wordset):
    print(word)
Пример #4
0
                        next = list(sentence)
                        next.append(l)
                        yield next

        recurse(syll_list, poss_array, 0)
        #print(poss_array)
        sentences = []
        min_len = len(syll_list) + 1
        for sentence in walktree(poss_array, []):
            sentence = list(sentence)
            if len(sentence) < min_len:
                min_len = len(sentence)
            sentences.append(" ".join(sentence))
        out = []
        for s in set(sentences):
            if len(s.split(' ')) == min_len:
                out.append(s)
        return out


if __name__ == "__main__":
    decoder = Decoder(cmudict.CMUDict())
    for x in decoder.decode_sentence(['DH AH', 'S AH N', 'L IH T']):
        print(x)
    print(' ')
    for x in decoder.decode_sentence([
            'DH AH', 'S AH N', 'L IH T', 'AA', 'N IH NG', 'HH IY', 'V IH NG',
            'OW', 'V ER', 'HH EH D'
    ]):
        print(x)
Пример #5
0
# haiku gen 1 column

import sys

import snlp
import cmudict

nsyll = 5

cd = cmudict.CMUDict()

# TODO
# looking good except for sentences with hyphenated words.
# they don't get clipped, just keep going ??

num_lines = 0
num_pass = 0
num_nonword = 0

outf_5 = open("haiku_5.txt", "w")
outf_7 = open("haiku_7.txt", "w")
num_5 = 0
num_7 = 0

lastline = ''
for line in sys.stdin:
    if line == lastline:
        continue
    lastline = line
    line = line[:-1].lower()
    if line[0] != '(':
Пример #6
0
                continue
            if len(stress) == len(meter):
                fail = self.distance(stress, meter)
            elif len(stress) + 1 == meter:
                fail = self.distance('?' + stress, meter)
                if fail > 1:
                    fail = self.distance(stress + '?', meter)
            if fail < 2:
                poss.append(name)
        return poss


if __name__ == "__main__":
    data = [["short!", ['1', '10']], ["long!", ['11001000000']],
            ["swap", ['01', '10', '0', '1', '0101']],
            ["correct", ['0101010101']], ["one", ['0111010101']],
            ["two!", ['0111010001']], ["first missing", ['101010101']],
            ["last missing", ['010101010']]]

    meter = meter(cmudict.CMUDict(), {"meter": "01"}, "/def/null")

    print("Strict meter:")
    for test in data:
        print("{0} -> {1}".format(test[0], meter.meter_strict(test[1])))

    print("Broken meter:")
    for test in data:
        print("{0} -> {1}".format(test[0], meter.meter_loose(test[1])))

    print(meter.possibles(['a', 'word'], {'a': 0, 'word': 0, 'word(2)': 0}))