def do_training(self, speech_corpus, text_corpus): if self.trained: print('NNDurationPredictor already trained') return print( 'Training of NNDurationPredictor itself not supported within Ossian -- ' ) print('use Merlin to train on the prepared data') if not os.path.isdir(self.model_dir): os.makedirs(self.model_dir) ## TODO: refactor to share the block below and write_merlin_config between ## NNDurationPredictor and NNAcousticPredictor ### Write merlin training list: utts_to_use = [] for utterance in speech_corpus: if utterance.has_external_data(self.input_label_filetype): utts_to_use.append(utterance.get("utterance_name")) writelist(utts_to_use, os.path.join(self.model_dir, 'filelist.txt')) n_utts = len(utts_to_use) self.write_merlin_config(n_utts=n_utts)
def process_utterance(self, utt, make_label=True): utt_data = [] utt_questions = defaultdict(int) ## {} nodelist = utt.xpath(self.target_nodes) if nodelist == []: print( 'WARNING: FeatureDumper\'s target_nodes matches no nodes: %s' % (self.config["target_nodes"])) for node in nodelist: node_data, node_questions = self.get_node_context_label(node) utt_data.append(node_data) ##utt_questions.update(node_questions) ## Sum the dictionaries' values: for question in node_questions: utt_questions[question] += node_questions[question] if make_label: label_file = utt.get_filename(self.output_filetype) if self.binary_output: utt_data = [line.split(' ') for line in utt_data] ## In case of string data being present, following line will give: ## ValueError: could not convert string to float: a utt_data = numpy.array(utt_data, dtype='float') put_speech(utt_data, label_file) else: writelist(utt_data, label_file, uni=True) return (utt_data, utt_questions ) ## for writing utterance-level labels,
def load(self): self.target_nodes = self.config.get('target_nodes', '//utt') self.input_attribute = self.config.get('input_attribute', 'norm_text') self.merge_clitics = self.config.get('merge_clitics', 'True') ## string, not bool ## check tools exist: corenlp_location = os.path.join(self.voice_resources.path[c.BIN], '..', \ 'corenlp-python', 'corenlp') assert os.path.isdir(corenlp_location) sys.path.append(corenlp_location) from corenlp import StanfordCoreNLP corenlp_dir = os.path.join(corenlp_location, '..', 'stanford-corenlp-full-2014-06-16') ## Each document is to be treated as one sentence, no sentence splitting at all. ## Write config for this if necessary: corenlp_conf_name = 'no_sentence_split.properties' corenlp_conf_file = os.path.join(corenlp_location, corenlp_conf_name) if not os.path.isfile(corenlp_conf_file): data = ['annotators = tokenize, ssplit, pos, lemma, ner, parse, dcoref', \ 'ssplit.isOneSentence = true'] writelist(data, corenlp_conf_file) print 'Loading stanford corenlp modules from %s ...'%(corenlp_dir) print 'Takes a while (~20-30 seconds)...' self.models = StanfordCoreNLP(corenlp_dir, properties=corenlp_conf_name)
def process_utterance(self, utt, make_label=True): utt_data = [] utt_questions = defaultdict(int) nodelist = utt.xpath(self.config["target_nodes"]) if nodelist == []: print( 'WARNING: FeatureDumper\'s target_nodes matches no nodes: %s' % (self.config["target_nodes"])) for node in nodelist: self.htk_state_xpath = None ## make sure this is none. self.start_time_xpath = None self.end_time_xpath = None ## for phone!:-- node_data, node_questions = self.get_node_context_label(node) statelist = node.xpath('.//' + self.state_tag) assert statelist != [] for (i, state) in enumerate(statelist): state_ix = i + 2 state_node_data = "%s[%s]" % (node_data, state_ix) start_time = state.attrib.get(self.start_attribute, '_NA_') ## no time at runtime! end_time = state.attrib.get(self.end_attribute, '_NA_') if not (start_time == "_NA_" or end_time == "_NA_"): start_time = string.ljust(str(ms_to_htk(start_time)), 10) end_time = string.ljust(str(ms_to_htk(end_time)), 10) state_node_data = "%s %s %s" % (start_time, end_time, state_node_data) utt_data.append(state_node_data) ##utt_questions.update(node_questions) ## Sum the dictionaries' values: for question in node_questions: utt_questions[question] += node_questions[question] if make_label: label_file = utt.get_filename(self.config["output_filetype"]) writelist(utt_data, label_file, uni=True) return (utt_data, utt_questions ) ## for writing utterance-level labels,
def make_simple_continuous_questions(self, outfile): cont_qlist = [] ## write continuous questions about numerical features key_list = [] for (number, name) in enumerate(self.mapped_feature_names): ## NB_ special regex to handle decimal point! -- cont_qlist.append("CQS %s {*/%s:([\d\.]+)/*}" % (name, number)) key_list.append("/%s:\t%s" % (number, name)) writelist(cont_qlist, outfile + '.cont', uni=True) key_file = outfile + ".key" writelist(key_list, key_file, uni=True)
def process_utterance(self, utt): # print('!!! in MappedFeatureDumper::process_utterance') utt_data = [] nodelist = utt.xpath(self.target_nodes) if nodelist == []: print('WARNING: FeatureDumper\'s target_nodes matches no nodes: %s' % (self.config["target_nodes"])) for node in nodelist: node_data = self.get_node_context_label(node) utt_data.append(node_data) label_file = utt.get_filename(self.output_filetype) writelist(utt_data, label_file, uni=True)
def do_training(self, corpus, text_corpus): dict_location = os.path.join(self.voice_resources.path[c.LANG], 'labelled_corpora', self.dictionary) ## phoneset phonetable_files = glob.glob(os.path.join(dict_location, '*.table')) if phonetable_files == []: sys.exit('Cannot find any phone table files at %s' % (os.path.join(dict_location, '*.table'))) phonetable_file = phonetable_files[0] ## take first shutil.copy(phonetable_file, self.phoneset_fname) ## load phoneset now for converting lexicon: self.phoneset = LookupTable(self.phoneset_fname, is_phoneset=True) ## letter pronunciations letter_file = os.path.join(dict_location, 'letter.names') assert os.path.isfile(letter_file) shutil.copy(letter_file, self.letter_fname) self.load_letternames() # populate self.letternames ## lexicon dict_files = [f for f in glob.glob(os.path.join(dict_location, '*')) \ if f.endswith('.out')] ## exclude cmudict extensions: ## or f.endswith('.scm') ] ## glob doesn't support {} for .{out,scm} assert dict_files != [], 'No lexicon files found at %s' % ( dict_location) self.convert_lexicon(dict_files) ## onsets self.count_onsets_and_codas() onset_strings = [' '.join(onset) for onset in self.onsets.keys()] writelist(onset_strings, self.onsets_fname) ## G2P train_file = os.path.join(self.get_training_dir(), 'train_data.txt') self.make_sequitur_train_data(train_file) self.train_sequitur_g2p(train_file) ## save it also globally for posterity:- if os.path.isdir(self.component_path): shutil.rmtree(self.component_path) shutil.copytree(self.model_dir, self.component_path)
def do_training(self, speech_corpus, text_corpus): """ "training" an extractor involves writing a config, and establishing the location of resources etc. Write also desciption of .cmp files in terms of streams, stream widths to be used by alignment and acoustic model 完成以下事情: 1. 写入配置文件acoustic_feats.cfg 2. 写入.cmp文件 :param speech_corpus: Utterance列表 :param text_corpus: 文本列表 :return: None """ self.acoustic_feats = self.get_location() + "/acoustic_feats.cfg" self.tool = self.voice_resources.get_path(c.BIN) for toolname in ['analysis', 'synth']: path = os.path.join(self.tool, toolname) assert os.path.isfile(path), '%s does not exist' % (path) assert os.access(path, os.X_OK), '%s is not executable' % (path) self.fftl, self.apsize = get_world_fft_and_apdim(self.sample_rate) # make acoustic modelling config self.feats = ['mgc', 'lf0', 'bap'] self.stream_sizes = [str(self.order + 1), '1', str(self.apsize)] weights = ['1', '0', '0'] msd = ['0', '1', '0'] floor_scale = ["0.01" for x in range(len(self.feats))] streams = [] # modifications for MSD streams cur_stream_index = 1 for i in range(len(self.feats)): if msd[i] == "1": self.stream_sizes[i] += " 1 1" weights[i] += " " + weights[i] + " " + weights[i] floor_scale[i] += " " + floor_scale[i] + " " + floor_scale[i] msd[i] = "1 1 1" streams.append( str(cur_stream_index) + "-" + str(cur_stream_index + 2)) cur_stream_index += 3 else: streams.append(str(cur_stream_index)) cur_stream_index += 1 # save these for acoustic model training with cmps htk_feats = open(self.acoustic_feats, "w") htk_feats.write("STREAMS=\"%s\"\n" % " ".join(streams)) htk_feats.write("STREAM_NAMES=\"%s\"\n" % " ".join(self.feats)) htk_feats.write( "SHORT_STREAM_NAMES=\"1 2 5\"\n" ) # % " ".join([str(i+1) for i in range(len(self.feats))])) # htk_feats.write("STATIC_STREAM_SIZES=\"%s\"\n" % " ".join(self.stream_sizes)) htk_feats.write("MSD_STREAM_INFO=\"%s\"\n" % " ".join(msd)) htk_feats.write("STREAM_WEIGHTS=\"%s\"\n" % " ".join(weights)) htk_feats.write( "VFLOORSCALESTR=\"Vector %d %s\"\n" % (len(" ".join(self.stream_sizes).split()), " ".join(floor_scale))) htk_feats.close() ## Make delta window coefficients in config file into separate files: training_dir = self.get_training_dir() self.winfiles = [] for window in ['static_window', 'delta_window', 'delta_delta_window']: fname = os.path.join(training_dir, window + '.win') data = self.coding_config[window] length = len(data.strip().split()) data = '%s %s' % (length, data) writelist([data], fname) self.winfiles.append(fname)
def process_utterance(self, utt): ## If there is no waveform attached to the utt, don't do anything: if not utt.has_attribute("waveform"): return ## Add some data to the utt structure recording the structure of the ## associated acoustic features we've produced. Do this first, in case ## we use existing features. self.stream_sizes[ 1] = '1' ## otherwise '1 1 1' for F0 TODO: fix this nicely! utt.add_acoustic_stream_info(self.feats, self.stream_sizes) ## If a feature file already exists, skip: if utt.has_external_data(self.output_filetype): ## TODO: check description against existing feats? return ## else extract features infile = utt.get("waveform") outfile = utt.get_filename(self.output_filetype) ## strip suffix .cmp:- assert outfile.endswith('.' + self.output_filetype) chars_to_strip = len(self.output_filetype) + 1 outstem = outfile[:-chars_to_strip] rate = self.rate sample_rate = self.rate alpha = self.alpha order = self.order fftl = self.fftl apsize = self.apsize frameshift_ms = self.frameshift_ms script_dir = self.voice_resources.path[c.SCRIPT] ## 1) remove wave header, downsample etc. with sox: comm = "sox -t wav " + infile comm += " -c 1 -e signed-integer " comm += " -r %s" % (rate) comm += " -b 16 " comm += " " + outstem + ".wav" comm += " dither" ## added for hi and rj data blizz 2014 success = os.system(comm) if success != 0: print 'sox failed on utterance ' + utt.get("utterance_name") return comm = "%s/analysis %s.wav %s.f0.double %s.sp.double %s.bap.double > %s.log" % ( self.tool, outstem, outstem, outstem, outstem, outstem) success = os.system(comm) # This command is very slow # print comm if success != 0: print 'world analysis failed on utterance ' + utt.get( "utterance_name") return if self.resynthesise_training_data: ## resynthesis to test comm = "%s/synth %s %s %s.f0.double %s.sp.double %s.bap.double %s.resyn.wav > %s.log" % ( self.tool, fftl, rate, outstem, outstem, outstem, outstem, outstem) success = os.system(comm) if success != 0: print 'world synthesis failed on utterance ' + utt.get( "utterance_name") return comm = "%s/x2x +df %s.sp.double | %s/sopr -R -m 32768.0 | %s/mcep -a %s -m %s -l %s -j 0 -f 0.0 -q 3 > %s.mgc" % ( self.tool, outstem, self.tool, self.tool, alpha, order, fftl, outstem) ## -e 1.0E-8 success = os.system(comm) # This command is very slow if success != 0: print 'conversion of world spectrum to mel cepstra failed on utterance ' + utt.get( "utterance_name") return for stream in ['bap']: comm = "%s/x2x +df %s.%s.double > %s.%s" % ( self.tool, outstem, stream, outstem, stream) success = os.system(comm) if success != 0: print 'double -> float conversion (stream: ' + stream + ') failed on utterance ' + utt.get( "utterance_name") return for stream in ['f0']: comm = "%s/x2x +da %s.%s.double > %s.%s.txt" % ( self.tool, outstem, stream, outstem, stream) success = os.system(comm) if success != 0: print 'double -> ascii conversion (stream: ' + stream + ') failed on utterance ' + utt.get( "utterance_name") return ## 5) F0 conversion: f0 = [float(val) for val in readlist(outstem + '.f0.txt')] log_f0 = [] for val in f0: if val == 0.0: log_f0.append('-1.0E10') else: log_f0.append(math.log(val)) writelist(log_f0, outstem + '.f0.log') comm = "%s/x2x +af %s.f0.log > %s.lf0" % (self.tool, outstem, outstem) success = os.system(comm) if success != 0: print 'writing log f0 failed on utterance ' + utt.get( "utterance_name") return ## add mcep/ap/f0 deltas: for (stream, dimen) in [('mgc', order + 1), ('bap', apsize), ('lf0', 1)]: comm = "perl %s/window.pl %s " % (script_dir, dimen) comm += "%s.%s %s > %s.%s.delta" % (outstem, stream, ' '.join( self.winfiles), outstem, stream) success = os.system(comm) # This command is very slow if success != 0: print 'delta (' + stream + ') extraction failed on utterance ' + utt.get( "utterance_name") return ### combined streams:-- ap = get_speech(outstem + '.bap.delta', apsize * len(self.winfiles)) mgc = get_speech(outstem + '.mgc.delta', (order + 1) * len(self.winfiles)) lf0 = get_speech(outstem + '.lf0.delta', 1 * len(self.winfiles)) cmp = numpy.hstack([mgc, lf0, ap]) put_speech(cmp, outfile) ## 7) add header floats_per_frame = (order + 2 + apsize) * len( self.winfiles) ## +2 for energy and F0 add_htk_header(outfile, floats_per_frame, frameshift_ms) ## 8) tidy: self.extensions_to_keep = ['.' + self.output_filetype, '.f0.txt'] ## TODO: make configuable? self.extensions_to_keep.append('.resyn.wav') self.extensions_to_keep.extend(['.mgc', '.bap', '.lf0']) keepfiles = [outstem + ending for ending in self.extensions_to_keep] for junk in glob.glob(outstem + '.*'): if not junk in keepfiles: os.remove(junk)
def format_question_set(self, raw_questions, outfile): """ Take raw_questions: list of (number, name, value) triplets, ... Write formatted questions to outfile, and human-readable key to outfile.key Additionally, write question file including continuous questions (CQS) for DNN training 保存问题集,路径如下: 1. qlist保存到outfile 2. cont_qlist保存到outfile+".cont" 3. key_list保存到 outfile+".key" 4. values_list 保存到 outfile + ".values" :param raw_questions: 问题列表,形如:[(0, 'segment', u'_CJKUNIFIEDIDEOGRAPHEIGHTZEROCE_'), (0, 'segment', u'_CJKUNIFIEDIDEOGRAPHFIVEDFFOUR_'), (0, 'segment', u'_CJKUNIFIEDIDEOGRAPHEIGHTFOURSEVENFIVE_'), ...] :param outfile: 保存路径 :return: None """ # 以(number, name)作为key,以value作为value unique_questions = {} for (number, name, value) in raw_questions: if (number, name) not in unique_questions: unique_questions[(number, name)] = [] if value not in unique_questions[(number, name)]: unique_questions[(number, name)].append(value) qlist = [] cont_qlist = [] ## write continuous questions about numerical features key_list = [] ## To make human-readable key to the feature set values_list = [] ## To make reference list of all values taken by a feature for ((number, name), values) in sorted(unique_questions.items()): values.sort() key_list.append((number, name)) NA_present = False if '_NA_' in values: values.remove('_NA_') NA_present = True if all_entries_of_type(values, str): ## For strings, make single question for each item, no groups: for value in values: qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value)) cont_qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value)) values_list.append((number, name, 'CATEGORICAL', ' '.join(values))) elif all_entries_of_type(values, unicode): ## For strings, make single question for each item, no groups: for value in values: qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value)) cont_qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value)) values_list.append((number, name, 'CATEGORICAL', ' '.join(values))) elif all_entries_of_type(values, int): ## For integers, make single question for each item, and also groups ## based on single splits of the range. ## Aug 2014: modified -- just use split points -- questions based on ## single values are too arbitrary. # # for value in values: # qlist.append("QS %s_is_%s {*/%s:%s/*}"%(name, value, number, value)) values_list.append((number, name, 'NUMERIC', 'MAX:' + str(max(values)))) cont_qlist.append("CQS %s {*/%s:(\d+)/*}" % (name, number)) qlist.extend([""]) ## for formatting of final file for split_point_ix in range(1, len(values)): split_point = values[split_point_ix] wildcard_values = make_htk_wildcards(split_point) formatted_sublist = ["/%s:%s/" % (number, value) for value in wildcard_values] formatted_sublist = "*,*".join(formatted_sublist) qlist.append("QS %s_<_%s {*%s*}" % (name, split_point, formatted_sublist)) elif all_entries_of_type(values, float): ## floats -- only make CQS values_list.append((number, name, 'NUMERIC', 'MAX:' + str(max(values)))) ## NB_ special regex to handle decimal point! -- cont_qlist.append("CQS %s {*/%s:([\d\.]+)/*}" % (name, number)) else: print "Feature values of mixed type / not string or int:" print values sys.exit(1) if NA_present: qlist.append("QS %s_is__NA_ {*/%s:_NA_/*}" % (name, number)) cont_qlist.append("QS %s_is__NA_ {*/%s:_NA_/*}" % (name, number)) qlist.extend(["", "", ""]) ## for formatting of final file cont_qlist.extend(["", "", ""]) ## for formatting of final file writelist(qlist, outfile, uni=True) writelist(cont_qlist, outfile + '.cont', uni=True) key_list = ["/%s:\t%s" % (number, name) for (number, name) in key_list] key_file = outfile + ".key" writelist(key_list, key_file, uni=True) values_list = ["%s\t%s\t%s\t%s" % (number, name, feat_type, values) \ for (number, name, feat_type, values) in values_list] values_file = outfile + ".values" writelist(values_list, values_file, uni=True)
def format_question_set(self, raw_questions, outfile): """ Take raw_questions: list of (number, name, value) triplets, ... Write formatted questions to outfile, and human-readable key to outfile.key Additionally, write question file including continuous questions (CQS) for DNN training """ print raw_questions unique_questions = {} for (number, name, value) in raw_questions: if (number, name) not in unique_questions: unique_questions[(number, name)] = [] if value not in unique_questions[(number, name)]: unique_questions[(number, name)].append(value) qlist = [] cont_qlist = [] ## write continuous questions about numerical features key_list = [] ## To make human-readable key to the feature set values_list = [ ] ## To make reference list of all values taken by a feature for ((number, name), values) in sorted(unique_questions.items()): values.sort() key_list.append((number, name)) NA_present = False if '_NA_' in values: values.remove('_NA_') NA_present = True if all_entries_of_type(values, str): ## For strings, make single question for each item, no groups: for value in values: qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value)) cont_qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value)) values_list.append( (number, name, 'CATEGORICAL', ' '.join(values))) elif all_entries_of_type(values, unicode): ## For strings, make single question for each item, no groups: for value in values: qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value)) cont_qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value)) values_list.append( (number, name, 'CATEGORICAL', ' '.join(values))) elif all_entries_of_type(values, int): ## For integers, make single question for each item, and also groups ## based on single splits of the range. ## Aug 2014: modified -- just use split points -- questions based on ## single values are too arbitrary. # #for value in values: # qlist.append("QS %s_is_%s {*/%s:%s/*}"%(name, value, number, value)) values_list.append( (number, name, 'NUMERIC', 'MAX:' + str(max(values)))) cont_qlist.append("CQS %s {*/%s:(\d+)/*}" % (name, number)) qlist.extend([""]) ## for formatting of final file for split_point_ix in range(1, len(values)): split_point = values[split_point_ix] wildcard_values = make_htk_wildcards(split_point) formatted_sublist = [ "/%s:%s/" % (number, value) for value in wildcard_values ] formatted_sublist = "*,*".join(formatted_sublist) qlist.append("QS %s_<_%s {*%s*}" % (name, split_point, formatted_sublist)) elif all_entries_of_type(values, float): ## floats -- only make CQS values_list.append( (number, name, 'NUMERIC', 'MAX:' + str(max(values)))) ## NB_ special regex to handle decimal point! -- cont_qlist.append("CQS %s {*/%s:([\d\.]+)/*}" % (name, number)) else: print "Feature values of mixed type / not string or int:" print values sys.exit(1) if NA_present: qlist.append("QS %s_is__NA_ {*/%s:_NA_/*}" % (name, number)) cont_qlist.append("QS %s_is__NA_ {*/%s:_NA_/*}" % (name, number)) qlist.extend(["", "", ""]) ## for formatting of final file cont_qlist.extend(["", "", ""]) ## for formatting of final file writelist(qlist, outfile, uni=True) writelist(cont_qlist, outfile + '.cont', uni=True) key_list = ["/%s:\t%s" % (number, name) for (number, name) in key_list] key_file = outfile + ".key" writelist(key_list, key_file, uni=True) values_list = ["%s\t%s\t%s\t%s"%(number, name, feat_type, values) \ for (number, name, feat_type, values) in values_list] values_file = outfile + ".values" writelist(values_list, values_file, uni=True) if self.processor_name != 'labelmaker': return transliterate = { "क": "k", "ख": "kh", "ग": "g", "घ": "gh", "ङ": "N1", "च": "c", "छ": "ch", "ज": "j", "झ": "jh", "ञ": "N2", "ट": "T", "ठ": "Th", "ड": "D", "ढ": "Dh", "ण": "N3", "त": "t", "थ": "th", "द": "d", "ध": "dh", "न": "n", "प": "p", "फ": "ph", "ब": "b", "भ": "bh", "म": "m", "य": "y", "र": "r", "ल": "l", "ळ": "L", "व": "v", "श": "s1", "ष": "s2", "स": "s", "ह": "h", "ं": "M", "ः": "H", "अ": "a", "आ": "A", "इ": "i", "ई": "I", "उ": "u", "ऊ": "U", "ऋ": "R", "ॠ": "RR", "ऌ": "l1", "ॡ": "l2", "ए": "e", "ऐ": "ai", "ओ": "o", "औ": "au", "लँ": "ln" } l = [] l.append([transliterate[i] for i in 'अ इ उ ऋ ऌ'.split(' ')]) l.append([transliterate[i] for i in 'आ ई ऊ ॠ ॡ ए ओ'.split(' ')]) l.append([ transliterate[i] for i in 'क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह' .split(' ') ]) l.append([ transliterate[i] for i in 'क ख ग घ च छ ज झ ट ठ ड ढ त थ द ध प फ ब भ'.split(' ') ]) #l.append([transliterate[i]+'x' for i in 'क ख ग घ च छ ज झ ट ठ ड ढ त थ द ध प फ ब भ'.split(' ')]) l += [['k', 'g'], ['kh', 'gh'], ['k', 'kh'], ['g', 'gh']] l += [['c', 'j'], ['ch', 'jh'], ['c', 'ch'], ['j', 'jh']] l += [['T', 'D'], ['Th', 'Dh'], ['T', 'Th'], ['D', 'Dh']] l += [['t', 'd'], ['th', 'dh'], ['t', 'th'], ['d', 'dh']] l += [['p', 'b'], ['ph', 'bh'], ['p', 'ph'], ['b', 'bh']] l += [['N1', 'N2', 'N3', 'n', 'm']] l += [['h', 'H']] l += [['a', 'A']] l += [['i', 'I', 'y']] l += [['u', 'U', 'v']] l += [['R', 'RR', 'r']] l += [['l1', 'l2', 'l']] i = 0 s = '\n' for p in range(0, 5): for j in l: s += 'QS q' + str(i) + ' {' for k in range(0, len(j)): if k != 0: s += ',' s += '*/' + str(p) + ':' + j[k] + '/*' s += '}\n' i += 1 print s f = open(outfile, 'a') f.write(s) f.close()