map_file_to_start_end = {} mfcc_file_name = 'tmp_allen_mfcc_' + str(int(input_n_frames)) + '.npy' map_mfcc_file_name = 'tmp_allen_map_file_to_start_end_' + str( int(input_n_frames)) + '.pickle' try: print("loading concat MFCC from pickled file") with open(mfcc_file_name) as concat_mfcc: all_mfcc = np.load(concat_mfcc) with open(map_mfcc_file_name) as map_mfcc: map_file_to_start_end = pickle.load(map_mfcc) except: for ind, mfcc_file in enumerate(list_of_mfcc_files): start = all_mfcc.shape[0] x = htkmfc.open(mfcc_file).getall() if input_n_frames > 1: x = padding(input_n_frames, x) all_mfcc = np.append(all_mfcc, x, axis=0) map_file_to_start_end[mfcc_file] = (start, all_mfcc.shape[0]) print("did", mfcc_file, "ind", ind) with open(mfcc_file_name, 'w') as concat_mfcc: np.save(concat_mfcc, all_mfcc) with open(map_mfcc_file_name, 'w') as map_mfcc: pickle.dump(map_file_to_start_end, map_mfcc) tmp_likelihoods = likelihoods_computer(all_mfcc) depth_1_likelihoods = depth_1_computer(all_mfcc) depth_2_likelihoods = depth_2_computer(all_mfcc) #depth_3_likelihoods = depth_1_computer(all_mfcc) TODO print(map_states_to_phones) print(dbn_phones_to_states) columns_remapping = [
def process(ofname, iscpfname, ihmmfname, ilmfname=None, iwdnetfname=None, unibifname=None, idbnfname=None, idbndictstuple=None): with open(ihmmfname) as ihmmf: n_states, transitions, gmms = parse_hmm(ihmmf) gmms_ = precompute_det_inv(gmms) map_states_to_phones = phones_mapping(gmms) likelihoods_computer = functools.partial(compute_likelihoods, gmms_) gmm_likelihoods_computer = functools.partial(compute_likelihoods, gmms_) #TODO REMOVE dbn = None dbn_to_int_to_state_tuple = None if idbnfname != None: with open(idbnfname) as idbnf: dbn = cPickle.load(idbnf) with open(idbndictstuple) as idbndtf: dbn_to_int_to_state_tuple = cPickle.load(idbndtf) dbn_phones_to_states = dbn_to_int_to_state_tuple[0] likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn) # like that = for GRBM first layer (normalize=True, unit=False) # TODO correct the normalize/unit to work on full test dataset if iwdnetfname != None: with open(iwdnetfname) as iwdnf: transitions = parse_wdnet(transitions, iwdnf) # parse wordnet elif ilmfname != None: with open(ilmfname) as ilmf: if MATRIX_BIGRAM: transitions = parse_lm_matrix(transitions, ilmf) # parse bigram LM in matrix format in ilmf else: transitions = parse_lm(transitions, ilmf) # parse bigram LM in ARPA-MIT in ilmf elif unibifname != None: # our own unigram and bigram counts, # c.f. src/produce_LM.py with open(unibifname) as ubf: transitions = initialize_transitions(transitions, ubf, unigrams_only=UNIGRAMS_ONLY) else: # uniform transitions between phones transitions = initialize_transitions(transitions) transitions = penalty_scale(transitions, insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR) dummy = np.ndarray((2,2)) # to force only 1 compile of Viterbi's C viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes if dbn != None: input_n_frames_mfcc = dbn.rbm_layers[0].n_visible / 39 # TODO generalize print "this is a DBN with", input_n_frames_mfcc, "MFCC frames" input_n_frames_arti = dbn.rbm_layers[1].n_visible / 59 # 60 # TODO generalize print "this is a DBN with", input_n_frames_arti, "articulatory frames" input_file_name = 'tmp_input_mocha.npy' map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle' try: # TODO remove? print "loading concat MFCC from pickled file" with open(input_file_name) as concat: all_input = np.load(concat) with open(map_input_file_name) as map_input: map_file_to_start_end = cPickle.load(map_input) except: print "concatenating MFCC and articulatory files" # TODO parallelize + use np.concatenate all_input = np.ndarray((0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible), dtype='float32') map_file_to_start_end = {} with open(iscpfname) as iscpf: for line in iscpf: cline = clean(line) start = all_input.shape[0] # get the 1 framed signals x_mfcc = htkmfc.open(cline).getall() with open(cline[:-4] + '_ema.npy') as ema: x_arti = np.load(ema)[:, 2:] # compute deltas and deltas deltas for articulatory features _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple(x_mfcc, x_arti) # add the adjacent frames if input_n_frames_mfcc > 1: x_mfcc = padding(input_n_frames_mfcc, x_mfcc) if input_n_frames_arti > 1: x_arti = padding(input_n_frames_arti, x_arti) # do feature transformations if any # TODO with mocha_timit_params.json params # concatenate x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1) all_input = np.append(all_input, x_mfcc_arti, axis=0) map_file_to_start_end[cline] = (start, all_input.shape[0]) with open(input_file_name, 'w') as concat: np.save(concat, all_input) with open(map_input_file_name, 'w') as map_input: cPickle.dump(map_file_to_start_end, map_input) else: # GMM all_mfcc = np.ndarray((0, 39), dtype='float32') # TODO generalize print "computing likelihoods" if dbn != None: # TODO clean tmp_likelihoods = likelihoods_computer(all_input) #mean_dbns = np.mean(tmp_likelihoods, 0) #tmp_likelihoods *= (mean_gmms / mean_dbns) print tmp_likelihoods print tmp_likelihoods.shape columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])] print columns_remapping likelihoods = (tmp_likelihoods[:, columns_remapping], map_file_to_start_end) print likelihoods[0] print likelihoods[0].shape else: likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end) print "computing viterbi paths" list_mlf_string = [] with open(iscpfname) as iscpf: il = InnerLoop(likelihoods, map_states_to_phones, transitions, using_bigram=(ilmfname != None or iwdnetfname != None or unibifname != None)) p = Pool(cpu_count()) list_mlf_string = p.map(il, iscpf) with open(ofname, 'w') as of: of.write('#!MLF!#\n') for line in list_mlf_string: of.write(line)
all_mfcc = np.ndarray((0, dbn.rbm_layers[0].n_visible), dtype='float32') map_file_to_start_end = {} mfcc_file_name = 'tmp_allen_mfcc_' + str(int(input_n_frames)) + '.npy' map_mfcc_file_name = 'tmp_allen_map_file_to_start_end_' + str(int(input_n_frames)) + '.pickle' try: print "loading concat MFCC from pickled file" with open(mfcc_file_name) as concat_mfcc: all_mfcc = np.load(concat_mfcc) with open(map_mfcc_file_name) as map_mfcc: map_file_to_start_end = cPickle.load(map_mfcc) except: for ind, mfcc_file in enumerate(list_of_mfcc_files): start = all_mfcc.shape[0] x = htkmfc.open(mfcc_file).getall() if input_n_frames > 1: x = padding(input_n_frames, x) all_mfcc = np.append(all_mfcc, x, axis=0) map_file_to_start_end[mfcc_file] = (start, all_mfcc.shape[0]) print "did", mfcc_file, "ind", ind with open(mfcc_file_name, 'w') as concat_mfcc: np.save(concat_mfcc, all_mfcc) with open(map_mfcc_file_name, 'w') as map_mfcc: cPickle.dump(map_file_to_start_end, map_mfcc) tmp_likelihoods = likelihoods_computer(all_mfcc) columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])] likelihoods = (tmp_likelihoods[:, columns_remapping], map_file_to_start_end) else: all_mfcc = np.ndarray((0, 39), dtype='float32') map_file_to_start_end = {}
def process(ofname, iscpfname, ihmmfname, ilmfname=None, iwdnetfname=None, unibifname=None, idbnfname=None, idbndictstuple=None): with open(ihmmfname) as ihmmf: n_states, transitions, gmms = parse_hmm(ihmmf) gmms_ = precompute_det_inv(gmms) map_states_to_phones = phones_mapping(gmms) likelihoods_computer = functools.partial(compute_likelihoods, gmms_) gmm_likelihoods_computer = functools.partial(compute_likelihoods, gmms_) #TODO REMOVE dbn = None dbn_to_int_to_state_tuple = None if idbnfname != None: with open(idbnfname) as idbnf: dbn = cPickle.load(idbnf) with open(idbndictstuple) as idbndtf: dbn_to_int_to_state_tuple = cPickle.load(idbndtf) dbn_phones_to_states = dbn_to_int_to_state_tuple[0] likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn) # like that = for GRBM first layer (normalize=True, unit=False) # TODO correct the normalize/unit to work on full test dataset if iwdnetfname != None: with open(iwdnetfname) as iwdnf: transitions = parse_wdnet(transitions, iwdnf) # parse wordnet elif ilmfname != None: with open(ilmfname) as ilmf: if MATRIX_BIGRAM: transitions = parse_lm_matrix( transitions, ilmf) # parse bigram LM in matrix format in ilmf else: transitions = parse_lm( transitions, ilmf) # parse bigram LM in ARPA-MIT in ilmf elif unibifname != None: # our own unigram and bigram counts, # c.f. src/produce_LM.py with open(unibifname) as ubf: transitions = initialize_transitions(transitions, ubf, unigrams_only=UNIGRAMS_ONLY) else: # uniform transitions between phones transitions = initialize_transitions(transitions) transitions = penalty_scale(transitions, insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR) dummy = np.ndarray((2, 2)) # to force only 1 compile of Viterbi's C viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes if dbn != None: input_n_frames_mfcc = dbn.rbm_layers[ 0].n_visible / 39 # TODO generalize print "this is a DBN with", input_n_frames_mfcc, "MFCC frames" input_n_frames_arti = dbn.rbm_layers[ 1].n_visible / 59 # 60 # TODO generalize print "this is a DBN with", input_n_frames_arti, "articulatory frames" input_file_name = 'tmp_input_mocha.npy' map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle' try: # TODO remove? print "loading concat MFCC from pickled file" with open(input_file_name) as concat: all_input = np.load(concat) with open(map_input_file_name) as map_input: map_file_to_start_end = cPickle.load(map_input) except: print "concatenating MFCC and articulatory files" # TODO parallelize + use np.concatenate all_input = np.ndarray( (0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible), dtype='float32') map_file_to_start_end = {} with open(iscpfname) as iscpf: for line in iscpf: cline = clean(line) start = all_input.shape[0] # get the 1 framed signals x_mfcc = htkmfc.open(cline).getall() with open(cline[:-4] + '_ema.npy') as ema: x_arti = np.load(ema)[:, 2:] # compute deltas and deltas deltas for articulatory features _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple( x_mfcc, x_arti) # add the adjacent frames if input_n_frames_mfcc > 1: x_mfcc = padding(input_n_frames_mfcc, x_mfcc) if input_n_frames_arti > 1: x_arti = padding(input_n_frames_arti, x_arti) # do feature transformations if any # TODO with mocha_timit_params.json params # concatenate x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1) all_input = np.append(all_input, x_mfcc_arti, axis=0) map_file_to_start_end[cline] = (start, all_input.shape[0]) with open(input_file_name, 'w') as concat: np.save(concat, all_input) with open(map_input_file_name, 'w') as map_input: cPickle.dump(map_file_to_start_end, map_input) else: # GMM all_mfcc = np.ndarray((0, 39), dtype='float32') # TODO generalize print "computing likelihoods" if dbn != None: # TODO clean tmp_likelihoods = likelihoods_computer(all_input) #mean_dbns = np.mean(tmp_likelihoods, 0) #tmp_likelihoods *= (mean_gmms / mean_dbns) print tmp_likelihoods print tmp_likelihoods.shape columns_remapping = [ dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1]) ] print columns_remapping likelihoods = (tmp_likelihoods[:, columns_remapping], map_file_to_start_end) print likelihoods[0] print likelihoods[0].shape else: likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end) print "computing viterbi paths" list_mlf_string = [] with open(iscpfname) as iscpf: il = InnerLoop(likelihoods, map_states_to_phones, transitions, using_bigram=(ilmfname != None or iwdnetfname != None or unibifname != None)) p = Pool(cpu_count()) list_mlf_string = p.map(il, iscpf) with open(ofname, 'w') as of: of.write('#!MLF!#\n') for line in list_mlf_string: of.write(line)