for idx in range(len(unique_labels)): label2idx[unique_labels[idx]]=int(idx) lines=open(DATA_FOLDER+'/split'+TOTAL_SPLIT+'/'+CURRRENT_SPLIT+'/'+args.utt2label).readlines() utt_label = [] for line in lines: label=line.rstrip().split()[1] utt_label.append(label2idx[label]) wav_list = [] lines=open(DATA_FOLDER+'/split'+TOTAL_SPLIT+'/'+CURRRENT_SPLIT+'/wav.scp').readlines() for line in lines: cols = line.rstrip().split() wav_list.append(cols[1]) feat, _, utt_shape, tffilename = ft.feat_extract(wav_list,FEAT_TYPE,N_FFT,HOP,VAD,CMVN,EXCLUDE_SHORT,False,DIM,WIN_LENGTH) if FIXED_LEN>0: for iter in range(len(feat)): if feat[iter].shape[0]<=FIXED_LEN: while feat[iter].shape[0]<=FIXED_LEN: feat[iter] = np.append(feat[iter],feat[iter],0) feat[iter] = feat[iter][0:FIXED_LEN,:] else: rstart = np.random.randint(0,feat[iter].shape[0]-FIXED_LEN,1)[0] rend = rstart + FIXED_LEN feat[iter] = feat[iter][rstart:rend,:] utt_shape[iter] = np.array(feat[iter].shape) TFRECORDS_NAME = SAVE_FOLDER+'/'+DATA_FOLDER.split('/')[-1] + '_' + tffilename + '_fixed'+str(FIXED_LEN) + '.' + CURRRENT_SPLIT + '.tfrecords'
lang2idx = {} for line in lines: lang = line.rstrip().split()[0] idx = line.rstrip().split()[1] lang2idx[lang] = int(idx) wav_list = [] devid = [] lines = open(DATA_FOLDER + '/wav.scp').readlines() for line in lines: cols = line.rstrip().split() devid.append(cols[0]) wav_list.append(cols[1]) #feature extraction feat, _, utt_shape, tffilename = ft.feat_extract(wav_list, FEAT_TYPE, N_FFT, HOP, VAD, CMVN, 0) #init placeholder test_feat_batch = tf.placeholder(tf.float32, [None, None, np.int(INPUT_DIM)], name="test_feat_batch") test_label_batch = tf.placeholder(tf.int32, [None], name="test_label_batch") test_shape_batch = tf.placeholder(tf.int32, [None, 2], name="test_shape_batch") #init model emnet_validation = nn_model.nn(test_feat_batch, test_label_batch, test_label_batch, test_shape_batch, SOFTMAX_NUM, False, INPUT_DIM, is_batchnorm) tf.get_variable_scope().reuse_variables() sess = tf.InteractiveSession() saver = tf.train.Saver()
RESUME_STARTPOINT = args.resume_startpoint NN_MODEL = args.model_name EMBEDDING_LAYER = args.embedding_layer if VAD =='False': VAD = False if CMVN == 'False': CMVN = False is_batchnorm = True if not args.segments_format: if int(TOTAL_SPLIT)==1: wavlist,utt_label,spk_label = kd.read_data_list(DATA_FOLDER, utt2spk=True) else: wavlist,utt_label,spk_label = kd.read_data_list(DATA_FOLDER+'/split'+TOTAL_SPLIT+'/'+CURRRENT_SPLIT, utt2spk=True) feat, _, utt_shape, tffilename = ft.feat_extract(wavlist,FEAT_TYPE,N_FFT,HOP,VAD,CMVN,EXCLUDE_SHORT) else: if int(TOTAL_SPLIT)==1: wavlist,utt_label,seg_wavlist,seg_segid,seg_uttid,seg_windows = kd.read_data_list(DATA_FOLDER, utt2spk=False,segments=True) else: wavlist,utt_label,seg_wavlist,seg_segid,seg_uttid,seg_windows = kd.read_data_list(DATA_FOLDER+'/split'+TOTAL_SPLIT+'/'+CURRRENT_SPLIT, utt2spk=False,segments=True) feat, _, utt_shape, tffilename = ft.feat_extract(seg_wavlist,FEAT_TYPE,N_FFT,HOP,VAD,CMVN,EXCLUDE_SHORT,seg_windows=seg_windows) SAVER_FOLDERNAME = 'saver/'+NN_MODEL+'_'+tffilename nn_model = __import__(NN_MODEL) x = tf.placeholder(tf.float32, [None,None,FEAT_DIM]) y = tf.placeholder(tf.int32, [None]) s = tf.placeholder(tf.int32, [None,2])