help='The speekers gender list') """ This file is used to generate tfrecords for gender-sensitive PIT speech seperation. Every tfrecords file contains: inputs: [mix_speech_abs, max_speech_phase], shape:T*(fft_len*2) labels: [spker1_speech_abs, apker2_speech_abs], shape:T*(fft_len*2) gender: [spker1_gender, spker2_gender], shape:1*2 """ args = parser.parse_args() wavdir = args.wavdir tfdir = args.tfdir namelist = args.namelist mkdir_p(tfdir) if args.gender_list is not '': apply_gender_info=True; gender_dict = {} fid = open(args.gender_list, 'r') lines = fid.readlines() fid.close() for line in lines: spk = line.strip('\n').split(' ')[0] gender = line.strip('\n').split(' ')[1] if gender.lower() == 'm': gender_dict[spk] = 1; else: gender_dict[spk] = 0 def make_sequence_example(inputs, labels, genders):
parser.add_argument('wav_dir', help='The parent dit of mix,s1,s2') parser.add_argument('list_dir', help='The parent dit of mix,s1,s2') parser.add_argument('tfrecord_dir', help='TFRecords file dir') parser.add_argument('--gender_list', default='', type=str, help='The speekers gender list') parser.add_argument('--sample_rate', default=8000, type=int, help='sample rate of audio') parser.add_argument('--window_size', default=256, type=int, help='window size for STFT') parser.add_argument('--window_shift', default=128, type=int, help='window size for STFT') args = parser.parse_args() wav_dir = args.wav_dir tfrecord_dir = args.tfrecord_dir process_num = 8 list_dir = args.list_dir mkdir_p(tfrecord_dir) sample_rate = args.sample_rate window_size = args.window_size window_shift = args.window_shift if args.gender_list is not '': apply_gender_info = True gender_dict = {} fid = open(args.gender_list, 'r') lines = fid.readlines() fid.close() for line in lines: spk = line.strip('\n').split(' ')[0] gender = line.strip('\n').split(' ')[1] if gender.lower() == 'm': gender_dict[spk] = 1