def make_sre_data(data_config, save_loc): save_loc = abspath(save_loc) data_loc = join_path(save_loc, DATA_DIR) make_directory(data_loc) train_data = get_train_data(data_config) make_kaldi_data_dir(train_data, join_path(data_loc, 'train_data')) print('Made {:d} files for training.'.format(train_data.shape[0])) sre_dev_enroll, sre_dev_test, sre_unlabelled = make_sre18_dev_data( data_config) print( 'Made {:d} enroll, {:d} test and {:d} unlabeled from sre2018 dev files.' .format(sre_dev_enroll.shape[0], sre_dev_test.shape[0], sre_unlabelled.shape[0])) make_kaldi_data_dir(sre_dev_enroll, join_path(data_loc, 'sre_dev_enroll')) make_kaldi_data_dir(sre_dev_test, join_path(data_loc, 'sre_dev_test')) make_kaldi_data_dir(sre_unlabelled, join_path(data_loc, 'sre_unlabelled')) sre_eval_enroll, sre_eval_test = make_sre18_eval_data(data_config) make_kaldi_data_dir(sre_eval_enroll, join_path(data_loc, 'sre_eval_enroll')) make_kaldi_data_dir(sre_eval_test, join_path(data_loc, 'sre_eval_test')) print('Made {:d} enroll and {:d} test from sre2018 eval files.'.format( sre_eval_enroll.shape[0], sre_eval_test.shape[0])) print('Saving data lists..') save_object(join_path(data_loc, 'train_data.pkl'), train_data) save_object(join_path(data_loc, 'sre_unlabelled.pkl'), sre_unlabelled) save_object(join_path(data_loc, 'sre_dev.pkl'), (sre_dev_enroll, sre_dev_test)) save_object(join_path(data_loc, 'sre_eval.pkl'), (sre_eval_enroll, sre_eval_test)) print('Data lists saved at: {}'.format(data_loc)) return train_data, sre_unlabelled, sre_dev_enroll, sre_dev_test, sre_eval_enroll, sre_eval_test
def make_kaldi_data_dir(args_list, data_loc): make_directory(data_loc) # run_command('cd {} && mv * .backup/'.format(data_loc)) args_list = sort_by_index(args_list) make_wav_scp(args_list[:, 0], args_list[:, 4], join_path(data_loc, 'wav.scp')) make_spk_to_utt(args_list[:, 0], args_list[:, 3], join_path(data_loc, 'spk2utt')) make_utt_to_spk(args_list[:, 0], args_list[:, 3], join_path(data_loc, 'utt2spk'))
def get_log_path(iteration, model_tag, save_loc, worker_id=None, operation='train'): log_loc = join_path(save_loc, '{}/{}'.format(LOGS_DIR, model_tag)) make_directory(log_loc) if worker_id is not None: log_path = '{}.{}.{}.log'.format(operation, iteration, worker_id) else: log_path = '{}.{}.log'.format(operation, iteration) return join_path(log_loc, log_path)
def get_model_path(iteration, model_tag, save_loc, worker_id=None): model_loc = join_path(save_loc, '{}/{}'.format(MODELS_DIR, model_tag)) if worker_id is not None: model_path = 'iteration_{}_{}'.format(iteration, worker_id) else: model_path = 'iteration_{}'.format(iteration) model_path = join_path(model_loc, model_path) make_directory(model_path) return model_path
default=NUM_FEATURES, help='Dimension of input features.') parser.add_argument('--num-jobs', type=int, default=NUM_CPU_WORKERS, help='Number of CPU Workers') parser.add_argument('--save', default=SAVE_LOC, help='Save location.') parser.add_argument('--worker-id', type=int, default=0, help='Worker Id') args = parser.parse_args() print_script_args(sys.argv) print('Started at: {}\n'.format(get_time_stamp())) save_loc = abspath(args.save) tmp_loc = join_path(save_loc, '{}/{}'.format(TMP_DIR, args.model_tag)) make_directory(tmp_loc) egs_scp = join_path(save_loc, '{}/egs.{}.scp'.format(EGS_DIR, args.egs_index)) read_scp = join_path(tmp_loc, 'read_egs.{}.'.format(args.worker_id) + '{}.scp') initial_path = get_model_path(args.iteration - 1, args.model_tag, save_loc) model_path = get_model_path(args.iteration, args.model_tag, save_loc, args.worker_id) def get_batch(items): batch_list, batch_id = items labels = [] scp_file = read_scp.format(batch_id) with open(scp_file, 'w') as f: for utt, ark, l in batch_list:
default=8000, help='Sampling Rate') parser.add_argument('--save', default='../save', help='Save Location') args = parser.parse_args() def make_feats(mfcc, split, save_loc): data_loc = join_path(join_path(save_loc, DATA_DIR), split) mfcc.extract_with_vad_and_normalization(data_loc, split) if __name__ == '__main__': args.save = abspath(args.save) mfcc_loc = join_path(args.save, MFCC_DIR) vad_loc = join_path(args.save, VAD_DIR) make_directory(mfcc_loc) make_directory(vad_loc) mfcc_ = MFCC(fs=args.sample_rate, fl=20, fh=3700, frame_len_ms=25, n_ceps=args.num_features, n_jobs=args.num_jobs, save_loc=args.save) for split_ in [ 'train_data', 'sre_unlabelled', 'sre_dev_enroll', 'sre_dev_test', 'sre_eval_enroll', 'sre_eval_test' ]: print('Making features for {}..'.format(split_)) make_feats(mfcc_, split_, args.save) print('Finished making features for {}..'.format(split_))
help='Dimension of input features') parser.add_argument('--ps', help='Parameter Server(s)') parser.add_argument('--save', default='../save', help='Save Location') parser.add_argument('--steps', type=int, default=500000, help='Total global steps') parser.add_argument('--type', default='ps', help='Instance Type') parser.add_argument('--task-index', type=int, default=0, help='Task Index') parser.add_argument('--workers', help='Worker Nodes') args = parser.parse_args() save_loc = abspath(args.save) egs_loc = join(save_loc, EGS_DIR) model_loc = join(save_loc, MODELS_DIR) make_directory(model_loc) model_loc = join(model_loc, args.model_tag) make_directory(model_loc) early_stop_file = join(model_loc, 'early_stop') tmp_loc = join(save_loc, TMP_DIR) make_directory(tmp_loc) ps = args.ps.split(',') workers = args.workers.split(',') num_workers = len(workers) use_gpu(args.gpu) config = tf.ConfigProto() config.gpu_options.allow_growth = True
from os.path import abspath, join as join_path from constants.app_constants import LOGS_DIR from services.common import make_directory from services.distributed import append_ps_and_workers, assign_nodes, make_parameter_servers, make_workers, submit_jobs import argparse as ap parser = ap.ArgumentParser() parser.add_argument('--cmd', default='python -u async_train.py', help='Python script command. Eg: python -u train.py --epochs 10') parser.add_argument('--model-tag', default='HGRU', help='Model Tag') parser.add_argument('--num-ps', type=int, default=1, help='Number of Parameter Servers') parser.add_argument('--num-workers', type=int, default=3, help='Number of Workers') parser.add_argument('--save', default='../save', help='Save Location') parser.add_argument('--start-port', type=int, default=7770, help='Starting port value') args = parser.parse_args() save_loc = abspath(args.save) log_loc = join_path(save_loc, LOGS_DIR) make_directory(log_loc) print('Making {} Parameter Server(s) and {} Worker Nodes...'.format(args.num_ps, args.num_workers)) ps, workers = assign_nodes(args.num_ps, args.num_workers, args.start_port) cmd = append_ps_and_workers(args.cmd, ps, workers) ps_list = make_parameter_servers(ps, cmd, args.model_tag, log_loc) workers_list = make_workers(workers, cmd, args.model_tag, log_loc) jobs_list = ps_list + workers_list print('Submitting jobs to parameter server(s) and worker nodes...') submit_jobs(jobs_list) print('Finished.')
help='Number of MFCC Co-efficients') parser.add_argument('--num-classes', type=int, default=3769, help='Number of MFCC Co-efficients') parser.add_argument('--num-workers', type=int, default=10, help='Number of Workers') parser.add_argument('--save', default='../save', help='Save Location') parser.add_argument('--worker-id', type=int, default=0, help='Worker Id') args = parser.parse_args() save_loc = abspath(args.save) embedding_loc = join_path(save_loc, '{}/{}'.format(EMB_DIR, args.model_tag)) make_directory(embedding_loc) tmp_loc = join_path(save_loc, '{}/{}'.format(TMP_DIR, args.model_tag)) make_directory(tmp_loc) read_scp = join_path(tmp_loc, 'extract_read.{}'.format(args.worker_id) + '.{}.scp') def get_batch(items): utt, ark, batch_id = items scp_file = read_scp.format(batch_id) with open(scp_file, 'w') as scp: scp.write('{} {}'.format(utt, ark))