def compute_presoftmax_prior_scale(dir, alidir, num_jobs, run_opts, presoftmax_prior_scale_power=-0.25): # getting the raw pdf count common_lib.run_job( """{command} JOB=1:{num_jobs} {dir}/log/acc_pdf.JOB.log \ ali-to-post "ark:gunzip -c {alidir}/ali.JOB.gz|" ark:- \| \ post-to-tacc --per-pdf=true {alidir}/final.mdl ark:- \ {dir}/pdf_counts.JOB""".format(command=run_opts.command, num_jobs=num_jobs, dir=dir, alidir=alidir)) common_lib.run_job( """{command} {dir}/log/sum_pdf_counts.log \ vector-sum --binary=false {dir}/pdf_counts.* {dir}/pdf_counts \ """.format(command=run_opts.command, dir=dir)) for file in glob.glob('{0}/pdf_counts.*'.format(dir)): os.remove(file) pdf_counts = common_lib.read_kaldi_matrix('{0}/pdf_counts'.format(dir))[0] scaled_counts = smooth_presoftmax_prior_scale_vector( pdf_counts, presoftmax_prior_scale_power=presoftmax_prior_scale_power, smooth=0.01) output_file = "{0}/presoftmax_prior_scale.vec".format(dir) common_lib.write_kaldi_matrix(output_file, [scaled_counts]) common_lib.force_symlink("../presoftmax_prior_scale.vec", "{0}/configs/presoftmax_prior_scale.vec".format( dir))
def compute_presoftmax_prior_scale(dir, alidir, num_jobs, run_opts, presoftmax_prior_scale_power=-0.25): # getting the raw pdf count common_lib.execute_command( """{command} JOB=1:{num_jobs} {dir}/log/acc_pdf.JOB.log \ ali-to-post "ark:gunzip -c {alidir}/ali.JOB.gz|" ark:- \| \ post-to-tacc --per-pdf=true {alidir}/final.mdl ark:- \ {dir}/pdf_counts.JOB""".format(command=run_opts.command, num_jobs=num_jobs, dir=dir, alidir=alidir)) common_lib.execute_command( """{command} {dir}/log/sum_pdf_counts.log \ vector-sum --binary=false {dir}/pdf_counts.* {dir}/pdf_counts \ """.format(command=run_opts.command, dir=dir)) for file in glob.glob('{0}/pdf_counts.*'.format(dir)): os.remove(file) pdf_counts = common_lib.read_kaldi_matrix('{0}/pdf_counts'.format(dir))[0] scaled_counts = smooth_presoftmax_prior_scale_vector( pdf_counts, presoftmax_prior_scale_power=presoftmax_prior_scale_power, smooth=0.01) output_file = "{0}/presoftmax_prior_scale.vec".format(dir) common_lib.write_kaldi_matrix(output_file, [scaled_counts]) common_lib.force_symlink("../presoftmax_prior_scale.vec", "{0}/configs/presoftmax_prior_scale.vec".format( dir))
def compute_presoftmax_prior_scale_targets(dir, counts_path, presoftmax_prior_scale_power=-0.25): # total num of frames per target already prepared target_counts = common_lib.read_kaldi_matrix(counts_path)[0] scaled_counts = smooth_presoftmax_prior_scale_vector( target_counts, presoftmax_prior_scale_power=presoftmax_prior_scale_power, smooth=0.01) output_file = "{0}/presoftmax_prior_scale.vec".format(dir) common_lib.write_kaldi_matrix(output_file, [scaled_counts]) common_lib.force_symlink("../presoftmax_prior_scale.vec", "{0}/configs/presoftmax_prior_scale.vec".format( dir))