Пример #1
0
    def extract_with_vad_and_normalization(self, data_loc, split, threshold=5.5, mean_scale=0.5, cmvn_window=300,
                                           var_norm=False):
        vad_loc = join_path(self.save_loc, VAD_DIR)
        tmp_loc = join_path(self.save_loc, TMP_DIR)

        feats_scp = join_path(data_loc, FEATS_SCP_FILE)
        vad_scp = join_path(data_loc, VAD_SCP_FILE)

        print('MFCC: Extracting features...')
        self.extract(data_loc, split)

        print('MFCC: Computing VAD...')
        vad = VAD(threshold, mean_scale, n_jobs=self.n_jobs, save_loc=self.save_loc)
        vad.compute(data_loc, split)

        print('MFCC: Normalizing features and selecting voiced frames..')
        feats_scp_dict = spaced_file_to_dict(feats_scp)
        vad_scp_dict = spaced_file_to_dict(vad_scp)

        splits = np.array_split(list(feats_scp_dict.keys()), self.n_jobs)
        for i in range(self.n_jobs):
            split_feat_scp = open(join_path(tmp_loc, 'feats.{}.scp'.format(i + 1)), 'w')
            split_vad_scp = open(join_path(tmp_loc, 'vad.{}.scp'.format(i + 1)), 'w')
            for key in splits[i]:
                split_feat_scp.write('{} {}\n'.format(key, feats_scp_dict[key]))
                split_vad_scp.write('{} {}\n'.format(key, vad_scp_dict[key]))
            split_feat_scp.close()
            split_vad_scp.close()

        Kaldi().queue('JOB=1:{nj} {mfcc_loc}/log/voiced_feats.JOB.log '
                      'apply-cmvn-sliding --norm-vars={var_norm} --center=true --cmn-window={window} scp:{tmp_loc}/feats.JOB.scp ark:- \| '
                      'select-voiced-frames ark:- scp,ns,cs:{tmp_loc}/vad.JOB.scp ark:- \| '
                      'copy-feats --compress=true --write-num-frames=ark,t:{mfcc_loc}/log/utt2num_frames.{name}.JOB ark:- '
                      'ark,scp:{mfcc_loc}/voiced_feats.{name}.JOB.ark,{mfcc_loc}/voiced_feats.{name}.JOB.scp || exit 1;'
                      .format(mfcc_loc=self.mfcc_loc, tmp_loc=tmp_loc, vad_loc=vad_loc,
                              var_norm='true' if var_norm else 'false',
                              nj=self.n_jobs, window=cmvn_window, name=split))

        run_command('for n in $(seq {nj}); do \n'
                    '   cat {mfcc_loc}/voiced_feats.{name}.$n.scp || exit 1;\n'
                    'done > {data_loc}/voiced_feats.scp || exit 1'.format(mfcc_loc=self.mfcc_loc, data_loc=data_loc,
                                                                          nj=self.n_jobs, name=split))

        run_command('for n in $(seq {nj}); do \n'
                    '   cat {mfcc_loc}/log/utt2num_frames.{name}.$n || exit 1;\n'
                    'done > {data_loc}/utt2num_frames || exit 1'.format(mfcc_loc=self.mfcc_loc, data_loc=data_loc,
                                                                        nj=self.n_jobs, name=split))
Пример #2
0
def watch_job(job_id, wait=3):
    time.sleep(wait)
    completed = False
    while not completed:
        output, _ = run_command(QUEUE_JOB_STATUS_CMD.format(job_id))
        if len(output.split('\n')) < 2:
            completed = True
        time.sleep(3)
Пример #3
0
def gpu_stat(total_nodes=5, slots_per_node=2):
    output, _ = run_command(QUEUE_GPU_USAGE_CMD)
    used_slots = Counter(output.decode("utf-8").split('\n')[:-1])
    free_slots = []
    gpu_ids = []
    for i in range(total_nodes):
        node = 'compute-0-{}.local'.format(i)
        try:
            count = used_slots[node]
            free_gpu = [-1] * (slots_per_node - count)
        except KeyError:
            count = 0
            free_gpu = list(range(0, slots_per_node))
        free_slots = free_slots + [node] * (slots_per_node - count)
        gpu_ids = gpu_ids + free_gpu
    return list(zip(free_slots, gpu_ids))
Пример #4
0
def delete_jobs(pattern):
    output, _ = run_command(QUEUE_DELETE_CMD.format(pattern))
    return output
Пример #5
0
def submit_job(job):
    output, _ = run_command(job)
    return output.split(' ')[2]