示例#1
0
def compute_mfcc_feats(input_path, output_path,
                       compress, compression_method, write_num_frames, **kwargs):

    mfcc_args = MFCC.filter_args(**kwargs)
    mfcc = MFCC(**mfcc_args)

    if mfcc.input_step == 'wave':
        input_args = AR.filter_args(**kwargs)
        reader = AR(input_path, **input_args)
    else:
        input_args = DRF.filter_args(**kwargs)
        reader = DRF.create(input_path, **input_args)

    writer = DWF.create(output_path, scp_sep=' ',
                        compress=compress,
                        compression_method=compression_method)

    if write_num_frames is not None:
        f_num_frames = open(write_num_frames, 'w')
    
    for data in reader:
        if mfcc.input_step == 'wave':
            key, x, fs = data
        else:
            key, x = data
        logging.info('Extracting MFCC for %s' % (key))
        t1 = time.time()
        y = mfcc.compute(x)
        dt = (time.time() - t1)*1000
        rtf = mfcc.frame_shift*y.shape[0]/dt
        logging.info('Extracted MFCC for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f' %
                     (key, y.shape[0], dt, rtf))
        writer.write([key], [y])
        
        if write_num_frames is not None:
            f_num_frames.write('%s %d\n' % (key, y.shape[0]))

        mfcc.reset()
            
    if write_num_frames is not None:
        f_num_frames.close()
def compute_mfcc_feats(input_path, output_path, compress, compression_method,
                       write_num_frames, use_gpu, nn_model_path, chunk_size,
                       context, **kwargs):

    #open device
    if use_gpu and torch.cuda.is_available():
        logging.info('CUDA_VISIBLE_DEVICES=%s' %
                     os.environ['CUDA_VISIBLE_DEVICES'])
        logging.info('init gpu device')
        device = torch.device('cuda', 0)
        torch.tensor([0]).to(device)
    else:
        logging.info('init cpu device')
        device = torch.device('cpu')

    mfcc_args = MFCC.filter_args(**kwargs)
    mfcc = MFCC(**mfcc_args)
    # PUT YOUR NNET MODEL HERE!!!!
    enhancer = CAN(num_channels=45)
    enhancer.load_state_dict(
        torch.load(nn_model_path, map_location=device)['state_dict'])
    enhancer.to(device)
    enhancer.eval()

    if mfcc.input_step == 'wave':
        input_args = AR.filter_args(**kwargs)
        reader = AR(input_path, **input_args)
    else:
        input_args = DRF.filter_args(**kwargs)
        reader = DRF.create(input_path, **input_args)

    writer = DWF.create(output_path,
                        scp_sep=' ',
                        compress=compress,
                        compression_method=compression_method)

    if write_num_frames is not None:
        f_num_frames = open(write_num_frames, 'w')

    for data in reader:
        if mfcc.input_step == 'wave':
            key, x, fs = data
        else:
            key, x = data
        logging.info('Extracting filter-banks for %s' % (key))
        t1 = time.time()
        y = mfcc.compute(x)

        #we apply dummy identity network to fb
        logging.info('Running enhancement network')
        y = apply_nnet(y, enhancer, chunk_size, context, device)

        dt = (time.time() - t1) * 1000
        rtf = mfcc.frame_shift * y.shape[0] / dt
        logging.info(
            'Extracted filter-banks for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f'
            % (key, y.shape[0], dt, rtf))
        writer.write([key], [y])

        if write_num_frames is not None:
            f_num_frames.write('%s %d\n' % (key, y.shape[0]))

        mfcc.reset()

    if write_num_frames is not None:
        f_num_frames.close()
示例#3
0
def compute_mfcc_feats(input_path, output_path,
                       compress, compression_method, write_num_frames, 
                       use_gpu, nn_model_path, chunk_size, context,
                       **kwargs):

    #open device
    if  use_gpu and torch.cuda.is_available():
        os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
        max_tries = 100
        for g in range(max_tries):
            try:
                gpu_ids = find_free_gpus()
                os.environ['CUDA_VISIBLE_DEVICES'] = gpu_ids
                logging.info('CUDA_VISIBLE_DEVICES=%s' % os.environ['CUDA_VISIBLE_DEVICES'])
                logging.info('init gpu device')
                device = torch.device('cuda', 0)
                torch.tensor([0]).to(device)
                break
            except:
                if g < max_tries-1:
                    logging.info('failing init gpu, trying again')
                    time.sleep(10)
                else:
                    logging.info('failing init gpu, using cpu')
                    device = torch.device('cpu')

    else:
        logging.info('init cpu device')
        device = torch.device('cpu')

    mfcc_args1 = MFCC.filter_args(**kwargs)
    mfcc_args2 = copy.deepcopy(mfcc_args1)
    mfcc_args1['output_step'] = 'logfb'
    mfcc_args2['input_step'] = 'logfb'
    print(kwargs)
    print(mfcc_args1)
    print(mfcc_args2)
    mfcc1 = MFCC(**mfcc_args1)
    mfcc2 = MFCC(**mfcc_args2)   

    mvn = MVN(norm_var=False, left_context=150, right_context=150)
    
    # PUT YOUR NNET MODEL HERE!!!!
    enhancer = CGN()
    #enhancer.load_state_dict(torch.load(nn_model_path, map_location=device)['state_dict'])
    enhancer.load_state_dict(torch.load(nn_model_path, map_location=device))
    enhancer.to(device)
    enhancer.eval()

    if mfcc1.input_step == 'wave':
        input_args = AR.filter_args(**kwargs)
        reader = AR(input_path, **input_args)
    else:
        input_args = DRF.filter_args(**kwargs)
        reader = DRF.create(input_path, **input_args)

    writer = DWF.create(output_path, scp_sep=' ',
                        compress=compress,
                        compression_method=compression_method)

    if write_num_frames is not None:
        f_num_frames = open(write_num_frames, 'w')
    
    for data in reader:
        if mfcc1.input_step == 'wave':
            key, x, fs = data
        else:
            key, x = data
        logging.info('Extracting filter-banks for %s' % (key))
        t1 = time.time()
        y = mfcc1.compute(x)

        # separate logE and filterbanks
        logE = y[:,0]
        y = y[:,1:]

        #estimate log energy from filterbanks
        logEy1 = logsumexp(y, axis=-1)

        #we apply dummy identity network to fb
        logging.info('Running enhancement network')
        y = mvn.normalize(y)
        y = apply_nnet(y, enhancer, chunk_size, context, device)

        #lets rescale the logE based on enhanced filterbanks
        logEy2 = logsumexp(y, axis=-1)
        logE = logE + (logEy2 - logEy1)

        # concatenate logE and filterbanks
        y = np.concatenate((logE[:,None], y), axis=-1)

        #apply DCT
        logging.info('Applying DCT')
        y = mfcc2.compute(y)

        dt = (time.time() - t1)*1000
        rtf = mfcc1.frame_shift*y.shape[0]/dt
        logging.info('Extracted filter-banks for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f' %
                     (key, y.shape[0], dt, rtf))
        writer.write([key], [y])
        
        if write_num_frames is not None:
            f_num_frames.write('%s %d\n' % (key, y.shape[0]))

        mfcc1.reset()
            
    if write_num_frames is not None:
        f_num_frames.close()
示例#4
0
        y = np.concatenate((logE[:,None], y), axis=-1)

        #apply DCT
        logging.info('Applying DCT')
        y = mfcc2.compute(y)

        dt = (time.time() - t1)*1000
        rtf = mfcc1.frame_shift*y.shape[0]/dt
        logging.info('Extracted filter-banks for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f' %
                     (key, y.shape[0], dt, rtf))
        writer.write([key], [y])
        
        if write_num_frames is not None:
            f_num_frames.write('%s %d\n' % (key, y.shape[0]))

        mfcc1.reset()
            
    if write_num_frames is not None:
        f_num_frames.close()
    

if __name__ == "__main__":
    
    parser=argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        fromfile_prefix_chars='@',
        description='Compute filter-bank features and enhance with pytorch model')

    parser.add_argument('--input', dest='input_path', required=True)
    parser.add_argument('--output', dest='output_path', required=True)
    parser.add_argument('--write-num-frames', dest='write_num_frames', default=None)