def predict_new(args):
    workspace = args.workspace

    # Load model.
    md_path = os.path.join(workspace, "models", "main", args.model_name)
    md = serializations.load(md_path)

    # Simulate new data.
    x_new = np.random.normal(size=(3, 10, 128))  # (n_clips, n_time, n_in)

    # Obtain final classification probability on an audio clip.
    [y] = md.predict(x_new)  # (n_clips, n_out)
    print("y.shape: %s" % (y.shape, ))

    # Obtain intermedial classification & attention value in the neural network.
    observe_nodes = [
        md.find_layer('cla').output_,
        md.find_layer('att').output_
    ]

    f_forward = md.get_observe_forward_func(observe_nodes)  # Forward function.
    [cla, att] = md.run_function(f_forward,
                                 x_new,
                                 batch_size=None,
                                 tr_phase=0.)

    print("classification.shape: %s" %
          (cla.shape, ))  # (n_clips, n_time, n_out)
    print("attention.shape: %s" % (att.shape, ))  # (n_clips, n_time, n_out)
def recognize(args):
    workspace = cfg.workspace
    events = cfg.events
    n_events = args.n_events
    snr = args.snr
    md_na = args.model_name
    lb_to_ix = cfg.lb_to_ix
    n_out = len(cfg.events)
    te_fold = cfg.te_fold

    md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                           "n_events=%d" % n_events, "fold=%d" % te_fold,
                           "snr=%d" % snr, md_na)
    md = serializations.load(md_path)

    # Load data.
    feature_dir = os.path.join(workspace, "features", "logmel",
                               "n_events=%d" % n_events)
    yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events)
    (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y,
     te_na_list) = pp_data.load_data(feature_dir=feature_dir,
                                     yaml_dir=yaml_dir,
                                     te_fold=te_fold,
                                     snr=snr,
                                     is_scale=is_scale)

    x = te_x
    at_gts = te_at_y
    sed_gts = te_sed_y
    na_list = te_na_list

    # Recognize.
    [at_pds] = md.predict(x)  # (N, 16)

    observe_nodes = [md.find_layer('detect').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)
    [seg_masks] = md.run_function(f_forward, x, batch_size=500,
                                  tr_phase=0.)  # (n_clips, n_time, n_out)
    seg_masks = np.transpose(seg_masks, (0, 2, 1))[:, :, :, np.newaxis]

    # Dump to pickle.
    out_dir = os.path.join(workspace, "preds", pp_data.get_filename(__file__),
                           "n_events=%d" % n_events, "fold=%d" % te_fold,
                           "snr=%d" % snr,
                           os.path.splitext(md_na)[0])
    pp_data.create_folder(out_dir)
    out_at_path = os.path.join(out_dir, "at_probs.p")
    out_seg_masks_path = os.path.join(out_dir, "seg_masks.p")

    cPickle.dump(at_pds,
                 open(out_at_path, 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(seg_masks,
                 open(out_seg_masks_path, 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)

    # Print stats.
    sed_pds = np.mean(seg_masks, axis=-1)  # (N, n_out, n_time)
    sed_pds = np.transpose(sed_pds, (0, 2, 1))  # (N, n_time, n_out)
    print_stats(at_pds, at_gts, sed_pds, sed_gts)
示例#3
0
def detect_cv():
    # init paths
    if type=='home':
        fe_fd = cfg.dev_fe_mel_home_fd
        labels = cfg.labels_home
        lb_to_id = cfg.lb_to_id_home
        id_to_lb = cfg.id_to_lb_home
        tr_txt = cfg.dev_evaluation_fd + '/home_fold' + str(fold) + '_train.txt'
        te_txt = cfg.dev_evaluation_fd + '/home_fold' + str(fold) + '_evaluate.txt'
        meta_fd = cfg.dev_meta_home_fd
    if type=='resi':
        fe_fd = cfg.dev_fe_mel_resi_fd
        labels = cfg.labels_resi
        lb_to_id = cfg.lb_to_id_resi
        id_to_lb = cfg.id_to_lb_resi
        tr_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str(fold) + '_train.txt'
        te_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str(fold) + '_evaluate.txt'
        meta_fd = cfg.dev_meta_resi_fd
    
    n_out = len( labels )
    
    # load model
    md = serializations.load( md_path )
    
    # get wav names to be detected
    te_names = pp_dev_data.GetWavNamesFromTxt( te_txt )
    
    # do recognize for each test audio
    names = os.listdir( fe_fd )
    names = sorted( names )
    y_pred_list = []

    # detect and write out to txt
    pp_dev_data.CreateFolder( cfg.dev_results_fd )
    file_list = []
    for na in names:
        if na[0:4] in te_names:
            print na
            gt_file = meta_fd + '/' + na[0:4] + '.ann'
            out_file = cfg.dev_results_fd + '/'+na[0:4]+'_detect.ann'
            
            X = cPickle.load( open( fe_fd+'/'+na, 'rb' ) )
            X = mat_2d_to_3d( X, agg_num, hop )
            y_pred = md.predict( X )
            
            y_pred_list.append( y_pred )
            out_list = pp_dev_data.OutMatToList( y_pred, thres, id_to_lb )
            pp_dev_data.PrintListToTxt( out_list, out_file )
            
            file_list.append( { 'reference_file': gt_file, 'estimated_file': out_file } )
            
    # print results for this fold
    pp_dev_data.PrintScore( file_list, labels )
示例#4
0
def recognize(md_path, te_fe_fd, te_csv_file, n_concat, hop, scaler):
    """Recognize and get statistics. 
    
    Args:
      md_path: string. Path of model. 
      te_fe_fd: string. Folder path containing testing features. 
      te_csv_file: string. Path of test csv file. 
      n_concat: integar. Number of frames to concatenate. 
      hop: integar. Number of frames to hop. 
      scaler: None | scaler object. 
    """
    # Load model
    md = serializations.load(md_path)

    # Recognize and get statistics
    n_labels = len(cfg.labels)
    confuse_mat = np.zeros((n_labels, n_labels))  # confusion matrix
    frame_based_accs = []

    # Get test file names
    with open(te_csv_file, 'rb') as f:
        reader = csv.reader(f)
        lis = list(reader)

    # Predict for each scene
    for li in lis:
        # Load data
        [na, lb] = li[0].split('\t')
        na = na.split('/')[1][0:-4]
        path = te_fe_fd + '/' + na + '.f'
        x = cPickle.load(open(path, 'rb'))
        if scaler:
            x = scaler.transform(x)
        x = mat_2d_to_3d(x, n_concat, hop)

        # Predict
        p_y_preds = md.predict(x)[0]  # (n_block,label)
        pred_ids = np.argmax(p_y_preds, axis=-1)  # (n_block,)
        pred_id = int(get_mode_value(pred_ids))
        gt_id = cfg.lb_to_id[lb]

        # Statistics
        confuse_mat[gt_id, pred_id] += 1
        n_correct_frames = list(pred_ids).count(gt_id)
        frame_based_accs += [float(n_correct_frames) / len(pred_ids)]

    clip_based_acc = np.sum(np.diag(
        np.diag(confuse_mat))) / np.sum(confuse_mat)
    frame_based_acc = np.mean(frame_based_accs)

    print 'event_acc:', clip_based_acc
    print 'frame_acc:', frame_based_acc
    print confuse_mat
def recognize(md_path, te_fe_fd, te_csv_file, n_concat, hop, scaler):
    """Recognize and get statistics. 
    
    Args:
      md_path: string. Path of model. 
      te_fe_fd: string. Folder path containing testing features. 
      te_csv_file: string. Path of test csv file. 
      n_concat: integar. Number of frames to concatenate. 
      hop: integar. Number of frames to hop. 
      scaler: None | scaler object. 
    """
    # Load model
    md = serializations.load(md_path)

    # Recognize and get statistics
    n_labels = len(cfg.labels)
    confuse_mat = np.zeros((n_labels, n_labels))      # confusion matrix
    frame_based_accs = []
    
    # Get test file names
    with open(te_csv_file, 'rb') as f:
        reader = csv.reader(f)
        lis = list(reader)
        
    # Predict for each scene
    for li in lis:
        # Load data
        [na, lb] = li[0].split('\t')
        na = na.split('/')[1][0:-4]
        path = te_fe_fd + '/' + na + '.f'
        x = cPickle.load(open(path, 'rb'))
        if scaler:
            x = scaler.transform(x)
        x = mat_2d_to_3d(x, n_concat, hop)
    
        # Predict
        p_y_preds = md.predict(x)[0]        # (n_block,label)
        pred_ids = np.argmax(p_y_preds, axis=-1)     # (n_block,)
        pred_id = int(get_mode_value(pred_ids))
        gt_id = cfg.lb_to_id[lb]
        
        # Statistics
        confuse_mat[gt_id, pred_id] += 1            
        n_correct_frames = list(pred_ids).count(gt_id)
        frame_based_accs += [float(n_correct_frames) / len(pred_ids)]
            
    clip_based_acc = np.sum(np.diag(np.diag(confuse_mat))) / np.sum(confuse_mat)
    frame_based_acc = np.mean(frame_based_accs)
    
    print 'event_acc:', clip_based_acc
    print 'frame_acc:', frame_based_acc
    print confuse_mat
示例#6
0
def plot_seg_masks(args):
    # Load data.
    te_pack_path = os.path.join(workspace, "packed_features", "logmel",
                                "testing.h5")
    scaler_path = os.path.join(workspace, "scalers", "logmel",
                               "training.scaler")

    with h5py.File(te_pack_path, 'r') as hf:
        te_na_list = list(hf.get('na_list'))
        te_x = np.array(hf.get('x'))
        te_y = np.array(hf.get('y'))

    te_x_unscaled = te_x  # unscaled x for plot.
    scaler = pickle.load(open(scaler_path, 'rb'))
    te_x = pp_data.do_scaler_on_x3d(te_x, scaler)

    # Load model.
    md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                           args.model_name)
    md = serializations.load(md_path)

    # Observe function.
    observe_nodes = [md.find_layer('seg_masks').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)
    [seg_masks] = md.run_function(f_forward, te_x, batch_size=50, tr_phase=0.)
    print("Segmentation masks: %s" % (seg_masks.shape, ))

    # Plot segmentation masks.
    for i1 in xrange(len(seg_masks)):
        na = te_na_list[i1]
        if ".mix_0db.wav" in na:
            print(na)
            gt_y = te_y[i1].astype(np.float32)
            print(gt_y)
            print("Ground truth: %s" % cfg.events[np.argmax(gt_y)])

            events_ex = cfg.events + ['bg']
            fig, axs = plt.subplots(3, 2, sharex=True)
            axs[0, 0].matshow(te_x_unscaled[i1].T,
                              origin='lower',
                              aspect='auto')
            axs[0, 0].set_title("log Mel spectrogram")
            for i2 in xrange(0, 4):
                axs[i2 / 2 + 1, i2 % 2].matshow(seg_masks[i1, i2].T,
                                                origin='lower',
                                                aspect='auto',
                                                vmin=0,
                                                vmax=1)
                axs[i2 / 2 + 1, i2 % 2].set_title(events_ex[i2])
            plt.show()
def plot_hotmap(args):
    workspace = cfg.workspace
    events = cfg.events
    md_na = args.model_name
    n_events = args.n_events
    te_fold = cfg.te_fold

    feature_dir = os.path.join(workspace, "features", "logmel",
                               "n_events=%d" % n_events)
    yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events)
    (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y,
     te_na_list) = pp_data.load_data(feature_dir=feature_dir,
                                     yaml_dir=yaml_dir,
                                     te_fold=te_fold,
                                     is_scale=is_scale)

    md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                           "n_events=%d" % n_events, md_na)
    md = serializations.load(md_path)

    x = te_x
    y = te_at_y

    observe_nodes = [md.find_layer('hotmap').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)
    [a4] = md.run_function(f_forward, x, batch_size=500, tr_phase=0.)
    print a4.shape

    for i1 in xrange(len(a4)):
        # if te_na_list[i1] == 'CR_lounge_220110_0731.s2700_chunk48':
        print(y[i1])

        # print np.mean(a4[i1], axis=(1,2))

        fig, axs = plt.subplots(5, 4, sharex=True)
        axs[0, 0].matshow(x[i1].T, origin='lower', aspect='auto')
        for i2 in xrange(16):
            axs[i2 / 4 + 1, i2 % 4].matshow(a4[i1, i2].T,
                                            origin='lower',
                                            aspect='auto',
                                            vmin=0,
                                            vmax=1)
            axs[i2 / 4 + 1, i2 % 4].set_title(events[i2])
        plt.show()
def detect():
    # init paths
    if type == 'home':
        fe_fd = cfg.eva_fe_mel_home_fd
        labels = cfg.labels_home
        lb_to_id = cfg.lb_to_id_home
        id_to_lb = cfg.id_to_lb_home
    if type == 'resi':
        fe_fd = cfg.eva_fe_mel_resi_fd
        labels = cfg.labels_resi
        lb_to_id = cfg.lb_to_id_resi
        id_to_lb = cfg.id_to_lb_resi

    n_out = len(labels)

    # load model
    md = serializations.load(md_path)

    # do recognize for each test audio
    names = os.listdir(fe_fd)
    names = sorted(names)
    pp_dev_data.CreateFolder(cfg.eva_results_fd)
    pp_dev_data.CreateFolder(cfg.eva_results_fd + '/' + type)

    # detect and write out for all audios
    for na in names:
        X = cPickle.load(open(fe_fd + '/' + na, 'rb'))
        X = mat_2d_to_3d(X, agg_num, hop)
        y_pred = md.predict(X)
        outlist = pp_dev_data.OutMatToList(y_pred, thres, id_to_lb)

        full_na = type + '/audio/' + na[0:4] + '.wav'
        out_txt_path = cfg.eva_results_fd + '/' + type + '/' + na[
            0:4] + '_detect.ann'
        f = open(out_txt_path, 'w')
        for li in outlist:
            f.write(full_na + '\t' + str(li['event_onset']) + '\t' +
                    str(li['event_offset']) + '\t' + li['event_label'] + '\n')
        print 'Write out detection result to', out_txt_path, 'successfully!'

    f.close()
def detect():
    # init paths
    if type=='home':
        fe_fd = cfg.eva_fe_mel_home_fd
        labels = cfg.labels_home
        lb_to_id = cfg.lb_to_id_home
        id_to_lb = cfg.id_to_lb_home
    if type=='resi':
        fe_fd = cfg.eva_fe_mel_resi_fd
        labels = cfg.labels_resi
        lb_to_id = cfg.lb_to_id_resi
        id_to_lb = cfg.id_to_lb_resi
    
    n_out = len( labels )
    
    # load model
    md = serializations.load( md_path )
    
    # do recognize for each test audio
    names = os.listdir( fe_fd )
    names = sorted( names )
    pp_dev_data.CreateFolder( cfg.eva_results_fd )
    pp_dev_data.CreateFolder( cfg.eva_results_fd+'/'+type )
    
    # detect and write out for all audios
    for na in names:
        X = cPickle.load( open( fe_fd+'/'+na, 'rb' ) )
        X = mat_2d_to_3d( X, agg_num, hop )
        y_pred = md.predict( X )
        outlist = pp_dev_data.OutMatToList( y_pred, thres, id_to_lb )
        
        full_na = type + '/audio/' + na[0:4] + '.wav'
        out_txt_path = cfg.eva_results_fd+'/'+type+'/'+na[0:4]+'_detect.ann'
        f = open( out_txt_path, 'w')
        for li in outlist:
            f.write( full_na + '\t' + str(li['event_onset']) + '\t' + str(li['event_offset']) + '\t' + li['event_label'] + '\n' )
        print 'Write out detection result to', out_txt_path, 'successfully!'
    
    f.close()
示例#10
0
import config as cfg
from main_dnn import mul
from main_rnn import get_last
from mir_eval.separation import bss_eval_sources

n_freq = 513
agg_num = 3  # This value should be the same as the training phase!
hop = 1  # hop must be 1
n_hid = 500

# load data
te_X2d_mix, te_X3d_mix, te_y2d_chn0, te_y2d_chn1, te_y3d_chn0, te_y3d_chn1 = pp_data.LoadData(
    cfg.fe_fft_fd, agg_num, hop, [cfg.te_list[0]])

# load model
md = serializations.load('Md/md100.p')

# get predicted abs spectrogram
[out_chn0, out_chn1] = md.predict(np.abs(te_X3d_mix))

# recover wav
s_out_chn0 = pp_data.recover_wav_from_abs(out_chn0, te_X2d_mix)
s_out_chn1 = pp_data.recover_wav_from_abs(out_chn1, te_X2d_mix)
s_gt_chn0 = pp_data.recover_wav_from_cmplx(te_y2d_chn0)
s_gt_chn1 = pp_data.recover_wav_from_cmplx(te_y2d_chn1)

# write out wavs
pp_data.write_wav(s_out_chn0, 16000.,
                  cfg.results_fd + '/' + 'recover_chn0.wav')
pp_data.write_wav(s_out_chn1, 16000.,
                  cfg.results_fd + '/' + 'recover_chn1.wav')
示例#11
0
'''
SUMMARY:  plot 1-st autoencoder learned weights
AUTHOR:   Qiuqiang Kong
Created:  2016.10.06
Modified: -
--------------------------------------
'''
from hat import serializations
import matplotlib.pyplot as plt

# load weights of 1-st layer
md = serializations.load('Results/md_ae1.p')
W = md.find_layer('a1').W_

# plot autoencoder learned weights
num_to_plot = 10
for i1 in range(num_to_plot):
    w = W[:, i1].reshape((28, 28))
    plt.matshow(w)
    plt.show()
示例#12
0
from hat import serializations
import prepare_dev_data as pp_dev_data
import config as cfg
from evaluation import *
import pickle
import cPickle
import os

# hyper-params
agg_num = 100  # should be same as training phase
hop = 1
eva_fe_fd = cfg.eva_fe_mel_fd
thres = 0.2

# load model
md = serializations.load(cfg.dev_md_fd + '/md100.p')

# evaluate for each test feature
names = os.listdir(cfg.eva_wav_fd)
names = sorted(names)
results = []
if not os.path.exists(cfg.eva_results_fd): os.makedirs(cfg.eva_results_fd)

for na in names:
    print na
    # load data
    te_fe = eva_fe_fd + '/' + na[0:-4] + '.f'
    X = cPickle.load(open(te_fe, 'rb'))
    X3d = mat_2d_to_3d(X, agg_num, hop)

    # detect
def recognize_on_test_data():
    # test_fe_fd = cfg.test_denoise_fe_enhance_mel_fd
    test_fe_fd = cfg.test_denoise_fe_enhance_pool_fft_fd
    
    # load data
    md = serializations.load( cfg.wbl_dev_md_fd+'/cnn_fft/md3000_iters.p' )
    
    names = os.listdir( test_fe_fd )
    names = sorted(names)
    i1 = 0

    f = open(cfg.scrap_fd + "/test_bird_result.csv", 'w')
    
    for na in names:
        if i1!=0:
            f.write("\n")
        
        if i1%1==0:
            
            path = test_fe_fd + "/" + na
            X = cPickle.load( open( path, 'rb' ) )
            [n_chunks, n_freq] = X.shape
            #X = pp_data.wipe_click2d( X )
            
            
            X = X.reshape( (1, n_chunks, n_freq) )
            X *=10000   # amplitude test data, which is useful
            
            
            n_pad = int( cfg.n_duration/2 )
            X, mask = pad_trunc_seqs( X, n_pad, 'post' )
            
            mask = pp_data.cut_test_fe_tail( mask )
            X *= mask[:,:,None]
            
            [out3d, detect3d] = md.predict( [X, mask], batch_size=100 )
            out3d *= mask[:,:,None]
            detect3d *= mask[:,:,None]
            
            uni_mu = detect3d[0,:,0] / ( np.sum( detect3d[0,:,0] ) + 1e-8 )
            score = np.sum( out3d[0,:,0] * uni_mu )
            if score < 0.00001: score=0
            
            
            
            print i1, na, score, np.sum(out3d[0,:,0]*detect3d[0,:,0]), np.sum(detect3d[0,:,0])

            # # Plot for debug!
            # fig, axs = plt.subplots(4, sharex=True)
            # axs[0].matshow( np.log(X[0,:,:].T), origin='lower', aspect='auto' )
            # axs[0].set_title('mel spectrogram')
            # 
            # axs[1].stem( detect3d[0,:,0] )
            # axs[1].set_ylim([0,1])
            # axs[1].set_title('detector')
            # 
            # 
            # axs[2].stem( out3d[0] )
            # axs[2].set_ylim([0,1])
            # axs[2].set_title('classifier')
            # 
            # 
            # axs[3].stem( detect3d[0,:,0]*out3d[0,:,0] )
            # axs[3].set_ylim([0,1])
            # axs[3].set_title(score)
            # plt.show()
            

        f.write(na[0:-2] + "," + str(score))
        i1 += 1
    
    f.close()
示例#14
0
def recognize(args):
    workspace = args.workspace
    md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                           args.model_name)
    t1 = time.time()

    # Load scaler.
    scaler_path = os.path.join(workspace, "scalers", "logmel",
                               "training.scaler")
    scaler = pickle.load(open(scaler_path, 'rb'))

    # Load model.
    md = serializations.load(md_path)

    # Observe function.
    observe_nodes = [md.find_layer('seg_masks').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)

    audio_dir = os.path.join(workspace, "mixed_audio", "testing")
    names = os.listdir(audio_dir)

    at_pd_ary = []
    at_gt_ary = []
    sed_pd_ary = []
    sed_gt_ary = []

    # For all audio clips.
    for na in names:
        if '.mix_0db.wav' in na:
            logging.info(na)

            # Load audio.
            bare_na = os.path.splitext(os.path.splitext(na)[0])[0]
            audio_path = os.path.join(audio_dir, na)
            (bg_audio, event_audio, fs) = pp_data.read_audio_stereo(audio_path)
            mixed_audio = bg_audio + event_audio

            # Load yaml.
            yaml_path = os.path.join(audio_dir, "%s.yaml" % bare_na)
            with open(yaml_path, 'r') as f:
                data = yaml.load(f)
            event_type = data['event_type']

            # Calculate feature.
            x = pp_data.calc_feat(mixed_audio)
            x3d = pp_data.do_scaler_on_x3d(x[np.newaxis, ...], scaler)

            # Ground truth.
            gt_y = [0, 0, 0, 0]
            gt_y[cfg.lb_to_ix[event_type]] = 1
            at_gt_ary.append(gt_y)

            # Audio tagging (AT) prediction.
            [pred_y] = md.predict(x3d)  # (1, n_events+1)
            pred_y = pred_y[0]  # (n_events+1,)
            at_pd_ary.append(pred_y)

            # Sound event detection (SED) prediction.
            [masks] = md.run_function(
                f_forward, x3d, batch_size=10,
                tr_phase=0.)  # (1, n_events+1, n_time, n_freq)
            masks = masks[0]  # (n_events+1, n_time, n_freq)
            sed_pd = np.mean(masks, axis=-1).T  # (n_time, n_events+1)
            sed_pd_ary.append(sed_pd)
            sed_gt = np.zeros_like(sed_pd)
            [bgn_sec, fin_sec] = data['event_segment']
            bgn_fr = int(bgn_sec * cfg.sample_rate /
                         float(cfg.n_window - cfg.n_overlap))
            fin_fr = int(fin_sec * cfg.sample_rate /
                         float(cfg.n_window - cfg.n_overlap))
            sed_gt[bgn_fr:fin_fr, cfg.lb_to_ix[event_type]] = 1
            sed_gt_ary.append(sed_gt)

    at_pd_ary = np.array(at_pd_ary)
    at_gt_ary = np.array(at_gt_ary)
    sed_pd_ary = np.array(sed_pd_ary)
    sed_gt_ary = np.array(sed_gt_ary)

    # Write out AT and SED presence probabilites.
    logging.info("at_pd_ary.shape: %s" % (at_pd_ary.shape, ))
    logging.info("at_gt_ary.shape: %s" % (at_gt_ary.shape, ))
    logging.info("sed_pd_ary.shape: %s" % (sed_pd_ary.shape, ))
    logging.info("sed_gt_ary.shape: %s" % (sed_gt_ary.shape, ))
    dict = {}
    dict['at_pd_ary'] = at_pd_ary
    dict['at_gt_ary'] = at_gt_ary
    dict['sed_pd_ary'] = sed_pd_ary
    dict['sed_gt_ary'] = sed_gt_ary
    out_path = os.path.join(workspace, "_tmp", "_at_sed_dict.p")
    pp_data.create_folder(os.path.dirname(out_path))
    cPickle.dump(dict, open(out_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
    logging.info("Recognize time: %s" % (time.time() - t1, ))
from hat.metrics import prec_recall_fvalue
import hat.backend as K
import config as cfg
import prepare_dev_data as pp_dev_data
import prepare_eva_data as pp_eva_data
import csv
import cPickle

# hyper-params
agg_num = 11
hop = 15
fold = 1
n_labels = len( cfg.labels )

# load model
md = serializations.load( cfg.eva_md_fd + '/md10.p' )

# prepare data
te_X = pp_eva_data.GetAllData( cfg.eva_fe_mel_fd, cfg.eva_csv_path, agg_num, hop )

# do recognize and evaluation
thres = 0.4     # thres, tune to prec=recall
n_labels = len( cfg.labels )

pp_dev_data.CreateFolder( cfg.eva_results_fd )
txt_out_path = cfg.eva_results_fd+'/task4_results.txt'
fwrite = open( txt_out_path, 'w')
with open( cfg.eva_csv_path, 'rb') as f:
    reader = csv.reader(f)
    lis = list(reader)
示例#16
0
def jsc_separation(args):
    """Joing separation-classification (JSC) source separation. 
    """
    workspace = args.workspace

    scaler_path = os.path.join(workspace, "scalers", "logmel",
                               "training.scaler")
    scaler = pickle.load(open(scaler_path, 'rb'))

    md_path = os.path.join(workspace, "models", "main", args.model_name)
    md = serializations.load(md_path)

    out_dir = os.path.join(workspace, "separated_wavs", "jsc_separation")
    pp_data.create_folder(out_dir)

    observe_nodes = [md.find_layer('seg_masks').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)

    audio_dir = os.path.join(os.path.join(workspace, "mixed_audio", "testing"))
    names = os.listdir(audio_dir)

    n_window = cfg.n_window
    n_overlap = cfg.n_overlap
    fs = cfg.sample_rate
    ham_win = np.hamming(n_window)
    recover_scaler = np.sqrt((ham_win**2).sum())

    melW = librosa.filters.mel(sr=fs,
                               n_fft=n_window,
                               n_mels=64,
                               fmin=0.,
                               fmax=fs / 2)
    inverse_melW = get_inverse_W(melW)

    for na in names:
        if ".mix" in na:
            # Read yaml
            bare_name = os.path.splitext(os.path.splitext(na)[0])[0]
            yaml_path = os.path.join(audio_dir, "%s.yaml" % bare_name)
            with open(yaml_path, 'r') as f:
                data = yaml.load(f)
            event_type = data['event_type']
            print(na, event_type)

            # Read audio
            audio_path = os.path.join(audio_dir, na)
            (bg_audio, event_audio, _) = pp_data.read_audio_stereo(audio_path)
            mixed_audio = bg_audio + event_audio

            # Spectrogram
            [f, t, bg_spec] = signal.spectral.spectrogram(x=bg_audio,
                                                          window=ham_win,
                                                          nperseg=n_window,
                                                          noverlap=n_overlap,
                                                          detrend=False,
                                                          return_onesided=True,
                                                          scaling='density',
                                                          mode='complex')

            [f, t,
             event_spec] = signal.spectral.spectrogram(x=event_audio,
                                                       window=ham_win,
                                                       nperseg=n_window,
                                                       noverlap=n_overlap,
                                                       detrend=False,
                                                       return_onesided=True,
                                                       scaling='density',
                                                       mode='complex')

            [f, t,
             mixed_spec] = signal.spectral.spectrogram(x=mixed_audio,
                                                       window=ham_win,
                                                       nperseg=n_window,
                                                       noverlap=n_overlap,
                                                       detrend=False,
                                                       return_onesided=True,
                                                       scaling='density',
                                                       mode='complex')

            bg_spec = bg_spec.T
            event_spec = event_spec.T
            mixed_spec = mixed_spec.T

            # Log Mel spectrogram
            mixed_x = pp_data.calc_feat(mixed_audio)
            x3d = pp_data.do_scaler_on_x3d(mixed_x[np.newaxis, ...], scaler)

            # Segmentation masks
            [mel_masks] = md.run_function(f_forward,
                                          x3d,
                                          batch_size=10,
                                          tr_phase=0.)
            mel_masks = mel_masks[0]  # (n_time, 64)
            spec_masks = np.dot(mel_masks, inverse_melW)  # (n_time, 513)

            if args.plot_only:
                mixed_mel_spec = np.dot(np.abs(mixed_spec), melW.T)
                bg_mel_spec = np.dot(np.abs(bg_spec), melW.T)
                event_mel_spec = np.dot(np.abs(event_spec), melW.T)
                ratio = 1.7  # 5 dB
                event_mask = (np.sign(event_mel_spec /
                                      (bg_mel_spec * ratio) - 1) + 1) / 2

                fig, axs = plt.subplots(3, 2, sharex=True)
                axs[0, 0].matshow(np.log(mixed_mel_spec.T),
                                  origin='lower',
                                  aspect='auto')
                axs[0, 1].matshow(event_mask.T, origin='lower', aspect='auto')
                axs[1, 0].matshow(spec_masks[0].T,
                                  origin='lower',
                                  aspect='auto',
                                  vmin=0.,
                                  vmax=1.)
                axs[1, 1].matshow(spec_masks[1].T,
                                  origin='lower',
                                  aspect='auto',
                                  vmin=0.,
                                  vmax=1.)
                axs[2, 0].matshow(spec_masks[2].T,
                                  origin='lower',
                                  aspect='auto',
                                  vmin=0.,
                                  vmax=1.)
                axs[2, 1].matshow(spec_masks[3].T,
                                  origin='lower',
                                  aspect='auto',
                                  vmin=0.,
                                  vmax=1.)
                axs[0, 0].set_title('log Mel of mixture')
                axs[0, 1].set_title('IBM of event')
                axs[1, 0].set_title('babycry')
                axs[1, 1].set_title('glassbreak')
                axs[2, 0].set_title('gunshot')
                axs[2, 1].set_title('bg')

                plt.show()

            else:
                # Separated spec
                separated_specs = spec_masks * np.abs(mixed_spec)[None, :, :]

                # Write out all events and bg
                enlarged_events = cfg.events + ['bg']
                for i1 in xrange(4):
                    s = spectrogram_to_wave.recover_wav(
                        separated_specs[i1],
                        mixed_spec,
                        n_overlap=n_overlap,
                        winfunc=np.hamming,
                        wav_len=len(mixed_audio))
                    s *= recover_scaler
                    pp_data.write_audio(
                        os.path.join(
                            out_dir, "%s.sep_%s.wav" %
                            (bare_name, enlarged_events[i1])), s, fs)

                # Write out event
                s = spectrogram_to_wave.recover_wav(
                    separated_specs[cfg.lb_to_ix[event_type]],
                    mixed_spec,
                    n_overlap=n_overlap,
                    winfunc=np.hamming,
                    wav_len=len(mixed_audio))
                s *= recover_scaler
                pp_data.write_audio(
                    os.path.join(out_dir, "%s.sep_event.wav" % bare_name), s,
                    fs)

                # Write out origin mix
                pp_data.write_audio(
                    os.path.join(out_dir, "%s.sep_mix.wav" % bare_name),
                    mixed_audio, fs)
示例#17
0
from hat.metrics import prec_recall_fvalue
import hat.backend as K
import config as cfg
import prepare_dev_data as pp_dev_data
import prepare_eva_data as pp_eva_data
import csv
import cPickle

# hyper-params
agg_num = 11
hop = 15
fold = 1
n_labels = len(cfg.labels)

# load model
md = serializations.load(cfg.eva_md_fd + '/md10.p')

# prepare data
te_X = pp_eva_data.GetAllData(cfg.eva_fe_mel_fd, cfg.eva_csv_path, agg_num,
                              hop)

# do recognize and evaluation
thres = 0.4  # thres, tune to prec=recall
n_labels = len(cfg.labels)

pp_dev_data.CreateFolder(cfg.eva_results_fd)
txt_out_path = cfg.eva_results_fd + '/task4_results.txt'
fwrite = open(txt_out_path, 'w')
with open(cfg.eva_csv_path, 'rb') as f:
    reader = csv.reader(f)
    lis = list(reader)
def recognize0():
    # load data
    dict = cPickle.load( open( cfg.scrap_fd+'/denoise_enhance_pool_fft_all0.p', 'rb' ) )
    tr_X, tr_mask, tr_y, tr_na_list, te_X, te_mask, te_y, te_na_list = dict['tr_X'], dict['tr_mask'], dict['tr_y'], dict['tr_na_list'], dict['te_X'], dict['te_mask'], dict['te_y'], dict['te_na_list']
    
    tr_X = pp_data.wipe_click( tr_X, tr_na_list )
    te_X = pp_data.wipe_click( te_X, te_na_list )
    
    print tr_X.shape, tr_y.shape, te_X.shape, te_y.shape
    

    x = te_X
    mask = te_mask
    y = te_y
    na_list = te_na_list
    [n_songs, n_chunks,  n_freq] = x.shape
    #K = 10
    K = n_songs
    
    x= x[0:K]
    mask = mask[0:K]

    for epoch in np.arange(1000,5100,1000):
    
        md = serializations.load( cfg.wbl_dev_md_fd+'/cnn_fft/md'+str(epoch)+'_iters.p' )
        
        [out3d, detect3d] = md.predict( [x, mask], batch_size=100 )     # shape: (K, n_chunks, n_out)    
        out3d *= mask[:,:,None]
        detect3d *= mask[:,:,None]
    
        score_ary = []
        gt_ary = []
    
        for i1 in xrange(K):
            uni_mu = detect3d[i1,:,0] / np.sum( detect3d[i1,:,0] )
            score = np.sum( out3d[i1,:,0] * uni_mu )
            
            score_ary.append( score )
            gt_ary.append( y[i1] )
            
            # plot, deubg, DO NOT DELETE!
            # print i1, y[i1], na_list[i1], score, np.sum(out3d[i1,:,0]*detect3d[i1,:,0]), np.sum(detect3d[i1,:,0])
            # 
            # fig, axs = plt.subplots(4, sharex=True)
            # axs[0].matshow( np.log(x[i1,:,:].T), origin='lower', aspect='auto' )
            # axs[0].set_title('mel spectrogram')
            # 
            # axs[1].stem( detect3d[i1,:,0] )
            # axs[1].set_ylim([0,1])
            # axs[1].set_title('detector')
            # 
            # 
            # axs[2].stem( out3d[i1] )
            # axs[2].set_ylim([0,1])
            # axs[2].set_title('classifier')
            # 
            # 
            # axs[3].stem( detect3d[i1,:,0]*out3d[i1,:,0] )
            # axs[3].set_ylim([0,1])
            # axs[3].set_title('overall')
            # plt.show()
    
        acc_ary, auc = pp_data.get_auc( score_ary, gt_ary )
        plt.plot( np.arange( 0, 1+1e-6, 0.1 ), acc_ary, alpha=epoch/float(5000), color='r' )
        plt.axis( [0,1,0,1] )
        print auc
    
    plt.show()
import prepare_dev_data as pp_dev_data
import config as cfg
from evaluation import *
import pickle
import cPickle
import os

# hyper-params
agg_num = 100
hop = 1
te_fe_fd = cfg.dev_te_fe_mel_fd
test_noise = '-6'   # can be '0_', '6_', '-6'
thres = 0.2

# load model
md = serializations.load( cfg.dev_md_fd + '/md100.p' )

# evaluate for each test feature
names = os.listdir(cfg.dev_ann_fd)
names = sorted(names)
results = []
if not os.path.exists( cfg.dev_results_fd ): os.makedirs( cfg.dev_results_fd )

for na in names:
    if na[10:12]==test_noise:
        print na
        # load data
        ann_path = cfg.dev_ann_fd + '/' + na
        gt_list = pp_dev_data.ReadAnn( ann_path )        # ground truth list
        te_fe = te_fe_fd + '/' + na[0:-4] + '.f'
        X = cPickle.load( open(te_fe, 'rb') )
示例#20
0
import os
from hat.models import Model
from hat.layers.core import InputLayer, Dense, Dropout
from hat.callbacks import SaveModel, Validation
from hat.preprocessing import sparse_to_categorical
from hat.optimizers import SGD, Adam
from hat import serializations

# init params
n_in = 784
n_hid = 500
n_out = 10

lay_in = InputLayer(in_shape=(n_in, ))
a = Dense(n_out=n_hid, act='relu')(lay_in)
a = Dropout(p_drop=0.2)(a)
a = Dense(n_out=n_hid, act='relu')(a)
a = Dropout(p_drop=0.2)(a)
lay_out = Dense(n_out=n_out, act='softmax')(a)

md = Model(in_layers=[lay_in], out_layers=[lay_out])
md.compile()
md.summary()

# Save model
md_path = 'model.p'
serializations.save(md=md, path=md_path)

# Load model
md_load = serializations.load(md_path)