Пример #1
0
def spectrograms(input_type,
                 data_list,
                 labelFile,
                 savePath,
                 fft_size,
                 win_size,
                 hop_size,
                 duration,
                 data_window=100,
                 window_shift=10,
                 augment=True,
                 save=True,
                 minimum_length=1):

    from audio import compute_spectrogram

    spectrograms = list()
    labels = list()

    print('Computing the ' + input_type + ' spectrograms !!')
    with open(data_list, 'r') as f:
        spectrograms = [
            compute_spectrogram(input_type, file.strip(), fft_size, win_size,
                                hop_size, duration, augment, minimum_length)
            for file in f
        ]
    # Get the labels into a list and save it along with the spectrograms
    with open(labelFile, 'r') as f:
        #labels = [1 if line.strip().split(' ')[1] == 'genuine' else 0 for line in f]
        labels = [line.strip() for line in f]

    if augment:
        new_data = list()
        new_labels = list()

        assert (len(labels) == len(spectrograms))
        print(
            'Now performing augmentation using sliding window mechanism on original spectrogram .... '
        )

        for i in range(len(spectrograms)):
            d, l = augment_data(spectrograms[i], labels[i], data_window,
                                input_type, window_shift)
            new_data.extend(
                d)  # extend the list rather than adding it into a new list
            new_labels.extend(l)

        spectrograms = new_data
        labels = new_labels

    if save:
        from helper import makeDirectory
        makeDirectory(savePath)
        outfile = savePath + '/spec'
        with open(outfile, 'w') as f:
            np.savez(outfile, spectrograms=spectrograms, labels=labels)
        print('Finished computing spectrogram and saved inside: ', savePath)
Пример #2
0
def run_prediction(model_path,
                   featType,
                   dataType,
                   protocal,
                   inputPath,
                   mean_std_file,
                   outBase,
                   batch_size=100,
                   activation='elu',
                   init_type='xavier',
                   targets=2,
                   fftSize=256,
                   architecture=2,
                   duration=1,
                   padding=True,
                   n_model=None,
                   inputType='mag_spec',
                   augment=True):

    # Extract Features from Training set
    #print('Extracting ' + featType + ' for the ' + dataType + 'set')

    print('outBase in run_prediction is: ', outBase)

    data, lab = dataset.load_data(inputPath + dataType + '/')

    #data = dataset.normalise_data(data,mean_std_file,'utterance')
    data = dataset.normalise_data(data, mean_std_file, 'global_mv')
    labels = dataset.get_labels_according_to_targets(lab, targets)

    featureList = getFeatures(featType, inputType, data, labels, batch_size,
                              model_path, n_model, activation, init_type,
                              targets, fftSize, padding, architecture,
                              duration, augment)

    if featType == 'bottleneck':
        makeDirectory(outBase + '/features/')  #  dataType)
        saveFeatures(featureList, outBase + '/features/' +
                     dataType)  #saves as train.npz, dev.npz etc
    elif featType == 'scores':
        makeDirectory(outBase + '/predictions/')
        write_scores_to_file(featureList,
                             outfile=outBase + '/predictions/' +
                             str(dataType) + '_prediction.txt')
    else:
        print('PLEASE CHOSE CORRECT PARAM !!')
def trainCNN_on_Bulbul_architecture():

    #CNN Training parameters
    activation = 'elu'
    init_type = 'xavier'

    batch_size = 32
    epochs = 200  #0

    # Regularizer parameters
    use_lr_decay = False  #set this flag for LR decay
    wDecayFlag = False  #whether to perform L2 weight decay or not
    lossPenalty = 0.001  # Using lambda=0.001 .
    applyBatchNorm = False
    deviceId = "/gpu:0"

    # Adam parameters
    optimizer_type = 'adam'
    b1 = 0.9
    b2 = 0.999
    epsilon = 0.1
    momentum = 0.95
    dropout1 = 0.6  #for input to first FC layer
    dropout3 = 0.5  #for intermediate layer input
    dropout2 = [0.5, 0.6]
    lambdas = [0.0005, 0.001]

    architectures = [
        2
    ]  # birds architecture 1, to make it unified (check model.py for definition)
    trainingSize = [1]  #in seconds
    #lr= 0.0005
    learning_rates = [0.0005, 0.0003, 0.0001, 0.005]

    targets = 2
    fftSize = 256
    specType = 'mag_spec'
    padding = True

    # Used following paths since I moved the scripts in git and used link so that code are synchronised
    spectrogramPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms/'
    tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/'
    modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models/'

    for duration in trainingSize:
        print('Now loading the data !!')
        outPath = spectrogramPath + specType + '/' + str(
            fftSize) + 'FFT/' + str(duration) + 'sec/'
        mean_std_file = outPath + 'train/mean_std.npz'

        # Load training data, labels and perform norm
        tD = dataset.load_data(outPath + 'train/')
        tL = dataset.get_labels_according_to_targets(trainP, targets)

        if not os.path.exists(mean_std_file):
            print('Computing Mean_std file ..')
            dataset.compute_global_norm(tD, mean_std_file)

        print('Shape of labels: ', tL.shape)

        #tD = dataset.normalise_data(tD,mean_std_file,'utterance')    # utterance level
        tD = dataset.normalise_data(tD, mean_std_file, 'global_mv')  # global

        # Load dev data, labels and perform norm
        devD = dataset.load_data(outPath + 'dev/')
        devL = dataset.get_labels_according_to_targets(devP, targets)
        #devD = dataset.normalise_data(devD,mean_std_file,'utterance')
        devD = dataset.normalise_data(devD, mean_std_file, 'global_mv')

        ### We are training on TRAIN set and validating on DEV set
        t_data = tD
        t_labels = tL
        v_data = devD
        v_labels = devL

        for dropout in dropout2:
            architecture = architectures[0]
            for lr in learning_rates:

                hyp_str = '_cnnModel' + str(
                    architecture) + '_keepProb_0.6_' + str(dropout) + str(
                        dropout3) + 'lr' + str(lr)

                log_dir = tensorboardPath + '/birdsArch_max2000epochsTEMP/' + hyp_str
                model_save_path = modelPath + '/birdsArch_max2000epochsTEMP/' + hyp_str
                logfile = model_save_path + '/training.log'

                figDirectory = model_save_path
                makeDirectory(model_save_path)
                print('Training model with ' + str(duration) +
                      ' sec data and cnnModel' + str(architecture))

                tLoss, vLoss, tAcc, vAcc = model.train(
                    architecture, fftSize, padding, duration, t_data, t_labels,
                    v_data, v_labels, activation, lr, use_lr_decay, epsilon,
                    b1, b2, momentum, optimizer_type, dropout1, dropout,
                    dropout3, model_save_path, log_dir, logfile, wDecayFlag,
                    lossPenalty, applyBatchNorm, init_type, epochs, batch_size,
                    targets)

                #plot_2dGraph('#Epochs', 'Avg CE Loss', tLoss,vLoss,'train_ce','val_ce', figDirectory+'/loss.png')
                #plot_2dGraph('#Epochs', 'Avg accuracy', tAcc,vAcc,'train_acc','val_acc',figDirectory+'/acc.png')
                plot_2dGraph('#Epochs', 'Val loss and accuracy', vLoss, vAcc,
                             'val_loss', 'val_acc',
                             figDirectory + '/v_ls_acc.png')
def trainCNN_on_trainData():

    #CNN Training parameters
    activation = 'mfm'  #choose activation: mfm,elu, relu, mfsoftmax, tanh ?
    init_type='xavier'  #'truncated_normal' #'xavier'  #or 'truncated_normal'

    batch_size = 32
    epochs = 120        #1000
    
    # Regularizer parameters
    
    wDecayFlag = False         #whether to perform L2 weight decay or not
    lossPenalty = 0.001       # Using lambda=0.001 .
    applyBatchNorm = False    
    deviceId = "/gpu:0"  
      
    # Adam parameters
    optimizer_type = 'adam'
    b1=0.9
    b2=0.999
    epsilon=0.1
    momentum=0.95
    dropout1=0.1                  #for input to first FC layer
    dropout2=0.2                  #for intermediate layer input    
    drops=[0.4]                   # 50% dropout the inputs of FC layers
    lambdas = [0.0005, 0.001]
    
    architectures = [1]
    trainingSize = [1]   #in seconds
    
    use_lr_decay=True        #set this flag for LR decay
    learning_rates=[0.0008,0.0006,0.0004,0.0001] 
    #learning_rates=[0.0001, 0.001]       #0.0001
    #learning_rates=np.random.uniform(0.003,0.0005,10)
    #learning_rates=np.random.uniform(0.01,0.0001,10)
    #learning_rates=np.linspace(0.001,0.01,10)
    #learning_rates=[0.0002, 0.0003, 0.0004, 0.0005, 0.0006,0.0007,0.0008, 0.0009]
    
    targets=2
           
    #specType='mag_spec'     #lets try loading mag_spec    
    #inputTypes=['mel_spec'] #'mag_spec'  #'cqt_spec'   ## Running on Hepworth !    
    inputTypes=['mag_spec']    # Not Run yet
    #inputTypes=['cqt_spec']    # Not Run yet
    
    #inputTypes=['mag_spec','cqt_spec','mel_spec']
    padding=True
    
    augment = True 
    trainPercentage=1.0    #Each training epoch will see only 50% of the original data at random !
    valPercentage=0.8   
            
    if augment:
        spectrogramPath='/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms_augmented/1sec_shift/'
    else:
        spectrogramPath='/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms/'        
            
    # Used following paths since I moved the scripts in git and used link so that code are synchronised
    tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/'
    modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models_augmented/'
                
    #for duration in trainingSize:
    duration=1
    fftSize=512
    
    for specType in inputTypes:
                               
        #print('Now loading the data with FFT size: ', fftSize)
        outPath = spectrogramPath +specType + '/' +str(fftSize)+ 'FFT/' + str(duration)+ 'sec/'
        mean_std_file = outPath+'train/mean_std.npz'
                
        # Load training data, labels and perform norm
        tD,tL = dataset.load_data(outPath+'train/')
        tL = dataset.get_labels_according_to_targets(tL, targets)
        
        assert(len(tD)==len(tL))
        
        if not os.path.exists(mean_std_file):
            print('Computing Mean_std file ..')
            dataset.compute_global_norm(tD,mean_std_file)
            
        # We will try utterance based norm later   
        #tD = dataset.normalise_data(tD,mean_std_file,'utterance')    # utterance level      
        tD = dataset.normalise_data(tD,mean_std_file,'global_mv')    # global
                        
        # Now take only 80% of the new augmented data to use for validation
        # Just to save some time during training
        devD,devL = dataset.get_random_data(outPath+'dev/',batch_size,valPercentage)                
        devL = dataset.get_labels_according_to_targets(devL, targets)        
        assert(len(devD)==len(devL))                                
                        
        #devD = dataset.normalise_data(devD,mean_std_file,'utterance')
        devD = dataset.normalise_data(devD,mean_std_file,'global_mv')                                
                
        ### We are training on TRAIN set and validating on DEV set        
        t_data = tD
        t_labels = tL
        v_data = devD
        v_labels = devL                
        
        print('Training model on ', specType)

        for dropout in drops:                  # dropout 1.0 and 0.5 to all inputs of DNN
            architecture = architectures[0]
            penalty=0.001  #this is not used thought at the moment
            
            for lr in learning_rates:
            #for targets in target_list:
                                                
                hyp_str='keep_'+str(dropout1)+'_'+str(dropout2)+'_'+str(dropout)+'_'+str(specType)+'_Lr'+str(lr)
                
                log_dir = tensorboardPath+ '/model1_max120epochs_32batch_with_0.85Decay/' +str(specType)+ '/' + hyp_str
                model_save_path = modelPath + '/model1_max120epochs_32batch_with_0.85Decay/'+str(specType) + '/' + hyp_str
                logfile = model_save_path+'/training.log'
                
                figDirectory = model_save_path
                makeDirectory(model_save_path)
                                                
                tLoss,vLoss,tAcc,vAcc=model.train(specType,architecture,fftSize,padding,duration,t_data,t_labels,
                                                  v_data,v_labels,activation,lr,use_lr_decay,epsilon,b1,b2,momentum,
                                                  optimizer_type,dropout1,dropout2,dropout,model_save_path,log_dir,
                                                  logfile,wDecayFlag,penalty,applyBatchNorm,init_type,epochs,batch_size,
                                                  targets,augment)#,trainPercentage,valPercentage)                                                                                                                                        
def trainCNN_on_trainData():

    #CNN Training parameters
    activation = 'mfm'  #choose activation: mfm,elu, relu, mfsoftmax, tanh ?
    init_type='xavier'  #'truncated_normal' #'xavier'  #or 'truncated_normal'

    batch_size = 32
    epochs = 2000
    
    # Regularizer parameters
    use_lr_decay=False        #set this flag for LR decay
    wDecayFlag = False         #whether to perform L2 weight decay or not
    lossPenalty = 0.001       # Using lambda=0.001 .
    applyBatchNorm = False    
    deviceId = "/gpu:0"  
      
    # Adam parameters
    optimizer_type = 'adam'
    b1=0.9
    b2=0.999
    epsilon=0.1
    momentum=0.95
    #dropout1=1.0                 #for input to first FC layer  
    #dropout2=1.0                 #for intermediate layer input    
    drops=[1.0]                   # No dropout
    lambdas = [0.001]
    
    architectures = [1]
    trainingSize = [1]   #in seconds
    lr= 0.0001
           
    targets=2
            
    #specType='mag_spec'     #lets try loading mag_spec    
    #inputTypes=['mel_spec'] #'mag_spec'  #'cqt_spec'   ## Running on Hepworth !    
    #inputTypes=['mag_spec']    # Not Run yet
    #inputTypes=['cqt_spec']    # Not Run yet
    
    inputTypes=['mel_spec','mag_spec','cqt_spec']   #CQT may throw error during scoring in model.py
    padding=True
    
    augment = True 
    trainPercentage=0.8               #Each training epoch will see only 80% of the original data at random !
    valPercentage=0.3                 #30% only used at random for validation
    
    if augment:
        spectrogramPath='/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms_augmented/1sec_shift/'
    else:
        spectrogramPath='/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms/'        
            
    # Used following paths since I moved the scripts in git and used link so that code are synchronised
    tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/'
    modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models_augmented/'
                
    #for duration in trainingSize:
    duration=1
    for specType in inputTypes:
               
        if specType == 'mel_spec' or specType == 'cqt_spec':            
            fftSize=512
        else:            
            fftSize=256
                
        print('Now loading the data with FFT size: ', fftSize)
        outPath = spectrogramPath +specType + '/' +str(fftSize)+ 'FFT/' + str(duration)+ 'sec/'
        mean_std_file = outPath+'train/mean_std.npz'
                
        # Load training data, labels and perform norm
        tD,tL = dataset.load_data(outPath+'train/')
        tL = dataset.get_labels_according_to_targets(tL, targets)
        
        assert(len(tD)==len(tL))
        
        if not os.path.exists(mean_std_file):
            print('Computing Mean_std file ..')
            dataset.compute_global_norm(tD,mean_std_file)
                
        tD = dataset.normalise_data(tD,mean_std_file,'utterance')    # utterance level      
        tD = dataset.normalise_data(tD,mean_std_file,'global_mv')    # global
                
        # Load dev data, labels and perform norm
        devD,devL = dataset.load_data(outPath+'dev/')
        devL = dataset.get_labels_according_to_targets(devL, targets)        
        assert(len(devD)==len(devL))
                
        devD = dataset.normalise_data(devD,mean_std_file,'utterance')
        devD = dataset.normalise_data(devD,mean_std_file,'global_mv')                                
                
        ### We are training on TRAIN set and validating on DEV set        
        t_data = tD
        t_labels = tL
        v_data = devD
        v_labels = devL           
        
        print('Training model on ', specType)

        for dropout in drops:                  # dropout 1.0 and 0.5 to all inputs of DNN
            architecture = architectures[0]
            #penalty=0.001  #this is not used thought at the moment
            
            for penalty in lambdas:
            #for targets in target_list:
                                
                hyp_str='arch'+str(architecture)+'_keep'+str(dropout)+'_'+str(specType)+'_targets'+str(targets)
                
                log_dir = tensorboardPath+ '/model1_max2000epochs_1/'+ hyp_str
                model_save_path = modelPath + '/model1_max2000epochs_1/'+ hyp_str
                logfile = model_save_path+'/training.log'
                
                figDirectory = model_save_path        
                makeDirectory(model_save_path)
                                                
                tLoss,vLoss,tAcc,vAcc=model.train(specType,architecture,fftSize,padding,duration,t_data,t_labels,
                                                  v_data,v_labels,activation,lr,use_lr_decay,epsilon,b1,b2,momentum,
                                                  optimizer_type,dropout,dropout,dropout,model_save_path,log_dir,
                                                  logfile,wDecayFlag,penalty,applyBatchNorm,init_type,epochs,batch_size,
                                                  targets,augment,trainPercentage,valPercentage)                                                                                                                                        
                #plot_2dGraph('#Epochs', 'Avg CE Loss', tLoss,vLoss,'train_ce','val_ce', figDirectory+'/loss.png')
                #plot_2dGraph('#Epochs', 'Avg accuracy', tAcc,vAcc,'train_acc','val_acc',figDirectory+'/acc.png')
                plot_2dGraph('#Epochs', 'Val loss and accuracy', vLoss,vAcc,'val_loss','val_acc',figDirectory+'/v_ls_acc.png')
def trainCNN_on_handcrafted_features():

    #CNN Training parameters
    activation = 'mfm'  #choose activation: mfm,elu, relu, mfsoftmax, tanh ?
    init_type = 'xavier'  #'truncated_normal' #'xavier'  #or 'truncated_normal'

    batch_size = 32
    epochs = 200  #500

    # Regularizer parameters
    wDecayFlag = False  #whether to perform L2 weight decay or not
    lossPenalty = 0.001  # Using lambda=0.001 .
    applyBatchNorm = False
    deviceId = "/gpu:0"

    # Adam parameters
    optimizer_type = 'adam'
    b1 = 0.9
    b2 = 0.999
    epsilon = 0.1
    momentum = 0.95  #0.95
    dropout1 = 0.5  #0.3                  #for input to first FC layer
    dropout2 = 0.5  #0.3                  #for intermediate layer input
    drops = [0.5]  # 50% dropout the inputs of FC layers
    lambdas = [0.0005, 0.001]
    targets = 2

    architectures = [1]
    trainingSize = [1]  #in seconds

    use_lr_decay = True
    learning_rates = [0.0022, 0.0008, 0.005, 0.0004]
    #learning_rates=np.random.uniform(0.003,0.0005,10)    #Take randomly drawn lr from uniform distribution between 0.001-0.0001
    #learning_rates=np.random.uniform(0.01,0.0001,10)

    # Note: LR =0.01 explodes badly. We get the huge cross entropy values we used to get before.

    #inputTypes=['CQCC','LFCC','LPCC','MFCC','RFCC']  #'IMFCC'    # i ran this in 1 kapoor

    #inputTypes=['IMFCC', 'SCMC']   # this was running on kapoor 0
    inputTypes = ['RFCC', 'LFCC', 'SCMC',
                  'IMFCC']  # this is running on kapoor 1

    padding = True

    augment = True
    trainPercentage = 1.0  #Each training epoch will see only 50% of the original data at random !
    valPercentage = 1.0

    if augment:
        spectrogramPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/features_1sec_shift/'
    else:
        spectrogramPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/features/'

    # Used following paths since I moved the scripts in git and used link so that code are synchronised
    tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/'
    modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models_augmented/'

    duration = 1
    fftSize = 512

    for specType in inputTypes:

        outPath = spectrogramPath + specType + '/'
        mean_std_file = outPath + 'train/mean_std.npz'

        # Load training data, labels and perform norm
        tD, tL = dataset.load_data(outPath + 'train/')
        tL = dataset.get_labels_according_to_targets(tL, targets)

        assert (len(tD) == len(tL))

        if not os.path.exists(mean_std_file):
            print('Computing Mean_std file ..')
            dataset.compute_global_norm(tD, mean_std_file)

        # We will try utterance based norm later
        #tD = dataset.normalise_data(tD,mean_std_file,'utterance')    # utterance level
        tD = dataset.normalise_data(tD, mean_std_file, 'global_mv')  # global

        # Now take only 80% of the new augmented data to use for validation
        # Just to save some time during training
        #devD,devL = dataset.get_random_data(outPath+'dev/',batch_size,valPercentage)
        devD, devL = dataset.load_data(outPath + 'dev/')
        devL = dataset.get_labels_according_to_targets(devL, targets)
        assert (len(devD) == len(devL))

        #devD = dataset.normalise_data(devD,mean_std_file,'utterance')
        devD = dataset.normalise_data(devD, mean_std_file, 'global_mv')

        ### We are training on TRAIN set and validating on DEV set
        t_data = tD
        t_labels = tL
        v_data = devD
        v_labels = devL

        print('Training model on ', specType)

        for dropout in drops:  # Just dropout 1.0 and 0.5 to all inputs of DNN
            architecture = architectures[0]
            penalty = 0.001

            for lr in learning_rates:

                hyp_str = 'keep_' + str(dropout1) + '_' + str(
                    dropout2) + '_' + str(dropout) + '_' + str(
                        specType) + '_lr_' + str(lr)

                print('Hyper-parameter string is: ', hyp_str)

                log_dir = tensorboardPath + '/model1_120max_handcrafted_with_0.85Decay/' + str(
                    specType) + 'drops' + '/' + hyp_str
                model_save_path = modelPath + '/model1_120max_handcrafted_with_0.85Decay/' + str(
                    specType) + 'drops' + '/' + hyp_str
                logfile = model_save_path + '/training.log'

                figDirectory = model_save_path
                makeDirectory(model_save_path)

                tLoss, vLoss, tAcc, vAcc = model.train(
                    specType, architecture, fftSize, padding, duration, t_data,
                    t_labels, v_data, v_labels, activation, lr, use_lr_decay,
                    epsilon, b1, b2, momentum, optimizer_type, dropout1,
                    dropout2, dropout, model_save_path, log_dir, logfile,
                    wDecayFlag, penalty, applyBatchNorm, init_type, epochs,
                    batch_size, targets,
                    augment)  #,trainPercentage,valPercentage)
def trainCNN_on_Sparrow_architecture():

    #CNN Training parameters
    activation = 'elu'  #'elu'
    init_type = 'xavier'

    batch_size = 32
    epochs = 1000

    # Regularizer parameters
    use_lr_decay = False  #set this flag for LR decay
    wDecayFlag = False  #whether to perform L2 weight decay or not
    lossPenalty = 0.001  # Using lambda=0.001 .
    applyBatchNorm = False
    deviceId = "/gpu:0"

    # Adam parameters
    optimizer_type = 'adam'
    b1 = 0.9
    b2 = 0.999
    epsilon = 0.1  #1e-08 is the default
    momentum = 0.95
    dropout1 = 1.0  #for input to first FC layer
    dropout2 = 1.0  #for intermediate layer input
    dropouts = [0.5, 0.4, 0.3, 0.2, 0.1]  #,0.6]
    lambdas = [0.0005, 0.001]

    architectures = [
        3
    ]  # birds architecture sparrow, to make it unified (check model.py for definition)
    trainingSize = [1]  #in seconds
    learning_rates = [0.0001, 0.00008]

    targets = 2
    fftSize = 256
    specType = 'mag_spec'
    padding = False

    for duration in trainingSize:
        print('Now loading the data !!')
        outPath = '../../spectrograms/' + specType + '/' + str(
            fftSize) + 'FFT/' + str(duration) + 'sec/'
        mean_std_file = outPath + 'train/mean_std.npz'

        # Load training data, labels and perform norm
        tD = dataset.load_data(outPath + 'train/')
        tL = dataset.get_labels_according_to_targets(trainP, targets)
        dataset.compute_global_norm(tD, mean_std_file)

        print('Shape of labels: ', tL.shape)

        #tD = dataset.normalise_data(tD,mean_std_file,'utterance')    # utterance level
        tD = dataset.normalise_data(tD, mean_std_file, 'global_mv')  # global
        #print('Norm td: max and min are ', np.max(tD))

        # Load dev data, labels and perform norm
        devD = dataset.load_data(outPath + 'dev/')
        devL = dataset.get_labels_according_to_targets(devP, targets)
        #devD = dataset.normalise_data(devD,mean_std_file,'utterance')
        #print('first Norm dev: max and min are ', np.max(devD))
        devD = dataset.normalise_data(devD, mean_std_file, 'global_mv')
        #print('Norm dev: max and min are ', np.max(devD))

        trainSize = str(
            duration) + 'sec'  ##may be change this in model.py also !

        ### We are training on TRAIN set and validating on DEV set
        t_data = tD
        t_labels = tL
        v_data = devD
        v_labels = devL

        for dropout in dropouts:
            architecture = architectures[0]
            for lr in learning_rates:

                #hyp_str ='cnn'+str(architecture)+'_keepProb_1.0_' + str(dropout)+str(dropout3)+'lr'+str(lr)
                hyp_str = 'sparrow' + '_keep_' + str(
                    dropout) + '_' + 'lr' + str(lr) + '_' + str(
                        activation) + '_' + 'fft' + str(fftSize)

                log_dir = '../tensorflow_log_dir/sparrowArch/' + hyp_str
                model_save_path = '../models/sparrowArch/' + hyp_str
                logfile = model_save_path + '/training.log'
                figDirectory = model_save_path
                makeDirectory(model_save_path)
                print('Training model with ' + str(duration) +
                      ' sec data and cnnModel' + str(architecture))

                tLoss, vLoss, tAcc, vAcc = model.train(
                    architecture, fftSize, padding, trainSize, t_data,
                    t_labels, v_data, v_labels, activation, lr, use_lr_decay,
                    epsilon, b1, b2, momentum, optimizer_type, dropout,
                    dropout1, dropout2, model_save_path, log_dir, logfile,
                    wDecayFlag, lossPenalty, applyBatchNorm, init_type, epochs,
                    batch_size, targets)

                #plot_2dGraph('#Epochs', 'Avg CE Loss', tLoss,vLoss,'train_ce','val_ce', figDirectory+'/loss.png')
                #plot_2dGraph('#Epochs', 'Avg accuracy', tAcc,vAcc,'train_acc','val_acc',figDirectory+'/acc.png')
                plot_2dGraph('#Epochs', 'Val loss and accuracy', vLoss, vAcc,
                             'val_loss', 'val_acc',
                             figDirectory + '/v_ls_acc.png')
Пример #8
0
def trainCNN_on_trainData():

    #CNN Training parameters
    activation = 'mfm'  #'elu'
    init_type = 'xavier'

    batch_size = 32
    epochs = 2000  #0

    # Regularizer parameters
    use_lr_decay = False  #set this flag for LR decay
    wDecayFlag = False  #whether to perform L2 weight decay or not
    lossPenalty = 0.001  # Using lambda=0.001 .
    applyBatchNorm = False
    deviceId = "/gpu:0"

    # Adam parameters
    optimizer_type = 'adam'
    b1 = 0.9
    b2 = 0.999
    epsilon = 0.1
    momentum = 0.95

    dropout3 = 1.0  #In russian arch there are only FC1 and output layer, so this not needed.

    #dropout1=[0.3,0.2,0.1]          # we ran this originally ! we will look into this later
    #dropout2=[0.7,0.5,0.4,0.3,0.2]  # for dropout1=0.3, we ran all combination

    dropout1 = [0.2, 0.1]
    dropout2 = [0.4, 0.2]

    #lambdas = [0.0005, 0.001]

    #dropout1=[1.0]
    #dropout2=[1.0]
    #dropout3=1.0

    architectures = [5]  # Russian Architecture is 5
    trainingSize = [4]  #in seconds
    lr = 0.0001

    targets = 2
    fftSize = 2048
    specType = 'mag_spec'
    padding = True

    # Used following paths since I moved the scripts in git and used link so that code are synchronised
    spectrogramPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms/'
    tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/'
    modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models/'

    for duration in trainingSize:
        print('Now loading the data !!')
        outPath = spectrogramPath + specType + '/' + str(
            fftSize) + 'FFT/' + str(duration) + 'sec/'
        mean_std_file = outPath + 'train/mean_std.npz'

        # Load training data, labels and perform norm
        tD = dataset.load_data(outPath + 'train/')
        tL = dataset.get_labels_according_to_targets(trainP, targets)

        if not os.path.exists(mean_std_file):
            print('Computing Mean_std file ..')
            dataset.compute_global_norm(tD, mean_std_file)

        print('Shape of labels: ', tL.shape)

        #tD = dataset.normalise_data(tD,mean_std_file,'utterance')    # utterance level
        tD = dataset.normalise_data(tD, mean_std_file, 'global_mv')  # global

        # Load dev data, labels and perform norm
        devD = dataset.load_data(outPath + 'dev/')
        devL = dataset.get_labels_according_to_targets(devP, targets)
        #devD = dataset.normalise_data(devD,mean_std_file,'utterance')
        devD = dataset.normalise_data(devD, mean_std_file, 'global_mv')

        ### We are training on TRAIN set and validating on DEV set
        t_data = tD
        t_labels = tL
        v_data = devD
        v_labels = devL

        for dropout in dropout1:
            architecture = architectures[0]

            for drop in dropout2:

                hyp_str = 'arch' + str(architecture) + '_keep_' + str(
                    dropout) + '_' + str(drop) + '_' + str(duration) + 'sec'

                log_dir = tensorboardPath + '/rusCNN_max2000epochs/' + hyp_str
                model_save_path = modelPath + '/rusCNN_max2000epochs/' + hyp_str
                logfile = model_save_path + '/training.log'

                figDirectory = model_save_path
                makeDirectory(model_save_path)
                print('Training model with ' + str(duration) +
                      ' sec data and cnnModel' + str(architecture))

                tLoss, vLoss, tAcc, vAcc = model.train(
                    architecture, fftSize, padding, duration, t_data, t_labels,
                    v_data, v_labels, activation, lr, use_lr_decay, epsilon,
                    b1, b2, momentum, optimizer_type, dropout, drop, dropout3,
                    model_save_path, log_dir, logfile, wDecayFlag, lossPenalty,
                    applyBatchNorm, init_type, epochs, batch_size, targets)

                #plot_2dGraph('#Epochs', 'Avg CE Loss', tLoss,vLoss,'train_ce','val_ce', figDirectory+'/loss.png')
                #plot_2dGraph('#Epochs', 'Avg accuracy', tAcc,vAcc,'train_acc','val_acc',figDirectory+'/acc.png')
                plot_2dGraph('#Epochs', 'Val loss and accuracy', vLoss, vAcc,
                             'val_loss', 'val_acc',
                             figDirectory + '/v_ls_acc.png')
Пример #9
0
def other_features(input_type,
                   data_list,
                   labelFile,
                   savePath,
                   fft_size,
                   win_size,
                   hop_size,
                   duration,
                   data_window=100,
                   window_shift=10,
                   augment=True,
                   save=True,
                   minimum_length=1):
    '''
    Merge this function and spectrograms. Can replace spectrogram function #1
    '''

    from audio import compute_spectrogram

    spectrograms = list()
    labels = list()

    if input_type == 'others':
        #load the npz file which in this case is data_list
        print('Loading the features..')
        spectrograms = np.load(data_list)['features']
        print('Length is: ', len(spectrograms))

        # Make these features unified across time dimension
        spectrograms = [
            update_feature_matrix(matrix) for matrix in spectrograms
        ]

    else:
        print('Computing the ' + input_type + ' spectrograms !!')
        with open(data_list, 'r') as f:
            spectrograms = [
                compute_spectrogram(input_type, file.strip(), fft_size,
                                    win_size, hop_size, duration, augment,
                                    minimum_length) for file in f
            ]

    # Get the labels into a list and save it along with the spectrograms
    with open(labelFile, 'r') as f:
        labels = [line.strip() for line in f]

    if augment:
        new_data = list()
        new_labels = list()

        assert (len(labels) == len(spectrograms))
        print(
            'Now performing augmentation using sliding window mechanism on original specs/features .... '
        )

        for i in range(len(spectrograms)):
            d, l = augment_data(spectrograms[i], labels[i], data_window,
                                input_type, window_shift)
            new_data.extend(
                d)  # extend the list rather than adding it into a new list
            new_labels.extend(l)

        spectrograms = new_data
        labels = new_labels

    if save:
        from helper import makeDirectory
        makeDirectory(savePath)
        outfile = savePath + '/spec'
        with open(outfile, 'w') as f:
            np.savez(outfile, spectrograms=spectrograms, labels=labels)
        print('Finished computing features/spectrograms and saved inside: ',
              savePath)