def SVM_Classification_BeehiveSTATE(X_flat_train, y_train, X_flat_test, y_test, kerneloption='rbf'):

    print('\n')
    printb('Starting classification with SVM:')
    Test_Preds=[]
    Train_Preds=[]
    Test_Preds_Proba=[]
    Train_Preds_Proba=[]
    Test_GroundT=[]
    Train_GroundT=[]
   
    print('\n')
    printb('classification Beehive State into : Active or Missing Queen')
        
    #train :
    CLF = svm.SVC(kernel=kerneloption, probability=True)
    CLF.fit(X_flat_train, y_train)
    y_pred_train = CLF.predict(X_flat_train)
    y_pred_proba_train = CLF.predict_proba(X_flat_train)
    
    Train_GroundT = y_train
    Train_Preds = y_pred_train
    Train_Preds_Proba = y_pred_proba_train[:,1]
    
    # test:
    y_pred_test = CLF.predict(X_flat_test)
    y_pred_proba_test = CLF.predict_proba(X_flat_test)
    Test_GroundT= y_test
    Test_Preds = y_pred_test
    Test_Preds_Proba = y_pred_proba_test[:,1]

    return CLF, Test_GroundT, Train_GroundT, Test_Preds, Train_Preds, Test_Preds_Proba, Train_Preds_Proba 
Пример #2
0
def BalanceData_online(y_set, x_set, sample_ids_set):
    
    ## balances already processed data (X and Y, just before classifier) by replicating samples of the least represented class.
    # input: y_set - binary labels of set, x_set - feature_maps of set, sample_ids_set - sample names in set, ( all have the same order!)
    # output: X, Y and sample_ids with replicated samples concatenated 
    
 
    printb( 'Balancing training data:' )
    print('will randomly replicate samples from least represented class')
    
    x2concatenate = x_set
    y2concatenate = y_set
    sample_ids2concatenate = sample_ids_set
    
    dict_items_replicate = get_items2replicate(y_set,sample_ids_set )
    #print("dict_items_replicate: ",dict_items_replicate)
    
    for i in range(len(sample_ids_set)):
        if sample_ids_set[i] in dict_items_replicate.keys() :
            
            sample_ids2concatenate =np.concatenate([sample_ids2concatenate, [sample_ids_set[i]]*dict_items_replicate[sample_ids_set[i]]])
            y2concatenate = np.concatenate([y2concatenate, [y_set[i]]*dict_items_replicate[sample_ids_set[i]]])
            x2concatenate = np.concatenate([x2concatenate, [x_set[i]]*dict_items_replicate[sample_ids_set[i]]])
            
    return y2concatenate, x2concatenate, sample_ids2concatenate
Пример #3
0
def train_and_evaluate_model(model, X_train, Y_train, X_test, Y_test, y_test,
                             nb_epoch, batch_size, model_filename):
    # y_test is labels, Y_test is categorical labels

    print('Train...')
    target_names = ['missing_queen', 'active']
    stopping = EarlyStopping(monitor='val_accuracy', patience=50)
    checkpointer = ModelCheckpoint(filepath=model_filename,
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   monitor='val_accuracy',
                                   mode='max')
    results = model.fit(X_train,
                        Y_train,
                        batch_size=batch_size,
                        nb_epoch=nb_epoch,
                        callbacks=[stopping, checkpointer],
                        verbose=2,
                        validation_data=(X_test, Y_test))
    # prediction
    best_model = load_model(model_filename)
    probabilities = best_model.predict(X_test, batch_size=batch_size)
    predictions = probabilities.argmax(axis=-1)
    best_acc = accuracy_score(y_test, predictions)
    print('Accuracy score (best model): {}'.format(best_acc))
    #print("classification report: ", classification_report(y_test, predictions))
    report = classification_report(y_test,
                                   predictions,
                                   target_names=target_names,
                                   output_dict=True)
    cnf_matrix = confusion_matrix(y_test, predictions)
    return results, best_acc, report, cnf_matrix
Пример #4
0
def get_features_from_samples(path_audio_samples, sample_ids, raw_feature, normalization, high_level_features ): 
    #normalization = NO, z_norm, min_max
    ## function to extract features 
    #high_level_features = 0 or 1 
    #file_path = os.path.isfile(path_save_audio_labels+'matrix.mat'+'.csv') 
    n_samples_set = len(sample_ids) # 4
    feature_Maps = []
    if raw_feature== 'MFCCs20': 
         path_working= path_working_MFCC20
    elif  raw_feature=='TTBOX': 
         path_working= path_working_TTBox
    elif raw_feature=='stft':
         path_working= path_working_stft
    else: 
        path_working= path_working_cqt
    
    for sample in sample_ids:
        # raw feature extraction:
        print("sample: ",sample)
        x = raw_feature_fromSample( path_audio_samples+sample, raw_feature ) # x.shape: (4, 20, 2584)
       # print("x.shape: ",x.shape)
       # Sauvgarder les x dans un fichier .mat 
     
        b = csr_matrix(x)
        savemat(path_working+ sample[:-4] + '.mat', {'b': b})
         
                         
                    
        ##normalization here:si on veut les résultats pour Conv1D on utlise la normalisation 
        ##normalization here:
        if not normalization == 'NO':
             x_norm = featureMap_normalization_block_level(x, normalizationType = normalization) 
        else: x_norm = x
        
        if high_level_features:
            # high level feature extraction:
            if 'MFCCs' in raw_feature:
                X = compute_statistics_overMFCCs(x_norm, 'yes') # X.shape: (4 , 120)
            else: 
                X = compute_statistics_overSpectogram(x_norm)
                
            feature_map=X
        else:
            feature_map=x_norm
        
        
        feature_Maps.append(feature_map)
        
    return feature_Maps
Пример #5
0
def uniform_block_size(undersized_block, block_size_samples, method='repeat'):

    lengthTofill = (block_size_samples) - (undersized_block.size)
    if method == 'zero_padding':
        new_block = np.pad(undersized_block, (0, lengthTofill),
                           'constant',
                           constant_values=(0))

    elif method == 'mean_padding':
        new_block = np.pad(undersized_block, (0, lengthTofill), 'mean')

    elif method == 'repeat':
        new_block = np.pad(undersized_block, (0, lengthTofill), 'reflect')
    else:
        print(
            'methods to choose are: \'zero_padding\' ,\'mean_padding\' and \'repeat\' '
        )
        new_block = 0

    return new_block
Пример #6
0
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=25)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
Пример #7
0
def read_beeNotBee_annotations_saves_labels(audiofilename,
                                            block_name,
                                            blockStart,
                                            blockfinish,
                                            annotations_path,
                                            threshold=0):

    ## function: reads corresponding annotation file (.lab) and assigns a label to one block/sample. Appends label into csv file.
    ##
    ## inputs:
    ## audiofilename = name of the audio file (no path), block_name = name of the sample/segment,  blockStart = time point in seconds where block starts, blockfinish = time point in seconds where block ends, annotations_path = path to annotations folder (where .lab files are), threshold = value tor threshold.
    ##
    ## outputs:
    ## label_th= 2 element list, [0] = a label (bee / nobee) for the block and threshold considered; [1] = label strength, value that reflects the proportion of nobee interval in respect to the whole block.

    # thershold gives the minimum duration of the no bee intervals we want to consider.
    # trheshold=0 uses every event as notBee whatever the duration
    # thershold=0.5 disregards intervals with less than half a second duration.

    block_length = blockfinish - blockStart

    if audiofilename.startswith('#'):
        annotation_filename = audiofilename[1:-4] + '.lab'
    else:
        annotation_filename = audiofilename[0:-4] + '.lab'

    try:
        with open(annotations_path + os.sep + annotation_filename, 'r') as f:
            # EXAMPLE FILE:

            # CF003 - Active - Day - (223)
            # 0	8.0	bee
            # 8.01	15.05	nobee
            # 15.06	300.0	bee
            # .
            #

            # all files end with a dot followed by an empty line.

            print(annotations_path + os.sep + annotation_filename)
            lines = f.read().split('\n')

            labels_th = ['bee', 0.0]
            label2assign = 'bee'
            label_strength = 0
            intersected_s = 0

            for line in lines:
                if (line == annotation_filename[0:-4]) or (line
                                                           == '.') or (line
                                                                       == ''):
                    #ignores title, '.', or empty line on the file.
                    continue

                #print(line)
                parsed_line = line.split('\t')

                assert (len(parsed_line) == 3), (
                    'expected 3 fields in each line, got: ' +
                    str(len(parsed_line)))

                tp0 = float(parsed_line[0])
                tp1 = float(parsed_line[1])
                annotation_label = parsed_line[2]
                if blockfinish < tp0:  # no need to read further nobee intervals since annotation line is already after block finishes
                    break

                if annotation_label == 'nobee':

                    if tp1 - tp0 >= threshold:  # only progress if nobee interval is longer than defined threshold.

                        if tp0 > blockStart and tp0 <= blockfinish and tp1 >= blockfinish:

                            intersected_s = intersected_s + (blockfinish - tp0)
                            # |____________########|########
                            # bs          tp0      bf      tp1

                        elif tp1 >= blockStart and tp1 < blockfinish and tp0 <= blockStart:

                            intersected_s = intersected_s + (tp1 - blockStart)
                            # #####|########_____|
                            # tp0  bs     tp1    bf

                        elif tp1 >= blockStart and tp1 <= blockfinish and tp0 >= blockStart and tp0 <= blockfinish:

                            intersected_s = intersected_s + (tp1 - tp0)
                            # |_____########_____|
                            # bs   tp0    tp1    bf

                        elif tp0 <= blockStart and tp1 >= blockfinish:

                            intersected_s = intersected_s + (blockfinish -
                                                             blockStart)
                            #  ####|############|####
                            # tp0  bs           bf  tp1

                    if intersected_s > 0:
                        label2assign = 'nobee'
                    label_strength = intersected_s / block_length  # proportion of nobee length in the block

                    labels_th = [label2assign,
                                 round(label_strength, 3)
                                 ]  # if label_strehgth ==0 --> bee segment

            assert (blockfinish <= tp1), (
                'the end of the request block falls outside the file: block ending: '
                + str(blockfinish) + ' end of file at: ' + str(tp1))

    except FileNotFoundError as e:
        print(e, '--Anotation file does not exist! label as unknown')
        #print(annotation_filename=audiofilename[0:-4]+'.lab')

        label2assign = 'unknown'
        label_strength = -1

        labels_th = [label2assign, label_strength]

    except Exception as e1:
        print('unknown exception: ' + str(e1))
        #quit

    return labels_th
Пример #8
0
def load_audioFiles_saves_segments(path_audioFiles,
                                   path_save_audio_labels,
                                   block_size,
                                   thresholds,
                                   annotations_path,
                                   read_beeNotBee_annotations='yes',
                                   save_audioSegments='yes'):

    audiofilenames_list = [
        os.path.basename(x) for x in glob.glob(path_audioFiles + '*.mp3')
    ]
    audiofilenames_list.extend(
        [os.path.basename(x) for x in glob.glob(path_audioFiles + '*.wav')])

    printb("Number of audiofiles in folder: " + str(len(audiofilenames_list)))
    # print("audiofilenames_list ",audiofilenames_list)

    fi = 0
    for file_name in audiofilenames_list:
        fi = fi + 1
        # print('\n')
        # printb('Processing '+ file_name+'          :::file number:  '+str(fi)+' --------->of '+str(len(audiofilenames_list)))

        offset = 0
        block_id = 0

        while 1:

            # READ ONE BLOCK OF THE AUDIO FILE
            try:
                ## Read one block of 60 seconds
                block, sr = librosa.core.load(path_audioFiles + file_name,
                                              offset=offset,
                                              duration=block_size)
            # print(block.shape , sr)
            # print('-----------------Reading segment '+str(block_id))
            except ValueError as e:
                e
                if 'Input signal length' in str(e):
                    block = np.arange(0)
            except FileNotFoundError as e1:
                print(e1, ' but continuing anyway')

            ##print("test")
            if block.shape[
                    0] > 0:  #when total length = multiple of blocksize, results that last block is 0-lenght, this if bypasses those cases.

                block_name = file_name[0:-4] + '__segment' + str(block_id)
                ## print(block_name)

                # READ BEE NOT_BEE ANNOTATIONS:
                if read_beeNotBee_annotations == 'yes':
                    # print('---------------------Will read BeeNotbee anotations and create labels for segment'+str(block_id))
                    blockStart = offset
                    ##print("blockStart: ",blockStart)
                    blockfinish = offset + block_size
                    ##print("blockfinish: ",blockfinish)

                    for th in thresholds:
                        #print("th::::::::::", th)
                        label_file_exists = os.path.isfile(
                            path_save_audio_labels + 'labels_BeeNotBee_th' +
                            str(th) + '.csv')
                        with open(path_save_audio_labels +
                                  'labels_BeeNotBee_th' + str(th) + '.csv',
                                  'a',
                                  newline='') as label_file:
                            writer = csv.DictWriter(label_file,
                                                    fieldnames=[
                                                        'sample_name',
                                                        'segment_start',
                                                        'segment_finish',
                                                        'label_strength',
                                                        'label'
                                                    ],
                                                    delimiter=',')
                            if not label_file_exists:
                                writer.writeheader()
                        ##  print("start read_beeNotBee_annotation_saves_labels")
                            label_block_th = read_beeNotBee_annotations_saves_labels(
                                file_name, block_name, blockStart, blockfinish,
                                annotations_path, th)
                            # print("label_block_th : ", label_block_th)
                            writer.writerow({
                                'sample_name': block_name,
                                'segment_start': blockStart,
                                'segment_finish': blockfinish,
                                'label_strength': label_block_th[1],
                                'label': label_block_th[0]
                            })
                        # print('-----------------Wrote label for th '+ str(th)+' seconds of segment'+str(block_id)  )

                # MAKE BLOCK OF THE SAME SIZE:
                if block.shape[0] < block_size * sr:
                    block = uniform_block_size(block, block_size * sr,
                                               'repeat')
                # print('-----------------Uniformizing block length of segment'+str(block_id)  )

                # Save audio segment:
                if save_audioSegments == 'yes' and (
                        not os.path.exists(path_save_audio_labels +
                                           block_name + '.wav')
                ):  #saves only if option is chosen and if block file doesn't already exist.
                    librosa.output.write_wav(
                        path_save_audio_labels + block_name + '.wav', block,
                        sr)
                    #print( '-----------------Saved wav file for segment '+str(block_id))

            else:
                #print('----------------- no more segments for this file--------------------------------------')
                # print('\n')
                break
            offset += block_size
            block_id += 1
    printb(
        '______________________________No more audioFiles___________________________________________________'
    )

    return
def report_SVM_beehiveState_results(summary_filename, path_results, thresholds, CLF, Test_GroundT, Train_GroundT, Test_Preds, Train_Preds, Test_Preds_Proba, Train_Preds_Proba, classification_idSTRING, testFilenames,  chunk_size, save='yes'):

    

    if not os.path.isfile(summary_filename):
        with open(summary_filename, 'w') as csvfile:
            wtr = csv.writer(csvfile, delimiter=',')
            wtr.writerow(['ExperienceParameters', 'AccuracyTRAIN', 'AUC_TRAIN', 'gtACTIVEpACTIVE_TRAIN', 'gtMQUEENpACTIVE_TRAIN', 'gtACTIVEpMQUEEN_TRAIN', 'gtMQUEENpMQUEEN_TRAIN','ShannonEnthropy_TRAIN','AccuracyTEST', 'AUC_TEST','ConfusionMatrixTEST_gtACTIVEpACTIVE_gtMQUEENpACTIVE_gtACTIVEpMQUEEN_gtMQUEENpMQUEEN', 'PrecisionTEST_on_MQUEEN', 'RecallTEST_on_MQUEEN', 'PrecisionTEST_on_ACTIVE', 'RecallTEST_on_ACTIVE', 'gtACTIVEpACTIVE_TEST', 'gtMQUEENpACTIVE_TEST', 'gtACTIVEpMQUEEN_TEST', 'gtMQUEENpMQUEEN_TEST', 'ShannonEnthropyTEST','accuracyTEST_on_balancedDatasets'])
    
        csvfile.close()
    # transform labels into boolean type for easiness
    PRED_TEST=Test_Preds[:]
    PRED_TRAIN=Train_Preds[:]
    PRED_TEST_PROBA=Test_Preds_Proba[:]
    PRED_TRAIN_PROBA=Train_Preds_Proba[:]
    GT=Test_GroundT[:]
    GT_TRAIN=Train_GroundT[:]
   
    
    #Evaluate classifier:
            
    
    print("Classification report for classifier %s:\n%s\n"
      % (CLF, metrics.classification_report(GT, PRED_TEST)))
    print("Confusion matrix:\n%s" % metrics.confusion_matrix(GT, PRED_TEST))
    print('\n')
    
    # Save classification results:

    if save == 'yes':
          
        with open(path_results+classification_idSTRING+'.csv', 'w') as csvfile:
            wtr = csv.writer(csvfile, delimiter=';')
            
            
            wtr.writerow( ["Classification report for classifier %s:\n%s\n"
              % (CLF, metrics.classification_report(GT, PRED_TEST))])
            wtr.writerow( ["Confusion matrix:\n%s" % metrics.confusion_matrix(GT, PRED_TEST)])
            wtr.writerow( [" Accuracy: \n%s" % metrics.accuracy_score(GT, PRED_TEST)])
            wtr.writerow( [" Area under Curve: \n%s" % metrics.roc_auc_score(GT, PRED_TEST_PROBA, average='macro', sample_weight=None)])
            wtr.writerow( ["Predictions: \n"])
            for p in range(len(PRED_TEST)):
                wtr. writerow([testFilenames[p] +'    GT: ' +str(GT[p])+ '   PRED_TEST: '+ str(PRED_TEST[p])])
                
    # append results to summary file:  
    with open(summary_filename, 'a') as summaryFile:
        writer=csv.writer(summaryFile, delimiter=',')
        
        
        #compute parameters to show:
        accuracy=metrics.accuracy_score(GT, PRED_TEST)
        AccuracyTRAIN=metrics.accuracy_score(GT_TRAIN, PRED_TRAIN)
        try: 
            gtACTIVEpACTIVE, gtMQUEENpACTIVE, gtACTIVEpMQUEEN, gtMQUEENpMQUEEN=metrics.confusion_matrix(GT, PRED_TEST).ravel()
        except ValueError as e:
            if sum(PRED_TEST)==len(PRED_TEST) and sum(GT)==len(PRED_TEST) :
                gtACTIVEpACTIVE=0 
                gtMQUEENpACTIVE=0
                gtACTIVEpMQUEEN=0
                gtMQUEENpMQUEEN =len(PRED_TEST)
            
            elif sum(PRED_TEST)==0 and sum(GT)==0 :
                gtACTIVEpACTIVE=len(PRED_TEST)
                gtMQUEENpACTIVE=0
                gtACTIVEpMQUEEN=0
                gtMQUEENpMQUEEN =0
            
            elif sum(PRED_TEST)==len(PRED_TEST) and sum(GT)==0 :
                gtACTIVEpACTIVE=0
                gtMQUEENpACTIVE=0
                gtACTIVEpMQUEEN=len(PRED_TEST)
                gtMQUEENpMQUEEN =0
                
            elif sum(PRED_TEST)==0 and sum(GT)==len(GT) :
                gtACTIVEpACTIVE=0
                gtMQUEENpACTIVE=len(PRED_TEST)
                gtACTIVEpMQUEEN=0
                gtMQUEENpMQUEEN =0
                
                
                
        try: 
            gtACTIVEpACTIVE_TRAIN, gtMQUEENpACTIVE_TRAIN, gtACTIVEpMQUEEN_TRAIN, gtMQUEENpMQUEEN_TRAIN=metrics.confusion_matrix(GT_TRAIN, PRED_TRAIN).ravel()
        except ValueError as e:
            if sum(PRED_TRAIN)==len(PRED_TRAIN) and sum(GT_TRAIN)==len(PRED_TRAIN) :
                gtACTIVEpACTIVE_TRAIN=0 
                gtMQUEENpACTIVE_TRAIN=0
                gtACTIVEpMQUEEN_TRAIN=0
                gtMQUEENpMQUEEN_TRAIN =len(PRED_TEST)
            
            elif sum(PRED_TRAIN)==0 and sum(GT_TRAIN)==0 :
                gtACTIVEpACTIVE_TRAIN=len(PRED_TRAIN)
                gtMQUEENpACTIVE_TRAIN=0
                gtACTIVEpMQUEEN_TRAIN=0
                gtMQUEENpMQUEEN_TRAIN =0
            
            elif sum(PRED_TRAIN)==len(PRED_TRAIN) and sum(GT_TRAIN)==0 :
                gtACTIVEpACTIVE_TRAIN=0
                gtMQUEENpACTIVE_TRAIN=0
                gtACTIVEpMQUEEN_TRAIN=len(PRED_TRAIN)
                gtMQUEENpMQUEEN_TRAIN =0
                
            elif sum(PRED_TRAIN)==0 and sum(GT_TRAIN)==len(GT_TRAIN) :
                gtACTIVEpACTIVE_TRAIN=0
                gtMQUEENpACTIVE_TRAIN=len(PRED_TRAIN)
                gtACTIVEpMQUEEN_TRAIN=0
                gtMQUEENpMQUEEN_TRAIN =0  
                
        try:
            ShannonEnthropy_TRAIN=-(((gtACTIVEpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN)/(gtACTIVEpACTIVE_TRAIN+gtMQUEENpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN+gtMQUEENpMQUEEN_TRAIN) )*log(((gtACTIVEpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN)/(gtACTIVEpACTIVE_TRAIN+gtMQUEENpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN+gtMQUEENpMQUEEN_TRAIN) ))  +    ((gtMQUEENpACTIVE_TRAIN+gtMQUEENpMQUEEN_TRAIN)/(gtACTIVEpACTIVE_TRAIN+gtMQUEENpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN+gtMQUEENpMQUEEN_TRAIN) )*log(((gtMQUEENpACTIVE_TRAIN+gtMQUEENpMQUEEN_TRAIN)/(gtACTIVEpACTIVE_TRAIN+gtMQUEENpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN+gtMQUEENpMQUEEN_TRAIN) )))
        except Exception as e :
            ShannonEnthropy_TRAIN=0
                
        try:
            Precision_on_MQUEEN=gtMQUEENpMQUEEN/(gtMQUEENpMQUEEN+gtACTIVEpMQUEEN)
        except ZeroDivisionError as e:
            Precision_on_MQUEEN=0
        try:
            Recall_on_MQUEEN=gtMQUEENpMQUEEN/(gtMQUEENpMQUEEN+gtMQUEENpACTIVE)
        except ZeroDivisionError as e:
            Recall_on_MQUEEN=0
        try:
            Precision_on_ACTIVE=gtACTIVEpACTIVE/(gtACTIVEpACTIVE+gtMQUEENpACTIVE)
        except ZeroDivisionError as e:
            Precision_on_ACTIVE=0
        try:
            Recall_on_ACTIVE=gtACTIVEpACTIVE/(gtACTIVEpACTIVE+gtACTIVEpMQUEEN)        
        except ZeroDivisionError as e:
            Recall_on_ACTIVE=0
            
        try:
            ShannonEnthropy=-(((gtACTIVEpACTIVE+gtACTIVEpMQUEEN)/(gtACTIVEpACTIVE+gtMQUEENpACTIVE+gtACTIVEpMQUEEN+gtMQUEENpMQUEEN) )*log(((gtACTIVEpACTIVE+gtACTIVEpMQUEEN)/(gtACTIVEpACTIVE+gtMQUEENpACTIVE+gtACTIVEpMQUEEN+gtMQUEENpMQUEEN) ))  +    ((gtMQUEENpACTIVE+gtMQUEENpMQUEEN)/(gtACTIVEpACTIVE+gtMQUEENpACTIVE+gtACTIVEpMQUEEN+gtMQUEENpMQUEEN) )*log(((gtMQUEENpACTIVE+gtMQUEENpMQUEEN)/(gtACTIVEpACTIVE+gtMQUEENpACTIVE+gtACTIVEpMQUEEN+gtMQUEENpMQUEEN) )))
        except Exception as e :
            ShannonEnthropy=0
            
        if ShannonEnthropy>0.9:
            accuracy_on_balancedDatasets=accuracy
        else: 
            accuracy_on_balancedDatasets=0
        
        try:
            AUC_TRAIN=metrics.roc_auc_score(GT_TRAIN, PRED_TRAIN_PROBA, average='macro', sample_weight=None)
        except Exception as e:
            AUC_TRAIN='error'

        try: 
            AUC_TEST=metrics.roc_auc_score(GT, PRED_TEST_PROBA, average='macro', sample_weight=None)
            
        except Exception as e:
            AUC_TEST='error'
            
            
        writer.writerow([classification_idSTRING, AccuracyTRAIN, AUC_TRAIN, gtACTIVEpACTIVE_TRAIN, gtMQUEENpACTIVE_TRAIN, gtACTIVEpMQUEEN_TRAIN, gtMQUEENpMQUEEN_TRAIN , ShannonEnthropy_TRAIN, accuracy , AUC_TEST ,metrics.confusion_matrix(GT, PRED_TEST),Precision_on_MQUEEN , Recall_on_MQUEEN, Precision_on_ACTIVE, Recall_on_ACTIVE, gtACTIVEpACTIVE, gtMQUEENpACTIVE, gtACTIVEpMQUEEN,gtMQUEENpMQUEEN, ShannonEnthropy,accuracy_on_balancedDatasets])
def extract_long_term_features_fromMEL(Mel_spectrograms, n_slices):

    #n_slices must result in an equal division. so:
    # total_frames=Mel_spectrograms[0].shape[-1] # total number of frames
    # slice_size = total_frames//n_slices
    # corrected_n_slices = total_frames / int(slice_size)
    
    long_term_features = []
    
    for sample in Mel_spectrograms:
        print(sample.shape)
        sliced_sample = np.array_split(sample, n_slices, 1 )
        print(len(sliced_sample))
        print(sliced_sample[0].shape)
        stacked_averaged_slices = []
        for i in range(len(sliced_sample)):
                       
            stacked_averaged_slices.append(np.mean(sliced_sample[i],1))
            print(stacked_averaged_slices[i].shape)
            #pdb.set_trace()
            print(len(stacked_averaged_slices))
        long_term_features.append(stacked_averaged_slices)    
            
    print(len(long_term_features))
    print(len(long_term_features[0]))
    
    print(long_term_features[0][0].shape)
    return long_term_features
path_save_audio_stft= path+ 'dataset_BeeNoBee_2_second'+str(block_size)+'sec'+'\\stft_matrix.mat'+os.sep 


nbits = 16;
MAX_VAL = pow(2,(nbits-1)) * 1.0;
target_names=['missing_queen', 'active']

#-----------------------------------STFT+CNN -----------------------------------#

ruche1,Y1,labels1, sample_ids1, ruche2,Y2,labels2, sample_ids2, ruche3,Y3,labels3, sample_ids3, ruche4,Y4,labels4, sample_ids4=get_list_samples_name_('b', path_save_audio_stft)
# save the model history in a list after fitting so we can plot later 
model_history=[]
val_accuracy=[]
for i in range(4):
    fold= i+1
    print("Training on Fold :", fold)
    x_train, x_test, y_train, y_test,sample_ids_train, sample_ids_test=cross_validation_4folds(fold, ruche1,Y1, ruche2,Y2, ruche3,Y3, ruche4,Y4 , sample_ids1 , sample_ids2 , sample_ids3 , sample_ids4) 
    print(len(x_train), len(x_test), len(y_train), len(y_test))
    # Read a sparse matrix 
    x_train2=[]
    x_test2=[]
    for l in range(len(x_train)):
        x_train2.append(x_train[l].todense())
    for l in range(len(x_test)):
        x_test2.append(x_test[l].todense())
        
    X_train= np.array(x_train2)
    x_Test= np.array(x_test2)
    y_train=np.array(y_train)
    y_test= np.array(y_test)
    X_test, y_test = constrainedsplit(y_train, x_Test, y_test, 0.7)
Пример #12
0
def fit_and_evaluate(train_x, val_x, train_y, val_y, EPOCHS=50, BATCH_SIZE=145 ):
    model=None
    model=deep_model(( 20,44, 1))
    results= model.fit(train_x, train_y, epochs=EPOCHS, batch_size= BATCH_SIZE, callbacks=[early_stopping, model_checkpoint], verbose=1, validation_split=0.1)
    print("Val Score :", model.evaluate(val_x, val_y))
    return results 
Пример #13
0
def StoreExcel(dic):
    checklis = []

    if os.path.isfile('./MFCC/Name_Label_MFCCfeature.xlsx'):
        xls = load_workbook('./MFCC/Name_Label_MFCCfeature.xlsx')
        print("file existed ")
        try:
            df = pd.read_excel('./MFCC/Name_Label_MFCCfeature.xlsx',
                               delimiter="\t")
            # 取出第一行(Name)
            check = df.iloc[:, 0]
            for i in check:
                if i in checklis:
                    continue
                else:
                    checklis.append(i)
        except Exception as e:
            print("An error found but ignored", e)
    else:
        xls = openpyxl.Workbook()
    #以下是儲存進excel

    sheet = xls.get_sheet_by_name('Sheet')  #生成excel的方法
    x, y = 1, 1

    sheet.cell(row=x, column=y, value='Name')
    sheet.cell(row=x, column=y + 1, value='Labal')

    x, y = 2, 1

    #判斷全新一列 塞值
    tmp = 'A' + str(x)
    while True:
        if sheet[tmp].value != None:
            x += 1
            tmp = 'A' + str(x)
            #print(ws[tmp].value)
        else:
            break

    for name in dic:
        if name[0:-4] in checklis:
            print('this file has been used , Pass!')
            continue  # 此檔案已經存過 略過

        currentname = name[0:-4]

        # 判斷狀況
        if 'NO' in currentname or 'Missing' in currentname:
            currentlabel = 'Missing Queen'
        else:
            currentlabel = 'Active'

        # 存進excel格子
        # 存 名稱 & 狀態
        y = 1
        sheet.cell(row=x, column=y, value=currentname)
        sheet.cell(row=x, column=y + 1, value=currentlabel)
        y = 3
        # 存 feature

        for feature in dic[name]:
            sheet.cell(row=x, column=y, value=feature)
            y += 1
        x += 1

    # MFCC特徵數值
    x, y = 1, 3
    for count in range(1, 121):
        tmps = count
        sheet.cell(row=x, column=y, value=tmps)
        y += 1

    Excelname = 'Name_Label_MFCCfeature.xlsx'
    xls.save('./MFCC/' + Excelname)
    print(Excelname + " have Done")
Пример #14
0
def load_audioFiles_saves_segments(audiofilenames_list,
                                   path_audioFiles,
                                   path_save_audio_labels,
                                   block_size,
                                   save_audioSegments='yes'):
    printb("Number of audiofiles in folder: " + str(len(audiofilenames_list)))
    fi = 0  #目前檔案第0個
    #操作方式: 逐檔載入,一個音檔中設置開始時間offset從0開始,到指定每個音檔分段時間duration(block_size=60s),然後進到while 1,
    #將音檔輸出成wav(等於此音檔的序列被輸出,剩下的序列是從offset+block_size開始)&資訊存進csv後,序列還有資料就再執行一次,
    #當序列<0則表示此音檔已經完全輸出,則跳出while 1 ,繼續處理下一個音檔

    for file_name in audiofilenames_list:
        fi = fi + 1
        print('\n')
        printb('Processing ' + file_name + '       :::file number:  ' +
               str(fi) + ' ----->of ' + str(len(audiofilenames_list)))

        offset = 0
        block_id = 0
        # 處理前先判斷是否已經處理過,若以處理過則跳過
        exitfile = fileisexitornot(path_save_audio_labels)
        # 用第一個切割的音段名稱來判斷是否需要處理此音檔
        first_block_name = file_name[0:-4] + '__segment' + str(
            block_id) + '.wav'
        if first_block_name in exitfile:
            print('this file has been used')
            continue

        while 1:

            # READ ONE BLOCK OF THE AUDIO FILE
            try:  #try除錯用
                #加載音檔 (音檔位置,offset:以此時間開始讀取,duration:持續讀取時間) 回傳音檔序列,sr=採樣率
                block, sr = librosa.core.load(path_audioFiles + file_name,
                                              offset=offset,
                                              duration=block_size)
                print('-----Reading segment ' + str(block_id))
            except ValueError as e:
                e
                if 'Input signal length' in str(e):
                    block = np.arange(0)
            except FileNotFoundError as e1:
                print(e1, ' but continuing anyway')

            if block.shape[0] > 0:
                #when total length = multiple of blocksize, results that last block is 0-lenght, this if bypasses those cases.
                block_name = file_name[0:-4] + '__segment' + str(block_id)
                print(block_name)

                #testing
                #在此產生state_label.csv並將名稱跟label狀態加入
                states = ['active', 'missing queen', 'swarm']

                label_file_exists = os.path.isfile(path_save_audio_labels +
                                                   'state_labels' +
                                                   '.csv')  #判斷檔案是否存在
                with open(path_save_audio_labels + 'state_labels' + '.csv',
                          'a',
                          newline='') as label_file:  #label_file檔案讀寫
                    writer = csv.DictWriter(
                        label_file,
                        fieldnames=['sample_name', 'label'],
                        delimiter=',')  #定義欄位sample_name, label
                    if not label_file_exists:  #假如檔案不存在
                        writer.writeheader()  #寫入欄位

                    csvreader = csv.DictReader(label_file)
                    try:
                        for row in csvreader:
                            print(row['sample_name'])
                    except:
                        pass
                    #270
                    label_state = read_HiveState_fromSampleName(
                        block_name, states)
                    writer.writerow({
                        'sample_name': block_name,
                        'label': label_state
                    })
                # READ BEE NOT_BEE ANNOTATIONS: 將音檔資訊.lab儲存進csv

                # MAKE BLOCK OF THE SAME SIZE:
                if block.shape[0] < block_size * sr:
                    pass

                # Save audio segment:
                if save_audioSegments == 'yes' and (
                        not os.path.exists(path_save_audio_labels +
                                           block_name + '.wav')
                ):  #saves only if option is chosen and if block file doesn't already exist.
                    #將音檔序列輸出為.wav文件
                    librosa.output.write_wav(
                        path_save_audio_labels + block_name + '.wav', block,
                        sr)
                    print('-----Saved wav file for segment ' + str(block_id))

            else:
                print('-----no more segments for this file-----')
                print('\n')
                break
            offset += block_size
            block_id += 1
    printb('_____No more audioFiles_____')

    return