示例#1
0
def wavLCollection2datXy(wavLabelCollection, fs=None, featExtFun=None):
    """
    returns the data object of a collection of labelled wavs

        ..... call type (classification) ....

    Parameters
    ----------
    wavLabelCollection : list of tuples
        tu[0] : path to wav file
        tu[1] : wav label
    featExtFun : callable

    Return
    ------
    > datO: myML.dataXy_names
        data
    """

    datO = myML.dataXy_names()  # initialise data object

    for wavF, l in wavLabelCollection:
        waveForm, fs = wav2waveform(wavF, fs=fs)  # , normalize=False)
        M = featExtFun(waveForm)
        datO.addInstances(np.expand_dims(M.flatten(), axis=0), [l])

        # print(np.shape(M0), datO.shape, np.shape(datO.y), os.path.basename(wavF))
    return datO
示例#2
0
def wavAnn2annSecs_dataXy_names(wavF, annF, featExtFun=None):
    """
    Instantiates the annotated sections of a wavfile
    extracting a feature vector for each annotated section in the wav file
    meant to be used with feature extraction 'split'

    (n_n) used for call type classification Q (n_n) 
    Parameters
    ----------
    wavF: str
        path to wavefile
    annF: str
        path to wavfile
    featExtFun:  callable
        feature extraction function

    Returns
    -------
    datO: ML.dataXy_names
        classification features
    """

    ### extract features for each annotated section
    segmentsLi, fs = auf.getAnnWavSec(wavF, annF)

    datO = myML.dataXy_names()
    ## for each annotation in the wavfile compute the features
    for annIndex in range(len(segmentsLi)):
        label = segmentsLi[annIndex]["label"]
        waveform = segmentsLi[annIndex]["waveform"]
        M = featExtFun(waveform)
        datO.addInstances(np.expand_dims(M.flatten(), axis=0),
                          [np.array(label)])

    return datO
示例#3
0
def wavAnnCollection2annSecs_dataXy_names(wavAnnColl, featExtFun=None):
    """
    Computes the X, y for a collection of annotated wav files
    for each annotated section in the wav file
    meant to be used with feature extraction 'split'

    ........O Used for call type classification Q....
    
    Parameters
    ----------
    < wavAnnColl : collection of annotated wavfiles
    < featExtFun :  feature extraction function (callable)
                    or a dictionary with the feature extraction settings
                    featureExtractionParams = dict(zip(i, i))
    Return
    ------    
    > datXy_names : features object
    """

    datO = myML.dataXy_names()

    for wavF, annF in wavAnnColl[:]:
        # datO_test_new = wavAnn2sectionsXy( wF, annF, featExtFun=featExtFun) #wavPreprocessingT = wavPreprocessingFun )
        datO_new = wavAnn2annSecs_dataXy_names(
            wavF, annF,
            featExtFun=featExtFun)  # wavPreprocessingT = wavPreprocessingFun )
        datO.addInstances(datO_new.X, datO_new.y_names)

    return datO
示例#4
0
def extractFeaturesWDataAugmentation(sampSpace,
                                     feExFun,
                                     n_instances=10,
                                     **ensSettings):
    """Prepares data with the labels in wavAnnCollection, 
    balancing the classes generating artificial samples
    Parameter
    ---------
    sampSpace: dict
        labels and waveforms (samples space)
    feExfun: callable
    n_instances: int
    ensemble_settings: dict
        kwards for the generation of artificial samples
        see exT.generateData_ensembleSettings(n_artificial_samples=1)"""

    datO = myML.dataXy_names()  # data object
    for call in sampSpace.keys():
        ### extract features from original samples
        dat = waveformsLi2DatXy_names(sampSpace[call],
                                      call,
                                      feExFun,
                                      nInstances=n_instances)
        datO.addInstances(dat.X, dat.y_names)
        n_art_instances = n_instances - dat.m_instances
        ### generate artificial samples
        datArt = waveformsLi2aritificial_DatXy_names(
            sampSpace[call],
            call,
            feExFun,
            n_instances=n_art_instances,
            **ensSettings)
        datO.addInstances(datArt.X, datArt.y_names)
    return datO
示例#5
0
def wavAnnCollection2datXy(WavAnnCollection,
                           feExFun=None,
                           labelsHierarchy="default"):
    """
    Extracts features and labels from wav-ann collection
    Parameters
    ----------
    WavAnnCollection: list of tuples
        [(<path to wavF>, <path to annF>), ...]
    feExFun: callable
        feature extraction function
    labelsHierarchy: list
        labels in hierarchical order for setting the label of the instances (WALKING)

    Return
    ------    
    > datO :  a file with the paths to the features and their labels
    """
    if labelsHierarchy == "default":
        labelsHierarchy = ["c"]

    datO = myML.dataXy_names()  # initialise data object

    for wavF, annF in WavAnnCollection:
        X, y0_names = getXy_fromWavFAnnF(wavF, annF, feExFun, labelsHierarchy)
        datO.addInstances(X, y0_names)

    return datO
示例#6
0
def waveformsLi2aritificial_DatXy_names(waveformsLi, label, feExFun,
                                        n_instances, **ensemble_settings):
    """takes a list of waveforms, all with the same label, generates artificial samples, 
    extracts features and returns data object
    Parameters
    ---------
    n_instances: int
        total number of artificial samples (instances) to generate
    ensemble_settings: dict
        kwargs for the generation of artificial samples
        see exT.generateData_ensembleSettings(n_artificial_samples=1)
    """
    n_samps = len(waveformsLi)
    # indices to take different waveforms until created desired number of samples
    indices = np.arange(n_instances) % n_samps
    datO = myML.dataXy_names()  # initialise data object

    for i in indices:
        waveform = waveformsLi[i]
        artificial_waveform = eff.generateWaveformEnsemble(
            waveform, **ensemble_settings)[0]
        art_samp = feExFun(artificial_waveform)
        datO.addInstances(np.expand_dims(art_samp.flatten(), axis=0),
                          [np.array(label)])
    return datO
示例#7
0
def wavFAnnF2sections_wavsEnsemble_datXy_names(wavF,
                                               annF,
                                               featExtFun=None,
                                               wavPreprocessingT=None,
                                               ensembleSettings=None):
    """
    Computes the features of each annotated section in the wav file
    ment to be used with feature extraction 'split' 

    
    Parameters:
    ----------
    wavFi: str
        path to wave file
    featExtFun: callable
        feature extraction function function
    wavPreprocessingT : callable
        applied before ensemble generation
    ensembleSettings: dict
        instructions for ensemble generation

    Return:
    ------
        > datXy_names : data object
    """

    ### check feature extraction function
    if not callable(featExtFun):  # dictionary or None (default parameters)
        featExtFun = wavFeatureExtraction(featExtFun).featExtrFun()  # default
    if not callable(wavPreprocessingT):
        wavPreprocessingT = lambda x, y: x
    if ensembleSettings is None:
        ensembleSettings = dict(effectName="addWhiteNoise",
                                generate_data_grid=np.ones(1))

    ### extract features for each annotated section
    segmentsLi, fs = auf.getAnnWavSec(wavF, annF)
    # assert sr==fs, "noise and signal waves have different sampling rates"

    datO = myML.dataXy_names()
    ## for each annotation in the wavfile compute the features
    for annIndex in range(len(segmentsLi)):
        label = segmentsLi[annIndex]["label"]
        waveform = segmentsLi[annIndex]["waveform"]
        ##
        waveform = wavPreprocessingT(waveform, fs)  # preprocess waveform
        ## generate ensemble
        Y = eff.generateWaveformEnsemble(waveform,
                                         **ensembleSettings)  ## noise
        # Extrac
        for i in range(len(Y)):  # np.shape(Y)[0]):
            # M, _, _, featStr = featExtFun(Y[i], fs) #
            M = featExtFun(Y[i, :])
            datO.addInstances(np.expand_dims(M.flatten(), axis=0),
                              [np.array(label)])

    return datO
def test_dataXy_filter():
    # test dataXy loading data
    M = np.random.randint(1, 5, (4, 4))
    labs = np.random.randint(0, 1, (4, ))
    datO = myML.dataXy_names(M, labs)
    np.testing.assert_array_equal(M, datO.X)

    # test None filter form the data_ynames class
    M_NoneFilt, labs_NoneFilt = datO.filterInstances(None)
    np.testing.assert_array_equal(M, M_NoneFilt)  # filtering
示例#9
0
def waveformsLi2DatXy_names(waveformsLi, label, feExFun, nInstances):
    """Extracts features from an waveformlist and returns data object"""
    n_samps = len(waveformsLi)
    stopIdx = None
    if n_samps > nInstances:
        stopIdx = nInstances

    datO = myML.dataXy_names()  # initialise data object
    for waveform in waveformsLi[:stopIdx]:
        M = feExFun(waveform)
        datO.addInstances(np.expand_dims(M.flatten(), axis=0),
                          [np.array(label)])
    return datO
示例#10
0
def wavAnnCollection2Xy_ensemble_datXy_names(wavAnnColl,
                                             featExtFun,
                                             wavPreprocessingT=None,
                                             ensembleSettings=None):

    datO = myML.dataXy_names()  # initialise data object
    for wavF, annF in wavAnnColl[:]:
        datO_new = wavFAnnF2sections_wavsEnsemble_datXy_names(
            wavF,
            annF,
            featExtFun=featExtFun,
            wavPreprocessingT=wavPreprocessingT,
            ensembleSettings=ensembleSettings)
        datO.addInstances(datO_new.X, datO_new.y_names)

    return datO
示例#11
0
def get_DataXy_fromWavFannF(wavF, annF, feExFun, labelsHierarchy):
    """
    extracts features and its labels (ground truth) from wavF and annF files
    and returns its dataXy_names instance
    ----------
    wavF: str
    annF: str
    feExFun: callable
    labelsHierarchy: list
    """
    # np.loadtxt(collFi, delimiter='\t', dtype='|S')
    # print("\n==========\nTEST\n==============",wavF)
    waveForm, fs = wav2waveform(wavF)
    tf = len(waveForm) / fs

    M0 = feExFun(waveForm)
    m = len(M0)
    y0_names = auf.annotationsFi2instances(annF,
                                           m,
                                           tf,
                                           labelsHierarchy=labelsHierarchy)
    datO = myML.dataXy_names(M0, y0_names)
    return datO
示例#12
0
# In[18]:


# path to files
train_collection = os.path.join(pDir, 'data/groupB_paths2files.csv')
## load data
df = pd.read_csv(train_collection, usecols=['path_to_file', 'call'])
wavColl = df.values


# ## Extract features

# In[7]:


datO = myML.dataXy_names() 
datO_new = fex.wavLCollection2datXy( wavColl, featExtFun=feExFun, fs=fs )
datO.addInstances(datO_new.X, datO_new.y_names )

## label transformer
call_labels = [l[1] for l in wavColl]
lt = myML.labelTransformer(call_labels)

X = datO.X
y_names = datO.y_names
y = lt.nom2num(y_names)


# In[8]: