def wavLCollection2datXy(wavLabelCollection, fs=None, featExtFun=None): """ returns the data object of a collection of labelled wavs ..... call type (classification) .... Parameters ---------- wavLabelCollection : list of tuples tu[0] : path to wav file tu[1] : wav label featExtFun : callable Return ------ > datO: myML.dataXy_names data """ datO = myML.dataXy_names() # initialise data object for wavF, l in wavLabelCollection: waveForm, fs = wav2waveform(wavF, fs=fs) # , normalize=False) M = featExtFun(waveForm) datO.addInstances(np.expand_dims(M.flatten(), axis=0), [l]) # print(np.shape(M0), datO.shape, np.shape(datO.y), os.path.basename(wavF)) return datO
def WSD2predictAnnotations(wavF, annWSD1, feExtFun, lt, WSD2_clf, outF, readSections, keepSections='default', dt=0): """Generate annotations using the WSD2 reads the predicted sections from WSD1 to predicts the finer structure of the calls with clf trained with a smaller nTextWS Parameters ---------- wavF: str wavefile name feExFun: callable feature extraction function lt: LabelEncoder label transformation object WSD2_clf: estimator model for estimating predictions outF: str name of the output annotations readSections: list like object array with the ann sections from WSD1 we want to reinterpret, default = ['c'] keepSections: list like object array with the ann sections we want to print dt: float time buffer for reading around the desired annotation section """ if keepSections is 'default': keepSections = ['c'] try: os.remove(outF) except OSError: pass waveform, fs = sT.wav2waveform(wavF) # load waveform A = annT.anns2array(annWSD1) # load annotations for t0i, t0f, l0 in A[:]: # for each ann section if l0 in readSections: # if section of interest (c) thisWaveform = auf.getWavSec(waveform, fs, t0i - dt, t0f + dt) ## predict annotations T, L = predictAnnotations(thisWaveform, fs, feExtFun, lt, WSD2_clf) #annSections=keepSections) newT = T + t0i - dt # relative to the orginal ann sections mask = np.in1d(L, keepSections) outF = annT.save_TLannotations(newT[mask, :], L[mask], outF, opening_mode='a') return outF
def WSD2predictionsTLanns(wavF, annWSD1, feExtFun, lt, WSD2_clf, readSections, dt=0): #keepSections=None """Generate annotations using the WSD2 reads the predicted sections from WSD1 to predicts the finer structure of the calls with clf trained with a smaller nTextWS Parameters ---------- wavF: str wavefile name feExFun: callable feature extraction function lt: LabelEncoder label transformation object WSD2_clf: estimator model for estimating predictions readSections: list like object array with the ann sections from WSD1 we want to reinterpret dt: float time buffer for reading around the desired annotation section keepSections: (DEPRECATED) list like object """ waveform, fs = sT.wav2waveform(wavF) # load waveform A = annT.anns2array(annWSD1) # load annotations newT_list = [] newL_list = [] for t0i, t0f, l0 in A[:]: # for each ann section if l0 in readSections: # if section of interest (c) thisWaveform = auf.getWavSec(waveform, fs, t0i - dt, t0f + dt) ## predict annotations secT, secL = predictAnnotations(thisWaveform, fs, feExtFun, lt, WSD2_clf) newSectT = secT + t0i - dt # relative to the orginal ann sections newT_list.append(newSectT) newL_list.append(secL) #outF = annT.save_TLannotations(newT, L, outF, opening_mode='a') newL = np.hstack((newL_list)) newT = np.vstack((newT_list)) return newT, newL
def TLpredictAnnotationSections(wavF, annF, clf, featExtFun, lt, printProbs=False, readSections=None, printreadSectionsC=True): """generates annotations predicting audio section classes Parameters ---------- wavF : str annF : str path to the file with the annotation section to predict clf : estimator featExtFun : callable lt : labelTransformer printProbs : bool readSections : list of str regions in the annF for which we predict printreadSectionsC : bool """ ## load annotations waveform, fs = sT.wav2waveform(wavF) T, L0 = annT.anns2TLndarrays(annF) ## set of annotation-sections to predict if readSections is None: readSections = np.array(list(set(L0))) ## filter for sections of interest IO_sections = np.isin(L0, readSections) Tp = T[IO_sections] L = L0[IO_sections] Lp = np.zeros_like(L) ## for each annotation section for i, label in enumerate(L): # for each section waveformSec = auf.getWavSec(waveform, fs, *Tp[i]) # load waveform section M0 = featExtFun(waveformSec) # extract features M = np.expand_dims(M0.flatten(), axis=0) Lp[i] = lt.num2nom(clf.predict(M))[0] # predict return Tp, Lp
def get_DataXy_fromWavFannF(wavF, annF, feExFun, labelsHierarchy): """ extracts features and its labels (ground truth) from wavF and annF files and returns its dataXy_names instance ---------- wavF: str annF: str feExFun: callable labelsHierarchy: list """ # np.loadtxt(collFi, delimiter='\t', dtype='|S') # print("\n==========\nTEST\n==============",wavF) waveForm, fs = wav2waveform(wavF) tf = len(waveForm) / fs M0 = feExFun(waveForm) m = len(M0) y0_names = auf.annotationsFi2instances(annF, m, tf, labelsHierarchy=labelsHierarchy) datO = myML.dataXy_names(M0, y0_names) return datO
def predictSoundSections(wavF, clf, lt, feExFun, outF='default', annSections='default'): ''' predicts and generates the annotations of the given wavF walking Parameters: ----------- wavF : str path to wav file clf : estimator classifier object lt : label transformer object feExFun : callable feature extraction out : str annotations out file name, default = wav base name + '-predictions' annSections : array sections to print, default = ['c'] ''' if outF == 'default': bN = os.path.basename(wavF) outF = os.path.join(outDir, bN.replace('.wav', '-predictions.txt')) waveForm, fs = sT.wav2waveform(wavF) oF = predictSectionsFromWaveform_genAnnotations(waveForm, fs, clf, lt, feExFun, outF=outF, annSections=annSections) return oF
def predictAnnotationSections(wavF, annF, clf, featExtFun, lt, outFile=None, sep='\t', printProbs=False, header='', readSections=None, printreadSectionsC=True): """predicts annotations for call types sections Parameters ---------- wavF: str annF: str clf: estimator featExtFun: callable lt: labelTransformer outFil: str sep: str printProbs: bool header: str readSections: list of str regions in the annF for which we predict printreadSectionsC: bool See also -------- TLpredictAnnotationSections TODO: recode to use TLpredictAnnotationSections """ if outFile is None: outFile = os.path.splitext(annF)[0] + '-sectionPredictions.txt' try: # remove file if exists os.remove(outFile) except OSError: pass ## load files waveform, fs = sT.wav2waveform(wavF) T, L = annT.anns2TLndarrays(annF) if readSections == None: readSections = list(set(L)) ## for each annotation section for i, label in enumerate(L): if label in readSections: waveformSec = auf.getWavSec(waveform, fs, *T[i]) ## predict try: M0 = featExtFun(waveformSec) # estract features M = np.expand_dims(M0.flatten(), axis=0) y_pred = lt.num2nom(clf.predict(M)) # predict label except AssertionError: y_pred = [label] ## write with open(outFile, 'a') as f: f.write("{}\t{}\t{}\t{}\n".format(T[i, 0], T[i, 1], label, *y_pred)) elif printreadSectionsC: with open(outFile, 'a') as f: f.write("{}\t{}\t{}\t{}\n".format(T[i, 0], T[i, 1], label, label)) return outFile