def fileRegression(inputFile, modelName, modelType):
    # Load classifier:

    if not os.path.isfile(inputFile):
        print "fileClassification: wav file not found!"
        return (-1, -1, -1)

    regressionModels = glob.glob(modelName + "_*")
    regressionModels2 = []
    for r in regressionModels:
        if r[-5::] != "MEANS":
            regressionModels2.append(r)
    regressionModels = regressionModels2
    regressionNames = []
    for r in regressionModels:
        regressionNames.append(r[r.rfind("_") + 1::])

    # FEATURE EXTRACTION
    # LOAD ONLY THE FIRST MODEL (for mtWin, etc)
    if modelType == 'svm' or modelType == "svm_rbf":
        [_, _, _, mtWin, mtStep, stWin, stStep,
         computeBEAT] = loadSVModel(regressionModels[0], True)
    elif modelType == 'randomforest':
        [_, _, _, mtWin, mtStep, stWin, stStep,
         computeBEAT] = loadRandomForestModel(regressionModels[0], True)

    [Fs, x] = audioBasicIO.readAudioFile(
        inputFile)  # read audio file and convert to mono
    x = audioBasicIO.stereo2mono(x)
    # feature extraction:
    [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs,
                                                  mtWin * Fs, mtStep * Fs,
                                                  round(Fs * stWin),
                                                  round(Fs * stStep))
    MidTermFeatures = MidTermFeatures.mean(
        axis=1)  # long term averaging of mid-term statistics
    if computeBEAT:
        [beat, beatConf] = aF.beatExtraction(s, stStep)
        MidTermFeatures = numpy.append(MidTermFeatures, beat)
        MidTermFeatures = numpy.append(MidTermFeatures, beatConf)

    # REGRESSION
    R = []
    for ir, r in enumerate(regressionModels):
        if not os.path.isfile(r):
            print "fileClassification: input modelName not found!"
            return (-1, -1, -1)
        if modelType == 'svm' or modelType == "svm_rbf":
            [Model, MEAN, STD, mtWin, mtStep, stWin, stStep,
             computeBEAT] = loadSVModel(r, True)
        elif modelType == 'randomforest':
            [Model, MEAN, STD, mtWin, mtStep, stWin, stStep,
             computeBEAT] = loadRandomForestModel(r, True)
        curFV = (MidTermFeatures - MEAN) / STD  # normalization
        R.append(regressionWrapper(Model, modelType, curFV))  # classification
    return R, regressionNames
def hmmSegmentation(wavFileName, hmmModelName, PLOT=False, gtFileName=""):
    [Fs, x] = audioBasicIO.readAudioFile(wavFileName)  # read audio data

    try:
        fo = open(hmmModelName, "rb")
    except IOError:
        print "didn't find file"
        return

    try:
        hmm = cPickle.load(fo)
        classesAll = cPickle.load(fo)
        mtWin = cPickle.load(fo)
        mtStep = cPickle.load(fo)
    except:
        fo.close()
    fo.close()

    #Features = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs);    # feature extraction
    [Features, _] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs,
                                           round(Fs * 0.050),
                                           round(Fs * 0.050))
    flagsInd = hmm.predict(Features.T)  # apply model
    #for i in range(len(flagsInd)):
    #    if classesAll[flagsInd[i]]=="silence":
    #        flagsInd[i]=classesAll.index("speech")

    # plot results
    if os.path.isfile(gtFileName):
        [segStart, segEnd, segLabels] = readSegmentGT(gtFileName)
        flagsGT, classNamesGT = segs2flags(segStart, segEnd, segLabels, mtStep)
        flagsGTNew = []
        for j, fl in enumerate(flagsGT):  # "align" labels with GT
            if classNamesGT[flagsGT[j]] in classesAll:
                flagsGTNew.append(classesAll.index(classNamesGT[flagsGT[j]]))
            else:
                flagsGTNew.append(-1)
        CM = numpy.zeros((len(classNamesGT), len(classNamesGT)))
        flagsIndGT = numpy.array(flagsGTNew)
        for i in range(min(flagsInd.shape[0], flagsIndGT.shape[0])):
            CM[int(flagsIndGT[i]), int(flagsInd[i])] += 1
    else:
        flagsIndGT = numpy.array([])
    acc = plotSegmentationResults(flagsInd, flagsIndGT, classesAll, mtStep,
                                  not PLOT)
    if acc >= 0:
        print "Overall Accuracy: {0:.2f}".format(acc)
        return (flagsInd, classNamesGT, acc, CM)
    else:
        return (flagsInd, classesAll, -1, -1)
def trainHMM_fromFile(wavFile, gtFile, hmmModelName, mtWin, mtStep):
    '''
    This function trains a HMM model for segmentation-classification using a single annotated audio file
    ARGUMENTS:
     - wavFile:        the path of the audio filename
     - gtFile:         the path of the ground truth filename
                       (a csv file of the form <segment start in seconds>,<segment end in seconds>,<segment label> in each row
     - hmmModelName:   the name of the HMM model to be stored
     - mtWin:          mid-term window size
     - mtStep:         mid-term window step
    RETURNS:
     - hmm:            an object to the resulting HMM
     - classNames:     a list of classNames

    After training, hmm, classNames, along with the mtWin and mtStep values are stored in the hmmModelName file
    '''

    [segStart, segEnd,
     segLabels] = readSegmentGT(gtFile)  # read ground truth data
    flags, classNames = segs2flags(
        segStart, segEnd, segLabels,
        mtStep)  # convert to fix-sized sequence of flags

    [Fs, x] = audioBasicIO.readAudioFile(wavFile)  # read audio data
    #F = aF.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs);
    [F, _] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs,
                                    round(Fs * 0.050),
                                    round(Fs * 0.050))  # feature extraction
    startprob, transmat, means, cov = trainHMM_computeStatistics(
        F, flags)  # compute HMM statistics (priors, transition matrix, etc)

    hmm = hmmlearn.hmm.GaussianHMM(startprob.shape[0], "diag")  # hmm training

    hmm.startprob_ = startprob
    hmm.transmat_ = transmat
    hmm.means_ = means
    hmm.covars_ = cov

    fo = open(hmmModelName, "wb")  # output to file
    cPickle.dump(hmm, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    fo.close()

    return hmm, classNames
def speakerDiarization(fileName,
                       numOfSpeakers,
                       mtSize=2.0,
                       mtStep=0.2,
                       stWin=0.05,
                       LDAdim=35,
                       PLOT=False):
    '''
    ARGUMENTS:
        - fileName:        the name of the WAV file to be analyzed
        - numOfSpeakers    the number of speakers (clusters) in the recording (<=0 for unknown)
        - mtSize (opt)     mid-term window size
        - mtStep (opt)     mid-term window step
        - stWin  (opt)     short-term window size
        - LDAdim (opt)     LDA dimension (0 for no LDA)
        - PLOT     (opt)   0 for not plotting the results 1 for plottingy
    '''
    [Fs, x] = audioBasicIO.readAudioFile(fileName)
    print fileName
    x = audioBasicIO.stereo2mono(x)
    Duration = len(x) / Fs

    [
        Classifier1, MEAN1, STD1, classNames1, mtWin1, mtStep1, stWin1,
        stStep1, computeBEAT1
    ] = aT.loadKNNModel(os.path.join("data", "knnSpeakerAll"))
    [
        Classifier2, MEAN2, STD2, classNames2, mtWin2, mtStep2, stWin2,
        stStep2, computeBEAT2
    ] = aT.loadKNNModel(os.path.join("data", "knnSpeakerFemaleMale"))

    [MidTermFeatures,
     ShortTermFeatures] = aF.mtFeatureExtraction(x, Fs,
                                                 mtSize * Fs, mtStep * Fs,
                                                 round(Fs * stWin),
                                                 round(Fs * stWin * 0.5))

    MidTermFeatures2 = numpy.zeros(
        (MidTermFeatures.shape[0] + len(classNames1) + len(classNames2),
         MidTermFeatures.shape[1]))

    for i in range(MidTermFeatures.shape[1]):
        curF1 = (MidTermFeatures[:, i] - MEAN1) / STD1
        curF2 = (MidTermFeatures[:, i] - MEAN2) / STD2
        [Result, P1] = aT.classifierWrapper(Classifier1, "knn", curF1)
        [Result, P2] = aT.classifierWrapper(Classifier2, "knn", curF2)
        MidTermFeatures2[0:MidTermFeatures.shape[0], i] = MidTermFeatures[:, i]
        MidTermFeatures2[MidTermFeatures.shape[0]:MidTermFeatures.shape[0] +
                         len(classNames1), i] = P1 + 0.0001
        MidTermFeatures2[MidTermFeatures.shape[0] + len(classNames1)::,
                         i] = P2 + 0.0001

    MidTermFeatures = MidTermFeatures2  # TODO
    # SELECT FEATURES:
    #iFeaturesSelect = [8,9,10,11,12,13,14,15,16,17,18,19,20];                                                                                         # SET 0A
    #iFeaturesSelect = [8,9,10,11,12,13,14,15,16,17,18,19,20, 99,100];                                                                                 # SET 0B
    #iFeaturesSelect = [8,9,10,11,12,13,14,15,16,17,18,19,20, 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,
    #   97,98, 99,100];     # SET 0C

    iFeaturesSelect = [
        8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 41, 42, 43, 44, 45,
        46, 47, 48, 49, 50, 51, 52, 53
    ]  # SET 1A
    #iFeaturesSelect = [8,9,10,11,12,13,14,15,16,17,18,19,20,41,42,43,44,45,46,47,48,49,50,51,52,53, 99,100];                                          # SET 1B
    #iFeaturesSelect = [8,9,10,11,12,13,14,15,16,17,18,19,20,41,42,43,44,45,46,47,48,49,50,51,52,53, 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98, 99,100];     # SET 1C

    #iFeaturesSelect = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53];             # SET 2A
    #iFeaturesSelect = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53, 99,100];     # SET 2B
    #iFeaturesSelect = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53, 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98, 99,100];     # SET 2C

    #iFeaturesSelect = range(100);                                                                                                    # SET 3
    #MidTermFeatures += numpy.random.rand(MidTermFeatures.shape[0], MidTermFeatures.shape[1]) * 0.000000010

    MidTermFeatures = MidTermFeatures[iFeaturesSelect, :]

    (MidTermFeaturesNorm, MEAN,
     STD) = aT.normalizeFeatures([MidTermFeatures.T])
    MidTermFeaturesNorm = MidTermFeaturesNorm[0].T
    numOfWindows = MidTermFeatures.shape[1]

    # remove outliers:
    DistancesAll = numpy.sum(distance.squareform(
        distance.pdist(MidTermFeaturesNorm.T)),
                             axis=0)
    MDistancesAll = numpy.mean(DistancesAll)
    iNonOutLiers = numpy.nonzero(DistancesAll < 1.2 * MDistancesAll)[0]

    # TODO: Combine energy threshold for outlier removal:
    #EnergyMin = numpy.min(MidTermFeatures[1,:])
    #EnergyMean = numpy.mean(MidTermFeatures[1,:])
    #Thres = (1.5*EnergyMin + 0.5*EnergyMean) / 2.0
    #iNonOutLiers = numpy.nonzero(MidTermFeatures[1,:] > Thres)[0]
    #print iNonOutLiers

    perOutLier = (100.0 *
                  (numOfWindows - iNonOutLiers.shape[0])) / numOfWindows
    MidTermFeaturesNormOr = MidTermFeaturesNorm
    MidTermFeaturesNorm = MidTermFeaturesNorm[:, iNonOutLiers]

    # LDA dimensionality reduction:
    if LDAdim > 0:
        #[mtFeaturesToReduce, _] = aF.mtFeatureExtraction(x, Fs, mtSize * Fs, stWin * Fs, round(Fs*stWin), round(Fs*stWin));
        # extract mid-term features with minimum step:
        mtWinRatio = int(round(mtSize / stWin))
        mtStepRatio = int(round(stWin / stWin))
        mtFeaturesToReduce = []
        numOfFeatures = len(ShortTermFeatures)
        numOfStatistics = 2
        #for i in range(numOfStatistics * numOfFeatures + 1):
        for i in range(numOfStatistics * numOfFeatures):
            mtFeaturesToReduce.append([])

        for i in range(numOfFeatures):  # for each of the short-term features:
            curPos = 0
            N = len(ShortTermFeatures[i])
            while (curPos < N):
                N1 = curPos
                N2 = curPos + mtWinRatio
                if N2 > N:
                    N2 = N
                curStFeatures = ShortTermFeatures[i][N1:N2]
                mtFeaturesToReduce[i].append(numpy.mean(curStFeatures))
                mtFeaturesToReduce[i + numOfFeatures].append(
                    numpy.std(curStFeatures))
                curPos += mtStepRatio
        mtFeaturesToReduce = numpy.array(mtFeaturesToReduce)
        mtFeaturesToReduce2 = numpy.zeros(
            (mtFeaturesToReduce.shape[0] + len(classNames1) + len(classNames2),
             mtFeaturesToReduce.shape[1]))
        for i in range(mtFeaturesToReduce.shape[1]):
            curF1 = (mtFeaturesToReduce[:, i] - MEAN1) / STD1
            curF2 = (mtFeaturesToReduce[:, i] - MEAN2) / STD2
            [Result, P1] = aT.classifierWrapper(Classifier1, "knn", curF1)
            [Result, P2] = aT.classifierWrapper(Classifier2, "knn", curF2)
            mtFeaturesToReduce2[0:mtFeaturesToReduce.shape[0],
                                i] = mtFeaturesToReduce[:, i]
            mtFeaturesToReduce2[
                mtFeaturesToReduce.shape[0]:mtFeaturesToReduce.shape[0] +
                len(classNames1), i] = P1 + 0.0001
            mtFeaturesToReduce2[mtFeaturesToReduce.shape[0] +
                                len(classNames1)::, i] = P2 + 0.0001
        mtFeaturesToReduce = mtFeaturesToReduce2
        mtFeaturesToReduce = mtFeaturesToReduce[iFeaturesSelect, :]
        #mtFeaturesToReduce += numpy.random.rand(mtFeaturesToReduce.shape[0], mtFeaturesToReduce.shape[1]) * 0.0000010
        (mtFeaturesToReduce, MEAN,
         STD) = aT.normalizeFeatures([mtFeaturesToReduce.T])
        mtFeaturesToReduce = mtFeaturesToReduce[0].T
        #DistancesAll = numpy.sum(distance.squareform(distance.pdist(mtFeaturesToReduce.T)), axis=0)
        #MDistancesAll = numpy.mean(DistancesAll)
        #iNonOutLiers2 = numpy.nonzero(DistancesAll < 3.0*MDistancesAll)[0]
        #mtFeaturesToReduce = mtFeaturesToReduce[:, iNonOutLiers2]
        Labels = numpy.zeros((mtFeaturesToReduce.shape[1], ))
        LDAstep = 1.0
        LDAstepRatio = LDAstep / stWin
        #print LDAstep, LDAstepRatio
        for i in range(Labels.shape[0]):
            Labels[i] = int(i * stWin / LDAstepRatio)
        clf = sklearn.discriminant_analysis.LinearDiscriminantAnalysis(
            n_components=LDAdim)
        clf.fit(mtFeaturesToReduce.T, Labels)
        MidTermFeaturesNorm = (clf.transform(MidTermFeaturesNorm.T)).T

    if numOfSpeakers <= 0:
        sRange = range(2, 10)
    else:
        sRange = [numOfSpeakers]
    clsAll = []
    silAll = []
    centersAll = []

    for iSpeakers in sRange:
        k_means = sklearn.cluster.KMeans(n_clusters=iSpeakers)
        k_means.fit(MidTermFeaturesNorm.T)
        cls = k_means.labels_
        means = k_means.cluster_centers_

        # Y = distance.squareform(distance.pdist(MidTermFeaturesNorm.T))
        clsAll.append(cls)
        centersAll.append(means)
        silA = []
        silB = []
        for c in range(iSpeakers
                       ):  # for each speaker (i.e. for each extracted cluster)
            clusterPerCent = numpy.nonzero(cls == c)[0].shape[0] / float(
                len(cls))
            if clusterPerCent < 0.020:
                silA.append(0.0)
                silB.append(0.0)
            else:
                MidTermFeaturesNormTemp = MidTermFeaturesNorm[:, cls ==
                                                              c]  # get subset of feature vectors
                Yt = distance.pdist(
                    MidTermFeaturesNormTemp.T
                )  # compute average distance between samples that belong to the cluster (a values)
                silA.append(numpy.mean(Yt) * clusterPerCent)
                silBs = []
                for c2 in range(
                        iSpeakers
                ):  # compute distances from samples of other clusters
                    if c2 != c:
                        clusterPerCent2 = numpy.nonzero(
                            cls == c2)[0].shape[0] / float(len(cls))
                        MidTermFeaturesNormTemp2 = MidTermFeaturesNorm[:,
                                                                       cls ==
                                                                       c2]
                        Yt = distance.cdist(MidTermFeaturesNormTemp.T,
                                            MidTermFeaturesNormTemp2.T)
                        silBs.append(
                            numpy.mean(Yt) *
                            (clusterPerCent + clusterPerCent2) / 2.0)
                silBs = numpy.array(silBs)
                silB.append(
                    min(silBs)
                )  # ... and keep the minimum value (i.e. the distance from the "nearest" cluster)
        silA = numpy.array(silA)
        silB = numpy.array(silB)
        sil = []
        for c in range(iSpeakers):  # for each cluster (speaker)
            sil.append((silB[c] - silA[c]) /
                       (max(silB[c], silA[c]) + 0.00001))  # compute silhouette

        silAll.append(numpy.mean(sil))  # keep the AVERAGE SILLOUETTE

    #silAll = silAll * (1.0/(numpy.power(numpy.array(sRange),0.5)))
    imax = numpy.argmax(silAll)  # position of the maximum sillouette value
    nSpeakersFinal = sRange[imax]  # optimal number of clusters

    # generate the final set of cluster labels
    # (important: need to retrieve the outlier windows: this is achieved by giving them the value of their nearest non-outlier window)
    cls = numpy.zeros((numOfWindows, ))
    for i in range(numOfWindows):
        j = numpy.argmin(numpy.abs(i - iNonOutLiers))
        cls[i] = clsAll[imax][j]

    # Post-process method 1: hmm smoothing
    for i in range(1):
        startprob, transmat, means, cov = trainHMM_computeStatistics(
            MidTermFeaturesNormOr, cls)
        hmm = hmmlearn.hmm.GaussianHMM(startprob.shape[0],
                                       "diag")  # hmm training
        hmm.startprob_ = startprob
        hmm.transmat_ = transmat
        hmm.means_ = means
        hmm.covars_ = cov
        cls = hmm.predict(MidTermFeaturesNormOr.T)

    # Post-process method 2: median filtering:
    cls = scipy.signal.medfilt(cls, 13)
    cls = scipy.signal.medfilt(cls, 11)

    sil = silAll[imax]  # final sillouette
    classNames = ["speaker{0:d}".format(c) for c in range(nSpeakersFinal)]

    # load ground-truth if available
    gtFile = fileName.replace('.wav', '.segments')
    # open for annotated file
    if os.path.isfile(gtFile):  # if groundturh exists
        [segStart, segEnd, segLabels] = readSegmentGT(gtFile)  # read GT data
        flagsGT, classNamesGT = segs2flags(segStart, segEnd, segLabels,
                                           mtStep)  # convert to flags

    #     if PLOT:
    # fig = plt.figure()
    #     if numOfSpeakers>0:
    #         ax1 = fig.add_subplot(111)
    #     else:
    #         ax1 = fig.add_subplot(211)
    #     ax1.set_yticks(numpy.array(range(len(classNames))))
    #     ax1.axis((0, Duration, -1, len(classNames)))
    #     ax1.set_yticklabels(classNames)
    #     ax1.plot(numpy.array(range(len(cls)))*mtStep+mtStep/2.0, cls)

    if os.path.isfile(gtFile):
        # if PLOT:
        #     ax1.plot(numpy.array(range(len(flagsGT)))*mtStep+mtStep/2.0, flagsGT, 'r')
        purityClusterMean, puritySpeakerMean = evaluateSpeakerDiarization(
            cls, flagsGT)
        print "{0:.1f}\t{1:.1f}".format(100 * purityClusterMean,
                                        100 * puritySpeakerMean)
        # if PLOT:
        #     plt.title("Cluster purity: {0:.1f}% - Speaker purity: {1:.1f}%".format(100*purityClusterMean, 100*puritySpeakerMean) )
    # if PLOT:
    #     plt.xlabel("time (seconds)")
    #     #print sRange, silAll
    #     if numOfSpeakers<=0:
    #         plt.subplot(212)
    #         plt.plot(sRange, silAll)
    #         plt.xlabel("number of clusters");
    #         plt.ylabel("average clustering's sillouette");
    #     plt.show()
    return cls
def mtFileClassification(inputFile,
                         modelName,
                         modelType,
                         plotResults=False,
                         gtFile=""):
    '''
    This function performs mid-term classification of an audio stream.
    Towards this end, supervised knowledge is used, i.e. a pre-trained classifier.
    ARGUMENTS:
        - inputFile:        path of the input WAV file
        - modelName:        name of the classification model
        - modelType:        svm or knn depending on the classifier type
        - plotResults:      True if results are to be plotted using matplotlib along with a set of statistics

    RETURNS:
          - segs:           a sequence of segment's endpoints: segs[i] is the endpoint of the i-th segment (in seconds)
          - classes:        a sequence of class flags: class[i] is the class ID of the i-th segment
    '''

    if not os.path.isfile(modelName):
        print "mtFileClassificationError: input modelType not found!"
        return (-1, -1, -1, -1)
    # Load classifier:
    if (modelType == 'svm') or (modelType == 'svm_rbf'):
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadSVModel(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadKNNModel(modelName)
    elif modelType == 'randomforest':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadRandomForestModel(modelName)
    elif modelType == 'gradientboosting':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadGradientBoostingModel(modelName)
    elif modelType == 'extratrees':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadExtraTreesModel(modelName)

    if computeBEAT:
        print "Model " + modelName + " contains long-term music features (beat etc) and cannot be used in segmentation"
        return (-1, -1, -1, -1)
    [Fs, x] = audioBasicIO.readAudioFile(inputFile)  # load input file
    if Fs == -1:  # could not read file
        return (-1, -1, -1, -1)
    x = audioBasicIO.stereo2mono(x)  # convert stereo (if) to mono
    Duration = len(x) / Fs
    # mid-term feature extraction:
    [MidTermFeatures, _] = aF.mtFeatureExtraction(x, Fs,
                                                  mtWin * Fs, mtStep * Fs,
                                                  round(Fs * stWin),
                                                  round(Fs * stStep))
    flags = []
    Ps = []
    flagsInd = []
    for i in range(
            MidTermFeatures.shape[1]
    ):  # for each feature vector (i.e. for each fix-sized segment):
        curFV = (MidTermFeatures[:, i] -
                 MEAN) / STD  # normalize current feature vector
        [Result, P] = aT.classifierWrapper(Classifier, modelType,
                                           curFV)  # classify vector
        flagsInd.append(Result)
        flags.append(classNames[int(Result)])  # update class label matrix
        Ps.append(numpy.max(P))  # update probability matrix
    flagsInd = numpy.array(flagsInd)

    # 1-window smoothing
    for i in range(1, len(flagsInd) - 1):
        if flagsInd[i - 1] == flagsInd[i + 1]:
            flagsInd[i] = flagsInd[i + 1]
    (segs, classes) = flags2segs(
        flags, mtStep)  # convert fix-sized flags to segments and classes
    segs[-1] = len(x) / float(Fs)

    # Load grount-truth:
    if os.path.isfile(gtFile):
        [segStartGT, segEndGT, segLabelsGT] = readSegmentGT(gtFile)
        flagsGT, classNamesGT = segs2flags(segStartGT, segEndGT, segLabelsGT,
                                           mtStep)
        flagsIndGT = []
        for j, fl in enumerate(flagsGT):  # "align" labels with GT
            if classNamesGT[flagsGT[j]] in classNames:
                flagsIndGT.append(classNames.index(classNamesGT[flagsGT[j]]))
            else:
                flagsIndGT.append(-1)
        flagsIndGT = numpy.array(flagsIndGT)
        CM = numpy.zeros((len(classNamesGT), len(classNamesGT)))
        for i in range(min(flagsInd.shape[0], flagsIndGT.shape[0])):
            CM[int(flagsIndGT[i]), int(flagsInd[i])] += 1
    else:
        CM = []
        flagsIndGT = numpy.array([])
    acc = plotSegmentationResults(flagsInd, flagsIndGT, classNames, mtStep,
                                  not plotResults)
    if acc >= 0:
        print "Overall Accuracy: {0:.3f}".format(acc)
        return (flagsInd, classNamesGT, acc, CM)
    else:
        return (flagsInd, classNames, acc, CM)
def trainHMM_fromDir(dirPath, hmmModelName, mtWin, mtStep):
    '''
    This function trains a HMM model for segmentation-classification using a where WAV files and .segment (ground-truth files) are stored
    ARGUMENTS:
     - dirPath:        the path of the data diretory
     - hmmModelName:    the name of the HMM model to be stored
     - mtWin:        mid-term window size
     - mtStep:        mid-term window step
    RETURNS:
     - hmm:            an object to the resulting HMM
     - classNames:        a list of classNames

    After training, hmm, classNames, along with the mtWin and mtStep values are stored in the hmmModelName file
    '''

    flagsAll = numpy.array([])
    classesAll = []
    for i, f in enumerate(glob.glob(dirPath + os.sep +
                                    '*.wav')):  # for each WAV file
        wavFile = f
        gtFile = f.replace('.wav', '.segments')  # open for annotated file
        if not os.path.isfile(
                gtFile
        ):  # if current WAV file does not have annotation -> skip
            continue
        [segStart, segEnd, segLabels] = readSegmentGT(gtFile)  # read GT data
        flags, classNames = segs2flags(segStart, segEnd, segLabels,
                                       mtStep)  # convert to flags
        for c in classNames:  # update classnames:
            if c not in classesAll:
                classesAll.append(c)
        [Fs, x] = audioBasicIO.readAudioFile(wavFile)  # read audio data
        [F,
         _] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs,
                                     round(Fs * 0.050),
                                     round(Fs * 0.050))  # feature extraction

        lenF = F.shape[1]
        lenL = len(flags)
        MIN = min(lenF, lenL)
        F = F[:, 0:MIN]
        flags = flags[0:MIN]

        flagsNew = []
        for j, fl in enumerate(flags):  # append features and labels
            flagsNew.append(classesAll.index(classNames[flags[j]]))

        flagsAll = numpy.append(flagsAll, numpy.array(flagsNew))

        if i == 0:
            Fall = F
        else:
            Fall = numpy.concatenate((Fall, F), axis=1)
    startprob, transmat, means, cov = trainHMM_computeStatistics(
        Fall, flagsAll)  # compute HMM statistics
    hmm = hmmlearn.hmm.GaussianHMM(startprob.shape[0], "diag")  # train HMM
    hmm.startprob_ = startprob
    hmm.transmat_ = transmat
    hmm.means_ = means
    hmm.covars_ = cov

    fo = open(hmmModelName, "wb")  # save HMM model
    cPickle.dump(hmm, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(classesAll, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    fo.close()

    return hmm, classesAll
def fileClassification(inputFile, modelName, modelType):
    # Load classifier:

    if not os.path.isfile(modelName):
        print "fileClassification: input modelName not found!"
        return (-1, -1, -1)

    if not os.path.isfile(inputFile):
        print "fileClassification: wav file not found!"
        return (-1, -1, -1)

    if (modelType) == 'svm' or (modelType == 'svm_rbf'):
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadSVModel(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadKNNModel(modelName)
    elif modelType == 'randomforest':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadRandomForestModel(modelName)
    elif modelType == 'gradientboosting':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadGradientBoostingModel(modelName)
    elif modelType == 'extratrees':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadExtraTreesModel(modelName)

    [Fs, x] = audioBasicIO.readAudioFile(
        inputFile)  # read audio file and convert to mono
    x = audioBasicIO.stereo2mono(x)

    if isinstance(x, int):  # audio file IO problem
        return (-1, -1, -1)
    if x.shape[0] / float(Fs) <= mtWin:
        return (-1, -1, -1)

    # feature extraction:
    [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs,
                                                  mtWin * Fs, mtStep * Fs,
                                                  round(Fs * stWin),
                                                  round(Fs * stStep))
    MidTermFeatures = MidTermFeatures.mean(
        axis=1)  # long term averaging of mid-term statistics
    if computeBEAT:
        [beat, beatConf] = aF.beatExtraction(s, stStep)
        MidTermFeatures = numpy.append(MidTermFeatures, beat)
        MidTermFeatures = numpy.append(MidTermFeatures, beatConf)
    curFV = (MidTermFeatures - MEAN) / STD  # normalization

    [Result, P] = classifierWrapper(Classifier, modelType,
                                    curFV)  # classification
    return Result, P, classNames