Пример #1
0
 def __init__(self):
     self.p = Printer(1)
     self.param = Params()
     self.m = Math()
     self.am = AudioManager()
     self.paths = Paths()
     self.trainingDesc, self.testingDesc = self.scanForAudioFiles()
Пример #2
0
 def __init__(self):
     self.paths = Paths();
     self.param = Params();
     self.pc = PrintConfig();
     self.p = Printer(1);
     self.am = AudioManager();
     self.m = Math();
     self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2);
     
     self.data = self.pickle.LoadData();
Пример #3
0
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.data = SpeachData()
        self.p = Printer(1)
        self.am = AudioManager()
        self.m = Math()
        self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1)

        self.data.raw = self.am.readAudio(self.paths.file)
Пример #4
0
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.am = AudioManager()

        self.p = Printer(1)
        self.S = Synthesizer()
        self.pickle = Pickle(self.paths.pickle)
        self.decoded, self.original, self.coded = self.loadAll()
        self.cP, self.cG, self.cLpc = self.organize()
        self.cSn = self.SynthAll()
Пример #5
0
 def __init__(self):
     self.paths = Paths()
     self.param = Params()
     self.pc = PrintConfig()
     self.p = Printer(1)
     self.am = AudioManager()
     self.m = Math()
     self.pickle = Pickle(self.paths.pickle,
                          lTag=self.paths.tag4,
                          sTag=self.paths.tag5)
     self.cc = CodeConfig()
     self.cu = CodingUtils()
     self.encoded = self.pickle.LoadEncoded()
Пример #6
0
class Decoder:
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.p = Printer(1)
        self.am = AudioManager()
        self.m = Math()
        self.pickle = Pickle(self.paths.pickle,
                             lTag=self.paths.tag4,
                             sTag=self.paths.tag5)
        self.cc = CodeConfig()
        self.cu = CodingUtils()
        self.encoded = self.pickle.LoadEncoded()

    def separateBins(self):
        binary = self.encoded.binaries
        innerL = [5, 7, 34]
        gain = []
        pitch = []
        lsp = []
        for i, s in enumerate(range(0, len(binary), sum(innerL))):
            separated = []
            eLast = s
            for e in innerL:
                separated.append(binary[eLast:eLast + e])
                eLast = eLast + e
            gain.append(separated[0])
            pitch.append(separated[1])
            lsp.append(separated[2])
        return gain, pitch, lsp

    def debinariseLsp(self, bLsp):
        frames = self.setupLspFrames()
        qLsp = []
        bcLsp = []
        for i, b in enumerate(bLsp):
            idx = 0
            qlsp = []
            bclsp = []
            for j, f in enumerate(frames):
                bc = b[idx:idx + f.bits]
                qb = self.cu.debinarise([bc], f)[0] / 0.5 * np.pi
                idx = idx + f.bits
                qlsp.append(qb)
                bclsp.append(bc)

            qLsp.append(qlsp)
            bcLsp.append(bclsp)

        return qLsp, bcLsp

    def debinariseGain(self, bGain, maxGain):
        gainFrame = CodeFrame(self.cc.gainSegments)
        gainFrame.scale(maxGain)
        qGain = self.cu.debinarise(bGain, gainFrame)
        return qGain

    def debinarisePitch(self, bPitch):
        qPitch = []
        for i, b in enumerate(bPitch):
            q = int(b, 2)
            q = q + (0 if q == 0 else 19)
            qPitch.append(q)
        return qPitch

    def lspToLpc(self, lsp):
        lsp = np.array(lsp)
        return self.cu.lsf_to_lpc(lsp)

    def setupLspFrames(self):
        frames = []
        for s in self.cc.lspSegments:
            frames.append(CodeFrame(s))
        return frames

    def lspToLpc(self, lsp):
        lsp = np.array(lsp)
        return self.cu.lsf_to_lpc(lsp)

    def Save(self, qlpc, qpitch, qgain):
        self.decoded = DecodedData()
        self.decoded.gain = qgain
        self.decoded.pitch = qpitch
        self.decoded.lpc = qlpc
        self.pickle.SaveDecoded(self.decoded)

    def run(self, save=1):
        maxGain = self.encoded.maxGain
        bGain, bPitch, bLsp = self.separateBins()
        qGain = self.debinariseGain(bGain, maxGain)
        qPitch = self.debinarisePitch(bPitch)
        qLsp, bcLsp = self.debinariseLsp(bLsp)

        qLpc = self.lspToLpc(qLsp)
        qLpc = self.cu.removeLpcPrefix(qLpc)
        self.Save(qLpc, qPitch, qGain)

        for step in range(len(bGain)):

            if (step in self.pc.stepInto5 or step in self.pc.stepIntoAll):

                self.p.prnt(2, ' ', 1)
                self.p.prnt(
                    2, '    bitcount  -> ' + str(len(self.encoded.binaries)),
                    1)

                self.p.prnt(
                    2, '    gain  -> ' + str(bGain[step]) + "    -  " +
                    str(qGain[step]), 1)

                self.p.prnt(
                    2, '    pitch -> ' + str(bPitch[step]) + "  -  " +
                    str(qPitch[step]), 1)
                self.p.prnt(2, '    lsp   -> ' + str(bLsp[step]), 1)
                self.p.prnt(2, ' ', 1)
                for i, lspCoef in enumerate(qLsp[step]):
                    self.p.prnt(
                        2, '       lsp ' + str(i) + ' -> ' +
                        str(bcLsp[step][i]) + "  -  " + str(lspCoef), 1)
                self.p.prnt(2, ' ', 1)
                for i, lpcCoef in enumerate(qLpc[step]):
                    self.p.prnt(2,
                                '       lpc ' + str(i) + ' -> ' + str(lpcCoef),
                                1)


#Decoder().run()
Пример #7
0
class Coder:
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.p = Printer(1)
        self.am = AudioManager()
        self.m = Math()
        self.pickle = Pickle(self.paths.pickle,
                             lTag=self.paths.tag3,
                             sTag=self.paths.tag4)
        self.cc = CodeConfig()
        self.cu = CodingUtils()
        self.data = self.pickle.LoadData()

    def binariseGain(self):
        gainFrame = CodeFrame(self.cc.gainSegments)
        maxGain = np.max(self.data.gain)
        gainFrame.scale(maxGain)
        binary, quanta, indice = self.cu.binarise(self.data.gain, gainFrame)
        return binary, maxGain

    def binarisePitch(self):
        bits = 7
        binary = []
        dig = []
        for i, p in enumerate(self.data.pitch):
            shifted = (p - self.param.pi + 1 if p != 0 else p).astype(np.uint8)
            dig.append(shifted[0, 0])
            binary.append(np.binary_repr(shifted[0, 0], width=bits))
        return binary

    def lpcToLsp(self, lpc):
        return self.cu.lpc_to_lsf(lpc)

    def setupLspFrames(self):
        frames = []
        for s in self.cc.lspSegments:
            frames.append(CodeFrame(s))
        return frames

    def binariseLsp(self, LSP):
        frames = self.setupLspFrames()
        lspBinaries = []
        for j, lsp in enumerate(LSP):
            lspBin = []
            for i, coef in enumerate(lsp):
                c = [coef * 0.5 / np.pi]
                binary, quanta, indice = self.cu.binarise(c, frames[i])
                lspBin.append(binary[0])

            lspBinaries.append(lspBin)
        return lspBinaries

    def comoposeBinaries(self, gain, pitch, lsp):
        allBins = []
        for i in range(len(gain)):
            blsp = ''
            for ls in lsp[i]:
                blsp = blsp + ls
            oneBin = gain[i] + pitch[i] + blsp
            allBins.append(oneBin)
        return allBins

    def zipBinaries(self, coded, maxGain):
        encoded = EncodedData()
        encoded.maxGain = maxGain
        binary = ''
        for c in coded:
            binary = binary + c
        encoded.binaries = binary
        return encoded

    def run(self, save=1):
        bGain, maxGain = self.binariseGain()
        bPitch = self.binarisePitch()
        LPC1 = self.cu.prefixLpcWith1(self.data.lpc)
        LSP = self.lpcToLsp(LPC1)
        bLSP = self.binariseLsp(LSP)
        coded = self.comoposeBinaries(bGain, bPitch, bLSP)
        encoded = self.zipBinaries(coded, maxGain)
        self.pickle.SaveEncoded(encoded)

        for step in range(len(bGain)):
            if (step in self.pc.stepInto4 or step in self.pc.stepIntoAll):

                self.p.prnt(2, str(step) + "------------------ start", 1)
                self.p.prnt(4, str("In Forth Cycle"), 1)
                self.p.prnt(2, ' ', 1)
                self.p.prnt(2, '    gain max  -> ' + str(maxGain), 1)
                self.p.prnt(
                    2, '    gain  -> ' + str(self.data.gain[step, 0]) +
                    "  ==  " + str(bGain[step]), 1)
                self.p.prnt(
                    2, '    pitch -> ' + str(self.data.pitch[step, 0]) +
                    "  ==  " + str(bPitch[step]), 1)
                self.p.prnt(2, ' ', 1)
                for i, c in enumerate(self.data.lpc[step]):
                    self.p.prnt(
                        2, '    lpc ' + str(i) + ' -> ' + str(round(c, 4)) +
                        "\t" + bLSP[step][i], 1)
                self.p.prnt(2, ' ', 1)
                for i, c in enumerate(LSP[step]):
                    self.p.prnt(
                        2, '    lsp ' + str(i) + ' -> ' +
                        str(round(LSP[step][i], 4)) + "\t" + bLSP[step][i], 1)
                self.p.prnt(2, ' ', 1)
                self.p.prnt(2, '    lsp   -> ' + coded[step], 1)

                self.p.prnt(2, ' ', 1)
Пример #8
0
class Preprocess:
    def __init__(self):
        self.p = Printer(1)
        self.param = Params()
        self.m = Math()
        self.am = AudioManager()
        self.paths = Paths()
        self.trainingDesc, self.testingDesc = self.scanForAudioFiles()

    def scanForAudioFiles(self):
        trainPaths = self.am.scanDirectory(self.paths.folderTrain)
        testPaths = self.am.scanDirectory(self.paths.folderTest)
        return trainPaths, testPaths

    def readAudioFile(self, desc):
        path = desc[2]
        raw = self.am.readAudio(path)
        return raw

    def getSignalEnergy(self, raw):
        energy = []
        stp = self.param.step
        for step, idx in enumerate(range(0, len(raw), stp)):
            e = np.sum(raw[idx:idx + stp]**2)
            energy.append(e)
        return energy

#

    def getSpeech(self, raw, energy):
        rawAbs = abs(raw)
        stp = self.param.step

        whiteNoiseRef = 100
        activationScale = [50, 1000, 100]
        dectivationScale = [100]

        activated = [0, 0, 0]

        lastActivated = 0
        spans = []
        span = []
        spanMaxRef = []
        maxRef = 0

        maxRaw = []
        for i, e in enumerate(energy):
            mx = max(rawAbs[:(i + 1) *
                            stp]) if i == 0 else max(rawAbs[(i) * stp:(i + 1) *
                                                            stp])
            maxRaw.append(mx)

        for i, e in enumerate(energy):
            wait = 0
            # passed the minimum activation
            if e >= whiteNoiseRef * activationScale[0] and activated[0] == 0:
                activated[0] = 1
                lastActivated = i
            # bellow the deactivation value
            elif e < whiteNoiseRef * dectivationScale[
                    0] and i - lastActivated > wait and activated[0] == 1:
                if activated[0] == 1 and activated[1] == 1:
                    span = [lastActivated * stp, i * stp]
                    spans.append(span)
                    spanMaxRef.append(maxRef)
                activated = [0, 0, 0]
                maxRef = maxRaw[i]
            # passed the second activation
            if activated[0] == 1 and e >= whiteNoiseRef * activationScale[1]:
                activated[1] = 1
                maxRef = max([maxRef, maxRaw[i]])

        # join spans, which are close
        joinedSpans = []
        joinedSpanMaxRef = []
        join = []
        jmaxRef = 0
        maxG = 1000
        for i, s in enumerate(spans):

            if i == 0:
                join = [s[0], s[1]]
                jmaxRef = spanMaxRef[i]
            elif s[0] - join[1] < maxG:
                join[1] = s[1]
                jmaxRef = max([jmaxRef, spanMaxRef[i]])
            else:
                joinedSpans.append(join)
                joinedSpanMaxRef.append(jmaxRef)
                jmaxRef = spanMaxRef[i]
                join = s
            if i == len(spans) - 1 and len(join) == 2:
                joinedSpans.append(join)
                joinedSpanMaxRef.append(jmaxRef)

        # remove short spans
        minL = 1500
        longEnoughSpans = []
        longEnoughMaxRef = []
        for i, s in enumerate(joinedSpans):
            if s[1] - s[0] > minL:
                longEnoughSpans.append(s)
                longEnoughMaxRef.append(joinedSpanMaxRef[i])

        # the most probable span
        bestSpan = [longEnoughSpans[np.argmax(longEnoughMaxRef)]]
        speech = []
        speechIdx = []
        for s in bestSpan:
            speech.append(raw[s[0]:s[1]])
            speechIdx.append(s[0])
        return speech, speechIdx

    def extractSpeech(self, desc, visu=False):
        raw = self.readAudioFile(desc)
        energy = self.getSignalEnergy(raw)
        speech, speechIdx = self.getSpeech(raw, energy)
        if (visu):
            title = str(" content: " + desc[1][0]) + " | orator: " + str(
                desc[1][2]) + " |  version: " + str(desc[1][1])
            self.p.plotSpeech(raw, speech, speechIdx, title)
        return speech

    def getDistanceMap(self, sR, sT):
        R = len(sR)
        T = len(sT)
        D = np.zeros([R, T])
        for r in range(R):
            for t in range(T):
                tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R)))
                tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R)))
                if tMin <= t and t <= tMax:
                    D[r, t] = np.sqrt((sR[r] - sT[t])**2)
                else:
                    D[r, t] = np.Inf
        return D

    def processSpeech(self, raw):
        stp = self.param.step
        wndw = self.param.window
        p = self.param.p

        tramasAC = []

        for step, idx in enumerate(range(0, len(raw), stp)):
            trama = raw[idx:idx + wndw]

            if len(trama) < wndw:
                expTrama = trama
                for i in range(0,
                               np.ceil((wndw / len(trama)) - 1).astype(int)):
                    expTrama = np.hstack([expTrama, trama])
                expTrama = expTrama[0:wndw]
                trama = expTrama

            tAC = self.m.autocorrelation(trama)
            ptAC = tAC[:p]
            tramasAC = np.vstack([ptAC] if step == 0 else [tramasAC, ptAC])
#            if step == 20:
#                self.p.plot([ (tAC, 'all', 'b*-',  0), (ptAC, 'order p='+str(p), 'y',  0) ], 0, 'Autocorrelation of segment');
#                title = str( " trama: " + str(step))
#                self.p.plot([ (raw, 'speech', 'r',  0), (trama, 'segment ', 'b',  idx) ], 0, title)

        return tramasAC

    def getExpandedDistanceMap(self, D):
        eD = np.zeros(np.array(D.shape) + 1) + np.Inf
        eD[1:, 1:] = D
        eD[0, 0] = 0
        return eD

    def getDistanceMapOfAc(self, sR, sT):
        R = len(sR)
        T = len(sT)
        D = np.zeros([R, T])
        for r in range(R):
            for t in range(T):
                tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R)))
                tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R)))
                if not (tMin <= t + 1 and t - 1 <= tMax):
                    D[r, t] = np.Inf
                else:
                    D[r, t] = (sum((sT[t] - sR[r])**2)**(0.5))
        return D

    def stepOne(self, dist, position, arround):
        dim = arround.shape
        if 2 < sum(dim):
            dirs = np.array([])
            if 1 < dim[0] and 1 < dim[1]:
                dirs = np.array([[1, 0], [0, 1], [1, 1]])
            elif 1 < dim[0]:
                dirs = np.array([[1, 0]])
            elif 1 < dim[1]:
                dirs = np.array([[0, 1]])

            minDir = dirs[0]
            minVal = arround[minDir[0], minDir[1]]

            for d in dirs:
                thisVal = arround[d[0], d[1]]
                if thisVal <= minVal:
                    minDir = d
                    minVal = thisVal

            dist = dist + minVal
            position = position + minDir

        return position, dist, minVal

    def getDistanceRoute(self, expD):
        target = expD.shape
        Route = np.zeros(expD.shape)
        expDRoute = np.array(expD)

        baseline = 0.5
        pos = np.array([0, 0])
        dist = 0
        Route[pos[0], pos[1]] = baseline
        step = 0

        while ((target[0] - 1) - pos[0] + (target[0] - 1) - pos[0]) != 0:
            around = expD[pos[0]:pos[0] + 2, pos[1]:pos[1] + 2]
            pos, dist, delta = self.stepOne(dist, pos, around)
            step = step + 1
            Route[pos[0], pos[1]] = baseline + delta
            expDRoute[pos[0], pos[1]] = expDRoute[pos[0], pos[1]] + 3
        globalDist = np.inf
        if 0 < step:
            globalDist = dist / step

        return globalDist, Route, expDRoute

    def getDistance(self, sR, sT):
        D = self.getDistanceMapOfAc(sR, sT)
        expD = self.getExpandedDistanceMap(D)
        globalDist, route, expdRoute = self.getDistanceRoute(expD)
        return globalDist, expD, route, expdRoute

    def processAll(self, descs):
        ACs = []
        for d in descs:
            speech = self.extractSpeech(d, False)[0]
            speechAC = self.processSpeech(speech)
            ACs.append(speechAC)
        return ACs

    def compareAC(self, speechA_AC, speechB_AC):
        globalDistance, expD, route, expdRoute = self.getDistance(
            speechA_AC, speechB_AC)
        return globalDistance, expD, route, expdRoute

    def compare(self, descA, descB, visu=False, speechAlreadyProcessed=False):
        speechA = self.extractSpeech(descA, visu)[0]
        speechB = self.extractSpeech(descB, visu)[0]
        speechA_AC = self.processSpeech(speechA)
        speechB_AC = self.processSpeech(speechB)
        globalDistance, expD, route, expdRoute = self.compareAC(
            speechA_AC, speechB_AC)
        return globalDistance, expD, route, expdRoute

    def compare1toN(self, one, many, visu=False):
        dA = one
        for dK in many:
            globalDistance, expD, route, expdRoute = self.compare(
                dA, dK, visu)
            if (visu):
                self.p.imShow(
                    expD, "expD of " + str(dA[1]) + " v " + str(dK[1]) +
                    "      dist=" + str(round(globalDistance, 3)))

                self.p.imShow(
                    expdRoute, "expdRoute of " + str(dA[1]) + " v " +
                    str(dK[1]) + "      dist=" + str(round(globalDistance, 3)))

                self.p.imShow(
                    route, "route of " + str(dA[1]) + " v " + str(dK[1]) +
                    "      dist=" + str(round(globalDistance, 3)))

    def compareTestToTrain(self, test, train, visu=False):

        testACs = self.processAll(test)
        trainACs = self.processAll(train)

        rows = len(test)
        cols = len(train)
        scoreMap = np.zeros([rows, cols])
        matchMap = np.zeros([rows, cols])
        matchScoreMap = np.zeros([rows * 3, cols]) - np.inf

        #        print("compareTestToTrain 1 - ", testACs)

        matchCount = 0
        testCount = 0

        print("compareTestToTrain  - ", len(test[0]), len(train[0]))
        for i, iTest in enumerate(test):
            iexp = i * 3

            iTestAC = testACs[i]
            scores = []
            for j, jTrain in enumerate(train):
                jTrainAC = trainACs[j]
                globalDistance, expD, route, expdRoute = self.compareAC(
                    iTestAC, jTrainAC)

                scoreMap[i, j] = globalDistance
                matchScoreMap[iexp, j] = globalDistance

                scores.append(globalDistance)
                isSame = iTest[1][0] == jTrain[1][0]
                matchScoreMap[iexp + 1, j] = 1 if isSame else np.inf


#                print("   -> ", i, j, "   -  ", iTest[1][0]," v ", jTrain[1][0], "  \t",round(globalDistance, 3) )

            lowestScoreIdx = np.argmin(scores)

            isMatch = iTest[1][0] == train[lowestScoreIdx][1][0]

            matchCount = matchCount + (1 if isMatch else 0)
            testCount = testCount + 1

            print("   -> ", i, lowestScoreIdx, "   -  ", iTest[1][0], " v ",
                  train[lowestScoreIdx][1][0], "  \t",
                  round(scores[lowestScoreIdx], 3), "  \t", isMatch)

            matchMap[i, lowestScoreIdx] = 1 * (1 if isMatch else -1)
            matchScoreMap[iexp + 1,
                          lowestScoreIdx] = 1 * (2 if isMatch else -0.5)

        matchRatio = (matchCount / testCount) if testCount != 0 else 0

        self.p.imShow(scoreMap, "scoreMap ")

        self.p.imShow(matchMap, "matchMap ")
        self.p.imShow(matchScoreMap, "matchScoreMap ")
        print(" matchRatio ", matchRatio, "    ", matchCount, testCount)

    def compareAll(self, data):

        numOfSamples = len(data)

        confusionMap = np.ones([numOfSamples, numOfSamples])
        #        matchMap = np.ones([numOfSamples, numOfSamples])
        matchMap = np.zeros([numOfSamples, numOfSamples])

        xMap = np.zeros([numOfSamples, numOfSamples])

        ACs = self.processAll(data)

        for i, di in enumerate(data):
            iAC = ACs[i]
            iValue = di[1][0]

            for j, dj in enumerate(data):
                if i > j - 1:
                    jValue = dj[1][0]
                    jAC = ACs[j]
                    globalDistance, expD, route, expdRoute = self.compareAC(
                        iAC, jAC)
                    confusionMap[i, j] = globalDistance
                    confusionMap[j, i] = globalDistance
                    matchMap[i,
                             j] = (jValue == iValue) and (globalDistance < 0.8)
                    xMap[i, j] = (globalDistance < 0.5)
                    print("   -> ", i, j, "   -  ", di[1][0], " v ", dj[1][0],
                          "  \t", round(globalDistance, 3))

        self.p.imShow(confusionMap, "confusionMap ")
        self.p.imShow(matchMap, "matchMap ")
        self.p.imShow(xMap, "xMap ")

    def run(self):
        #        self.compare1toN(self.testingDesc[21], [self.trainingDesc[30]], True)
        #        self.compare1toN(self.trainingDesc[1], [self.trainingDesc[0]], True)

        #        self.compareAll(self.trainingDesc[:40])
        self.compareTestToTrain(self.testingDesc[:], self.trainingDesc[:])
Пример #9
0
class LpcProcessing:
    
    def __init__(self):
        self.paths = Paths();
        self.param = Params();
        self.pc = PrintConfig();
        self.p = Printer(1);
        self.am = AudioManager();
        self.m = Math();
        self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2);
        
        self.data = self.pickle.LoadData();
    
    
    def calculateLpcCoefs(self, data):
        toep = scipy.linalg.toeplitz(data[0:-1])
        a = np.linalg.inv(toep).dot(-data[1:])
        return a
    
    
    def calculateGain(self, s, Rs0, lpcCoef):
        Rs = self.m.autocorrelation(s)[1:self.param.p]
        G = np.sqrt((1 + lpcCoef.dot(Rs))*Rs0)        
        return G
    
    def voicedPreprocesing(self,data):
        # High pass filter
        dLen = len(data) 
        spp = data[1:dLen] - self.param.u * data[0:dLen-1] 
        spp = np.insert(spp, 0, spp[0] )
        return spp
 
    
    def run(self, save=1):
        stp =  self.param.step
 

        for step, idx in enumerate(range(0,len(self.data.raw),stp)):
            pitch = self.data.pitch[step]
            trama  = self.data.raw[idx:idx+self.param.pf]            
            h = np.zeros(self.param.pf)

            tramaHp = trama
            if pitch:
                tramaHp = self.voicedPreprocesing(trama)
            ham = np.hamming(len(tramaHp))
            tramaHpHam = tramaHp*ham
            tramaHpHamAc = self.m.autocorrelation(tramaHpHam)
            tramaHpHamAcP=tramaHpHamAc[0:self.param.p]
            lpcCoefs = self.calculateLpcCoefs(tramaHpHamAcP)
            energy = self.data.power[step]*self.param.step

            G = self.calculateGain(trama, energy, lpcCoefs)

            for it, t in enumerate(trama): 
                if it<self.param.p-1:
                    h[it] = 0
                elif it==self.param.p:
                    h[it] = G
                else:
                    for ic, c in enumerate(lpcCoefs):
                        h[it] -=  c*h[it-ic-1]
                        
            hShift = np.append( h[self.param.p-1:], np.zeros(self.param.p-1))
            hShiftFft = np.fft.fft(hShift)
            trFft = np.fft.fft(tramaHp)

            
            hShiftAc = self.m.autocorrelation(hShift)

            self.data.gain.append(G)    
            if step==0:
                self.data.lpc = lpcCoefs
            else:             
                self.data.lpc = np.vstack([self.data.lpc, lpcCoefs])
            
            
            if (step in self.pc.stepInto2 or step in self.pc.stepIntoAll):
                self.p.prnt(2, str(step)+"------------------ start", 1)
                self.p.prnt(4, str("In Second Cycle"), 1)
                self.p.prnt(6, "Current voice pitch: " +str(self.data.pitch[step]), 1)
                self.p.plot([(self.data.raw, 'speech', 'y', 0),(trama, 'trama', 'r',  idx)])

                ptrFft = 20*np.log10(trFft[0:int(len(trFft)/2)])
                phShiftFft = 20*np.log10(hShiftFft[0:int(len(hShiftFft)/2)])
                self.p.plot([(tramaHp, 'trama - high pass', 'b',  0)])
                self.p.plot([(tramaHpHam, 'trama - hamming', 'b',  0)])
                self.p.plot([(tramaHpHamAc, 'trama - auto correlation', 'b',  0),(tramaHpHamAc[0:self.param.p], 'trama - auto correlation (p='+str(self.param.p)+")", 'r',  0)])
                self.p.plot([(h, 'h', 'm',  0)])
                self.p.plot([(hShift, 'hShift', 'm',  0)])
                self.p.plot([(ptrFft, 'trFft dB', 'b',  0),(phShiftFft, 'phShiftFft dB', 'r',  0)],0)
                self.p.plot([(hShiftAc[:30], 'hShiftAc 30', 'r',  0),(tramaHpHamAc[:30], 'tramaHpHamAc 30', 'b',  0)],0)

                self.p.plot([(hShiftAc[:self.param.p], 'hShiftAc p', 'r',  0),(tramaHpHamAc[:self.param.p], 'tramaHpHamAc p', 'b',  0)],0)

                self.p.prnt(2, str(step)+"------------------ end", 1)

                if self.pc.stop2:
                    input("   ...")
                    
        self.data.pitch = np.mat(self.data.pitch).T
        self.data.gain = np.mat(self.data.gain).T
        

        
        if save:
            self.pickle.SaveData(self.data)
Пример #10
0
class Analysis:
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.am = AudioManager()

        self.p = Printer(1)
        self.S = Synthesizer()
        self.pickle = Pickle(self.paths.pickle)
        self.decoded, self.original, self.coded = self.loadAll()
        self.cP, self.cG, self.cLpc = self.organize()
        self.cSn = self.SynthAll()

    def loadAll(self):
        coded = self.pickle.LoadEncoded(self.paths.tag4)
        decoded = self.pickle.LoadDecoded(self.paths.tag5)
        data = self.pickle.LoadData(self.paths.tag3)
        return decoded, data, coded

    def SynthAll(self):
        snO = self.S.synth(self.cLpc.o, self.cP.o, self.cG.o)
        snD = self.S.synth(self.cLpc.d, self.cP.d, self.cG.d)
        snE = []
        for i, sno in enumerate(snO):
            snE.append(snO[i] - snD[i])

        rw = self.am.readAudio(self.paths.file)
        return ComaparedData(snO, snD, snE, rw)

    def organize(self):
        oPitch = []
        dPitch = []
        ePitch = []
        oGain = []
        dGain = []
        eGain = []
        oLpc = self.original.lpc
        dLpc = self.decoded.lpc

        for i in range(len(self.original.lpc)):
            op = int(self.original.pitch[i][0, 0])
            dp = self.decoded.pitch[i]
            ep = op - dp
            oPitch.append(op)
            dPitch.append(dp)
            ePitch.append(ep)
            og = round(self.original.gain[i][0, 0], 3)
            dg = self.decoded.gain[i]
            eg = og - dg
            oGain.append(og)
            dGain.append(dg)
            eGain.append(eg)

        cP = ComaparedData(oPitch, dPitch, ePitch)
        cG = ComaparedData(oGain, dGain, eGain)
        cLpc = ComaparedData(oLpc, dLpc, None)
        return cP, cG, cLpc

    def compareVisu(self):

        for i in range(len(self.cP.o)):
            if (i in self.pc.stepInto6 or i in self.pc.stepIntoAll):
                self.p.prnt(2, str(i) + "------------------ start", 1)
                self.p.prnt(4, str("In Sixth Cycle"), 1)
                self.p.prnt(2, '  ', 1)
                self.p.prnt(
                    2, '    gain   ' + str(i) + "  " + " ->    " +
                    str(round(self.cG.o[i], 0)) + "\t" +
                    str(int(self.cG.d[i])) + "\t" + str(int(self.cG.e[i])), 1)
                self.p.prnt(
                    2, '    pitch  ' + str(i) + "  " + " ->    " +
                    str(round(self.cP.o[i], 0)) + "\t" +
                    str(int(self.cP.d[i])) + "\t" + str(int(self.cP.e[i])), 1)

                for j in range(len(self.cLpc.o[i])):
                    self.p.prnt(
                        2, '     lpc   ' + str(i) + " " + str(j) + " ->   " +
                        str(round(self.cLpc.o[i][j], 3)) + "\t" +
                        str(round(self.cLpc.d[i][j], 3)), 1)

                start = i * self.param.step
                end = start + self.param.step
                tag = str(i) + "th "
                self.p.plot([
                    (self.cSn.raw[start:end], ' raw audio', 'k', 0),
                    (self.cSn.o[start:end], tag + ' original synth', 'b', 0),
                    (self.cSn.d[start:end], tag + '  decoded synth', 'g', 0),
                    (self.cSn.e[start:end], tag + '    error synth', 'r', 0)
                ])

        self.p.plot([(self.cSn.raw, ' raw audio', 'k', 0),
                     (self.cSn.o, ' original synth', 'b', 0),
                     (self.cSn.d, '  decoded synth', 'g', 0),
                     (self.cSn.e, '    error synth', 'r', 0)])
        self.p.plot([(self.cP.o, ' original gain', 'b', 0),
                     (self.cP.d, 'decoded gain', 'g*', 0),
                     (self.cP.e, 'error', 'r--', 0)])
        self.p.plot([(self.cG.o, ' original gain', 'b', 0),
                     (self.cG.d, 'decoded gain', 'g*', 0),
                     (self.cG.e, 'error', 'r--', 0)])
        originalFileSize = self.pickle.getFileSize(self.paths.file) * 8
        codedFileSize = len(self.coded.binaries)
        self.p.prnt(
            2, '  ---------------------------------------------------   ', 1)
        self.p.prnt(
            2, '    original file size    ->    ' + str(originalFileSize) +
            ' bits', 1)
        self.p.prnt(
            2,
            '       coded file size    ->    ' + str(codedFileSize) + ' bits',
            1)
        self.p.prnt(
            2, '           compression    ->    ' +
            str(round((codedFileSize / originalFileSize) * 100, 3)) + ' %', 1)

    def compareAudio(self):
        self.p.prnt(4, "", 1)

        self.p.prnt(4, str("Listen"), 1)
        self.p.prnt(4, "      1.  original file", 1)
        self.p.prnt(4, "      2.  sytesized before coding", 1)
        self.p.prnt(4, "      3.  sytesized after decoding", 1)

        self.am.playOriginalAudio(self.paths.file)
        self.am.playSyntesizedAudio(self.cSn.o)
        self.am.playSyntesizedAudio(self.cSn.d)
Пример #11
0
class PreProcessing:
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.data = SpeachData()
        self.p = Printer(1)
        self.am = AudioManager()
        self.m = Math()
        self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1)

        self.data.raw = self.am.readAudio(self.paths.file)

    def filterPitch(self, p):
        for i, v in enumerate(p):
            if i == 0 or i == len(p) - 1:
                continue
            if p[i - 1] == 0 and v != 0 and p[i + 1] == 0:
                p[i] = 0
            elif p[i - 1] != 0 and v == 0 and p[i + 1] != 0:
                p[i] = np.mean([p[i - 1], p[i + 1]])
        return p

    def run(self, save=1):

        stp = self.param.step
        for step, idx in enumerate(range(0, len(self.data.raw), stp)):

            trama = self.data.raw[idx:idx + self.param.pf]
            tramaAC = self.m.autocorrelation(trama)
            power = np.sum(self.data.raw[idx:idx + stp]**2) / self.param.step
            pitch = 0
            if (max(tramaAC[self.param.pi:self.param.pfN]) >
                    self.param.threshold):
                pitch = np.argmax(
                    tramaAC[self.param.pi:self.param.pfN]) + self.param.pi

            self.data.pitch.append(pitch)
            self.data.power.append(power)

            if (step in self.pc.stepInto1 or step in self.pc.stepIntoAll):
                self.p.prnt(2, str(step) + "------------------ start", 1)
                self.p.prnt(4, str("In First Cycle"), 1)
                self.p.plot([(self.data.raw, 'speech', 'c', 0),
                             (trama, 'trama', 'r', idx)])
                self.p.plot([(trama, 'trama', 'c', 0)])
                self.p.plot([(tramaAC, 'tramaAC', 'c', 0)])
                self.p.plot([(tramaAC, 'tramaAC', 'c', 0)])

                self.p.prnt(2, str(step) + "------------------ end", 1)
                if self.pc.stop1:
                    input("   ...")

        self.data.pitch = self.filterPitch(self.data.pitch)

        if save:
            self.pickle.SaveData(self.data)
Пример #12
0
class Synthesizer:
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.data = SpeachData()
        self.p = Printer(1)
        self.am = AudioManager()
        self.m = Math()
        self.pickle = Pickle(self.paths.pickle,
                             lTag=self.paths.tag2,
                             sTag=self.paths.tag3)
        self.data = self.pickle.LoadData()

    def gNoise(self, length, val=1):
        return np.array([np.random.normal() for i in range(int(length))]) * val

    def linearPredictor(self, lpcCoefs, Sni, SniPrev):
        lpcLen = len(lpcCoefs)
        SniPrev = SniPrev[len(SniPrev) - lpcLen:]
        SniPrefixed = np.append(SniPrev, Sni)
        for i, n in enumerate(range(lpcLen, len(SniPrefixed))):
            SniLastN = SniPrefixed[i:n]
            pred = -np.dot(lpcCoefs, SniLastN[::-1])
            SniPrefixed[n] += pred
        Sni = SniPrefixed[lpcLen:]
        return Sni

    def glutealPulse(self, pitch, gain=1):
        N0 = np.int(pitch)
        Nop = np.int(N0 * 2 / 3)
        pulse = np.zeros(N0)
        for n in range(Nop):
            pulse[n] = ((2 * Nop - 1) * n - 3 * n**2) / (Nop**2 - 3 * Nop + 2)
        return pulse * gain

    def synth(self, LPC, Pitch, Gain):
        stp = self.param.step
        Sn = np.array([])

        SniPrev = np.zeros(stp)
        stepInset = 0
        for step, pitch in enumerate(Pitch[:]):

            lpcCoefs = LPC[step]
            gain = float(Gain[step])
            stepLeftover = stp - stepInset
            Sni = np.array([])
            if pitch == 0:
                G = float(np.sqrt(1 / stp) * gain)
                Sni = np.append(Sni, self.gNoise(stepLeftover, G))
                stepInset = 0
            else:
                G = float(np.sqrt(pitch / stp) * gain)
                innerJumps = int(1 if pitch == 0 else np.ceil(stepLeftover /
                                                              pitch))
                spannedStep = 0
                Snisub = np.array([])
                for ij in range(innerJumps):
                    Snisubi = self.glutealPulse(pitch, G)
                    Snisub = np.append(Snisub, Snisubi)
                    spannedStep += pitch
                Sni = np.append(Sni, Snisub)
                stepInset = spannedStep - stepLeftover

            Sni = self.linearPredictor(lpcCoefs, Sni, SniPrev)
            SniPrev = Sni
            Sn = np.append(Sn, np.array(Sni))

        return Sn

    def run(self, save=1):
        stp = self.param.step
        Sn = np.array([])

        SniPrev = np.zeros(self.param.step)
        stepInset = 0
        for step, pitch in enumerate(self.data.pitch[:]):

            lpcCoefs = self.data.lpc[step]
            gain = float(self.data.gain[step])
            stepLeftover = stp - stepInset
            Sni = np.array([])
            if pitch == 0:
                G = float(np.sqrt(1 / self.param.step) * gain)
                Sni = np.append(Sni, self.gNoise(stepLeftover, G))
                stepInset = 0
            else:
                G = float(np.sqrt(pitch / self.param.step) * gain)
                innerJumps = int(1 if pitch == 0 else np.ceil(stepLeftover /
                                                              pitch))
                spannedStep = 0
                Snisub = np.array([])
                for ij in range(innerJumps):
                    Snisubi = self.glutealPulse(pitch, G)
                    Snisub = np.append(Snisub, Snisubi)
                    spannedStep += pitch
                Sni = np.append(Sni, Snisub)
                stepInset = spannedStep - stepLeftover

            Sni = self.linearPredictor(lpcCoefs, Sni, SniPrev)

            SniPrev = Sni

            Sn = np.append(Sn, np.array(Sni))

            if (step in self.pc.stepInto3 or step in self.pc.stepIntoAll):
                self.p.prnt(2, str(step) + "------------------ start", 1)
                self.p.prnt(4, str("In Third Cycle"), 1)
                self.p.plot([(Sn, 'Sn', 'b', 0),
                             (Sni, 'Sni', 'r', len(Sn) - len(Sni)),
                             (self.data.raw[:len(Sn)] * 1, 'raw*0.3', 'c', 0)],
                            0)
                prev = 3
                since = len(Sn) - len(Sni) * prev
                since = 0 if since < 0 else since
                self.p.plot(
                    [(self.data.raw[since:len(Sn)] * 2, 'raw*2', 'c', since),
                     (Sn[since:len(Sn) - len(Sni)], 'Sn', 'b', since),
                     (Sni, 'Sni', 'r', len(Sn) - len(Sni))], 0)

                if self.pc.stop3:
                    input("   ...")

        self.syntesized = Sn

        if save:
            self.pickle.SaveData(self.data)
            self.pickle.save('syntesized', self.syntesized)