majorVotesSoft = np.genfromtxt(sSoftVoteFile, delimiter=',') else: (majorVotesHard, majorVotesSoft) = computeMajorVotes(sHardVoteFile, sSoftVoteFile, sResultDir) mProb = majorVotesSoft[:, 1:].copy() print mProb.sum(1) assert np.all(abs(mProb.sum(1) - config.CLASSIFIERS) < 1E-2) mProb /= mProb.sum(1)[:, None] kaldiOut = KaldiWriteOut(output_scp, output_ark) kaldiOut.open() rIdx = 0 for i, uId in enumerate(uttIDs): kaldiOut.write(uId, mProb[rIdx:(rIdx + uttLens[i]), :]) rIdx += uttLens[i] kaldiOut.close() # write dummy weights (identity matrix) with open(sWeightFile, 'wb') as fOut: w = np.eye(mProb.shape[1]) (sz1, sz2) = w.shape fOut.write('<affinetransform> %d %d\n[\n' % (sz2, sz1)) for c in xrange(0, sz2): fOut.write(' '.join([str(w[r, c]) for r in xrange(0, sz1)]) + '\n') fOut.write(']\n') # bias fOut.write('[ ') fOut.write(' '.join(['0.0' for i in xrange(0, sz1)]))
if os.path.exists(sHardVoteFile) and os.path.exists(sSoftVoteFile): majorVotesHard = np.genfromtxt(sHardVoteFile, delimiter=',') majorVotesSoft = np.genfromtxt(sSoftVoteFile, delimiter=',') else: (majorVotesHard, majorVotesSoft) = computeMajorVotes(sHardVoteFile, sSoftVoteFile, sResultDir) mProb = majorVotesSoft[:, 1:].copy() print mProb.sum(1) assert np.all(abs(mProb.sum(1) - config.CLASSIFIERS) < 1E-2) mProb /= mProb.sum(1)[:, None] kaldiOut = KaldiWriteOut(output_scp, output_ark) kaldiOut.open() rIdx = 0 for i, uId in enumerate(uttIDs): kaldiOut.write(uId, mProb[rIdx:(rIdx + uttLens[i]), :]) rIdx += uttLens[i] kaldiOut.close() # write dummy weights (identity matrix) with open(sWeightFile, 'wb') as fOut: w = np.eye(mProb.shape[1]) (sz1, sz2) = w.shape fOut.write('<affinetransform> %d %d\n[\n' % (sz2, sz1)) for c in xrange(0, sz2): fOut.write(' '.join([str(w[r, c]) for r in xrange(0, sz1)]) + '\n') fOut.write(']\n') # bias fOut.write('[ ') fOut.write(' '.join(['0.0' for i in xrange(0, sz1)]))
while featMat is not None: if batchSz < 0: batchSz = 300*1024*1024 / (4*featMat.shape[1]) if featMatBatch is None: featMatBatch = featMat else: featMatBatch = np.vstack([featMatBatch, featMat]) uttIDBatch.append(uttID) uttIDLength.append(featMat.shape[0]) if featMatBatch.shape[0] >= batchSz: featOut = extractRepresentation(featMatBatch, sDataDir, sDeeplearnPath, sModelFile) rIdx = 0 for i, uId in enumerate(uttIDBatch): kaldiOut.write(uId, featOut[rIdx:(rIdx + uttIDLength[i]), :]) rIdx += uttIDLength[i] featMatBatch = None uttIDBatch = [] uttIDLength = [] uttID, featMat = kaldiIn.next() # final batch if featMatBatch.shape[0] > 0: featOut = extractRepresentation(featMatBatch, sDataDir, sDeeplearnPath, sModelFile) rIdx = 0 for i, uId in enumerate(uttIDBatch): kaldiOut.write(uId, featOut[rIdx:(rIdx + uttIDLength[i]), :]) rIdx += uttIDLength[i] kaldiIn.close()
if batchSz < 0: batchSz = 300 * 1024 * 1024 / (4 * featMat.shape[1]) if featMatBatch is None: featMatBatch = featMat else: featMatBatch = np.vstack([featMatBatch, featMat]) uttIDBatch.append(uttID) uttIDLength.append(featMat.shape[0]) if featMatBatch.shape[0] >= batchSz: featOut = extractRepresentation(featMatBatch, sDataDir, sDeeplearnPath, sModelFile) rIdx = 0 for i, uId in enumerate(uttIDBatch): kaldiOut.write(uId, featOut[rIdx:(rIdx + uttIDLength[i]), :]) rIdx += uttIDLength[i] featMatBatch = None uttIDBatch = [] uttIDLength = [] uttID, featMat = kaldiIn.next() # final batch if featMatBatch.shape[0] > 0: featOut = extractRepresentation(featMatBatch, sDataDir, sDeeplearnPath, sModelFile) rIdx = 0 for i, uId in enumerate(uttIDBatch): kaldiOut.write(uId, featOut[rIdx:(rIdx + uttIDLength[i]), :]) rIdx += uttIDLength[i]