def predict(k=5, sample_size=1000):
    mfcc_size = 10
    step = sample_size / mfcc_size
    predictions = {'music': [], 'speech': [], 'both': []}

    for at, ar in predictions.items():
        file = 'data/query/%s.mp3' % at
        logging.info('Load file %s' % file)
        mfcc = sm.load_mfcc(file)
        for j in range(0, int(len(mfcc) / step)):
            sample = mfcc[j * step:(j + 1) * step]
            r = sm.clf.predict(sample, k)
            logging.debug((j, r))
            predictions[at] = np.r_[predictions[at], r]
    return predictions
def predict(k=5,sample_size=1000):
    mfcc_size =10
    step = sample_size/mfcc_size
    predictions = {'music' : [],
                   'speech': [],
                   'both'  : []}

    for at,ar in predictions.items():
        file = 'data/query/%s.mp3' % at 
        logging.info('Load file %s' % file)
        mfcc = sm.load_mfcc(file)
        for j in range(0,int(len(mfcc)/step)): 
            sample = mfcc[j*step : (j+1)*step]
            r = sm.clf.predict(sample,k)
            logging.debug((j,r))
            predictions[at] = np.r_[predictions[at], r]
    return predictions
Пример #3
0
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import division
import numpy as np
import os
import logging
import speech_music as sm

SAMPLE_IN_MS = 1000
MFCC_SIZE_IN_MS = 10
K = 5
step = SAMPLE_IN_MS / MFCC_SIZE_IN_MS

for i in ['music', 'speech', 'both']:
    mfcc = sm.load_mfcc('data/query/%s.mp3' % i)
    for j in range(0, int(len(mfcc) / step)):
        sample = mfcc[j * step:(j + 1) * step]
        r = sm.clf.predict(sample, k=K)
        res = 'speech'
        nb_music = len(np.flatnonzero(r == 1))
        nb_speech = len(np.flatnonzero(r == 0))
        if nb_music / nb_speech > 1:
            res = 'music'
        print '%s | sample %i | %s | m[%03d] p[%03d]' % (i, j, res, nb_music,
                                                         nb_speech)
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import division
import numpy as np 
import os 
import logging 
import speech_music as sm 

SAMPLE_IN_MS = 1000
MFCC_SIZE_IN_MS =10
K=5
step = SAMPLE_IN_MS/MFCC_SIZE_IN_MS

for i in ['music','speech','both']:
    mfcc = sm.load_mfcc('data/query/%s.mp3' % i ) 
    for j in range(0,int(len(mfcc)/step)): 
        sample = mfcc[j*step : (j+1)*step]
        r = sm.clf.predict(sample,k=K)
        res = 'speech'
        nb_music  = len(np.flatnonzero(r == 1)) 
        nb_speech = len(np.flatnonzero(r == 0)) 
        if nb_music / nb_speech > 1 :
            res = 'music'
        print '%s | sample %i | %s | m[%03d] p[%03d]' % (i,j,res,nb_music,nb_speech)