Пример #1
0
    def __init__(self, set_id=None, set_name=None, conf=conf_default):          
        self.connection = conn.get();
        self.connection.register([DataSet])
    
        if set_id == None:
            self.train_set = self.connection.DataSet()
            self.train_set.config = conf
            self.train_set.name = set_name
            
            self.train_set.save()
        else:
            self.train_set = self.connection.DataSet.one({"_id": ObjectId(set_id)})

        self.folder_path = training_folder + '/' + self.generate_name()
        self.validation_path = self.folder_path + '/validation/'
Пример #2
0
def train(tracks, training_audio_path, conf, set_name, conn_str):
    logger.log('begin training sequence on %s ' % training_audio_path)
    
    # set up all model parameters
    num_components      = conf['num_components']
    em_epsilon          = conf['em_epsilon']
    em_iter             = conf['em_iter']
    cv_type             = conf['cv_type']
    
    audio_freq          = conf['audio_freq']
    mfcc_step_size      = conf['mfcc_step_size']
    mfcc_block_size     = conf['mfcc_block_size']
    frames_per_second   = audio_freq / mfcc_step_size
    
    audio_block_size    = frames_per_second * conf['sample_length']
    audio_step_size     = frames_per_second * conf['sample_step_size']
    
    # set up Yaafe
    afp, engine = get_engine(audio_freq, mfcc_block_size, mfcc_step_size)

    connection = conn.get()
    connection.register([TrackData])
    result = []
    
    for label, filename in tracks:
        logger.log('begin processing %s.' % filename)
        afp.processFile(engine, training_audio_path + '/' + filename)
        
        output      = engine.readAllOutputs()['mfcc']
        mfcc        = output     
        num_samples = mfcc.shape[0]
        track_gmms  = []
        
        track       = connection.TrackData()
        track.label = label
        track.set   = set_name

        for index in range(0, (num_samples - audio_block_size), audio_step_size):
            with warnings.catch_warnings(record=True) as w:
                mfcc_data = mfcc[index:index + audio_block_size]
    
                classifier = GMM(n_components = num_components, cvtype = cv_type)
                classifier.fit(mfcc_data, thresh = em_epsilon, n_iter = em_iter)
    
                means = classifier._get_means().tolist()  
                
                if cv_type == 'diag':          
                    covars = [np.diag(diag).tolist() for diag in classifier._get_covars()]
                else:
                    covars = classifier._get_covars().tolist()
                weights = classifier._get_weights().tolist()
                
                for weight in weights:
                    if math.isnan(weight):
                        logger.warn("Warning: invalid GMM entry, skipping at label: %s, index %s" % (label, str(index)))
                        break
                else:
                    track_gmms.append([means, covars, weights])

        track.data = track_gmms
        track.save()
        
        result.append(track._id)

    return result