Exemplo n.º 1
0
def feature_extraction(params):

    features_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes_norm_feats']['low_level_feature_filename']
    extraction_workers = params['exp_gtzan_selframes']['extraction_workers']

    #Extract all low level features -- frame selection should be done just before training.
    #See training for further details.
    print('Extracting low level features...')

    extraction_fcn = None

    if params['exp_gtzan_selframes_norm_feats']['feature_extractor'] == 'random_projection':
        projection_matrix = dill.load(open(params['exp_gtzan_selframes_norm_feats']['rp_projection_matrix']))
        params['random_projection']['projection_matrix'] = projection_matrix
        extraction_params = { '_parallel_fe' : { 'extraction_fcn' : random_projection.get_rp_features } }

    elif params['exp_gtzan_selframes_norm_feats']['feature_extractor'] == 'gtzan':
        extraction_params = { '_parallel_fe' : { 'extraction_fcn' : gtzan_features.get_gtzan_features } }

    elif params['exp_gtzan_selframes_norm_feats']['feature_extractor'] == 'identity':
        extraction_params = { '_parallel_fe' : { 'extraction_fcn' : identity_features.get_identity_features } }

    elif params['exp_gtzan_selframes_norm_feats']['feature_extractor'] == 'stft':
        extraction_params = { '_parallel_fe' : { 'extraction_fcn' : stft_features.get_stft_features } }

    elif params['exp_gtzan_selframes_norm_feats']['feature_extractor'] == 'pca':
        extraction_params = { '_parallel_fe' : { 'extraction_fcn' : pca_features.get_pca_features} }

    elif params['exp_gtzan_selframes_norm_feats']['feature_extractor'] == 'cqt':
        extraction_params = { '_parallel_fe' : { 'extraction_fcn' : cqt_features.get_cqt_features} }

    extraction_params.update(params)

    from_filelist(params['exp_gtzan_selframes']['feature_extraction_filelist'], 100, 3, extraction_workers, 
        _parallel_fe, extraction_params, features_filename, out_format='h5', inscript=params['general']['in_script'])
def extract_features(filelist, fe_params, output_filename, feature_set='marsyas'):
    if feature_set == 'marsyas':
        fe_params.update({'_parallel_fe': {'extraction_fcn' : get_gtzan_features}})
    elif feature_set == 'mel':
        fe_params.update({'_parallel_fe': {'extraction_fcn' : get_stft_features}})
    else:
        raise ValueError('feature set not supported.')

    pf = from_filelist(filelist, 100, 5, 8, _parallel_fe, 
                    fe_params, output_filename, out_format='h5', inscript=True)
Exemplo n.º 3
0
def test(params):

    features_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes_norm_feats']['low_level_feature_filename']
    
    test_filelist = params['exp_gtzan_selframes']['test_filelist']
    test_features_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes_norm_feats']['test_features_filename']
    feature_scaler_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes_norm_feats']['feature_scaler_filename']
    extraction_workers = params['exp_gtzan_selframes']['extraction_workers']

    load_absolute_ss = True

    if params['exp_gtzan_selframes_norm_feats']['smart_feature_scaler_loading']:
        #build the path to the standard scaler
        fold = params['exp_gtzan_selframes']['test_filelist'].split('/')[-1].split('_')[0]
        ss_filename = params['general']['scratch_directory'] + params['general']['dataset_name'] + '_' + fold + '.ss.dill'

        if os.path.isfile(ss_filename):
            print('Standard Scaler found! Loading from file %s...' % (ss_filename))
            ss = dill.load(open(ss_filename))
            load_absolute_ss = False
        else:
            print('Standard Scaler not found: %s. Trying to load Standard Scaler from the parameter file.' % (ss_filename))

    if load_absolute_ss:
        ss = dill.load(open(feature_scaler_filename))

    #select frames!
    select_frames_params = {'_select_frames' : {'ss': ss, 'pf_filename': features_filename} }
    select_frames_params.update(params)
    print('Scaling test data and selecting frames...')
    test_feats = from_filelist(test_filelist, 100, 3, extraction_workers, _select_frames, select_frames_params, 
        test_features_filename, out_format='h5', inscript=params['general']['in_script'])

    print('Testing model...')

    model_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes']['model_file']

    tester = SklearnLike(params)

    voting_model = False
    if params['exp_gtzan_selframes']['classifier'] == 'attnet':
        voting_model = True

    test_feats = test_features_filename

    if voting_model:
        preds = tester.predict(model_filename, test_feats, test_filelist, voting_model=True)
    else:
        preds = tester.predict(model_filename, test_feats, test_filelist)

    dill.dump(preds, open(params['exp_gtzan_selframes']['frame_predictions'], 'w'))

    dill.dump(preds, open(params['exp_gtzan_selframes']['final_prediction'] + '.frames.dill', 'w'))
Exemplo n.º 4
0
def select_textures(pf_filename, fold_filename, output_filename, ss=None, **selector_arguments):
    
    default_selector_args = {
        '_select_frames' : {
            'ss' : None,            #Don't change this
            'pf' : None,            #Don't change this
            'selector' : 'kmeansc',
            'n_frames' : 5,
            'select_subset': False
        },
        'kmeansc_selector' : {
            'sort_centroids_by_common' : True
        }
    }    
    
    #print ('default args: ', default_selector_args)
    #print ('passed args: ', selector_arguments)
    
    for d in default_selector_args:
        if d in selector_arguments:
            print('updating ' + d)
            default_selector_args[d].update(selector_arguments[d])
            
    for d in selector_arguments:
        if d not in default_selector_args:
            default_selector_args[d] = selector_arguments[d]
            
    #print ('updated args: ', default_selector_args)
    
    pf = dill.load(open(pf_filename))
    
    if ss is None:
        ss = StandardScaler()
        ss.fit(pf.estimator_output)
        
    default_selector_args['_select_frames']['ss'] = ss
    
    #h5 arrays cannot be pickled. Thus, before passing an object with a h5
    #to a (process-based) parallel job, it must be explicitly closed.
    #see function _select_textures (the parallel job) to check how to
    #get around this.
    if pf.has_h5_storage():
        pf.h5file.close()
        
    default_selector_args['_select_frames']['pf'] = pf
    
    textures = from_filelist(fold_filename, 100, 10, 8, _select_textures, 
                       default_selector_args, output_filename, out_format='h5', inscript=True)
    
    return textures
Exemplo n.º 5
0
def extract_features(filelist, fe_params, output_filename, centerk, feature_set='marsyas'):
    if feature_set == 'marsyas':
        fe_params.update({'_parallel_fe': {'extraction_fcn' : get_gtzan_features, 'centerk': centerk}})
    elif feature_set == 'mel':
        fe_params.update({'_parallel_fe': {'extraction_fcn' : get_stft_features, 'centerk': centerk}})
    elif feature_set == 'ae':
        fe_params.update({'_parallel_fe': {'extraction_fcn' : get_identity_features, 'centerk': centerk}})        
    elif feature_set == 'rp':
        fe_params['random_projection']['projection_matrix'] = dill.load(open(fe_params['random_projection']['projection_matrix']))
        fe_params.update({'_parallel_fe': {'extraction_fcn' : get_rp_features, 'centerk': centerk}})
    else:        
        raise ValueError('feature set not supported.')

    pf = from_filelist(filelist, 100, 5, 8, _parallel_fe, 
                    fe_params, output_filename, out_format='h5', inscript=True)
Exemplo n.º 6
0
def train_model(params):
    features_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes_norm_feats']['low_level_feature_filename']
    
    train_filelist = params['exp_gtzan_selframes']['train_filelist']
    train_features_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes_norm_feats']['train_features_filename']
    feature_scaler_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes_norm_feats']['feature_scaler_filename']
    extraction_workers = params['exp_gtzan_selframes']['extraction_workers']

    pf = dill.load(open(features_filename))
    
    #First it is necessary to go through all train examples to determine the features' means and std_devs.
    train_files, _ = parse_filelist(train_filelist)
    ss = StandardScaler()

    fit_ss = True

    if params['exp_gtzan_selframes_norm_feats']['smart_feature_scaler_loading']:
        #build the path to the standard scaler
        fold = params['exp_gtzan_selframes']['train_filelist'].split('/')[-1].split('_')[0]
        print (params['exp_gtzan_selframes']['train_filelist'])
        ss_filename = params['general']['scratch_directory'] + params['general']['dataset_name'] + '_' + fold + '.ss.dill'

        if os.path.isfile(ss_filename):
            print('Standard Scaler found! Loading from file %s...' % (ss_filename))
            ss = dill.load(open(ss_filename))
            fit_ss = False
        else:
            print('Standard Scaler not found: %s.' % (ss_filename))

    if fit_ss:
        print('Fitting standard scaler on train data...')
        for k in train_files:
            ss.partial_fit(pf.estimator_output[pf.get_single_track_idxs(k),:])

        #dump standard scaler. This will be used again to scale the test data.
        if params['exp_gtzan_selframes_norm_feats']['smart_feature_scaler_loading']:
            outf = ss_filename
        else:
            outf = feature_scaler_filename
        dill.dump(ss, open(outf, 'w'))

        # print('loading standard scaler for fold 1 of LMD (remove this if using another fold')
        # ss = dill.load(open(feature_scaler_filename))

        if hasattr(pf, 'h5file'):
            pf.close_h5()

    #select frames!
    select_frames_params = {'_select_frames' : {'ss': ss, 'pf_filename': features_filename} }
    select_frames_params.update(params)

    print('Scaling training data and selecting frames...')

    t0 = time.time()

    #DESCOMENTAR ISSOO!!!!!!!!!!!!!!!!!!
    train_feats = from_filelist(train_filelist, 100, 3, extraction_workers, _select_frames, select_frames_params, 
        train_features_filename, out_format='h5', inscript=params['general']['in_script'])
    print('Frame selection took %.2f seconds'% (time.time()-t0))

    #uncomment the following dill loading when out_format='h5' above
    print('loading train features h5 file...')
    train_feats = dill.load(open(train_features_filename))

    #check whether the group file exists and hack it into the params dict. This is a quick hack. I should refactor this later on.
    f, e = os.path.splitext(train_filelist)
    gf = f + '_groups' + e
    print('checking for %s...' % gf)
    if os.path.isfile(gf):
        print('Found a groups file for %s. Adding it to the grid search parameters.' % train_filelist)
        params['single_split_gs']['groups_file'] = gf
    else:
        print('%s not found!', gf)

    print (params['single_split_gs'])

    print('Training model...')
    if params['exp_gtzan_selframes']['classifier'] == 'svm_anova':
        model = SvmAnova(params)
    elif params['exp_gtzan_selframes']['classifier'] == 'random_forest':
        model = RandomForest(params)
    elif params['exp_gtzan_selframes']['classifier'] == 'knn':
        model = KNN(params)
    elif params['exp_gtzan_selframes']['classifier'] == 'attnet':
        model = AttentionNetClassifier(params)
    else:
        print('please set the classifier to one of svm_anova, random_forest, knn or attnet')

    model = model.fit(train_feats, train_filelist)

    model_filename = params['general']['scratch_directory'] + params['exp_gtzan_selframes']['model_file']

    dill.dump(model, open( model_filename, 'w'))

    #uncomment the following dill loading when out_format='h5' above
    if hasattr(train_feats, 'h5file'):
        train_feats.close_h5()