def get_voxforge_total_files( vox_forge_dir, max = 3000):
    CLASSES_DICT, _ = load_classes_info()
    dirs = {'Italian' : os.path.join(vox_forge_dir, 'it' ),
            'French'  : os.path.join(vox_forge_dir, 'fr' ),
            'German'  : os.path.join(vox_forge_dir, 'de' ),
            'English' : os.path.join(vox_forge_dir, 'en' ),
            'Spanish' : os.path.join(vox_forge_dir, 'es' )
    }

    available_data = []
    for lang, path in dirs.iteritems():
        files = extract_vox_files_by_dir( path )
        random.shuffle(files)
        files = files[:max]
        print "Available {} files for language {}".format( lang, len(files) )
        lang_class = CLASSES_DICT[ lang ]
        available_data.extend( [ (f, lang_class) for f in files ] )
    return available_data
if __name__ == "__main__":

    LIMIT = 9000
    DICTIONARY_SIZE = 600
    sofia_path = None  # sys.argv[1]
    if len(sys.argv) > 1:
        VOX_FEATURES = sys.argv[1] == "use_voxforge"
    else:
        VOX_FEATURES = False

    print "Using Vox features", VOX_FEATURES
    print "LIMIT", LIMIT

    print "Checking class names"
    _, REVERSE_CLASSES = load_classes_info()

    print "Loading train"
    X_train_transformed = load_train_features("bow_train_features.pcl", extract_mfcc_features, limit=LIMIT)
    X_train = np.vstack([f for (_, _, f) in X_train_transformed if f is not None])
    # X_train = np.reshape(X_train, ( len(X_train_transformed), num_features ) )
    print X_train.shape
    # for filename, lbl, features in X_train_transformed:
    #    if features is None:
    #        print "Train",filename, "is none"
    #        continue
    #    if X_total is not None:
    #        X_total =  np.vstack([ X_total, features ])
    #    else:
    #        X_total = features
    X_total = X_train