def main(argv):
    tools.defaultLog()


    """
        packsize = 1024 --> about 42 sample packs per second
    """

    packsize = 1024
    dataset = []

    dbControl = database.Controller()

    required_features = [
        "Feature_Centroid_Avg",
        "Feature_Centroid_SD",
        "Feature_Rolloff_Avg",
        "Feature_Rolloff_SD",
        "Feature_Flux",
        "Feature_Spec_Flux_Avg",
        "Feature_Spec_Flux_SD"
    ]

    genres = [
        'dubstep',
        'house',
        'trance'
    ]

    training_data = dbControl.getTrainingSet(genres, 40)

    ids_and_genres = []
    print training_data
    for genre in genres:
        for song_id in training_data[genre]:
            ids_and_genres.append((song_id, genre))

    samples = []
    classes = []
    # iterating over all pulled songs
    for id_and_genre in ids_and_genres:
        log.debug("Fetching " + str(id_and_genre))

        features = []
        for feat in required_features:
            features.append(dbControl.pullFeatureForSong(feat, id_and_genre[0],packsize)[0].value)

        samples.append(np.array(features))
        classes.append(id_and_genre[1])

    machine = svm.SVC(C=1.0, kernel='poly', degree=2, shrinking=True, verbose=False)
    # machine = mlp.Classifier(
    #         layers=(mlp.Layer("Sigmoid", units=4),
    #                 mlp.Layer("Linear", units=len(genres))),
    #         learning_rate=0.02,
    #         n_stable=200)


    training_indexes = []
    hitRates = {}

    def onTrain(index):
        training_indexes.append(index)

    def onValidate(index):

        predicted = machine.predict(np.array([samples[index]]))[0]
        expected = classes[index]

        is_valid = (predicted == expected)

        if not hitRates.has_key(expected):
            hitRates[expected] = {}
            hitRates[expected]["total"] = 0
            hitRates[expected]["correct"] = 0

        hitRates[expected]["total"] += 1


        if predicted == expected:
            hitRates[expected]["correct"] += 1
        else:
            if not hitRates[expected].has_key(predicted):
                hitRates[expected][predicted] = 0
            hitRates[expected][predicted] += 1

        log.debug(str(is_valid) + "  \t got " + str(predicted) + " expected " + str(expected))
        return is_valid

    def onDoneTraining():
        chosen_samples = []
        chosen_classes = []
        for i in training_indexes:
            chosen_samples.append(samples[i])
            chosen_classes.append(classes[i])

        X = np.array(chosen_samples)
        Y = np.array(chosen_classes)

        log.debug("Fitting ...")
        machine.fit(X, Y)

    holdOut = validation.HoldOutValidation(len(samples), onTrain, onValidate, onDoneTraining, validationPercent=0.2)

    # the divide should be the number of genres. An equal amount of samples must be trained from each genre
    hitRate = holdOut.performValidation(shuffle=False, divide=len(genres))
    log.debug("hitRate = " + str(hitRate))

    for genre, v in hitRates.iteritems():
        log.debug(str(genre).upper() + ": " + str(v["correct"]) + "/" + str(v["total"]))
        for hitg, amount in v.iteritems():
            if hitg not in ["correct", "total"]:
                log.debug("\t\t" + str(hitg) + "= " + str(amount))

    pickled_save_path = "/home/damian/Music-Genre-Classification/Classifiers/SVM_Latest.pickled"

    # set some meta paramaters
    machine.required_features = required_features
    machine.pack_size = packsize
    machine.genres = genres
def main(argv):
    tools.defaultLog()

    packsize = 512
    dataset = []

    dbControl = database.Controller()

    genres = ["house", "dubstep", "trance"]
    training_data = dbControl.getTrainingSet(genres, 10)

    ids_and_genres = []
    print training_data
    for genre in genres:
        for song_id in training_data[genre]:
            ids_and_genres.append((song_id, genre))

    samples = []
    classes = []
    # iterating over all pulled songs
    for id_and_genre in ids_and_genres:
        log.debug("Fetching " + str(id_and_genre))

        # featureDatas will contain an array of feature objects
        feature_data_centroid = dbControl.pullFeatureForSong("Feature_Centroid", id_and_genre[0], packsize)
        feature_data_rolloff = dbControl.pullFeatureForSong("Feature_Rolloff", id_and_genre[0], packsize)

        # log.debug("feature_datas.length = " + str(len(feature_datas)))
        # take 200 samples from the middle-ish
        for i in range(2000, 2200):
            centroid = float(feature_data_centroid[i].value)
            rolloff = int(feature_data_rolloff[i].value)
            samples.append((centroid, rolloff))
            classes.append(id_and_genre[1])

    machine = svm.SVC(C=1.0)

    training_indexes = []

    def onTrain(index):
        training_indexes.append(index)

    def onValidate(index):
        predicted = machine.predict(samples[index])[0]
        expected = classes[index]
        is_valid = predicted == expected
        log.debug(str(is_valid) + "  \t got " + predicted + " expected " + expected)
        return is_valid

    def onDoneTraining():
        chosen_samples = []
        chosen_classes = []
        for i in training_indexes:
            chosen_samples.append(samples[i])
            chosen_classes.append(classes[i])

        machine.fit(chosen_samples, chosen_classes)
        log.debug("Fitting.")

    eightFold = validation.CrossValidation(len(samples), onTrain, onValidate, onDoneTraining, folds=8)
    hitRate = eightFold.performValidation(shuffle=True)
    log.debug("hitRate = " + str(hitRate))

    # machine = svm.SVC()
    # machine.fit(desc, classes)

    # desc = [ [1, 1], [11,11]]
    # classes = ['b', 'a']
    # machine.fit(desc, classes)

    # print machine.predict([3, 3])

    # classifier = SupportVectorCLS()

    # assign descriptors and classifications

    pass