Exemplo n.º 1
0
def crossValidate(data, meta, folds, topology, iterations, weights, graph=None):
    "k-fold cross validation"
    if folds <= 1:
        raise Exception("Cross validation folds must be > 1")
    averageError = 0.0
    for counter, (training, validation) in enumerate(
        crossValidateIndices(items=range(data.shape[0]), k=folds, randomize=True)
    ):
        # setup training and validation matricies
        train = data.ix[training].reset_index(drop=True)
        validate = data.ix[validation].reset_index(drop=True)
        trainingFeatures = train.drop(meta.categoricalLabelColumns, axis=1)  # remove output columns
        validationFeatures = validate.drop(meta.categoricalLabelColumns, axis=1)  # remove output columns
        trainingLabels = train[meta.categoricalLabelColumns]  # use only output columns
        validationLabels = validate[meta.categoricalLabelColumns]  # use only output columns

        # setup MLP and start training
        li(
            "Fold {2}/{3} - Training with {1}/{4} rows ({0} epochs)".format(
                iterations, trainingFeatures.shape[0], counter + 1, folds, data.shape[0]
            )
        )
        mlp = MLP()
        mlp.trainingIterations = iterations
        mlp.initalWeightsMultiplier = weights
        mlp.features = (
            trainingFeatures.values
        )  # convert from pandas dataframe to numpy arrays. they are faster for the computationally intensive training phase.
        mlp.labels = trainingLabels.values
        mlp.validationFeatures = validationFeatures  # for validation, send in pandas dataframes.
        mlp.validationLabels = validationLabels
        mlp.meta = meta
        mlp.topology = topology
        if graph:
            mlp.trackLearning = True
        mlp.setupHiddenLayers()
        mlp.train()

        if graph:
            li("Plotting Learning to file '{0}'".format(graph))
            mlp.plotLearning(graph)

        # validate model
        li(
            "Fold {0}/{1} - Testing with {2}/{3} rows".format(
                counter + 1, folds, validationFeatures.shape[0], data.shape[0]
            )
        )
        error = mlp.validateModel(printToScreen=True)
        averageError += error

    averageError = averageError / folds
    li("Average error across all folds: {0}".format(averageError))
    return averageError
Exemplo n.º 2
0
def crossValidate(data, meta, folds, topology, iterations, weights, graph=None):
    "k-fold cross validation"
    if folds <= 1:
        raise Exception("Cross validation folds must be > 1")
    averageError = 0.0
    for counter, (training, validation) in enumerate(crossValidateIndices(items=range(data.shape[0]), k=folds, randomize=True)):
        # setup training and validation matricies
        train               = data.ix[training].reset_index(drop=True)
        validate            = data.ix[validation].reset_index(drop=True)
        trainingFeatures    = train.drop(meta.categoricalLabelColumns, axis=1)  # remove output columns
        validationFeatures  = validate.drop(meta.categoricalLabelColumns, axis=1)  # remove output columns
        trainingLabels      = train[meta.categoricalLabelColumns]  # use only output columns
        validationLabels    = validate[meta.categoricalLabelColumns]  # use only output columns

        #setup MLP and start training
        li("Fold {2}/{3} - Training with {1}/{4} rows ({0} epochs)".format(iterations, trainingFeatures.shape[0], counter + 1, folds, data.shape[0]))
        mlp                          = MLP()
        mlp.trainingIterations       = iterations
        mlp.initalWeightsMultiplier  = weights
        mlp.features                 = trainingFeatures.values  # convert from pandas dataframe to numpy arrays. they are faster for the computationally intensive training phase.
        mlp.labels                   = trainingLabels.values
        mlp.validationFeatures       = validationFeatures  # for validation, send in pandas dataframes.
        mlp.validationLabels         = validationLabels
        mlp.meta                     = meta
        mlp.topology                 = topology
        if graph:
            mlp.trackLearning        = True
        mlp.setupHiddenLayers()
        mlp.train()

        if graph:
            li("Plotting Learning to file '{0}'".format(graph))
            mlp.plotLearning(graph)

        #validate model
        li("Fold {0}/{1} - Testing with {2}/{3} rows".format(counter + 1, folds, validationFeatures.shape[0], data.shape[0]))
        error = mlp.validateModel(printToScreen=True)
        averageError += error

    averageError = averageError / folds
    li("Average error across all folds: {0}".format(averageError))
    return averageError
Exemplo n.º 3
0
        averageError += error

    averageError = averageError / folds
    li("Average error across all folds: {0}".format(averageError))
    return averageError


if __name__ == "__main__":

    if not args["data"]:
        data = raw_input("Path to dataset: ")
    else:
        data = args["data"]

    loader = DataLoader(data)
    li("Loading Dataset")
    data, meta = loader.load()
    li(meta)
    li("Dataset has Rows: {0}, Columns: {1}".format(len(data), len(meta.names())))

    if args["command"] in ("info"):
        sys.exit(0)

    if not args["labels"]:
        labels = raw_input("Which columns to use as labels? [{0}-{1}]: ".format(1, len(meta.names())))
    else:
        labels = args["labels"]
    setLabels(data, meta, labels)
    if len(meta.labelColumns) < 1:
        raise Exception("Specify atleast 1 label column")
    li("Label Columns: {0}".format(meta.labelColumns))
Exemplo n.º 4
0
        averageError += error

    averageError = averageError / folds
    li("Average error across all folds: {0}".format(averageError))
    return averageError


if __name__ == '__main__':

    if not args['data']:
        data = raw_input('Path to dataset: ')
    else:
        data = args['data']

    loader = DataLoader(data)
    li("Loading Dataset")
    data, meta = loader.load()
    li(meta)
    li("Dataset has Rows: {0}, Columns: {1}".format(len(data), len(meta.names())))

    if args['command'] in ("info"):
        sys.exit(0)

    if not args['labels']:
        labels = raw_input('Which columns to use as labels? [{0}-{1}]: '.format(1, len(meta.names())))
    else:
        labels = args['labels']
    setLabels(data, meta, labels)
    if len(meta.labelColumns) < 1:
        raise Exception("Specify atleast 1 label column")
    li("Label Columns: {0}".format(meta.labelColumns))