def crossValidate(data, meta, folds, topology, iterations, weights, graph=None): "k-fold cross validation" if folds <= 1: raise Exception("Cross validation folds must be > 1") averageError = 0.0 for counter, (training, validation) in enumerate( crossValidateIndices(items=range(data.shape[0]), k=folds, randomize=True) ): # setup training and validation matricies train = data.ix[training].reset_index(drop=True) validate = data.ix[validation].reset_index(drop=True) trainingFeatures = train.drop(meta.categoricalLabelColumns, axis=1) # remove output columns validationFeatures = validate.drop(meta.categoricalLabelColumns, axis=1) # remove output columns trainingLabels = train[meta.categoricalLabelColumns] # use only output columns validationLabels = validate[meta.categoricalLabelColumns] # use only output columns # setup MLP and start training li( "Fold {2}/{3} - Training with {1}/{4} rows ({0} epochs)".format( iterations, trainingFeatures.shape[0], counter + 1, folds, data.shape[0] ) ) mlp = MLP() mlp.trainingIterations = iterations mlp.initalWeightsMultiplier = weights mlp.features = ( trainingFeatures.values ) # convert from pandas dataframe to numpy arrays. they are faster for the computationally intensive training phase. mlp.labels = trainingLabels.values mlp.validationFeatures = validationFeatures # for validation, send in pandas dataframes. mlp.validationLabels = validationLabels mlp.meta = meta mlp.topology = topology if graph: mlp.trackLearning = True mlp.setupHiddenLayers() mlp.train() if graph: li("Plotting Learning to file '{0}'".format(graph)) mlp.plotLearning(graph) # validate model li( "Fold {0}/{1} - Testing with {2}/{3} rows".format( counter + 1, folds, validationFeatures.shape[0], data.shape[0] ) ) error = mlp.validateModel(printToScreen=True) averageError += error averageError = averageError / folds li("Average error across all folds: {0}".format(averageError)) return averageError
def crossValidate(data, meta, folds, topology, iterations, weights, graph=None): "k-fold cross validation" if folds <= 1: raise Exception("Cross validation folds must be > 1") averageError = 0.0 for counter, (training, validation) in enumerate(crossValidateIndices(items=range(data.shape[0]), k=folds, randomize=True)): # setup training and validation matricies train = data.ix[training].reset_index(drop=True) validate = data.ix[validation].reset_index(drop=True) trainingFeatures = train.drop(meta.categoricalLabelColumns, axis=1) # remove output columns validationFeatures = validate.drop(meta.categoricalLabelColumns, axis=1) # remove output columns trainingLabels = train[meta.categoricalLabelColumns] # use only output columns validationLabels = validate[meta.categoricalLabelColumns] # use only output columns #setup MLP and start training li("Fold {2}/{3} - Training with {1}/{4} rows ({0} epochs)".format(iterations, trainingFeatures.shape[0], counter + 1, folds, data.shape[0])) mlp = MLP() mlp.trainingIterations = iterations mlp.initalWeightsMultiplier = weights mlp.features = trainingFeatures.values # convert from pandas dataframe to numpy arrays. they are faster for the computationally intensive training phase. mlp.labels = trainingLabels.values mlp.validationFeatures = validationFeatures # for validation, send in pandas dataframes. mlp.validationLabels = validationLabels mlp.meta = meta mlp.topology = topology if graph: mlp.trackLearning = True mlp.setupHiddenLayers() mlp.train() if graph: li("Plotting Learning to file '{0}'".format(graph)) mlp.plotLearning(graph) #validate model li("Fold {0}/{1} - Testing with {2}/{3} rows".format(counter + 1, folds, validationFeatures.shape[0], data.shape[0])) error = mlp.validateModel(printToScreen=True) averageError += error averageError = averageError / folds li("Average error across all folds: {0}".format(averageError)) return averageError
averageError += error averageError = averageError / folds li("Average error across all folds: {0}".format(averageError)) return averageError if __name__ == "__main__": if not args["data"]: data = raw_input("Path to dataset: ") else: data = args["data"] loader = DataLoader(data) li("Loading Dataset") data, meta = loader.load() li(meta) li("Dataset has Rows: {0}, Columns: {1}".format(len(data), len(meta.names()))) if args["command"] in ("info"): sys.exit(0) if not args["labels"]: labels = raw_input("Which columns to use as labels? [{0}-{1}]: ".format(1, len(meta.names()))) else: labels = args["labels"] setLabels(data, meta, labels) if len(meta.labelColumns) < 1: raise Exception("Specify atleast 1 label column") li("Label Columns: {0}".format(meta.labelColumns))
averageError += error averageError = averageError / folds li("Average error across all folds: {0}".format(averageError)) return averageError if __name__ == '__main__': if not args['data']: data = raw_input('Path to dataset: ') else: data = args['data'] loader = DataLoader(data) li("Loading Dataset") data, meta = loader.load() li(meta) li("Dataset has Rows: {0}, Columns: {1}".format(len(data), len(meta.names()))) if args['command'] in ("info"): sys.exit(0) if not args['labels']: labels = raw_input('Which columns to use as labels? [{0}-{1}]: '.format(1, len(meta.names()))) else: labels = args['labels'] setLabels(data, meta, labels) if len(meta.labelColumns) < 1: raise Exception("Specify atleast 1 label column") li("Label Columns: {0}".format(meta.labelColumns))