def exportWeights(self, _features, _file): M = CSV() if len(self.classes) > 0: M.header = ['class0', 'class1'] + _features else: M.header = _features for c in range(len(self.weights)): W = self.weights[c] F = [] for feature in _features: if feature in W: F.append(W[feature]) else: F.append(0) if len(self.classes) > 0: M.data.append(','.join(self.classes[c] + [str(x) for x in F])) else: M.data.append(','.join([str(x) for x in F])) M.save(_file)
def run(self, _training, _model, _batchSize, _resultFile): csv = CSV(_training) csv.randomize(1000) csv.removeIndices() R = ResultMatrix() for i in range(int(len(csv.data)/_batchSize)): c = CSV() c.header = csv.header c.data = csv.data[0:(i+1)*_batchSize] file = self.resultFolder + "subset_" + str(i) + ".csv" c.save(file) header, data = Experiment(file).regression([_model], 10) R.add(header, data) R.save(_resultFile)
# perform a 10-fold cross validation e = Experiment(training, "example_feature_reduction") e.regression([model], 10) CSV(e.path("cv_0.csv")).save(e.path("subset_0.csv")) xTicks = ["None"] # obtain a feature ranking M = CSV(e.path("features_0.csv")).toMatrix() M.normalizeRows() M.sortByMean() # sequentially remove the least important feature from the training data and retrain the model subset = e.path("subset.csv") for i in range(len(M.header) - 1): key = M.header[-1] M.header = M.header[0:-1] csv.removeColumnWithKey(key) csv.save(subset) e = Experiment(subset, "example_feature_reduction") e.regression([model], 10) CSV(e.path("cv_0.csv")).save(e.path("subset_" + str(i + 1) + ".csv")) xTicks.append(key) # files = [e.path("subset_" + str(i) + ".csv") for i in range(len(xTicks))] ResultVisualizer().boxplots(files, "r2", xTicks, xlabel='Sequentially Removed Features', ylabel='R2',