def multinomial(mi): MessageFeatures.test_fold = -1 mn = Multinomial(mi) mn.train() mn.test() #print(mn.correct) pass
def kfcvm(mi, k=10): correct = [] tested = [] tot = 0 cor = 0 for i in range(k): #mark_test_set(mi, k, i) MessageFeatures.test_fold = i MessageFeatures.folds = k mn = Multinomial(mi) mn.train() mn.test_marked()
from LogisticRegression import LogisticRegression from Multinomial import Multinomial def loadData(file): data = pd.read_csv(file, header=None).fillna(0) return data.drop_duplicates() if __name__ == '__main__': print("1 Spambase Logistic Regression") LogisticRegression(loadData('./data/spambase.csv')).validate() print("1 Breast Cancer Logistic Regression") LogisticRegression(loadData('./data/breastcancer.csv')).validate() print("1 Diabetes Logistic Regression") LogisticRegression(loadData('./data/diabetes.csv')).validate() print("2 Multivariate Bernoulli") Multinomial(True).validate('./data/20NG_data/train_data.csv', './data/20NG_data/train_label.csv', './data/20NG_data/test_data.csv', './data/20NG_data/test_label.csv') print("2 Multinomial") Multinomial().validate('./data/20NG_data/train_data.csv', './data/20NG_data/train_label.csv', './data/20NG_data/test_data.csv', './data/20NG_data/test_label.csv')
dataSet = dataSet.drop_duplicates() return dataSet if __name__ == '__main__': spambaseFileLocation = 'spambase.csv' spambaseDataSet = importData(spambaseFileLocation) breastCancerFileLocation = 'breastcancer.csv' breastCancerDataSet = importData(breastCancerFileLocation) diabetesFileLocation = 'diabetes.csv' diabetesDataSet = importData(diabetesFileLocation) print('Spambase Dataset - Logistic Regression') spambaseLogisticRegression = LogisticRegression(spambaseDataSet, 0.75, 0.00001) spambaseLogisticRegression.validate() print('Breast Cancer Dataset - Logistic Regression') breastCancerLogisticRegression = LogisticRegression( breastCancerDataSet, 0.75, 0.00001) breastCancerLogisticRegression.validate() print('Pima Indian Diabetes Dataset - Logistic Regression') diabetesLogisticRegression = LogisticRegression(diabetesDataSet, 0.1, 0.0000001) diabetesLogisticRegression.validate() multivariateBernoulli = MultivariateBernoulli() multivariateBernoulli.run('train.data', 'train.label', 'test.data', 'test.label') multinomial = Multinomial() multinomial.run('train.data', 'train.label', 'test.data', 'test.label')