def main(): global thresholdIG #builds the data vectors for both datasets.. #liFeatures,trainVectors, testVectors = buildDataVectors(DataSet.Three, FeatureSelection.DocFrequency) #liFeatures,trainVectors, testVectors = buildDataVectors(DataSet.Two, FeatureSelection.InformationGain) #print testVectors[0], len(testVectors) #logisticRegression.runLogisticRegression(liFeatures, trainVectors, testVectors) handleYelpDataset() exit() numTries = 10 logResults = {} #dictionary to a tuple of (runtime in seconds, accuracy) naiveResults = {} randomResults = {} liThresholds=[] sum1 = 0 for i in range(numTries): liThresholds.append(sum1) sum1 += 0.05 #remove 5% extra terms each time.. #print sum1 #lids = [DataSet.One, DataSet.Two] lids = [DataSet.Two] liLenFeatures = [] for ds in lids: if ds == DataSet.One: print "="*40 print "Using data set one." print "="*40 else: print "="*40 print "Using data set two." print "="*40 for k in range(numTries): #total 10 different number of features.. thresholdIG = liThresholds[k] liFeatures,trainVectors, testVectors = buildDataVectors(ds, FeatureSelection.InformationGain) liLenFeatures.append(len(liFeatures)) lAccuracy = 0 #perform logistic regression.. start_time = time.time() lAccuracy = logisticRegression.runLogisticRegression(liFeatures, trainVectors, testVectors) logResults[len(liFeatures)] = (time.time() - start_time, lAccuracy) #perform naive Bayes... start_time = time.time() lAccuracy = naiveBayes.runNaiveBayes(liFeatures, trainVectors, testVectors) naiveResults[len(liFeatures)] = (time.time() - start_time, lAccuracy) #perform random forest... #TODO liLenFeatures.sort() print "-"*40 print "Printing results with Logistic Regression" print "-"*40 print "(numFeatures)\t(accuracy)\t(runTime)" for k in liLenFeatures: print k, "\t", logResults[k][1], "\t", logResults[k][0] print "-"*40 print "Printing results with Naive Bayes" print "-"*40 print "(numFeatures)\t(accuracy)\t(runTime)" for k in liLenFeatures: print k, "\t", naiveResults[k][1], "\t", naiveResults[k][0] '''
def main(): global thresholdIG #builds the data vectors for both datasets.. #liFeatures,trainVectors, testVectors = buildDataVectors(DataSet.Three, FeatureSelection.DocFrequency) #liFeatures,trainVectors, testVectors = buildDataVectors(DataSet.Two, FeatureSelection.InformationGain) #print testVectors[0], len(testVectors) #logisticRegression.runLogisticRegression(liFeatures, trainVectors, testVectors) handleYelpDataset() exit() numTries = 10 logResults = {} #dictionary to a tuple of (runtime in seconds, accuracy) naiveResults = {} randomResults = {} liThresholds = [] sum1 = 0 for i in range(numTries): liThresholds.append(sum1) sum1 += 0.05 #remove 5% extra terms each time.. #print sum1 #lids = [DataSet.One, DataSet.Two] lids = [DataSet.Two] liLenFeatures = [] for ds in lids: if ds == DataSet.One: print "=" * 40 print "Using data set one." print "=" * 40 else: print "=" * 40 print "Using data set two." print "=" * 40 for k in range(numTries): #total 10 different number of features.. thresholdIG = liThresholds[k] liFeatures, trainVectors, testVectors = buildDataVectors( ds, FeatureSelection.InformationGain) liLenFeatures.append(len(liFeatures)) lAccuracy = 0 #perform logistic regression.. start_time = time.time() lAccuracy = logisticRegression.runLogisticRegression( liFeatures, trainVectors, testVectors) logResults[len(liFeatures)] = (time.time() - start_time, lAccuracy) #perform naive Bayes... start_time = time.time() lAccuracy = naiveBayes.runNaiveBayes(liFeatures, trainVectors, testVectors) naiveResults[len(liFeatures)] = (time.time() - start_time, lAccuracy) #perform random forest... #TODO liLenFeatures.sort() print "-" * 40 print "Printing results with Logistic Regression" print "-" * 40 print "(numFeatures)\t(accuracy)\t(runTime)" for k in liLenFeatures: print k, "\t", logResults[k][1], "\t", logResults[k][0] print "-" * 40 print "Printing results with Naive Bayes" print "-" * 40 print "(numFeatures)\t(accuracy)\t(runTime)" for k in liLenFeatures: print k, "\t", naiveResults[k][1], "\t", naiveResults[k][0] '''
def handleYelpDataset(): liFeatures,trainVectors, testVectors = buildDataVectors(DataSet.Yelp, FeatureSelection.InformationGain) print "running log reg now.." logisticRegression.runLogisticRegression(liFeatures, trainVectors, testVectors)
def handleYelpDataset(): liFeatures, trainVectors, testVectors = buildDataVectors( DataSet.Yelp, FeatureSelection.InformationGain) print "running log reg now.." logisticRegression.runLogisticRegression(liFeatures, trainVectors, testVectors)