示例#1
0
def main(argv):
  setpath()
  try:
    opts, args = getopt.getopt(argv,"ht:e:",["train=","test="])
    if(len(sys.argv) < 5):
      raise getopt.GetoptError(None)

  except getopt.GetoptError:
    print '\nusage: run.py -t <trainfile> -e <testfile> \n'
    sys.exit(2)
  for opt, arg in opts:
    if opt == '-h':
      print 'run.py -t <trainfile> -e <testfile>'
      sys.exit()
    elif opt in ("-t", "--train"):
       trainfile = arg
    elif opt in ("-e", "--test"):
       testfile = arg


  from file_reader import FileReader
  fr = FileReader(trainfile)
  training_Set= fr.getRows()

  #Readin the test file and creating the matrix
  from file_reader import FileReader
  test_File_Reader = FileReader(testfile)

  testing_Set= test_File_Reader.getRows()
  test_Result(logistic_Regression(training_Set),testing_Set)
示例#2
0
def main(argv):
    setpath()
    try:
        opts, args = getopt.getopt(argv, "ht:e:", ["train=", "test="])
        if (len(sys.argv) < 5):
            raise getopt.GetoptError(None)

    except getopt.GetoptError:
        print '\nusage: run.py -t <trainfile> -e <testfile> \n'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'run.py -t <trainfile> -e <testfile>'
            sys.exit()
        elif opt in ("-t", "--train"):
            trainfile = arg
        elif opt in ("-e", "--test"):
            testfile = arg

    from file_reader import FileReader
    fr = FileReader(trainfile)
    training_Set = fr.getRows()

    #Readin the test file and creating the matrix
    from file_reader import FileReader
    test_File_Reader = FileReader(testfile)

    testing_Set = test_File_Reader.getRows()
    test_Result(logistic_Regression(training_Set), testing_Set)
示例#3
0
def main(argv):
    setpath()
    try:
        opts, args = getopt.getopt(argv, "ht:e:", ["train=", "test="])
        if (len(sys.argv) < 5):
            raise getopt.GetoptError(None)

    except getopt.GetoptError:
        print('\nusage: run.py -t <trainfile> -e <testfile>\n')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('run.py -t <trainfile> -e <testfile>')
            sys.exit()
        elif opt in ("-t", "--train"):
            trainfile = arg
        elif opt in ("-e", "--test"):
            testfile = arg

    from file_reader import FileReader
    fr = FileReader(testfile)
    from naive_bayes import NaiveBayes
    nb = NaiveBayes(trainfile)

    test_file_reader = FileReader(testfile)
    testData = test_file_reader.getRows()

    num_errors = 0
    true_positive = 0
    false_positive = 0
    true_negative = 0
    false_negative = 0

    #Testing phase
    for idx, row in enumerate(testData):
        prediction = nb.binary_classify(row)
        if row[-1] != prediction:
            num_errors += 1.0
            print("Error on row: %s" % str(idx + 1))
            if row[-1] == '1':
                false_negative += 1
            else:
                false_positive += 1
        elif row[-1] == '0':
            true_negative += 1
        else:
            true_positive += 1

    print('\n\n--------------Error Count----------------')
    print(num_errors)
    print('\n\n--------------Accuracy----------------')

    print("\n\nThe Accuracy is " +
          str((len(testData) - num_errors) * 100 / len(testData)) + "%")
    print("\n===========The confusion matrix===========")
    print("\t No \t Yes")
    print("No \t", str(true_negative) + "\t", str(false_positive))
    print("Yes \t", str(false_negative) + "\t", str(true_positive))
示例#4
0
def main(argv):
  setpath()
  try:
    opts, args = getopt.getopt(argv,"ht:e:",["train=","test="])
    if(len(sys.argv) < 5):
      raise getopt.GetoptError(None)

  except getopt.GetoptError:
    print('\nusage: run.py -t <trainfile> -e <testfile>\n')
    sys.exit(2)
  for opt, arg in opts:
    if opt == '-h':
      print('run.py -t <trainfile> -e <testfile>')
      sys.exit()
    elif opt in ("-t", "--train"):
       trainfile = arg
    elif opt in ("-e", "--test"):
       testfile = arg

  from file_reader import FileReader
  fr = FileReader(testfile)
  from naive_bayes import NaiveBayes
  nb = NaiveBayes(trainfile)

  test_file_reader = FileReader(testfile)
  testData = test_file_reader.getRows()

  num_errors = 0
  true_positive = 0
  false_positive = 0
  true_negative = 0
  false_negative = 0

  #Testing phase
  for idx, row in enumerate(testData):
    prediction = nb.binary_classify(row)
    if row[-1] != prediction:
      num_errors += 1.0
      print("Error on row: %s" % str(idx+1))
      if row[-1] == '1':
        false_negative += 1
      else:
        false_positive += 1
    elif row[-1] == '0':
      true_negative += 1
    else:
      true_positive += 1

  print('\n\n--------------Error Count----------------')
  print(num_errors)
  print('\n\n--------------Accuracy----------------')

  print("\n\nThe Accuracy is " +str((len(testData) - num_errors)*100/len(testData)) + "%")
  print("\n===========The confusion matrix===========")
  print("\t No \t Yes")
  print("No \t", str(true_negative) + "\t", str(false_positive))
  print("Yes \t", str(false_negative) +"\t", str(true_positive))
示例#5
0
    def calcCounts(self):
        # Missing labels not yet added.

        fr = FileReader(self._file)
        rows = fr.getRows()
        self._num_features = len(rows[0]) - 1

        feature_count = {}
        label_count = {}
        label_count['total'] = 0

        for row in rows:
            label_count['total'] += 1
            if label_count.get(row[-1]) is None:
                label_count[row[-1]] = 1
            else:
                label_count[row[-1]] += 1

            if feature_count.get(row[-1]) is None:
                feature_count[row[-1]] = {}
                for i in range(self._num_features):
                    feature_count[row[-1]][str(i)] = {}
                    feature_count[row[-1]][str(i)]['total'] = 0

            for i in range(self._num_features):
                feature_i = feature_count[row[-1]][str(i)]
                value = feature_i.get(row[i])
                feature_i[
                    row[i]] = 1 if value is None else feature_i[row[i]] + 1
                feature_count[row[-1]][str(i)]['total'] += 1

        for label in feature_count:
            for feature in feature_count[label]:
                feature_values = set()
                for l in filter(lambda x: x != 'total', feature_count):
                    for value in feature_count[l][feature]:
                        feature_values.add(value)

                for value in feature_values:
                    if feature_count[label][feature].get(value) is None:
                        feature_count[label][feature][value] = 0

        return (label_count, feature_count)
示例#6
0
    def calcCounts(self):
        # Missing labels not yet added.

        fr = FileReader(self._file)
        rows = fr.getRows()
        self._num_features = len(rows[0])-1

        feature_count = {}
        label_count = {}
        label_count['total'] = 0

        for row in rows:
            label_count['total'] += 1
            if label_count.get(row[-1]) is None:
                label_count[row[-1]] = 1
            else:
                label_count[row[-1]] += 1

            if feature_count.get(row[-1]) is None:
                feature_count[row[-1]] = {}
                for i in range(self._num_features):
                    feature_count[row[-1]][str(i)] = {}
                    feature_count[row[-1]][str(i)]['total'] = 0

            for i in range(self._num_features):
                feature_i = feature_count[row[-1]][str(i)]
                value = feature_i.get(row[i])
                feature_i[row[i]] = 1 if value is None else feature_i[row[i]]+1
                feature_count[row[-1]][str(i)]['total'] += 1

        for label in feature_count:
            for feature in feature_count[label]:
                feature_values = set()
                for l in filter(lambda x: x != 'total', feature_count):
                    for value in feature_count[l][feature]:
                        feature_values.add(value)

                for value in feature_values:
                    if feature_count[label][feature].get(value) is None:
                        feature_count[label][feature][value] = 0

        return(label_count, feature_count)
示例#7
0
def main(argv):
  setpath()
  try:
    opts, args = getopt.getopt(argv,"ht:e:d:",["train=","test=","maxDepth="])
    if(len(sys.argv) < 7):
      raise getopt.GetoptError(None)

  except getopt.GetoptError:
    print '\nusage: run.py -t <trainfile> -e <testfile> -d <maxDepth>\n'
    sys.exit(2)
  for opt, arg in opts:
    if opt == '-h':
      print 'run.py -t <trainfile> -e <testfile> -d <maxDepth>'
      sys.exit()
    elif opt in ("-t", "--train"):
       trainfile = arg
    elif opt in ("-e", "--test"):
       testfile = arg
    elif opt in ("-d", "--maxDepth"):
       maxDepth = int(arg)

  from file_reader import FileReader
  fr = FileReader(trainfile)

  from decision_tree_builder import DecisionTreeBuilder
  # getRows() returns a dataMatrix;
  dtb = DecisionTreeBuilder(fr.getRows())
  print ('Features: {}'.format(fr.featureNames))
  root = dtb.build(maxDepth)
  print('Tree Building Complete and Successful')
  print('Height of the tree is {}'.format(dtb.decisionTree.height()))

  #Testing section

  #create a zero initialized confusion matrix
  confusion_matrix=[[0 for j in range(len(fr.getClassLabels()))] for j in range(len(fr.getClassLabels()))]
  #read the test file
  testFile_Reader = FileReader(testfile)
  dataMatrix_testFile = testFile_Reader.getRows()
  Error_Count =0
  No =0
  Yes=0
  No_Error= 0
  Yes_error=0
  Total_records = len(dataMatrix_testFile)+0.0
  #Testing phase
  for row in dataMatrix_testFile:
    predicted_classLabel = dtb.predict(row)
    print ('\tActual Label is {}, and Predicted Label is {}'.format(row[len(row)-1], predicted_classLabel))
    #confusion_matrix[int(row[len(row)-1])-1][int(predicted_classLabel)-1]+= 1
    if not row[len(row)-1]==predicted_classLabel:
      Error_Count += 1.0
  #To print confusion matrix for zoo data set uncomment line 66
  print ('\n\n------------------Confusion Matrix----------')
  for row in confusion_matrix:
    print row

  print('\n\n--------------Error Count----------------')
  print Error_Count
  print('\n\n--------------Accuracy----------------')
  print (Total_records-Error_Count)/Total_records