kdtree_euclid.setDistanceFunction(EuclideanDistance()) # only Euclidean Distance function
tree_algorithms.append(kdtree_euclid)
ball = BallTree()
ball.setDistanceFunction(EuclideanDistance()) # only Euclidean Distance function
tree_algorithms.append(ball)
cover = CoverTree()
cover.setDistanceFunction(EuclideanDistance())  # only Euclidean Distance function
tree_algorithms.append(cover)
data.setClassIndex(data.numAttributes() - 1)
for num in range(1,30,2):
   file.write(str(num))
   for algoknn in tree_algorithms :
      log.write("---------------------------------\nK: " + str(num) + ", Search Algorithm: " + algoknn.__class__.__name__ + "\n")
      algo = IBk()
      algo.setNearestNeighbourSearchAlgorithm(algoknn)
      algo.setKNN(num)
      x = time.time()
      algo.buildClassifier(data)
      log.write("Time to build classifier: " + str(time.time() - x) + "\n")
      evaluation = Evaluation(data)
      output = PlainText()  # plain text output for predictions
      output.setHeader(data)
      buffer = StringBuffer() # buffer to use
      output.setBuffer(buffer)
      attRange = Range()                  # no additional attributes output
      outputDistribution = Boolean(False) # we don't want distribution
      x = time.time()
      #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution])
      evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution])
      log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
      log.write(evaluation.toSummaryString())
ball.setDistanceFunction(
    EuclideanDistance())  # only Euclidean Distance function
tree_algorithms.append(ball)
cover = CoverTree()
cover.setDistanceFunction(
    EuclideanDistance())  # only Euclidean Distance function
tree_algorithms.append(cover)
data.setClassIndex(data.numAttributes() - 1)
for num in range(1, 30, 2):
    file.write(str(num))
    for algoknn in tree_algorithms:
        log.write("---------------------------------\nK: " + str(num) +
                  ", Search Algorithm: " + algoknn.__class__.__name__ + "\n")
        algo = IBk()
        algo.setNearestNeighbourSearchAlgorithm(algoknn)
        algo.setKNN(num)
        x = time.time()
        algo.buildClassifier(data)
        log.write("Time to build classifier: " + str(time.time() - x) + "\n")
        evaluation = Evaluation(data)
        output = PlainText()  # plain text output for predictions
        output.setHeader(data)
        buffer = StringBuffer()  # buffer to use
        output.setBuffer(buffer)
        attRange = Range()  # no additional attributes output
        outputDistribution = Boolean(False)  # we don't want distribution
        x = time.time()
        #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution])
        evaluation.crossValidateModel(algo, data, 10, rand,
                                      [output, attRange, outputDistribution])
        log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
tree_algorithms_instance.append(ball)
cover = CoverTree()
cover.setDistanceFunction(EuclideanDistance())  # only Euclidean Distance function
tree_algorithms_instance.append(cover)

for num in range(int(p['knn.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['knn.numdatapoints']))):
   filelimit.write(str(num))
   trainset = Instances(fulltrainset,0,num)   # create training set
   trainset.setClassIndex(trainset.numAttributes() - 1)

   for algoknn in tree_algorithms_instance :
      log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full training set size: " + str(fulltrainset.numInstances()) + "\n")
      for dataset in [testset, fulltrainset]:
          algo = IBk()
          algo.setNearestNeighbourSearchAlgorithm(algoknn)  
          algo.setKNN(int(p['knn.K']))
          algo.buildClassifier(trainset)
          evaluation = Evaluation(trainset)
          output = PlainText()  # plain text output for predictions
          output.setHeader(trainset)
          buffer = StringBuffer() # buffer to use
          output.setBuffer(buffer)
          attRange = Range()                  # no additional attributes output
          outputDistribution = Boolean(False) # we don't want distribution
          x = time.time()
          if (int(crossvalidate)):
              evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution])
          else:
              evaluation.evaluateModel(algo, dataset, [output, attRange, outputDistribution])
          log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
          log.write(evaluation.toSummaryString())
for num in range(int(p['knn.initial']), fulltrainset.numInstances(),
                 (fulltrainset.numInstances() / int(p['knn.numdatapoints']))):
    filelimit.write(str(num))
    trainset = Instances(fulltrainset, 0, num)  # create training set
    trainset.setClassIndex(trainset.numAttributes() - 1)

    for algoknn in tree_algorithms_instance:
        log.write("---------------------------------\nTraining Set Size: " +
                  str(trainset.numInstances()) + ", Test Set Size: " +
                  str(testset.numInstances()) + ", Full training set size: " +
                  str(fulltrainset.numInstances()) + "\n")
        for dataset in [testset, fulltrainset]:
            algo = IBk()
            algo.setNearestNeighbourSearchAlgorithm(algoknn)
            algo.setKNN(int(p['knn.K']))
            algo.buildClassifier(trainset)
            evaluation = Evaluation(trainset)
            output = PlainText()  # plain text output for predictions
            output.setHeader(trainset)
            buffer = StringBuffer()  # buffer to use
            output.setBuffer(buffer)
            attRange = Range()  # no additional attributes output
            outputDistribution = Boolean(False)  # we don't want distribution
            x = time.time()
            if (int(crossvalidate)):
                evaluation.crossValidateModel(
                    algo, dataset, 10, rand,
                    [output, attRange, outputDistribution])
            else:
                evaluation.evaluateModel(