Exemplo n.º 1
0
def get_accuracy(data_file, data_format, knn_output, alg, k, d, N):
  # Get ground truth
  print "getting ground truth"
  ground_truth = run_sklearn.get_knn_graph(data_file, data_format, k, d, N, 'brute')
  print "finished getting ground truth"
  
  if alg == "chen":
    # Get knn graph file in numpy array
    print "creating numpy knn graph from chen's output"
    knn_graph = np.zeros((N,N), dtype=np.int)
    print "created graph; opening file to fill graph"
    with open(knn_output, 'r') as f:
      for line in f:
        line.strip()
        line = line.split()
        j = int(line[0]) - 1
        i = int(line[1]) - 1
        knn_graph[i][j] = 1
    print "finished reading file, converting to sparse matrix"
    knn_sparse = sparse.csr_matrix(knn_graph)
    print "finished converting to sparse matrix"

  elif alg == "sklearn":
    knn_sparse = knn_output

  print "getting n_correct"
  n_incorrect = (ground_truth - knn_sparse).nnz
  accuracy = 1 - n_incorrect/(N*N+0.0)
  return accuracy
Exemplo n.º 2
0
def get_accuracy(data_file, data_format, knn_output, alg, k, d, N):
    # Get ground truth
    print "getting ground truth"
    ground_truth = run_sklearn.get_knn_graph(data_file, data_format, k, d, N, "brute")
    print "finished getting ground truth"

    if alg == "chen":
        # Get knn graph file in numpy array
        print "creating numpy knn graph from chen's output"
        knn_graph = np.zeros((N, N), dtype=np.int)
        print "created graph; opening file to fill graph"
        with open(knn_output, "r") as f:
            for line in f:
                line.strip()
                line = line.split()
                j = int(line[0]) - 1
                i = int(line[1]) - 1
                knn_graph[i][j] = 1
        print "finished reading file, converting to sparse matrix"
        knn_sparse = sparse.csr_matrix(knn_graph)
        print "finished converting to sparse matrix"

    elif alg == "sklearn":
        knn_sparse = knn_output

    print "getting n_correct"
    n_incorrect = (ground_truth - knn_sparse).nnz
    accuracy = 1 - n_incorrect / (N * N + 0.0)
    return accuracy
Exemplo n.º 3
0
import sys
import run_sklearn

poker = ["/scratch/02234/kmcardle/data/poker.t", 'libsvm', 10, 1000000, 8]
rna = ["/scratch/02234/kmcardle/data/cod-rna.t", 'libsvm', 8, 271617, 8]
cadata = ["/scratch/02234/kmcardle/data/cadata", 'libsvm', 8, 20640, 8]
covtype = [
    "/scratch/02234/kmcardle/data/covtype.libsvm.binary", 'libsvm', 54, 581012,
    8
]
year = [
    "/scratch/02234/kmcardle/data/YearPredictionMSD", 'libsvm', 90, 463715, 8
]
aloi = ["/scratch/02234/kmcardle/data/aloi", 'libsvm', 128, 108000, 8]

datasets = [poker, rna, cadata, covtype, year, aloi]

for dataset in datasets:
    print dataset[0], ' -----------------------'
    algs = ['kd_tree', 'ball_tree', 'brute']
    for alg in algs:
        print alg, ": +++++++++++++"
        # get_knn_graph(data_file, data_format, k, d, N, alg):
        run_sklearn.get_knn_graph(dataset[0], dataset[1], dataset[4],
                                  dataset[2], dataset[3], alg)
import sys
import run_sklearn

poker = ["/scratch/02234/kmcardle/data/poker.t", "libsvm", 10, 1000000, 8]
rna = ["/scratch/02234/kmcardle/data/cod-rna.t", "libsvm", 8, 271617, 8]
cadata = ["/scratch/02234/kmcardle/data/cadata", "libsvm", 8, 20640, 8]
covtype = ["/scratch/02234/kmcardle/data/covtype.libsvm.binary", "libsvm", 54, 581012, 8]
year = ["/scratch/02234/kmcardle/data/YearPredictionMSD", "libsvm", 90, 463715, 8]
aloi = ["/scratch/02234/kmcardle/data/aloi", "libsvm", 128, 108000, 8]

datasets = [poker, rna, cadata, covtype, year, aloi]

for dataset in datasets:
    print dataset[0], " -----------------------"
    algs = ["kd_tree", "ball_tree", "brute"]
    for alg in algs:
        print alg, ": +++++++++++++"
        # get_knn_graph(data_file, data_format, k, d, N, alg):
        run_sklearn.get_knn_graph(dataset[0], dataset[1], dataset[4], dataset[2], dataset[3], alg)