def test_data_size_vs_diff(dm, given_dict, infer_dict):
    #Read all data from data model
    dm.read_data(normalize_data=False)   
    #attr_list = [U_UNIVERSITY_CODE, PROGRAM_CODE, UNIVERSITY, MAJOR_CODE, TERM]
    attr_list = [U_UNIVERSITY_CODE, PROGRAM_CODE, UNIVERSITY]
    #attr_list = [MAJOR_CODE, PROGRAM_CODE, TERM]
    
    #Size of data
    data_size = len(dm.data)

    #Step size = 10 steps 
    step_size = data_size//10

    #Get experiment data in a dict
    size = []
    accuracy = []

    for i in xrange(step_size, data_size, step_size):
        dm_test = DataModel("")
        dm_test.set_data(dm.data[:i])
        exp_test = Experimenter(dm_test, attr_list)
        actual = exp_test.get_actual_result(given_dict, infer_dict)
        estimation = exp_test.generic_get_estimated_result(given_dict, infer_dict)
        size.append(i)
        accuracy.append(abs(estimation - actual))
        print("Step:%d--->Actual:%f--->Estimate:%f" %(i, actual, estimation))
        print "-------------------------------------------------------------"
    plt.figure()
    plt.plot(size, accuracy)
    plt.title("Data Size vs Accuracy")
    plt.show()
Пример #2
0
 def perform_datasize_vs_efficiency(self, given_dict, infer_dict, max_datasize=None, steps=10):
     sizes, est_times, acc_times = [], [], []
     if max_datasize is None:
         max_datasize = len(self.dm.data)
     data_step = max_datasize / steps
     for i in range(steps):
         cur_datasize = (i+1) * data_step
         data = self.dm.data
         while len(data) < cur_datasize:
             data.extend(self.dm.data)
         cur_data = data[:cur_datasize]
         cur_dm = DataModel("")
         cur_dm.set_data(cur_data)
         cur_exp = Experimenter(cur_dm, self.attr_list)
         (cur_est, cur_acc) = cur_exp.time_n_queries(given_dict, infer_dict)
         sizes.append(cur_datasize)
         est_times.append(float(sum(cur_est))/len(cur_est))
         acc_times.append(float(sum(cur_acc))/len(cur_acc))
     return (sizes, est_times, acc_times)
Пример #3
0
 def perform_datasize_vs_accuracy(self, given_dict, infer_dict, max_datasize=None, steps=10):
     #Get experiment data in a dict
     size = []
     accuracy = []
     if max_datasize is None:
         max_datasize = len(self.dm.data)
     data_step = max_datasize / steps
     
     for i in range(steps):
         cur_datasize = (i+1) * data_step
         data = self.dm.data
         while len(data) < cur_datasize:
             data.extend(self.dm.data)
         cur_data = data[:cur_datasize]
         cur_dm = DataModel("")
         cur_dm.set_data(cur_data)
         cur_exp = Experimenter(cur_dm, self.attr_list)
         actual = cur_exp.get_actual_result(given_dict, infer_dict)
         estimation = cur_exp.generic_get_estimated_result(given_dict, infer_dict)
         size.append(cur_datasize)
         accuracy.append(abs(estimation - actual))
     return (size, accuracy)