def test_compare_structure_base(self): """ The base comparison with the original testing and training data """ test = self.all_struct.contents[0:543] train = self.all_struct.contents[543:] test, train = compare.compare_structure(test, train) val = compare.predict_accuracy(test, train) self.assertEqual(val, DEFAULT_VALUE)
def _randomize(nbs, percent_for_testing, threshold): """ Randoimzed training and testing data on the give input """ data_dict = nbs.get_training_testing(percent_for_testing) test_data = data_dict['test'] train_data = data_dict['train'] if threshold is None: prediction, test = nbs_comparison.compare_structure(test_data, train_data) return [(0, (prediction, test, _get_word_info(test_data)))] else: return _get_threshold_data(test_data, train_data, nbs, threshold)
def _eval_threshold(test_data, train_data, nbs, curr_threshold, results): """ Evaluates the comparison of the data at the given threshold """ start_index = _make_start_index(nbs, curr_threshold) curr_test_data = _remove_columns(test_data, nbs, start_index) curr_train_data = _remove_columns(train_data, nbs, start_index) prediction, test = nbs_comparison.compare_structure(curr_test_data, curr_train_data) curr_word_info = _get_word_info(curr_test_data) curr_result = (prediction, test, curr_word_info) #results.put((curr_threshold,curr_result)) results.append((curr_threshold,curr_result))
def _cross_validation(nbs, chunks, threshold): """ Performs cross validation on the data input """ results = [] cross_validation_chunks = nbs.get_cross_validation_chunks(chunks) for i in range(chunks): print('chunk: ' + str(i)) test_data = cross_validation_chunks[i] train_data = [] for x in range(len(cross_validation_chunks)): if x != i: train_data.extend(cross_validation_chunks[x]) if threshold is None: prediction, test = nbs_comparison.compare_structure(test_data, train_data) results.append([(0, (prediction, test, _get_word_info(test_data)))]) else: results.append(_get_threshold_data(test_data, train_data, nbs, threshold)) return results