Пример #1
0
def subspace_main(schema, sigma_k=1.0, outliers=None, epsilon=None, filenames=None, neighbor_k=3, data_size=None,
                  used_attrs=None, alpha=0.2, trainRatio=0.2, axis='epsilon'):
    with open('result/DCs.pkl', 'r') as f:
        dcs = pickle.load(f)
    if used_attrs is not None:
        schema = used_attrs + ['class']
    error_table_filename = filenames.get('error', '../dataset/letter/data_error')
    error_table = read_table(error_table_filename, schema)
    error_instance = Instance(schema)
    error_instance.data = error_table.instance
    repair = HolisticRepair(error_table, dcs, threshold=float('inf'))
    try:
        repair.repair()
    except Exception:
        import traceback;
        traceback.print_exc()
    repaired_instance = Instance(schema)
    repaired_instance.data = error_table.get_data()

    origin_table_filename = filenames.get('origin', '../dataset/letter/data_origin')
    origin_table = read_table(origin_table_filename, schema)
    origin_instance = Instance(schema)
    origin_instance.data = origin_table.instance

    violated_tuples = set()
    for i in xrange(len(repair.violations)):
        violated_tuples.add(repair.violations[i].tid1)
    print len(violated_tuples)

    jaccard, precision, recall, f1, accuracy, error_count = repair_accuracy_for_subspace(origin_instance, error_instance,
                                                                                        repaired_instance)
    return jaccard, precision, recall, f1, accuracy, error_count, origin_instance.size()
Пример #2
0
def main(schema, sigma_k=1.0, outliers=None, epsilon=None, filenames=None, neighbor_k=3, data_size=None,
         used_attrs=None):
    with open('result/DCs-gps.pkl', 'r') as f:
        dcs = pickle.load(f)
    if used_attrs is not None:
        schema = used_attrs + ['class']
    error_table_filename = filenames.get('error', '../dataset/wisconsin/data_error')
    error_table = read_table(error_table_filename, schema)
    error_instance = Instance(schema)
    error_instance.data = error_table.instance
    repair = HolisticRepair(error_table, dcs, threshold=float('inf'))
    try:
        repair.repair()
    except Exception:
        import traceback
        traceback.print_exc()
    repaired_instance = Instance(schema)
    repaired_instance.data = error_table.get_data()

    origin_table_filename = filenames.get('origin', '../dataset/wisconsin/data_origin')
    origin_table = read_table(origin_table_filename, schema)
    origin_instance = Instance(schema)
    origin_instance.data = origin_table.instance

    rms, precision, recall, accuracy, repair_distance = repair_accuracy(origin_instance,
                                                                        error_instance, repaired_instance)
    return rms, precision, recall, accuracy, repair_distance, repaired_instance.size()