示例#1
0
 def test_rdp_0(self):
     points = np.array([[1, 5], [2, 5], [3, 5], [4, 5], [5, 5]])
     reduced, removed = rdp.rdp(points)
     desired = np.array([0, 4])
     np.testing.assert_array_equal(reduced, desired)
     desired = np.array([[0, 3]])
     np.testing.assert_array_equal(removed, desired)
示例#2
0
def main(args):
    # define clustering methods
    cmethod = {Clustering.single: clustering.single_linkage, Clustering.complete: clustering.complete_linkage, Clustering.average: clustering.average_linkage}
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []

    points = np.genfromtxt(args.i, delimiter=',')

    ## Knee detection code ##

    points_reduced, points_removed = rdp.rdp(points, args.r)
    knees = kneedle.auto_knees(points_reduced)
    t_k = pp.filter_worst_knees(points_reduced, knees)
    t_k = pp.filter_corner_knees(points_reduced, t_k)
    filtered_knees = pp.filter_clustring(points_reduced, t_k, cmethod[args.c], args.t, args.m)
    if args.a:
        knees = pp.add_points_even(points, points_reduced, filtered_knees, points_removed)
    else:
        knees = rdp.mapping(filtered_knees, points_reduced, points_removed)
    
    ##########################
    """
示例#3
0
 def test_rdp_mapping_two(self):
     points = np.array([[0, 3], [1, 3], [2, 3], [3, 2], [4, 1], [5, 0]])
     reduced, removed = rdp.rdp(points)
     indexes = np.array([0, 1, 2])
     result = rdp.mapping(indexes, reduced, removed)
     desired = np.array([0, 2, 5])
     np.testing.assert_array_equal(result, desired)
示例#4
0
 def test_rdp_mapping_line(self):
     points = np.array([[1, 5], [2, 5], [3, 5], [4, 5], [5, 5]])
     reduced, removed = rdp.rdp(points)
     indexes = np.array([0, 1])
     result = rdp.mapping(indexes, reduced, removed)
     desired = np.array([0, 4])
     np.testing.assert_array_equal(result, desired)
示例#5
0
 def test_rdp_1(self):
     points = np.array([[1, 5], [2, 5], [3, 6], [4, 6], [5, 6]])
     reduced, removed = rdp.rdp(points)
     desired = np.array([0, 1, 2, 4])
     np.testing.assert_array_equal(reduced, desired)
     desired = np.array([[0, 0], [1, 0], [2, 1]])
     np.testing.assert_array_equal(removed, desired)
示例#6
0
def main(args):
    points = np.genfromtxt(args.i, delimiter=',')
    
    profiler = cProfile.Profile()
    profiler.enable()
    points_reduced, removed = rdp(points, args.r)
    profiler.disable()
    stats = pstats.Stats(profiler).sort_stats('cumtime')
    stats.print_stats()
    
    space_saving = round((1.0-(len(points_reduced)/len(points)))*100.0, 2)
    logger.info('Number of data points after RDP: %s(%s %%)', len(points_reduced), space_saving)
    
    indexes = np.arange(0, len(points_reduced))
    indexes = mapping(indexes, points_reduced, removed)
    
    x = points[:, 0]
    y = points[:, 1]
    plt.plot(x, y)

    selected = points[indexes]
    x = selected[:, 0]
    y = selected[:, 1]

    plt.plot(x, y, marker='o', markersize=3)
    plt.show()
示例#7
0
def main(args):
    points = np.genfromtxt(args.i, delimiter=',')
    points_reduced, removed = rdp(points, args.r)
    #space_saving = round((1.0-(len(points_reduced)/len(points)))*100.0, 2)
    #logger.info('Number of data points after RDP: %s(%s %%)', len(points_reduced), space_saving)

    knees = np.arange(1, len(points_reduced))
    raw_knees = mapping(knees, points_reduced, removed)
    #plot_knees(plt, points, raw_knees, 'Knees')
    #logger.info('Knee extraction')
    rdp_knees = postprocessing(points_reduced, knees, args)
    #rankings = slope_ranking(points_reduced, filtered_knees)
    #logger.info('Clustering and ranking')
    #filtered_knees = mapping(rdp_knees, points_reduced, removed)
    #plot_knees(plt, points, filtered_knees, 'Knees')
    #logger.info('Mapping into raw plot')

    logger.info(f'Add curvature points...')
    previous_size = len(rdp_knees)
    filtered_knees = add_points_even(points, points_reduced, rdp_knees, removed, plt)
    current_size = len(filtered_knees)
    logger.info(f'Add curvature points ({current_size-previous_size})')
    plot_knees(plt, points, filtered_knees, 'Knees (Add points)')

    # Compute performance evalution
    average_x, average_y, average_slope, average_coeffients, cost = accuracy_trace(points, filtered_knees)
    logger.info('Performance %s %s %s %s %s', average_x, average_y, average_slope, average_coeffients, cost)

    #plot_ranking(plt, points, filtered_knees, rankings, '')  # args.o)
    plot_knees(plt, points, filtered_knees, 'Knees (Final)')
    plt.show()
示例#8
0
 def test_rdp_mapping_four(self):
     points = np.array([[2, 0], [3, 1], [4, 2], [5, 2], [6, 2], [7, 3],
                        [8, 4], [9, 3], [10, 2], [11, 1], [12, 0]])
     reduced, removed = rdp.rdp(points)
     indexes = np.array([0, 1, 2, 3, 4])
     result = rdp.mapping(indexes, reduced, removed)
     desired = np.array([0, 2, 4, 6, 10])
     np.testing.assert_array_equal(result, desired)
示例#9
0
def kneedle_novel(points, args):
    reduced, removed = rdp.rdp(points, args.r)
    points_reduced = points[reduced]
    knees = kneedle.auto_knees(points_reduced, p=kneedle.PeakDetection.All)
    knees = pp.filter_worst_knees(points_reduced, knees)
    knees = pp.filter_corner_knees(points_reduced, knees, t=args.c)
    knees = pp.filter_clustring(points_reduced, knees,
                                clustering.average_linkage, args.t, args.k)
    knees = rdp.mapping(knees, reduced, removed)
    return knees
示例#10
0
def main(args):

    path = os.path.expanduser(args.p)

    if args.tr is Trace.all:
        files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-(lru|arc)\.csv', f)]
    elif args.tr is Trace.arc:
        files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-arc\.csv', f)]
    else:
        files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-lru\.csv', f)]
    
    scores = []

    for i in tqdm(range(len(files))):
        points = np.genfromtxt(f'{path}{files[i]}', delimiter=',')
        # open expected file
        dirname = os.path.dirname(f'{path}{files[i]}')
        filename = os.path.splitext(os.path.basename(files[i]))[0]
        expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv')
        expected = None
        if os.path.exists(expected_file):
            with open(expected_file, 'r') as f:
                reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
                expected = list(reader)
        else:
            expected = []
        expected = np.array(expected)

        # get original x_max and y_ranges
        x_max = [max(x) for x in zip(*points)][0]
        y_range = [[max(y),min(y)] for y in zip(*points)][1]

        # run rdp
        reduced, removed = rdp.rdp(points, t=args.r, cost=args.c, distance=args.d)
        points_reduced = points[reduced]

        ## Knee detection code ##
        knees = zmethod.knees(points_reduced, dx=args.x, dy=args.y, dz=args.z, x_max=x_max, y_range=y_range)
        knees = knees[knees>0]
        knees = rdp.mapping(knees, reduced, removed)
        if len(knees) > 0:
            cm = evaluation.cm(points, knees, expected)
            mcc = evaluation.mcc(cm)
        else:
            mcc = 0.0
        scores.append(mcc)

    # output the results
    dirname = os.path.expanduser(args.p)
    output = os.path.join(os.path.normpath(dirname), f'eval_rdp_metric_output.csv')

    with open(output, 'w') as f:
        writer = csv.writer(f)
        for s in scores:
            writer.writerow([s])        
示例#11
0
def compute_knee_points(r, t):
    # Check if cache already has these values
    if r in points_cache:
        points_reduced, removed = points_cache[r]
    else:
        points_reduced, removed = rdp(points, r)
        points_cache[r] = (points_reduced, removed)

    knees = np.arange(1, len(points_reduced))
    filtered_knees = postprocessing(points_reduced, knees, t=t)

    return points_reduced, removed, filtered_knees
示例#12
0
def kneedle_novel(points, args):
    reduced, removed = rdp.rdp(points, args.r)
    points_reduced = points[reduced]
    knees = kneedle.auto_knees(points_reduced, p=kneedle.PeakDetection.All)
    
    #x = points_reduced[:, 0]
    #y = points_reduced[:, 1]
    #plt.plot(x, y)
    #plt.plot(x[knees], y[knees], 'r+')
    #plt.show()

    knees = pp.filter_worst_knees(points_reduced, knees)
    knees = pp.filter_corner_knees(points_reduced, knees, t=args.c)
    knees = pp.filter_clusters(points_reduced, knees, clustering.average_linkage, args.t, args.k)
    knees = rdp.mapping(knees, reduced, removed)
    return knees
示例#13
0
def main(args):
    points = np.genfromtxt(args.i, delimiter=',')

    if points.ndim == 1:
        y = points
        x = np.arange(0, len(y))
        points = np.array([x, y]).T

    reduced, removed = rdp.rdp(points, args.r, cost=args.c, distance=args.d)

    space_saving = round((1.0 - (len(reduced) / len(points))) * 100.0, 2)
    logger.info('Number of data points after RDP: %s(%s %%)', len(reduced),
                space_saving)

    hull_imp = {
        ConvexHull.hull: ch.graham_scan,
        ConvexHull.upper: ch.graham_scan_upper,
        ConvexHull.lower: ch.graham_scan_lower
    }

    selected = points[reduced]

    if args.s is ConvexHullSource.raw:
        hull = hull_imp[args.ch](points)
        hull_points = points[hull]
    else:
        hull = hull_imp[args.ch](selected)
        hull_points = selected[hull]

    logger.info(hull)

    x = points[:, 0]
    y = points[:, 1]
    plt.plot(x, y)

    x = selected[:, 0]
    y = selected[:, 1]
    plt.plot(x, y, marker='o', markersize=3)

    x = hull_points[:, 0]
    y = hull_points[:, 1]
    plt.plot(x, y, 'o', mec='r', color='none', lw=1, markersize=10)
    plt.fill(x, y, edgecolor='r', fill=False)

    plt.show()
示例#14
0
def main(args):
    points = np.genfromtxt(args.i, delimiter=',')

    if points.ndim == 1:
        y = points
        x = np.arange(0, len(y))
        points = np.array([x,y]).T
    
    reduced, removed = rdp.rdp(points, args.r, cost=args.c, distance=args.d)
    
    space_saving = round((1.0-(len(reduced)/len(points)))*100.0, 2)
    logger.info('Number of data points after RDP: %s(%s %%)', len(reduced), space_saving)
    
    points_reduced = points[reduced]

    # all rdp points are candidates, except extremes
    knees = np.arange(1, len(points_reduced))
    logger.info(f'Knees {len(knees)}')

    # filter out all non-corner points
    knees = pp.select_corner_knees(points_reduced, knees, t=args.t1)
    logger.info(f'Knees {len(knees)}')

    # cluster points together
    knees = pp.filter_clusters_corners(points_reduced, knees, clustering.average_linkage, t=args.t2)
    logger.info(f'Knees {len(knees)}')
    
    x = points[:, 0]
    y = points[:, 1]
    plt.plot(x, y)

    # map the points to the original space
    knees = rdp.mapping(knees, reduced, removed)
    
    #rdp_points = points[reduced]
    #x = rdp_points[:, 0]
    #y = rdp_points[:, 1]
    #plt.plot(x, y, marker='o', markersize=3, linestyle = 'None')

    knee_points = points[knees]
    x = knee_points[:, 0]
    y = knee_points[:, 1]
    plt.plot(x, y, marker='o', markersize=3, linestyle = 'None')
    plt.show()
示例#15
0
def main(args):
    points = np.genfromtxt(args.i, delimiter=',')

    if points.ndim == 1:
        y = points
        x = np.arange(0, len(y))
        points = np.array([x, y]).T

    reduced, removed = rdp.rdp(points, args.r, cost=args.c, distance=args.d)

    space_saving = round((1.0 - (len(reduced) / len(points))) * 100.0, 2)
    logger.info('Number of data points after RDP: %s(%s %%)', len(reduced),
                space_saving)

    x = points[:, 0]
    y = points[:, 1]
    plt.plot(x, y)

    selected = points[reduced]
    x = selected[:, 0]
    y = selected[:, 1]

    plt.plot(x, y, marker='o', markersize=3)
    plt.show()
示例#16
0
文件: main.py 项目: Yifei-Liu/knee
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname),
                                 f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []

    points = np.genfromtxt(args.i, delimiter=',')
    points_reduced, points_removed = rdp.rdp(points, args.r)

    space_saving = round((1.0 - (len(points_reduced) / len(points))) * 100.0,
                         2)
    logger.info('Number of data points after RDP: %s(%s %%)',
                len(points_reduced), space_saving)

    names = [
        'kneedle', 'kneedke(Rec)', 'l-method', 'dfdt', 'menger', 'curvature',
        'Tyler (RDP)', 'Tyler', 'RDP'
    ]
    methods = [
        kneedle.auto_knees, kneedle.multi_knee, lmethod.multi_knee,
        dfdt.multi_knee, menger.multi_knee, curvature.multi_knee, ps.knees
    ]
    knees = []
    knees_raw = []

    # Elbow methods
    for m, n in zip(methods, names):
        tmp = m(points_reduced)
        knees.append(tmp)
        raw_indexes = rdp.mapping(tmp, points_reduced, points_removed)
        knees_raw.append(raw_indexes)

    # Tyler
    candidates = ps.knees(points)
    knees.append(candidates)
    knees_raw.append(candidates)

    # RDP
    candidates = np.arange(1, len(points_reduced))
    knees.append(candidates)
    raw_indexes = rdp.mapping(candidates, points_reduced, points_removed)
    knees_raw.append(raw_indexes)

    #plot_knees(points, knees_raw, names)

    cmethod = {
        Clustering.single: clustering.single_linkage,
        Clustering.complete: clustering.complete_linkage,
        Clustering.average: clustering.average_linkage
    }

    # Cluster and select points
    filtered_knees_raw = []
    rankings = []
    for k, n in zip(knees, names):
        # remove 0 index in the knees:
        k = k[k != 0]
        if n == 'Tyler':
            filtered_knees_raw.append(k)
            ranks = np.full(len(k), 1.0)
            #rankings.append(ranking.slope_ranking(points, k))
            rankings.append(ranks)
        else:
            t_k = pp.filter_worst_knees(points_reduced, k)
            filtered_knees = pp.filter_clustring(points_reduced, t_k,
                                                 cmethod[args.c], args.t,
                                                 args.m)
            rankings.append(
                ranking.slope_ranking(points_reduced, filtered_knees))
            raw_indexes = rdp.mapping(filtered_knees, points_reduced,
                                      points_removed)
            filtered_knees_raw.append(raw_indexes)

    logger.info(f'Model          MSE(knees)   MSE(exp)   Cost(tr)   Cost(kn)')
    logger.info(f'----------------------------------------------------------')
    for k, n in zip(filtered_knees_raw, names):
        if len(expected) > 0:
            error_mse = evaluation.mse(points, k, expected,
                                       evaluation.Strategy.knees)
            error_mse_exp = evaluation.mse(points, k, expected,
                                           evaluation.Strategy.expected)
        else:
            error_mse = math.nan
            error_mse_exp = math.nan
        _, _, _, _, cost_trace = evaluation.accuracy_trace(points, k)
        _, _, _, _, cost_knee = evaluation.accuracy_knee(points, k)
        logger.info(
            f'{n:<13}| {error_mse:10.2E} {error_mse_exp:10.2E} {cost_trace:10.2E} {cost_knee:10.2E}'
        )

    plot_knees_ranking(points, filtered_knees_raw, names, rankings, expected)
示例#17
0
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []
    expected = np.array(expected)
    points = np.genfromtxt(args.i, delimiter=',')

    ## Knee detection code ##
    reduced, removed = rdp.rdp(points, args.r)
    points_reduced = points[reduced]
    knees = np.arange(1, len(reduced))
    t_k = pp.filter_worst_knees(points_reduced, knees)
    t_k = pp.filter_corner_knees(points_reduced, t_k, t=args.c)
    filtered_knees = pp.filter_clusters(points_reduced, t_k, clustering.average_linkage, args.t, args.k)
    
    ##########################################################################################
    
    # add even points
    if args.a:
        knees = pp.add_points_even(points, reduced, filtered_knees, removed)
    else:
        knees = rdp.mapping(filtered_knees, reduced, removed)

    rmspe_k = evaluation.rmspe(points, knees, expected, evaluation.Strategy.knees)
    rmspe_e = evaluation.rmspe(points, knees, expected, evaluation.Strategy.expected)
    cm = evaluation.cm(points, knees, expected, t = 0.01)
    mcc = evaluation.mcc(cm)

    logger.info(f'RMSE(knees)  RMSE(exp)  MCC')
    logger.info(f'-------------------------------------------')
    logger.info(f'{rmspe_k:10.2E} {rmspe_e:10.2E}  {mcc:10.2E}')

    # store outpout
    if args.o:
        dirname = os.path.dirname(args.i)
        filename = os.path.splitext(os.path.basename(args.i))[0]
        output = os.path.join(os.path.normpath(dirname), f'{filename}_output.csv')

        dataset = points[knees]

        with open(output, 'w') as f:
            writer = csv.writer(f)
            writer.writerows(dataset)
    
    # display result
    if args.g:
        x = points[:, 0]
        y = points[:, 1]
        plt.plot(x, y)
        plt.plot(x[knees], y[knees], 'r+')
        plt.show()
示例#18
0
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname),
                                 f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []
    expected = np.array(expected)
    points = np.genfromtxt(args.i, delimiter=',')

    rs = [0.75, 0.80, 0.85, 0.90, 0.95]
    ts = [0.01, 0.02, 0.03, 0.04, 0.05]

    evaluations = []

    for r in rs:
        ## Knee detection code ##
        points_reduced, points_removed = rdp.rdp(points, r)
        knees = np.arange(1, len(points_reduced))
        t_k = pp.filter_worst_knees(points_reduced, knees)
        t_k = pp.filter_corner_knees(points_reduced, t_k)
        for t in ts:
            ## Clustering ##
            filtered_knees = pp.filter_clustring(points_reduced, t_k,
                                                 clustering.average_linkage, t,
                                                 ClusterRanking.left)
            final_knees = pp.add_points_even(points, points_reduced,
                                             filtered_knees, points_removed)

            ## Evaluation ##
            error_rmspe = evaluation.rmspe(points, final_knees, expected,
                                           evaluation.Strategy.knees)
            error_rmspe_exp = evaluation.rmspe(points, final_knees, expected,
                                               evaluation.Strategy.expected)

            _, _, _, _, cost_trace = evaluation.accuracy_trace(
                points, final_knees)
            _, _, _, _, cost_knee = evaluation.accuracy_knee(
                points, final_knees)

            evaluations.append(
                [error_rmspe, error_rmspe_exp, cost_trace, cost_knee])

    ## Compute the Correlation ##
    evaluations = np.array(evaluations)
    rho = np.corrcoef(evaluations.T)
    rmspe_rmspe_exp = rho[0, 1]
    rmspe_cost_trace = rho[0, 2]
    rmspe_cost_knee = rho[0, 3]

    rmspe_exp_cost_trace = rho[1, 2]
    rmspe_exp_cost_knee = rho[1, 3]

    cost_trace_cost_knee = rho[2, 3]

    #logger.info(f'{rho}')
    logger.info(
        f'{rmspe_rmspe_exp}, {rmspe_cost_trace}, {rmspe_cost_knee}, {rmspe_exp_cost_trace}, {rmspe_exp_cost_knee}, {cost_trace_cost_knee}'
    )
示例#19
0
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname),
                                 f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []
    expected = np.array(expected)
    points = np.genfromtxt(args.i, delimiter=',')

    # get original x_max and y_ranges
    x_max = [max(x) for x in zip(*points)][0]
    y_range = [[max(y), min(y)] for y in zip(*points)][1]

    # run rdp
    reduced, removed = rdp.rdp(points, args.r)
    points_reduced = points[reduced]

    ## Knee detection code ##
    knees = zmethod.knees(points_reduced,
                          dx=args.x,
                          dy=args.y,
                          dz=args.z,
                          x_max=x_max,
                          y_range=y_range)
    knees = knees[knees > 0]

    ##########################

    # add even points
    if args.a:
        knees = pp.add_points_even(points, reduced, knees, removed)
    else:
        knees = rdp.mapping(knees, reduced, removed)

    rmspe_k = evaluation.rmspe(points, knees, expected,
                               evaluation.Strategy.knees)
    rmspe_e = evaluation.rmspe(points, knees, expected,
                               evaluation.Strategy.expected)
    cm = evaluation.cm(points, knees, expected, t=0.01)
    mcc = evaluation.mcc(cm)

    logger.info(f'RMSE(knees)  RMSE(exp)  MCC')
    logger.info(f'-------------------------------------------')
    logger.info(f'{rmspe_k:10.2E} {rmspe_e:10.2E} {mcc:10.2E}')

    # store outpout
    if args.o:
        dirname = os.path.dirname(args.i)
        filename = os.path.splitext(os.path.basename(args.i))[0]
        output = os.path.join(os.path.normpath(dirname),
                              f'{filename}_output.csv')

        dataset = points[knees]

        with open(output, 'w') as f:
            writer = csv.writer(f)
            writer.writerows(dataset)

    # display result
    if args.g:
        x = points[:, 0]
        y = points[:, 1]
        plt.plot(x, y)
        plt.plot(x[knees], y[knees], 'r+')
        plt.show()