Exemplo n.º 1
0
 def test_mse_2(self):
     points = np.array([[0,0], [1,1], [2,2]])
     knees = np.array([1])
     expected = np.array([[1,1], [2,2]])
     result = evaluation.mse(points, knees, expected)
     desired = 1/2
     self.assertAlmostEqual(result, desired)
Exemplo n.º 2
0
 def test_mse_1(self):
     points = np.array([[0,0], [1,1], [2,2]])
     knees = np.array([0,1,2])
     expected = np.array([[1,1], [2,2]])
     result = evaluation.mse(points, knees, expected, evaluation.Strategy.worst)
     desired = 1/3
     self.assertAlmostEqual(result, desired)
Exemplo n.º 3
0
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname),
                                 f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []

    points = np.genfromtxt(args.i, delimiter=',')
    points_reduced, points_removed = rdp.rdp(points, args.r)

    space_saving = round((1.0 - (len(points_reduced) / len(points))) * 100.0,
                         2)
    logger.info('Number of data points after RDP: %s(%s %%)',
                len(points_reduced), space_saving)

    names = [
        'kneedle', 'kneedke(Rec)', 'l-method', 'dfdt', 'menger', 'curvature',
        'Tyler (RDP)', 'Tyler', 'RDP'
    ]
    methods = [
        kneedle.auto_knees, kneedle.multi_knee, lmethod.multi_knee,
        dfdt.multi_knee, menger.multi_knee, curvature.multi_knee, ps.knees
    ]
    knees = []
    knees_raw = []

    # Elbow methods
    for m, n in zip(methods, names):
        tmp = m(points_reduced)
        knees.append(tmp)
        raw_indexes = rdp.mapping(tmp, points_reduced, points_removed)
        knees_raw.append(raw_indexes)

    # Tyler
    candidates = ps.knees(points)
    knees.append(candidates)
    knees_raw.append(candidates)

    # RDP
    candidates = np.arange(1, len(points_reduced))
    knees.append(candidates)
    raw_indexes = rdp.mapping(candidates, points_reduced, points_removed)
    knees_raw.append(raw_indexes)

    #plot_knees(points, knees_raw, names)

    cmethod = {
        Clustering.single: clustering.single_linkage,
        Clustering.complete: clustering.complete_linkage,
        Clustering.average: clustering.average_linkage
    }

    # Cluster and select points
    filtered_knees_raw = []
    rankings = []
    for k, n in zip(knees, names):
        # remove 0 index in the knees:
        k = k[k != 0]
        if n == 'Tyler':
            filtered_knees_raw.append(k)
            ranks = np.full(len(k), 1.0)
            #rankings.append(ranking.slope_ranking(points, k))
            rankings.append(ranks)
        else:
            t_k = pp.filter_worst_knees(points_reduced, k)
            filtered_knees = pp.filter_clustring(points_reduced, t_k,
                                                 cmethod[args.c], args.t,
                                                 args.m)
            rankings.append(
                ranking.slope_ranking(points_reduced, filtered_knees))
            raw_indexes = rdp.mapping(filtered_knees, points_reduced,
                                      points_removed)
            filtered_knees_raw.append(raw_indexes)

    logger.info(f'Model          MSE(knees)   MSE(exp)   Cost(tr)   Cost(kn)')
    logger.info(f'----------------------------------------------------------')
    for k, n in zip(filtered_knees_raw, names):
        if len(expected) > 0:
            error_mse = evaluation.mse(points, k, expected,
                                       evaluation.Strategy.knees)
            error_mse_exp = evaluation.mse(points, k, expected,
                                           evaluation.Strategy.expected)
        else:
            error_mse = math.nan
            error_mse_exp = math.nan
        _, _, _, _, cost_trace = evaluation.accuracy_trace(points, k)
        _, _, _, _, cost_knee = evaluation.accuracy_knee(points, k)
        logger.info(
            f'{n:<13}| {error_mse:10.2E} {error_mse_exp:10.2E} {cost_trace:10.2E} {cost_knee:10.2E}'
        )

    plot_knees_ranking(points, filtered_knees_raw, names, rankings, expected)