示例#1
0
def main(args):
    # define clustering methods
    cmethod = {Clustering.single: clustering.single_linkage, Clustering.complete: clustering.complete_linkage, Clustering.average: clustering.average_linkage}
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []

    points = np.genfromtxt(args.i, delimiter=',')

    ## Knee detection code ##

    points_reduced, points_removed = rdp.rdp(points, args.r)
    knees = kneedle.auto_knees(points_reduced)
    t_k = pp.filter_worst_knees(points_reduced, knees)
    t_k = pp.filter_corner_knees(points_reduced, t_k)
    filtered_knees = pp.filter_clustring(points_reduced, t_k, cmethod[args.c], args.t, args.m)
    if args.a:
        knees = pp.add_points_even(points, points_reduced, filtered_knees, points_removed)
    else:
        knees = rdp.mapping(filtered_knees, points_reduced, points_removed)
    
    ##########################
    """
示例#2
0
def kneedle_novel(points, args):
    reduced, removed = rdp.rdp(points, args.r)
    points_reduced = points[reduced]
    knees = kneedle.auto_knees(points_reduced, p=kneedle.PeakDetection.All)
    knees = pp.filter_worst_knees(points_reduced, knees)
    knees = pp.filter_corner_knees(points_reduced, knees, t=args.c)
    knees = pp.filter_clustring(points_reduced, knees,
                                clustering.average_linkage, args.t, args.k)
    knees = rdp.mapping(knees, reduced, removed)
    return knees
示例#3
0
def postprocessing(points, knees, args):
    logger.info('Post Processing')
    logger.info('Knees: %s', knees)
    logger.info('Initial #Knees: %s', len(knees))
    knees = filter_worst_knees(points, knees)
    logger.info('Worst Knees: %s', len(knees))
    cmethod = {Clustering.single: clustering.single_linkage, Clustering.complete: clustering.complete_linkage, Clustering.average: clustering.average_linkage}
    current_knees = filter_clustring(points, knees, cmethod[args.c], args.t)
    logger.info('Clustering Knees: %s', len(current_knees))

    return current_knees
示例#4
0
def postprocessing(points,
                   knees,
                   c=Clustering.average,
                   t=0.1,
                   m=ClusterRanking.left):
    knees = filter_worst_knees(points, knees)
    knees = filter_corner_knees(points, knees)
    cmethod = {
        Clustering.single: clustering.single_linkage,
        Clustering.complete: clustering.complete_linkage,
        Clustering.average: clustering.average_linkage
    }
    current_knees = filter_clustring(points, knees, cmethod[c], t, m)
    return current_knees
示例#5
0
def postprocessing(points, knees, args):
    logger.info('Post Processing')
    #logger.info('Knees: %s', knees)
    logger.info('Initial #Knees: %s', len(knees))
    wknees = filter_worst_knees(points, knees)

    #plot_knees(plt, points, knees, 'Worst Knees')
    logger.info('After Worst #Knees: %s', len(knees))
    
    knees = filter_corner_knees(points, wknees)
    #plot_knees(plt, points, knees, 'Corner Knees')

    #diff = np.setdiff1d(wknees, knees)
    #plot_knees(plt, points, diff, 'Diff Knees')

    logger.info('After Corner #Knees: %s', len(knees))
    #logger.info('Worst Knees: %s', len(knees))
    cmethod = {Clustering.single: clustering.single_linkage, Clustering.complete:
               clustering.complete_linkage, Clustering.average: clustering.average_linkage}
    current_knees = filter_clustring(points, knees, cmethod[args.c], args.t, args.m)
    logger.info('Clustering Knees: %s', len(current_knees))

    return current_knees
示例#6
0
文件: main.py 项目: Yifei-Liu/knee
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname),
                                 f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []

    points = np.genfromtxt(args.i, delimiter=',')
    points_reduced, points_removed = rdp.rdp(points, args.r)

    space_saving = round((1.0 - (len(points_reduced) / len(points))) * 100.0,
                         2)
    logger.info('Number of data points after RDP: %s(%s %%)',
                len(points_reduced), space_saving)

    names = [
        'kneedle', 'kneedke(Rec)', 'l-method', 'dfdt', 'menger', 'curvature',
        'Tyler (RDP)', 'Tyler', 'RDP'
    ]
    methods = [
        kneedle.auto_knees, kneedle.multi_knee, lmethod.multi_knee,
        dfdt.multi_knee, menger.multi_knee, curvature.multi_knee, ps.knees
    ]
    knees = []
    knees_raw = []

    # Elbow methods
    for m, n in zip(methods, names):
        tmp = m(points_reduced)
        knees.append(tmp)
        raw_indexes = rdp.mapping(tmp, points_reduced, points_removed)
        knees_raw.append(raw_indexes)

    # Tyler
    candidates = ps.knees(points)
    knees.append(candidates)
    knees_raw.append(candidates)

    # RDP
    candidates = np.arange(1, len(points_reduced))
    knees.append(candidates)
    raw_indexes = rdp.mapping(candidates, points_reduced, points_removed)
    knees_raw.append(raw_indexes)

    #plot_knees(points, knees_raw, names)

    cmethod = {
        Clustering.single: clustering.single_linkage,
        Clustering.complete: clustering.complete_linkage,
        Clustering.average: clustering.average_linkage
    }

    # Cluster and select points
    filtered_knees_raw = []
    rankings = []
    for k, n in zip(knees, names):
        # remove 0 index in the knees:
        k = k[k != 0]
        if n == 'Tyler':
            filtered_knees_raw.append(k)
            ranks = np.full(len(k), 1.0)
            #rankings.append(ranking.slope_ranking(points, k))
            rankings.append(ranks)
        else:
            t_k = pp.filter_worst_knees(points_reduced, k)
            filtered_knees = pp.filter_clustring(points_reduced, t_k,
                                                 cmethod[args.c], args.t,
                                                 args.m)
            rankings.append(
                ranking.slope_ranking(points_reduced, filtered_knees))
            raw_indexes = rdp.mapping(filtered_knees, points_reduced,
                                      points_removed)
            filtered_knees_raw.append(raw_indexes)

    logger.info(f'Model          MSE(knees)   MSE(exp)   Cost(tr)   Cost(kn)')
    logger.info(f'----------------------------------------------------------')
    for k, n in zip(filtered_knees_raw, names):
        if len(expected) > 0:
            error_mse = evaluation.mse(points, k, expected,
                                       evaluation.Strategy.knees)
            error_mse_exp = evaluation.mse(points, k, expected,
                                           evaluation.Strategy.expected)
        else:
            error_mse = math.nan
            error_mse_exp = math.nan
        _, _, _, _, cost_trace = evaluation.accuracy_trace(points, k)
        _, _, _, _, cost_knee = evaluation.accuracy_knee(points, k)
        logger.info(
            f'{n:<13}| {error_mse:10.2E} {error_mse_exp:10.2E} {cost_trace:10.2E} {cost_knee:10.2E}'
        )

    plot_knees_ranking(points, filtered_knees_raw, names, rankings, expected)
示例#7
0
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname),
                                 f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []
    expected = np.array(expected)
    points = np.genfromtxt(args.i, delimiter=',')

    rs = [0.75, 0.80, 0.85, 0.90, 0.95]
    ts = [0.01, 0.02, 0.03, 0.04, 0.05]

    evaluations = []

    for r in rs:
        ## Knee detection code ##
        points_reduced, points_removed = rdp.rdp(points, r)
        knees = np.arange(1, len(points_reduced))
        t_k = pp.filter_worst_knees(points_reduced, knees)
        t_k = pp.filter_corner_knees(points_reduced, t_k)
        for t in ts:
            ## Clustering ##
            filtered_knees = pp.filter_clustring(points_reduced, t_k,
                                                 clustering.average_linkage, t,
                                                 ClusterRanking.left)
            final_knees = pp.add_points_even(points, points_reduced,
                                             filtered_knees, points_removed)

            ## Evaluation ##
            error_rmspe = evaluation.rmspe(points, final_knees, expected,
                                           evaluation.Strategy.knees)
            error_rmspe_exp = evaluation.rmspe(points, final_knees, expected,
                                               evaluation.Strategy.expected)

            _, _, _, _, cost_trace = evaluation.accuracy_trace(
                points, final_knees)
            _, _, _, _, cost_knee = evaluation.accuracy_knee(
                points, final_knees)

            evaluations.append(
                [error_rmspe, error_rmspe_exp, cost_trace, cost_knee])

    ## Compute the Correlation ##
    evaluations = np.array(evaluations)
    rho = np.corrcoef(evaluations.T)
    rmspe_rmspe_exp = rho[0, 1]
    rmspe_cost_trace = rho[0, 2]
    rmspe_cost_knee = rho[0, 3]

    rmspe_exp_cost_trace = rho[1, 2]
    rmspe_exp_cost_knee = rho[1, 3]

    cost_trace_cost_knee = rho[2, 3]

    #logger.info(f'{rho}')
    logger.info(
        f'{rmspe_rmspe_exp}, {rmspe_cost_trace}, {rmspe_cost_knee}, {rmspe_exp_cost_trace}, {rmspe_exp_cost_knee}, {cost_trace_cost_knee}'
    )