Пример #1
0
def instability_average(dataset_id):
    ct_df = Parser.read_ct_metric(dataset_id)
    rpc_df = Parser.read_rpc_metric(dataset_id)

    means = {}

    technique_list = sorted(rpc_df)
    for i, technique_id in enumerate(technique_list):
        technique_means = []
        for revision in range(int(len(rpc_df[technique_id].columns) / 2)):
            r_col = 'r_' + str(revision)
            b_col = 'b_' + str(revision)
            diff = rpc_df[technique_id][[r_col, b_col]].max(
                axis=1) - rpc_df[technique_id][b_col]
            ct_mean = diff.dropna().mean()

            r_col = 'r_' + str(revision)
            b_col = 'b_' + str(revision)
            diff = ct_df[technique_id][[r_col, b_col]].max(
                axis=1) - ct_df[technique_id][b_col]
            rpc_mean = diff.dropna().mean()

            technique_means.append((ct_mean + rpc_mean) / 2)

        means[technique_id] = np.mean(technique_means)

    return means
Пример #2
0
def make_rpc_matrix(dataset_ids):

    technique_ids = []
    all_means = []
    for dataset_id in dataset_ids:
        rpc_df = Parser.read_rpc_metric(dataset_id)

        dataset_means = np.array([])
        technique_list = sorted(rpc_df)
        if len(technique_ids) == 0:
            technique_acronyms = [Globals.acronyms[d] for d in technique_list]

        for i, technique_id in enumerate(technique_list):
            technique_means = []
            for revision in range(int(len(rpc_df[technique_id].columns) / 2)):
                df = rpc_df[technique_id]
                r_col = 'r_' + str(revision)
                b_col = 'b_' + str(revision)

                diff = df[[r_col, b_col]].max(axis=1) - df[b_col]
                diff = diff.dropna()
                if len(diff) > 0:
                    diff_mean = diff.mean()
                else:
                    diff_mean = 0

                technique_means.append(diff_mean)

            dataset_means = np.append(dataset_means, np.mean(technique_means))
        all_means.append(dataset_means)

    return np.array(all_means).transpose(
    ), technique_acronyms  # Transpose matrix so each row is a technique and each column a dataset
Пример #3
0
def plot_mean_boxplot_with_pearson(dataset_id):
    data = []
    pearson = []
    for i, technique_id in enumerate(technique_list):
        print(Globals.acronyms[technique_id], end=' ', flush=True)
        technique_pearson = []
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            delta_vis = DeltaMetrics.compute_delta_vis(history[revision],
                                                       history[revision + 1])
            delta_data = DeltaMetrics.compute_delta_data(
                history[revision], history[revision + 1])
            un_mov = UnavoidableMovement.compute_unavoidable_movement(
                history[revision], history[revision + 1])

            ratios = (1 - delta_vis) / (1 - delta_data)
            diffs = 1 - abs(delta_vis - delta_data)
            unavoidable = 1 - (delta_vis - un_mov)
            mean = (ratios + diffs + unavoidable) / 3
            technique_data.append(mean)

            # Compute linear regression statistics
            _, _, r_value, _, _ = stats.linregress(delta_data, delta_vis)
            technique_pearson.append(r_value if r_value > 0 else 0)

        data.append(technique_data)
        pearson.append(technique_pearson)

    TimeBoxplot.plot_with_pearson(data,
                                  technique_list,
                                  pearson,
                                  title='Mean with Pearson - ' + dataset_id)
def plot_time_boxplot(dataset_id):
    data = []
    for i, technique_id in enumerate(technique_list):
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            shneiderman = compute_shneiderman(history[revision],
                                              history[revision + 1])
            technique_data.append(shneiderman)
        data.append(technique_data)

    TimeBoxplot.plot(data, technique_list, title="Shneiderman - " + dataset_id)
def plot_time_boxplot(dataset_id):
    data = []
    for i, technique_id in enumerate(technique_list):
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            rpc = relative_position_change_wrapper(history[revision],
                                                   history[revision + 1])
            technique_data.append(rpc)
        data.append(technique_data)
        print(Globals.acronyms[technique_id], end=' ', flush=True)

    TimeBoxplot.plot(data, technique_list, title="RPC - " + dataset_id)
Пример #6
0
    def Load_Graph(self):

        CmdProcessor = P.Parser(self.__file)
        CmdProcessor.init_command_processor()
        GProcessor = GP.Graph_Processor(nx.DiGraph())

        tokens = CmdProcessor.get_next_command()

        while (tokens != None):

            GProcessor.process_tokens(tokens)
            tokens = CmdProcessor.get_next_command()

        GProcessor.resolve_edges()

        self.__Graph = GProcessor.get_Graph()
Пример #7
0
def plot_ar_matrix(dataset_ids):
    matrix = []
    for dataset_id in dataset_ids:
        dataset_values = []
        for technique_id in technique_list:
            history = Parser.parse_rectangles(technique_id, dataset_id)
            all_ratios = np.array([])
            for revision in range(len(history) - 1):
                ratios = compute_aspect_ratios(history[revision])
                all_ratios = np.append(all_ratios, ratios.values)

            dataset_values.append(all_ratios.mean())
            print(Globals.acronyms[technique_id], dataset_id,
                  all_ratios.mean())
        matrix.append(dataset_values)

    matrix = np.array(matrix).transpose()

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=False,
                    cell_text=True,
                    title='Aspect ratio')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=True,
                    cell_text=True,
                    title='Aspect ratio')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=False,
                    cell_text=False,
                    title='Aspect ratio')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=True,
                    cell_text=False,
                    title='Aspect ratio')
Пример #8
0
def pearson_matrix(dataset_ids):
    matrix = []
    for dataset_id in dataset_ids:
        dataset_values = []
        for technique_id in technique_list:
            # print(Globals.acronyms[technique_id], dataset_id)
            history = Parser.parse_rectangles(technique_id, dataset_id)
            # Compute all delta_vis and delta_data values for a dataset (1 pair per cell)
            all_delta_data = np.array([])
            all_delta_vis = np.array([])
            for revision in range(len(history) - 1):
                delta_data = DeltaMetrics.compute_delta_data(history[revision], history[revision + 1])
                all_delta_data = np.append(all_delta_data, delta_data)

                delta_vis = DeltaMetrics.compute_delta_vis(history[revision], history[revision + 1])
                all_delta_vis = np.append(all_delta_vis, delta_vis)

            # Compute linear regression statistics
            slope, intercept, r_value, p_value, std_err = stats.linregress(all_delta_data, all_delta_vis)

            dataset_values.append(r_value)
            print(Globals.acronyms[technique_id], dataset_id, r_value)
        matrix.append(dataset_values)

    matrix = np.array(matrix).transpose()

    MatrixPlot.plot(matrix, dataset_ids, technique_list,
                    shared_cm=False,
                    cell_text=True,
                    title='Pearson')

    MatrixPlot.plot(matrix, dataset_ids, technique_list,
                    shared_cm=True,
                    cell_text=True,
                    title='Pearson')

    MatrixPlot.plot(matrix, dataset_ids, technique_list,
                    shared_cm=False,
                    cell_text=False,
                    title='Pearson')

    MatrixPlot.plot(matrix, dataset_ids, technique_list,
                    shared_cm=True,
                    cell_text=False,
                    title='Pearson')
Пример #9
0
def delta_ratio_boxplots(dataset_id):
    data = []
    for i, technique_id in enumerate(technique_list):
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            delta_vis = compute_delta_vis(history[revision], history[revision + 1])
            delta_data = compute_delta_data(history[revision], history[revision + 1])
            ratios = (1 - delta_vis) / (1 - delta_data)
            technique_data.append(ratios)
        data.append(technique_data)

    TimeBoxplot.plot(data, technique_list,
                     title="Delta Ratio - " + dataset_id)

    TimeBoxplot.plot(data, technique_list,
                     median_sorted=True,
                     title="Delta Ratio - " + dataset_id)
def plot_matrix(dataset_ids):
    matrix = []
    for dataset_id in dataset_ids:
        dataset_values = []
        for technique_id in technique_list:
            history = Parser.parse_rectangles(technique_id, dataset_id)
            avg = compute_location_drift(history)
            dataset_values.append(avg)
            print(Globals.acronyms[technique_id], dataset_id, avg)
        matrix.append(dataset_values)

    matrix = np.array(matrix).transpose()

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    cell_text=True,
                    title='Location Drift')
Пример #11
0
def plot_time_boxplot(dataset_id):
    data = []
    for i, technique_id in enumerate(technique_list):
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history)):
            ratios = compute_aspect_ratios(history[revision]).tolist()
            technique_data.append(ratios)
        data.append(technique_data)

    TimeBoxplot.plot(data,
                     technique_list,
                     title="Aspect Ratios - " + dataset_id)

    TimeBoxplot.plot(data,
                     technique_list,
                     median_sorted=True,
                     title="Aspect Ratios - " + dataset_id)
Пример #12
0
def plot_mean_boxplot(
        dataset_id,
        metrics='VIS'):  # Default case was what was used at VIS18 paper
    data = []
    for i, technique_id in enumerate(technique_list):
        print(Globals.acronyms[technique_id], end=' ', flush=True)
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            if metrics == 'VIS':
                delta_vis = DeltaMetrics.compute_delta_vis(
                    history[revision], history[revision + 1])
                delta_data = DeltaMetrics.compute_delta_data(
                    history[revision], history[revision + 1])
                un_mov = UnavoidableMovement.compute_unavoidable_movement(
                    history[revision], history[revision + 1])

                ratios = (1 - delta_vis) / (1 - delta_data)
                diffs = 1 - abs(delta_vis - delta_data)
                unavoidable = 1 - (delta_vis - un_mov)

                mean = (ratios + diffs + unavoidable) / 3

            elif metrics == 'SIBGRAPI':
                delta_vis = DeltaMetrics.compute_delta_vis(
                    history[revision], history[revision + 1])
                delta_data = DeltaMetrics.compute_delta_data(
                    history[revision], history[revision + 1])

                ratios = (1 - delta_vis) / (1 - delta_data)
                shn = ShneidermanWattenberg.compute_shneiderman(
                    history[revision], history[revision + 1])

                mean = (ratios + shn) / 2

            technique_data.append(mean)
        data.append(technique_data)

    TimeBoxplot.plot(data, technique_list, title='Mean - ' + dataset_id)

    TimeBoxplot.plot(data,
                     technique_list,
                     median_sorted=True,
                     title='Mean - ' + dataset_id)
def plot(dataset_id):

    fig, axs = plt.subplots(math.ceil(len(technique_list) / 5), 5)
    fig.suptitle("Centroid Trail - " + dataset_id)

    norm = math.sqrt(
        1000**2 + 1000**
        2)  # Assuming we are plotting the treemap in a 1000x1000 pixel frame

    for i, technique_id in enumerate(technique_list):
        ax = fig.axes[i]
        ax.set_title(Globals.acronyms[technique_id])
        history = Parser.parse_rectangles(technique_id, dataset_id)
        centroids = compute_centroids(history)

        lines = []
        colors = []
        for key, centroid_list in centroids.items():
            for i in range(len(centroid_list) - 1):
                a = centroid_list[i]
                b = centroid_list[i + 1]
                lines.append((a, b))  # Add line segment
                alpha = math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2) / norm
                alpha = alpha / math.sqrt(len(centroid_list))
                colors.append((0, 0, 0, alpha))  # Set color for line segment

        lc = mc.LineCollection(lines, colors=colors, linewidths=1)
        ax.add_collection(lc)
        ax.set_xlim(0, 1000)
        ax.set_ylim(0, 1000)
        ax.tick_params(axis='x',
                       which='both',
                       bottom=False,
                       top=False,
                       labelbottom=False)
        ax.tick_params(axis='y',
                       which='both',
                       left=False,
                       right=False,
                       labelleft=False)
        ax.set_aspect('equal', adjustable='box')

    fig.savefig(Globals.plot_subdir + dataset_id + '.png', dpi=500)
Пример #14
0
def ar_average(dataset_id):
    ar_df = Parser.read_aspect_ratios(dataset_id)
    means = {}

    technique_list = sorted(ar_df)
    for i, technique_id in enumerate(technique_list):
        technique_means = []
        for revision in range(int(len(ar_df[technique_id].columns) / 2)):
            # df = ar_df[technique_id]
            w_col = 'w_' + str(revision)
            ar_col = 'ar_' + str(revision)

            u_avg = ar_df[technique_id][ar_col].mean(axis=0)
            w_avg = np.average(ar_df[technique_id][ar_col].dropna(),
                               weights=ar_df[technique_id][w_col].dropna())

            technique_means.append((u_avg + w_avg) / 2)

        means[technique_id] = np.mean(technique_means)

    return means
Пример #15
0
def delta_ratio_matrix(dataset_ids):
    matrix = []
    for dataset_id in dataset_ids:
        dataset_values = []
        for technique_id in technique_list:
            history = Parser.parse_rectangles(technique_id, dataset_id)
            all_ratios = np.array([])
            for revision in range(len(history) - 1):
                delta_vis = compute_delta_vis(history[revision], history[revision + 1])
                delta_data = compute_delta_data(history[revision], history[revision + 1])
                ratio = (1 - delta_vis) / (1 - delta_data)
                all_ratios = np.append(all_ratios, ratio.values)

            dataset_values.append(all_ratios.mean())
            print(Globals.acronyms[technique_id], dataset_id, all_ratios.mean())
        matrix.append(dataset_values)

    matrix = np.array(matrix).transpose()

    MatrixPlot.plot(matrix, dataset_ids, technique_list,
                    shared_cm=False,
                    cell_text=True,
                    title='Delta ratio')

    MatrixPlot.plot(matrix, dataset_ids, technique_list,
                    shared_cm=True,
                    cell_text=True,
                    title='Delta ratio')

    MatrixPlot.plot(matrix, dataset_ids, technique_list,
                    shared_cm=False,
                    cell_text=False,
                    title='Delta ratio')

    MatrixPlot.plot(matrix, dataset_ids, technique_list,
                    shared_cm=True,
                    cell_text=False,
                    title='Delta ratio')
Пример #16
0
def make_ar_matrices(dataset_ids):

    technique_ids = []
    weighted_means = []
    unweighted_means = []
    for dataset_id in dataset_ids:
        ar_df = Parser.read_aspect_ratios(dataset_id)

        weighted_dataset_means = np.array([])
        unweighted_dataset_means = np.array([])
        technique_list = sorted(ar_df)
        if len(technique_ids) == 0:
            technique_acronyms = [Globals.acronyms[d] for d in technique_list]

        for i, technique_id in enumerate(technique_list):
            weighted_technique_means = []
            unweighted_technique_means = []
            for revision in range(int(len(ar_df[technique_id].columns) / 2)):
                w_col = 'w_' + str(revision)
                ar_col = 'ar_' + str(revision)

                u_avg = ar_df[technique_id][ar_col].mean(axis=0)
                w_avg = np.average(ar_df[technique_id][ar_col].dropna(),
                                   weights=ar_df[technique_id][w_col].dropna())

                weighted_technique_means.append(w_avg)
                unweighted_technique_means.append(u_avg)

            weighted_dataset_means = np.append(
                weighted_dataset_means, np.mean(weighted_technique_means))
            unweighted_dataset_means = np.append(
                unweighted_dataset_means, np.mean(unweighted_technique_means))
        weighted_means.append(weighted_dataset_means)
        unweighted_means.append(unweighted_dataset_means)

    return np.array(weighted_means).transpose(
    ), np.array(unweighted_means).transpose(
    ), technique_acronyms  # Transpose matrices so each row is a technique and each column a dataset
def plot_matrix(dataset_ids):
    matrix = []
    for dataset_id in dataset_ids:
        dataset_values = []
        for technique_id in technique_list:
            history = Parser.parse_rectangles(technique_id, dataset_id)
            all_ratios = np.array([])
            for revision in range(len(history) - 1):
                distances = compute_shneiderman(history[revision],
                                                history[revision + 1])
                all_ratios = np.append(all_ratios, distances.values)

            dataset_values.append(all_ratios.mean())
            print(Globals.acronyms[technique_id], dataset_id,
                  all_ratios.mean())
        matrix.append(dataset_values)

    matrix = np.array(matrix).transpose()

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    cell_text=True,
                    title='Shneiderman-Wattenberg')
Пример #18
0
def compute_and_cache_metrics(dataset_id):
    # Returns a dict (one entry for each technique) with lists of dataframes
    dataframes = Parser.parse_dataset(dataset_id)

    for technique, df_list in dataframes.items():
        print(technique, end='')
        # Compute aspect ratios and weight of cells
        ar_cache_path = 'metric_results/' + technique + '-' + dataset_id + '-ar.csv'
        ar_df = pd.DataFrame()
        if not os.path.isfile(ar_cache_path):
            for revision, df in enumerate(df_list):
                weight = compute_relative_weight(df, revision)
                ar = compute_aspect_ratio(df, revision)
                ar_df = pd.merge(ar_df,
                                 weight,
                                 how='outer',
                                 left_index=True,
                                 right_index=True)
                ar_df = pd.merge(ar_df,
                                 ar,
                                 how='outer',
                                 left_index=True,
                                 right_index=True)
            ar_df.fillna(0, inplace=True)
            ar_df.to_csv(ar_cache_path, index_label='id')

        # Compute Corner Travel (real and baseline)
        ct_cache_path = 'metric_results/' + technique + '-' + dataset_id + '-ct.csv'
        ct_df = pd.DataFrame()
        if not os.path.isfile(ct_cache_path):
            for revision in range(len(df_list) - 1):
                r0 = df_list[revision][['rx', 'ry', 'rw',
                                        'rh']].dropna(axis=0, subset=['rx'])
                r1 = df_list[revision + 1][['rx', 'ry', 'rw',
                                            'rh']].dropna(axis=0,
                                                          subset=['rx'])
                b1 = df_list[revision + 1][['bx', 'by', 'bw',
                                            'bh']].dropna(axis=0,
                                                          subset=['bx'])
                ct = corner_travel_values(r0, r1, b1, revision)
                ct_df = pd.merge(ct_df,
                                 ct,
                                 how='outer',
                                 left_index=True,
                                 right_index=True)
            ct_df.fillna(0, inplace=True)
            ct_df.to_csv(ct_cache_path, index_label='id')

        # Compute Relative Position Change metric
        rpc_cache_path = 'metric_results/' + technique + '-' + dataset_id + '-rpc.csv'
        rpc_df = pd.DataFrame()
        if not os.path.isfile(rpc_cache_path):
            for revision in range(len(df_list) - 1):
                real = relative_position_change_wrapper(
                    df_list[revision][['rx', 'ry', 'rw', 'rh']],
                    df_list[revision + 1][['rx', 'ry', 'rw', 'rh']])

                baseline = relative_position_change_wrapper(
                    df_list[revision][['rx', 'ry', 'rw', 'rh']],
                    df_list[revision + 1][['bx', 'by', 'bw', 'bh']])

                df_temp = pd.DataFrame({
                    'r_' + str(revision): real,
                    'b_' + str(revision): baseline
                })
                df_temp.sort_index(axis=1, ascending=False, inplace=True)
                rpc_df = pd.merge(rpc_df,
                                  df_temp,
                                  how='outer',
                                  left_index=True,
                                  right_index=True)
            rpc_df.fillna(0, inplace=True)
            rpc_df.to_csv(rpc_cache_path, index_label='id')

        print(' done.')
    return None
Пример #19
0
import networkx as nx
import Util.Parser as P
import Util.Graph_Processor as GP

CmdProcessor = P.Parser("/home/sghorui/Trace_Path/sample_design_file")

CmdProcessor.init_command_processor()

#tok = P.Tokenizer("/home/sghorui/Trace_Path/sample_design_file")

#Processor = GP.Graph_Processor(nx.DiGraph())

D = {}

tokens = CmdProcessor.get_next_command()

while (tokens != None):

    if ("MED_" in tokens[0]):

        if (tokens[0][0:8] not in D):
            print(tokens)
            D.update({tokens[0][0:8]: 1})

    elif (tokens[0] not in D):
        print(tokens[0])
        D.update({tokens[0]: 1})
    #Processor.process_tokens(tokens)
    tokens = CmdProcessor.get_next_command()

#print(Processor.Graph.nodes(data = True))
Пример #20
0
from scipy.stats import stats

from Util import Globals, Parser
from Visualizations import TimeBoxplot
from StabilityMetrics import DeltaMetrics, UnavoidableMovement, ShneidermanWattenberg

technique_list = Parser.list_techniques(sibgrapi=True)


def plot_mean_boxplot(
        dataset_id,
        metrics='VIS'):  # Default case was what was used at VIS18 paper
    data = []
    for i, technique_id in enumerate(technique_list):
        print(Globals.acronyms[technique_id], end=' ', flush=True)
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            if metrics == 'VIS':
                delta_vis = DeltaMetrics.compute_delta_vis(
                    history[revision], history[revision + 1])
                delta_data = DeltaMetrics.compute_delta_data(
                    history[revision], history[revision + 1])
                un_mov = UnavoidableMovement.compute_unavoidable_movement(
                    history[revision], history[revision + 1])

                ratios = (1 - delta_vis) / (1 - delta_data)
                diffs = 1 - abs(delta_vis - delta_data)
                unavoidable = 1 - (delta_vis - un_mov)

                mean = (ratios + diffs + unavoidable) / 3
Пример #21
0
from Visualization import KDE
from Visualization import Matrix
from Visualization import Scatter
from Visualization import SimpleTrails
from Visualization import DirectedTrails
from Visualization import TrailHeatmap

action = sys.argv[1]

if action == 'cache-metrics':
    dataset_id = sys.argv[2]
    Metrics.compute_and_cache_metrics(dataset_id)

elif action == 'kde-ct':
    dataset_id = sys.argv[2]
    ct_values = Parser.read_ct_metric(dataset_id)
    KDE.plot_real_vs_baseline(ct_values, dataset_id, 'ct', True)
    print('---')
    KDE.plot_real_vs_baseline(ct_values, dataset_id, 'ct', False)
    print('---')

elif action == 'kde-rpc':
    dataset_id = sys.argv[2]
    rpc_values = Parser.read_rpc_metric(dataset_id)
    KDE.plot_real_vs_baseline(rpc_values, dataset_id, 'rpc', True)
    print('---')
    KDE.plot_real_vs_baseline(rpc_values, dataset_id, 'rpc', False)
    print('---')

elif action == 'boxplots':
    dataset_id = sys.argv[2]
Пример #22
0
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from matplotlib.colors import Normalize
from matplotlib import cm

from StabilityMetrics import DeltaMetrics
from Util import Globals, Parser
from Visualizations import MatrixPlot

# Use this dummy technique list for now
# technique_list = ['SliceAndDice', 'SquarifiedTreeMap']
technique_list = Parser.list_techniques()


def scatter(dataset_id):
    fig, axs = plt.subplots(int(len(technique_list)/2), 2, sharex=True, sharey=True, figsize=(20, 44))

    xlim = 0
    for i, technique_id in enumerate(technique_list):
        # print(Globals.acronyms[technique_id])
        ax = fig.axes[i]

        history = Parser.parse_rectangles(technique_id, dataset_id)
        # Compute all delta_vis and delta_data values for a dataset (1 pair per cell)
        all_delta_data = np.array([])
        all_delta_vis = np.array([])
        for revision in range(len(history) - 1):
Пример #23
0
def scatter(dataset_id):
    fig, axs = plt.subplots(int(len(technique_list)/2), 2, sharex=True, sharey=True, figsize=(20, 44))

    xlim = 0
    for i, technique_id in enumerate(technique_list):
        # print(Globals.acronyms[technique_id])
        ax = fig.axes[i]

        history = Parser.parse_rectangles(technique_id, dataset_id)
        # Compute all delta_vis and delta_data values for a dataset (1 pair per cell)
        all_delta_data = np.array([])
        all_delta_vis = np.array([])
        for revision in range(len(history) - 1):
            delta_data = DeltaMetrics.compute_delta_data(history[revision], history[revision + 1])
            all_delta_data = np.append(all_delta_data, delta_data)

            delta_vis = DeltaMetrics.compute_delta_vis(history[revision], history[revision + 1])
            all_delta_vis = np.append(all_delta_vis, delta_vis)

        # Compute linear regression and draw regression line
        slope, intercept, r_value, p_value, std_err = stats.linregress(all_delta_data, all_delta_vis)

        if xlim == 0:
            xlim = np.percentile(all_delta_data, 99.99)  # Remove outliers

        # If there are too many points to handle, first we draw all of them in black (alpha),
        # then subsample the space, perform kde, and draw the colored subsample
        # over the original points
        sample_size = 10000
        if len(all_delta_data) > sample_size:
            ax.scatter(all_delta_data, all_delta_vis, color='k', s=1, alpha=.25)

            # matrix = df[['delta_data', 'delta_vis']].sample(sample_size).T.as_matrix()
            indices = np.random.choice(len(all_delta_vis), sample_size)
            matrix = np.vstack([all_delta_data[indices], all_delta_vis[indices]])
            dens = stats.gaussian_kde(matrix)
            dens_pt = dens(matrix)
            colours = make_colors(dens_pt, 'inferno')
            ax.scatter(matrix[0], matrix[1], color=colours, s=3, alpha=.05)
        else:
            matrix = np.vstack([all_delta_data, all_delta_vis])
            dens = stats.gaussian_kde(matrix)
            dens_pt = dens(matrix)
            colours = make_colors(dens_pt, 'inferno')
            ax.scatter(matrix[0], matrix[1], color=colours, s=3, alpha=.25)

        line = np.poly1d([slope, intercept])(all_delta_data)
        ax.plot(all_delta_data, line, 'r-', lw=.5)

        print(Globals.acronyms[technique_id], r_value)

        title = Globals.acronyms[technique_id]
        title += r"  $\alpha = $" + "{0:.2f}".format(intercept)
        title += r"  $\beta = $" + "{0:.2f}".format(slope)
        title += r"  $r = $" + "{0:.3f}".format(r_value)
        title += r"  $s_e = $" + "{0:.3f}".format(std_err)
        ax.set_title(title)

        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.tick_params(axis='x', which='both', top='off')
        ax.tick_params(axis='y', which='both', right='off')
        ax.set_xlim(xmin=0, xmax=xlim)
        ax.set_ylim(ymin=0)

    fig.savefig(Globals.plot_subdir + 'scatter-' + dataset_id + '.png', dpi=500)
    # plt.show()
    return