Exemplo n.º 1
0
def plots_from_samples(samples, x_attribute, ylabel="Number of txs", xlabel=None, log_axis=None, version=0.15,
                       comparative=False, save_fig=False, legend=None, legend_loc=1, font_size=20):
    """
    Generates plots from utxo/tx samples extracted from utxo_dump.

    :param samples: Samples to be printed (from get_samples)
    :type: list
    :param x_attribute: Attribute to plot (must be a key in the dictionary of the dumped data).
    :type x_attribute: str or list
    :param ylabel: Label for the y axis of the chart
    :type ylabel: str or list
    :param xlabel: Label on the x axis
    :type xlabel: str
    :param log_axis: Determines which axis are plotted using (accepted values are False, "x", "y" or "xy").
    logarithmic scale
    :type log_axis: str or list
    :param version: Bitcoin core version, used to decide the folder in which to store the data.
    :type version: str or list
    :param comparative: Whether we are running a comparative analysis.
    :type comparative: bool
    :param save_fig: Figure's filename or False (to show the interactive plot)
    :type save_fig: str or list
    :param legend: List of strings with legend entries or None (if no legend is needed)
    :type legend: str list
    :param legend_loc: Indicates the location of the legend (if present)
    :type legend_loc: int
    :param font_size: Title, xlabel and ylabel font size
    :type font_size: int
    :return: None
    :rtype: None
    """

    if not (isinstance(x_attribute, list) or isinstance(x_attribute, np.ndarray)):
        x_attribute = [x_attribute]

    # In comparative analysis samples are passed as list of lists of samples.
    if not comparative:
        samples = [samples]

    title = ""
    if not xlabel:
        xlabel = x_attribute

    xs, ys = [], []
    for i in range(len(x_attribute)):
        for s in samples:
            [xc, yc] = get_cdf(s, normalize=True)
            xs.append(xc)
            ys.append(yc)

    if isinstance(log_axis, list) and isinstance(save_fig, list):
        # If both the normal axis and the logx axis charts want to be displayed, we can take advantage of the same
        # parsing to speed up the process.
        for lx, sf in zip(log_axis, save_fig):
            sf = str(version) + '/' + sf
            plot_distribution(xs, ys, title, xlabel, ylabel, lx, sf, legend, legend_loc, font_size)
    else:
        # Otherwise we just print one chart.
        save_fig = str(version) + '/' + save_fig
        plot_distribution(xs, ys, title, xlabel, ylabel, log_axis, save_fig, legend, legend_loc, font_size)
Exemplo n.º 2
0
def tx_based_analysis_with_filters(tx_fin_name):
    """
    Performs a transaction data analysis using different filters, to obtain for example the amount of coinbase
    transactions.

    :param tx_fin_name: Input file path which contains the chainstate transaction dump.
    :type: str
    :return: None
    :rtype: None
    """

    x_attributes = 'height'
    xlabels = ['Height']
    out_names = ['tx_height_coinbase']
    filters = [lambda x: x["coinbase"]]

    samples = get_filtered_samples(x_attributes,
                                   fin_name=tx_fin_name,
                                   filtr=filters)
    xs, ys = get_cdf(samples, normalize=True)

    for label, out in zip(xlabels, out_names):
        plots_from_samples(xs=xs,
                           ys=ys,
                           xlabel=label,
                           save_fig=out,
                           ylabel="Number of txs")
Exemplo n.º 3
0
def tx_based_analysis(tx_fin_name):
    """
    Performs a transaction based analysis from a given input file (resulting from a transaction dump of the chainstate)

    :param tx_fin_name: Input file path which contains the chainstate transaction dump.
    :type: str
    :return: None
    :rtype: None
    """

    x_attributes = ['height', 'total_len', 'total_value', 'num_utxos']

    xlabels = [
        'Height', 'Total length (bytes)', 'Total value',
        'Number of UTXOs per tx'
    ]

    out_names = [
        "tx_height", ["tx_total_len", "tx_total_len_logx"],
        "tx_total_value_logx", ["tx_num_utxos", "tx_num_utxos_logx"]
    ]

    log_axis = [False, [False, 'x'], 'x', [False, 'x']]

    x_attr_pie = 'coinbase'
    xlabels_pie = [['Coinbase', 'No-coinbase']]
    out_names_pie = ['tx_coinbase']
    pie_groups = [[[1], [0]]]
    pie_colors = [["#165873", "#428C5C"]]

    samples = get_samples(x_attributes + [x_attr_pie], fin_name=tx_fin_name)
    samples_pie = samples.pop(x_attr_pie)

    for attribute, label, log, out in zip(x_attributes, xlabels, log_axis,
                                          out_names):
        xs, ys = get_cdf(samples[attribute], normalize=True)
        plots_from_samples(xs=xs,
                           ys=ys,
                           xlabel=label,
                           log_axis=log,
                           save_fig=out,
                           ylabel="Number of txs")

    for label, out, groups, colors in (zip(xlabels_pie, out_names_pie,
                                           pie_groups, pie_colors)):
        plot_pie_chart_from_samples(samples=samples_pie,
                                    save_fig=out,
                                    labels=label,
                                    title="",
                                    groups=groups,
                                    colors=colors,
                                    labels_out=True)
def comparative_data_analysis(tx_fin_name, utxo_fin_name):
    """
    Performs a comparative data analysis between a transaction dump data file and an utxo dump one.

    :param tx_fin_name: Input file path which contains the chainstate transaction dump.
    :type: str
    :param utxo_fin_name: Input file path which contains the chainstate utxo dump.
    :type: str
    :return: None
    :rtype: None
    """

    # Generate plots with both transaction and utxo data (f_parsed_txs and f_parsed_utxos)
    tx_attributes = ['total_value', 'height']
    utxo_attributes = ['amount', 'tx_height']

    xlabels = ['Amount (Satoshi)', 'Height']
    out_names = ['tx_utxo_amount', 'tx_utxo_height']
    legends = [['Tx.', 'UTXO'], ['Tx.', 'UTXO']]
    legend_locations = [1, 2]

    tx_samples = get_samples(tx_attributes, tx_fin_name)
    utxo_samples = get_samples(utxo_attributes, utxo_fin_name)

    for tx_attr, utxo_attr, label, out, legend, leg_loc in zip(
            tx_attributes, utxo_attributes, xlabels, out_names, legends,
            legend_locations):
        xs_txs, ys_txs = get_cdf(tx_samples[tx_attr], normalize=True)
        xs_utxos, ys_utxos = get_cdf(utxo_samples[utxo_attr], normalize=True)

        plots_from_samples(xs=[xs_txs, xs_utxos],
                           ys=[ys_txs, ys_utxos],
                           xlabel=label,
                           save_fig=out,
                           legend=legend,
                           legend_loc=leg_loc,
                           ylabel="Number of registers")
Exemplo n.º 5
0
def plot_from_file(x_attribute, y="tx", xlabel=False, log_axis=False, save_fig=False, legend=None,
                   legend_loc=1, font_size=20):
    """
    Generates plots from utxo/tx data extracted from utxo_dump.

    :param x_attribute: Attribute to plot (must be a key in the dictionary of the dumped data).
    :type x_attribute: str
    :param y: Either "tx" or "utxo"
    :type y: str
    :param xlabel: Label on the x axis
    :type xlabel: str
    :param log_axis: Determines which axis are plotted using (accepted values are False, "x", "y" or "xy").
    logarithmic scale
    :type log_axis: str
    :param save_fig: Figure's filename or False (to show the interactive plot)
    :type save_fig: str
    :param legend: List of strings with legend entries or None (if no legend is needed)
    :type legend: str list
    :param legend_loc: Indicates the location of the legend (if present)
    :type legend_loc: int
    :param font_size: Title, xlabel and ylabel font size
    :type font_size: int
    :return: None
    :rtype: None
    """

    if y == "tx":
        fin = open(CFG.data_path + 'parsed_txs.txt', 'r')
        ylabel = "Number of tx."
    elif y == "utxo":
        fin = open(CFG.data_path + 'parsed_utxos.txt', 'r')
        ylabel = "Number of UTXOs"
    else:
        raise ValueError('Unrecognized y value')

    samples = []
    for line in fin:
        data = loads(line[:-1])
        samples.append(data[x_attribute])

    fin.close()

    [xs, ys] = get_cdf(samples, normalize=True)
    title = ""
    if not xlabel:
        xlabel = x_attribute

    plot_distribution(xs, ys, title, xlabel, ylabel, log_axis, save_fig, legend, legend_loc, font_size)
Exemplo n.º 6
0
def dust_analysis_all_fees(utxo_fin_name):
    """
    Performs a dust analysis for all fee rates, that is, up until all samples are considered dust (plot shows cdf up
    until 1).

    :param utxo_fin_name: Input file path which contains the chainstate utxo dump.
    :type: str
    :return: None
    :rtype: None
    """

    x_attributes = [["dust", "non_profitable", "non_profitable_est"]]
    xlabels = ['Dust/non_prof_min/non_prof_est value']
    out_names = ["dust_utxos_all"]
    legends = [["Dust", "Non-profitable min.", "Non-profitable est."]]
    log_axis = ['x']

    for attribute, label, log, out, legend in zip(x_attributes, xlabels,
                                                  log_axis, out_names,
                                                  legends):
        samples = get_samples(attribute, fin_name=utxo_fin_name)
        xs = []
        ys = []
        for a in attribute:
            x, y = get_cdf(samples[a], normalize=True)
            xs.append(x)
            ys.append(y)

        plots_from_samples(xs=xs,
                           ys=ys,
                           xlabel=label,
                           log_axis=log,
                           save_fig=out,
                           ylabel="Number of UTXOs",
                           legend=legend,
                           legend_loc=4)
def compare_attribute(fin_names, x_attribute, out_name, xlabel='', legend=''):
    """
    Performs a comparative analysis between different files and a fixed attribute. Useful to compare the evolution
    of a parameter throughout different snapshots.

    :param fin_names: List of file names to load data from.
    :type fin_names: list str
    :param x_attribute: Attribute to be compared.
    :type x_attribute: str
    :param out_name: Name of the generated chart.
    :type out_name: str
    :param xlabel: Label of the x axis of the resulting chart.
    :type xlabel: str
    :param legend: Legend to be included in the chart.
    :type legend: str or list
    :return: None
    :rtyp: None
    """

    samples = [get_samples(x_attribute, fin) for fin in fin_names]

    xs = []
    ys = []
    for _ in range(len(samples)):
        x, y = get_cdf(samples.pop(0).values(), normalize=True)
        xs.append(x)
        ys.append(y)

    plots_from_samples(xs=xs,
                       ys=ys,
                       xlabel=xlabel,
                       save_fig=out_name,
                       legend=legend,
                       log_axis='x',
                       ylabel="Number of UTXOs",
                       legend_loc=2)
Exemplo n.º 8
0
def utxo_based_analysis_with_filters(utxo_fin_name):
    """
    Performs an utxo data analysis using different filters, to obtain for examples the amount of SegWit outputs.

    :param utxo_fin_name: Input file path which contains the chainstate utxo dump.
    :type: str
    :return: None
    :rtype: None
    """

    x_attribute = 'tx_height'
    xlabel = 'Block height'
    out_names = [
        'utxo_height_out_type', 'utxo_height_amount', 'segwit_upper_bound',
        'utxo_height_1_satoshi'
    ]

    filters = [
        lambda x: x["out_type"] == 0, lambda x: x["out_type"] == 1,
        lambda x: x["out_type"] in [2, 3, 4, 5],
        lambda x: x["non_std_type"] == "P2WPKH",
        lambda x: x["non_std_type"] == "P2WSH", lambda x: x[
            "non_std_type"] is not False and "multisig" in x["non_std_type"],
        lambda x: x["non_std_type"] is False, lambda x: x["amount"] == 1,
        lambda x: 1 < x["amount"] <= 10**1,
        lambda x: 10 < x["amount"] <= 10**2,
        lambda x: 10**2 < x["amount"] <= 10**4,
        lambda x: 10**4 < x["amount"] <= 10**6,
        lambda x: 10**6 < x["amount"] <= 10**8, lambda x: x["amount"] > 10**8,
        lambda x: x["out_type"] == 1, lambda x: x["amount"] == 1
    ]

    legends = [[
        'P2PKH', 'P2SH', 'P2PK', 'P2WPKH', 'P2WSH', 'Multisig', 'Other'
    ],
               [
                   '$=1$', '$1 < x \leq 10$', '$10 < x \leq 10^2$',
                   '$10^2 < x \leq 10^4$', '$10^4 < x \leq 10^6$',
                   '$10^6 < x \leq 10^8$', '$10^8 < x$'
               ], ['P2SH'], ['Amount = 1']]
    comparative = [True, True, False, False]
    legend_loc = 2

    samples = get_filtered_samples(x_attribute,
                                   fin_name=utxo_fin_name,
                                   filtr=filters)

    for out, legend, comp in zip(out_names, legends, comparative):
        xs = []
        ys = []
        for _ in range(len(legend)):
            x, y = get_cdf(samples.pop(0), normalize=True)
            xs.append(x)
            ys.append(y)

        plots_from_samples(xs=xs,
                           ys=ys,
                           xlabel=xlabel,
                           save_fig=out,
                           legend=legend,
                           legend_loc=legend_loc,
                           ylabel="Number of UTXOs")
Exemplo n.º 9
0
def utxo_based_analysis(utxo_fin_name):
    """
    Performs a utxo based analysis from a given input file (resulting from a utxo dump of the chainstate)

    :param utxo_fin_name: Input file path which contains the chainstate utxo dump.
    :type: str
    :return: None
    :rtype: None
    """

    x_attributes = [
        'tx_height', 'amount', 'index', 'out_type', 'utxo_data_len',
        'register_len'
    ]

    xlabels = [
        'Tx. height', 'Amount', 'UTXO index', 'Out type', 'UTXO data length',
        'Register length'
    ]

    out_names = [
        "utxo_tx_height", "utxo_amount_logx",
        ["utxo_index", "utxo_index_logx"],
        ["utxo_out_type", "utxo_out_type_logx"],
        ["utxo_data_len", "utxo_data_len_logx"],
        ['utxo_register_len', 'utxo_register_len_logx']
    ]

    log_axis = [
        False, 'x', [False, 'x'], [False, 'x'], [False, 'x'], [False, 'x']
    ]

    x_attributes_pie = ['out_type', 'out_type']
    xlabels_pie = [['C-even', 'C-odd', 'U-even', 'U-odd'],
                   ['P2PKH', 'P2PK', 'P2SH', 'Other']]
    out_names_pie = ["utxo_pk_types", "utxo_types"]
    pie_groups = [[[2], [3], [4], [5]], [[0], [2, 3, 4, 5], [1]]]

    x_attribute_special = 'non_std_type'

    # Since the attributes for the pie chart are already included in the normal chart, we won't pass them to the
    # sampling function.
    samples = get_samples(x_attributes + [x_attribute_special],
                          fin_name=utxo_fin_name)
    samples_special = samples.pop(x_attribute_special)

    for attribute, label, log, out in zip(x_attributes, xlabels, log_axis,
                                          out_names):
        xs, ys = get_cdf(samples[attribute], normalize=True)
        plots_from_samples(xs=xs,
                           ys=ys,
                           xlabel=label,
                           log_axis=log,
                           save_fig=out,
                           ylabel="Number of UTXOs")

    for attribute, label, out, groups in (zip(x_attributes_pie, xlabels_pie,
                                              out_names_pie, pie_groups)):
        plot_pie_chart_from_samples(
            samples=samples[attribute],
            save_fig=out,
            labels=label,
            title="",
            groups=groups,
            colors=["#165873", "#428C5C", "#4EA64B", "#ADD96C"],
            labels_out=True)
    # Special case: non-standard
    non_std_outs_analysis(samples_special)
Exemplo n.º 10
0
def plots_from_file(x_attribute,
                    y=["tx"],
                    xlabel=False,
                    log_axis=False,
                    version=[0.15],
                    save_fig=False,
                    legend=None,
                    legend_loc=1,
                    font_size=20,
                    filtr=[lambda x: True]):
    """
    Generates plots from utxo/tx data extracted from utxo_dump.

    :param x_attribute: Attribute to plot (must be a key in the dictionary of the dumped data).
    :type x_attribute: str or list
    :param y: Either "tx" or "utxo"
    :type y: str or list
    :param xlabel: Label on the x axis
    :type xlabel: str
    :param log_axis: Determines which axis are plotted using (accepted values are False, "x", "y" or "xy").
    logarithmic scale
    :type log_axis: str
    :param version: Bitcoin core version, used to decide the folder in which to store the data.
    :type version: float or list
    :param save_fig: Figure's filename or False (to show the interactive plot)
    :type save_fig: str
    :param legend: List of strings with legend entries or None (if no legend is needed)
    :type legend: str list
    :param legend_loc: Indicates the location of the legend (if present)
    :type legend_loc: int
    :param font_size: Title, xlabel and ylabel font size
    :type font_size: int
    :param filtr: Function to filter samples (returns a boolean value for a given sample)
    :type filtr: function or list of functions
    :return: None
    :rtype: None
    """

    if not (isinstance(x_attribute, list)
            or isinstance(x_attribute, np.ndarray)):
        x_attribute = [x_attribute]

    if not (isinstance(y, list) or isinstance(y, np.ndarray)):
        y = [y]

    if not (isinstance(version, list) or isinstance(version, np.ndarray)):
        version = [version]

    if not (isinstance(filtr, list) or isinstance(filtr, np.ndarray)):
        filtr = [filtr]

    assert len(x_attribute) == len(y) == len(version) == len(filtr), \
        "There is a mismatch on the list length of some of the parameters"

    if y[0] == "tx":
        ylabel = "Number of tx."
    elif y[0] == "utxo":
        ylabel = "Number of UTXOs"

    title = ""
    if not xlabel:
        xlabel = x_attribute

    xs, ys = [], []
    for i in range(len(x_attribute)):
        samples = get_samples(x_attribute[i],
                              y=y[i],
                              version=version[i],
                              filtr=filtr[i])
        [xc, yc] = get_cdf(samples, normalize=True)
        xs.append(xc)
        ys.append(yc)

    # Adds the folder in which the data will be stored (if multiple versions are involved, store it
    # in the first one folder)
    save_fig = str(version[0]) + '/' + save_fig

    plot_distribution(xs, ys, title, xlabel, ylabel, log_axis, save_fig,
                      legend, legend_loc, font_size)