Пример #1
0
def hist_from_file(input_file, header, threshold, range_min, range_max, stats,
                   markers, columns, vert_space):
    """ Gets comma separated key-value pairs, and plots hist of the values for every key
    (given more than threshold occurrences) """
    values = defaultdict(list)
    fp = csv.reader(input_file)

    h = [None, "Values"
         ]  # Init h so that h[1] will hold the xlabel header or no header
    if header:
        h = fp.next()

    min_value = float(range_min) if range_min else None
    max_value = float(range_max) if range_max else None

    for l in fp:
        if len(l) != 2:
            raise ValueError("Bad number of items in line: " + str(l))

        (k, v) = (l[0], float(l[1]))

        values[k].append(v)

        # Update minimum and maximum values, later to be used in the range of the histograms
        if not range_min:  # only if the range wasn't given as a parameter
            if not min_value:
                min_value = v
            elif min_value > v:
                min_value = v

        if not range_max:
            if not max_value:
                max_value = v
            elif max_value < v:
                max_value = v

    plots = sum([len(v) > threshold for v in values.itervalues()])
    i = 1
    plt.figure()
    for k, v in values.iteritems():
        if len(v) > threshold:
            plt.subplot(ceil(plots / float(columns)), int(columns), i)
            if stats:
                k += "\nm=%0.2f,s=%0.2f,med=%0.2f" % (mean(v), std(v),
                                                      median(v))
            draw_hist(k,
                      v, (min_value, max_value),
                      ylabel="Count",
                      xlabel=h[1])
            if markers:
                plt.axvline(mean(v), color='r', linestyle='-')
                plt.axvline(mean(v) + std(v), color='r', linestyle='--')
                plt.axvline(mean(v) - std(v), color='r', linestyle='--')
                plt.axvline(median(v), color='b', linestyle='-')
            print k.replace('\n', ': \t') + ", Count:" + str(len(v))
            i += 1

    plt.subplots_adjust(hspace=float(
        vert_space))  # tweak spacing to prevent clipping of x-label
    plt.show()
Пример #2
0
def hist_from_file(input_file, header, threshold, range_min, range_max, stats, markers, columns, vert_space):
    """ Gets comma separated key-value pairs, and plots hist of the values for every key
    (given more than threshold occurrences) """
    values = defaultdict(list)
    fp = csv.reader(input_file)

    h = [None, "Values"]  # Init h so that h[1] will hold the xlabel header or no header
    if header:
        h = fp.next()

    min_value = float(range_min) if range_min else None
    max_value = float(range_max) if range_max else None

    for l in fp:
        if len(l) != 2:
            raise ValueError("Bad number of items in line: " + str(l))

        (k, v) = (l[0], float(l[1]))

        values[k].append(v)

        # Update minimum and maximum values, later to be used in the range of the histograms
        if not range_min:  # only if the range wasn't given as a parameter
            if not min_value:
                min_value = v
            elif min_value > v:
                min_value = v

        if not range_max:
            if not max_value:
                max_value = v
            elif max_value < v:
                max_value = v

    plots = sum([len(v) > threshold for v in values.itervalues()])
    i = 1
    plt.figure()
    for k, v in values.iteritems():
        if len(v) > threshold:
            plt.subplot(ceil(plots / float(columns)), int(columns), i)
            if stats:
                k += "\nm=%0.2f,s=%0.2f,med=%0.2f" % (mean(v), std(v), median(v))
            draw_hist(k, v, (min_value, max_value), ylabel="Count", xlabel=h[1])
            if markers:
                plt.axvline(mean(v), color='r', linestyle='-')
                plt.axvline(mean(v)+std(v), color='r', linestyle='--')
                plt.axvline(mean(v)-std(v), color='r', linestyle='--')
                plt.axvline(median(v), color='b', linestyle='-')
            print k.replace('\n', ': \t') + ", Count:" + str(len(v))
            i += 1

    plt.subplots_adjust(hspace=float(vert_space))  # tweak spacing to prevent clipping of x-label
    plt.show()
def draw_distributions(input):
    accuracies = []
    per_firm_accuracy = defaultdict(list)
    for firm, accuracy in analyst_accuracies(input):
        accuracies.append(accuracy)
        per_firm_accuracy[firm].append(accuracy)

    p = plt.figure()
    p.canvas.set_window_title("All Analysts")
    draw_hist("All Analysts",accuracies, range=(0,2.0))

    p = plt.figure()
    plots = sum([len(v) > 200 for v in per_firm_accuracy.itervalues()])
    i = 1
    for firm,values in per_firm_accuracy.iteritems():
        if len(values) > 200:
            plt.subplot(4, ceil(plots / 4.0), i)
            draw_hist(firm, values, range=(0,2.0))
            i += 1

    plt.suptitle('Count vs. Accuracy (|New Price - Target| / |Original Price|)')
    p.canvas.set_window_title("Each Analyst Separately")
    plt.show()
Пример #4
0
def draw_distributions(input):
    accuracies = []
    per_firm_accuracy = defaultdict(list)
    for firm, accuracy in analyst_accuracies(input):
        accuracies.append(accuracy)
        per_firm_accuracy[firm].append(accuracy)

    p = plt.figure()
    p.canvas.set_window_title("All Analysts")
    draw_hist("All Analysts", accuracies, range=(0, 2.0))

    p = plt.figure()
    plots = sum([len(v) > 200 for v in per_firm_accuracy.itervalues()])
    i = 1
    for firm, values in per_firm_accuracy.iteritems():
        if len(values) > 200:
            plt.subplot(4, ceil(plots / 4.0), i)
            draw_hist(firm, values, range=(0, 2.0))
            i += 1

    plt.suptitle(
        'Count vs. Accuracy (|New Price - Target| / |Original Price|)')
    p.canvas.set_window_title("Each Analyst Separately")
    plt.show()