def hist_from_file(input_file, header, threshold, range_min, range_max, stats, markers, columns, vert_space): """ Gets comma separated key-value pairs, and plots hist of the values for every key (given more than threshold occurrences) """ values = defaultdict(list) fp = csv.reader(input_file) h = [None, "Values" ] # Init h so that h[1] will hold the xlabel header or no header if header: h = fp.next() min_value = float(range_min) if range_min else None max_value = float(range_max) if range_max else None for l in fp: if len(l) != 2: raise ValueError("Bad number of items in line: " + str(l)) (k, v) = (l[0], float(l[1])) values[k].append(v) # Update minimum and maximum values, later to be used in the range of the histograms if not range_min: # only if the range wasn't given as a parameter if not min_value: min_value = v elif min_value > v: min_value = v if not range_max: if not max_value: max_value = v elif max_value < v: max_value = v plots = sum([len(v) > threshold for v in values.itervalues()]) i = 1 plt.figure() for k, v in values.iteritems(): if len(v) > threshold: plt.subplot(ceil(plots / float(columns)), int(columns), i) if stats: k += "\nm=%0.2f,s=%0.2f,med=%0.2f" % (mean(v), std(v), median(v)) draw_hist(k, v, (min_value, max_value), ylabel="Count", xlabel=h[1]) if markers: plt.axvline(mean(v), color='r', linestyle='-') plt.axvline(mean(v) + std(v), color='r', linestyle='--') plt.axvline(mean(v) - std(v), color='r', linestyle='--') plt.axvline(median(v), color='b', linestyle='-') print k.replace('\n', ': \t') + ", Count:" + str(len(v)) i += 1 plt.subplots_adjust(hspace=float( vert_space)) # tweak spacing to prevent clipping of x-label plt.show()
def hist_from_file(input_file, header, threshold, range_min, range_max, stats, markers, columns, vert_space): """ Gets comma separated key-value pairs, and plots hist of the values for every key (given more than threshold occurrences) """ values = defaultdict(list) fp = csv.reader(input_file) h = [None, "Values"] # Init h so that h[1] will hold the xlabel header or no header if header: h = fp.next() min_value = float(range_min) if range_min else None max_value = float(range_max) if range_max else None for l in fp: if len(l) != 2: raise ValueError("Bad number of items in line: " + str(l)) (k, v) = (l[0], float(l[1])) values[k].append(v) # Update minimum and maximum values, later to be used in the range of the histograms if not range_min: # only if the range wasn't given as a parameter if not min_value: min_value = v elif min_value > v: min_value = v if not range_max: if not max_value: max_value = v elif max_value < v: max_value = v plots = sum([len(v) > threshold for v in values.itervalues()]) i = 1 plt.figure() for k, v in values.iteritems(): if len(v) > threshold: plt.subplot(ceil(plots / float(columns)), int(columns), i) if stats: k += "\nm=%0.2f,s=%0.2f,med=%0.2f" % (mean(v), std(v), median(v)) draw_hist(k, v, (min_value, max_value), ylabel="Count", xlabel=h[1]) if markers: plt.axvline(mean(v), color='r', linestyle='-') plt.axvline(mean(v)+std(v), color='r', linestyle='--') plt.axvline(mean(v)-std(v), color='r', linestyle='--') plt.axvline(median(v), color='b', linestyle='-') print k.replace('\n', ': \t') + ", Count:" + str(len(v)) i += 1 plt.subplots_adjust(hspace=float(vert_space)) # tweak spacing to prevent clipping of x-label plt.show()
def draw_distributions(input): accuracies = [] per_firm_accuracy = defaultdict(list) for firm, accuracy in analyst_accuracies(input): accuracies.append(accuracy) per_firm_accuracy[firm].append(accuracy) p = plt.figure() p.canvas.set_window_title("All Analysts") draw_hist("All Analysts",accuracies, range=(0,2.0)) p = plt.figure() plots = sum([len(v) > 200 for v in per_firm_accuracy.itervalues()]) i = 1 for firm,values in per_firm_accuracy.iteritems(): if len(values) > 200: plt.subplot(4, ceil(plots / 4.0), i) draw_hist(firm, values, range=(0,2.0)) i += 1 plt.suptitle('Count vs. Accuracy (|New Price - Target| / |Original Price|)') p.canvas.set_window_title("Each Analyst Separately") plt.show()
def draw_distributions(input): accuracies = [] per_firm_accuracy = defaultdict(list) for firm, accuracy in analyst_accuracies(input): accuracies.append(accuracy) per_firm_accuracy[firm].append(accuracy) p = plt.figure() p.canvas.set_window_title("All Analysts") draw_hist("All Analysts", accuracies, range=(0, 2.0)) p = plt.figure() plots = sum([len(v) > 200 for v in per_firm_accuracy.itervalues()]) i = 1 for firm, values in per_firm_accuracy.iteritems(): if len(values) > 200: plt.subplot(4, ceil(plots / 4.0), i) draw_hist(firm, values, range=(0, 2.0)) i += 1 plt.suptitle( 'Count vs. Accuracy (|New Price - Target| / |Original Price|)') p.canvas.set_window_title("Each Analyst Separately") plt.show()