def test_generate_comparative_plots_empty_marker_list(self): """generate_comparative_plots() should use the predefined list of markers if an empty list is provided by the user.""" generate_comparative_plots('scatter', self.ValidTypicalData, [1, 4, 10, 11], ["T0", "T1", "T2", "T3"], ["Infants", "Children", "Teens"], [], "x-axis label", "y-axis label", "Test")
def test_generate_comparative_plots_insufficient_symbols(self): """generate_comparative_plots() should work even when there aren't enough symbols. We should capture a print statement that warns the users.""" saved_stdout = sys.stdout try: out = StringIO() sys.stdout = out generate_comparative_plots('scatter', self.ValidTypicalData, [1, 4, 10, 11], ["T0", "T1", "T2", "T3"], ["Infants", "Children", "Teens"], ['^'], "x-axis label", "y-axis label", "Test") output = out.getvalue().strip() self.assertEqual(output, "There are not enough markers to " "uniquely represent each distribution in your dataset. " "You may want to provide a list of markers that is at " "least as large as the number of distributions in your " "dataset.") finally: sys.stdout = saved_stdout
def test_generate_comparative_plots_insufficient_symbols(self): """generate_comparative_plots() should work even when there aren't enough symbols. We should capture a print statement that warns the users.""" saved_stdout = sys.stdout try: out = StringIO() sys.stdout = out generate_comparative_plots('scatter', self.ValidTypicalData, [1, 4, 10, 11], ["T0", "T1", "T2", "T3"], ["Infants", "Children", "Teens"], ['^'], "x-axis label", "y-axis label", "Test") output = out.getvalue().strip() self.assertEqual( output, "There are not enough markers to " "uniquely represent each distribution in your dataset. " "You may want to provide a list of markers that is at " "least as large as the number of distributions in your " "dataset.") finally: sys.stdout = saved_stdout
def test_generate_comparative_plots_box(self): """generate_comparative_plots() should return a valid boxplot Figure object.""" fig = generate_comparative_plots('box', self.ValidTypicalData, [1, 4, 10, 11], ["T0", "T1", "T2", "T3"], ["Infants", "Children", "Teens"], ['b', 'g', 'y'], "x-axis label", "y-axis label", "Test") ax = fig.get_axes()[0] self.assertEqual(ax.get_title(), "Test") self.assertEqual(ax.get_xlabel(), "x-axis label") self.assertEqual(ax.get_ylabel(), "y-axis label") self.assertEqual(len(ax.get_xticklabels()), 4) self.assertFloatEqual(ax.get_xticks(), [2.1, 7.2, 17.4, 19.1])
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # Create the output dir if it doesn't already exist. try: create_dir(opts.output_dir) except: option_parser.error("Could not create or access output directory " "specified with the -o option.") # Parse the distance matrix and mapping file. try: dist_matrix_header, dist_matrix = parse_distmat( open(opts.distance_matrix_fp, 'U')) except: option_parser.error("This does not look like a valid distance matrix " "file. Please supply a valid distance matrix file using the -d " "option.") try: mapping, mapping_header, mapping_comments = parse_mapping_file( open(opts.mapping_fp, 'U')) except QiimeParseError: option_parser.error("This does not look like a valid metadata mapping " "file. Please supply a valid mapping file using the -m option.") # Make sure the y_min and y_max options make sense, as they can be either # 'auto' or a number. y_min = opts.y_min y_max = opts.y_max try: y_min = float(y_min) except ValueError: if y_min == 'auto': y_min = None else: option_parser.error("The --y_min option must be either a number " "or 'auto'.") try: y_max = float(y_max) except ValueError: if y_max == 'auto': y_max = None else: option_parser.error("The --y_max option must be either a number " "or 'auto'.") # Parse the field states that will be compared to every other field state. comparison_field_states = opts.comparison_groups comparison_field_states = map(strip, comparison_field_states.split(',')) comparison_field_states = [field_state.strip('"').strip("'") for field_state in comparison_field_states] if comparison_field_states is None: option_parser.error("You must provide at least one field state to " "compare (using the -c option).") # Get distance comparisons between each field state and each of the # comparison field states. field = opts.field comparison_groupings = get_field_state_comparisons(dist_matrix_header, dist_matrix, mapping_header, mapping, field, comparison_field_states) # Grab a list of all field states that had the comparison field states # compared against them. These will be plotted along the x-axis. field_states = comparison_groupings.keys() def custom_comparator(x, y): try: num_x = float(x) num_y = float(y) return int(num_x - num_y) except: if x < y: return -1 elif x > y: return 1 else: return 0 # Sort the field states as numbers if the elements are numbers, else sort # them lexically. field_states.sort(custom_comparator) # If the label type is numeric, get a list of all field states in sorted # numeric order. These will be used to determine the spacing of the # field state 'points' along the x-axis. x_spacing = None if opts.label_type == "numeric": try: x_spacing = map(float, field_states) x_spacing.sort() except: option_parser.error("The 'numeric' label type is invalid because " "not all field states could be converted into " "numbers. Please specify a different label " "type.") # Accumulate the data for each field state 'point' along the x-axis. plot_data = [] plot_x_axis_labels = [] for field_state in field_states: field_state_data = [] for comp_field_state in comparison_field_states: field_state_data.append( comparison_groupings[field_state][comp_field_state]) plot_data.append(field_state_data) plot_x_axis_labels.append(field_state) # Plot the data and labels. plot_title = "Distance Comparisons" plot_x_label = field plot_y_label = "Distance" # If we are creating a bar chart or box plot, grab a list of good data # colors to use. plot_type = opts.plot_type plot_colors = None if plot_type == "bar" or plot_type == "box": plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) \ for color in data_color_order] assert plot_data, "Error: there is no data to plot!" width = opts.width height = opts.height if width <= 0 or height <= 0: option_parser.error("The specified width and height of the image must " "be greater than zero.") plot_figure = generate_comparative_plots(opts.plot_type, plot_data, x_values=x_spacing, data_point_labels=plot_x_axis_labels, distribution_labels=comparison_field_states, distribution_markers=plot_colors, x_label=plot_x_label, y_label=plot_y_label, title=plot_title, x_tick_labels_orientation=opts.x_tick_labels_orientation, y_min=y_min, y_max=y_max, whisker_length=opts.whisker_length, error_bar_type=opts.error_bar_type, distribution_width=opts.distribution_width, group_spacing=opts.group_spacing, figure_width=width, figure_height=height) # Save the plot in the specified format. output_plot_fp = join(opts.output_dir, "%s_Distance_Comparisons.%s" % (field, opts.imagetype)) plot_figure.savefig(output_plot_fp, format=opts.imagetype, transparent=opts.transparent) if not opts.suppress_significance_tests: sig_tests_f = open(join(opts.output_dir, "%s_Stats.xls" % field), 'w') # Rearrange the plot data into a format suitable for all_pairs_t_test. sig_tests_labels = [] sig_tests_data = [] for data_point, data_point_label in zip(plot_data, plot_x_axis_labels): for dist, comp_field in zip(data_point, comparison_field_states): sig_tests_labels.append('%s vs %s' % (data_point_label, comp_field)) sig_tests_data.append(dist) sig_tests_results = all_pairs_t_test(sig_tests_labels, sig_tests_data, tail_type=opts.tail_type, num_permutations=opts.num_permutations) sig_tests_f.write(sig_tests_results) sig_tests_f.close() if opts.save_raw_data: # Write the raw plot data into a tab-delimited file, where each line # has the distances between a comparison group and another field state # 'point' along the x-axis. assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\ "labels do not match the number of points along the x-axis." raw_data_fp = join(opts.output_dir, "%s_Distance_Comparisons.xls" % field) raw_data_f = open(raw_data_fp, 'w') raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n") for label, data in zip(plot_x_axis_labels, plot_data): assert (len(comparison_field_states) == len(data)), "The " +\ "number of specified comparison groups does not match " +\ "the number of groups found at the current point along " +\ "the x-axis." for comp_field_state, comp_grp_data in zip(comparison_field_states, data): raw_data_f.write(comp_field_state + "\t" + label + "\t" + "\t".join(map(str, comp_grp_data)) + "\n") raw_data_f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # Create the output dir if it doesn't already exist. try: create_dir(opts.output_dir) except: option_parser.error("Could not create or access output directory " "specified with the -o option.") # Parse the distance matrix and mapping file. try: dist_matrix_header, dist_matrix = parse_distmat( open(opts.distance_matrix_fp, 'U')) except: option_parser.error( "This does not look like a valid distance matrix " "file. Please supply a valid distance matrix file using the -d " "option.") try: mapping, mapping_header, mapping_comments = parse_mapping_file( open(opts.mapping_fp, 'U')) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping " "file. Please supply a valid mapping file using the -m option.") # Make sure the y_min and y_max options make sense, as they can be either # 'auto' or a number. y_min = opts.y_min y_max = opts.y_max try: y_min = float(y_min) except ValueError: if y_min == 'auto': y_min = None else: option_parser.error("The --y_min option must be either a number " "or 'auto'.") try: y_max = float(y_max) except ValueError: if y_max == 'auto': y_max = None else: option_parser.error("The --y_max option must be either a number " "or 'auto'.") # Parse the field states that will be compared to every other field state. comparison_field_states = opts.comparison_groups comparison_field_states = map(strip, comparison_field_states.split(',')) comparison_field_states = [ field_state.strip('"').strip("'") for field_state in comparison_field_states ] if comparison_field_states is None: option_parser.error("You must provide at least one field state to " "compare (using the -c option).") # Get distance comparisons between each field state and each of the # comparison field states. field = opts.field comparison_groupings = get_field_state_comparisons( dist_matrix_header, dist_matrix, mapping_header, mapping, field, comparison_field_states) # Grab a list of all field states that had the comparison field states # compared against them. These will be plotted along the x-axis. field_states = comparison_groupings.keys() def custom_comparator(x, y): try: num_x = float(x) num_y = float(y) return int(num_x - num_y) except: if x < y: return -1 elif x > y: return 1 else: return 0 # Sort the field states as numbers if the elements are numbers, else sort # them lexically. field_states.sort(custom_comparator) # If the label type is numeric, get a list of all field states in sorted # numeric order. These will be used to determine the spacing of the # field state 'points' along the x-axis. x_spacing = None if opts.label_type == "numeric": try: x_spacing = map(float, field_states) x_spacing.sort() except: option_parser.error("The 'numeric' label type is invalid because " "not all field states could be converted into " "numbers. Please specify a different label " "type.") # Accumulate the data for each field state 'point' along the x-axis. plot_data = [] plot_x_axis_labels = [] for field_state in field_states: field_state_data = [] for comp_field_state in comparison_field_states: field_state_data.append( comparison_groupings[field_state][comp_field_state]) plot_data.append(field_state_data) plot_x_axis_labels.append(field_state) # Plot the data and labels. plot_title = "Distance Comparisons" plot_x_label = field plot_y_label = "Distance" # If we are creating a bar chart or box plot, grab a list of good data # colors to use. plot_type = opts.plot_type plot_colors = None if plot_type == "bar" or plot_type == "box": plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) \ for color in data_color_order] assert plot_data, "Error: there is no data to plot!" width = opts.width height = opts.height if width <= 0 or height <= 0: option_parser.error("The specified width and height of the image must " "be greater than zero.") plot_figure = generate_comparative_plots( opts.plot_type, plot_data, x_values=x_spacing, data_point_labels=plot_x_axis_labels, distribution_labels=comparison_field_states, distribution_markers=plot_colors, x_label=plot_x_label, y_label=plot_y_label, title=plot_title, x_tick_labels_orientation=opts.x_tick_labels_orientation, y_min=y_min, y_max=y_max, whisker_length=opts.whisker_length, error_bar_type=opts.error_bar_type, distribution_width=opts.distribution_width, group_spacing=opts.group_spacing, figure_width=width, figure_height=height) # Save the plot in the specified format. output_plot_fp = join( opts.output_dir, "%s_Distance_Comparisons.%s" % (field, opts.imagetype)) plot_figure.savefig(output_plot_fp, format=opts.imagetype, transparent=opts.transparent) if not opts.suppress_significance_tests: sig_tests_f = open(join(opts.output_dir, "%s_Stats.xls" % field), 'w') # Rearrange the plot data into a format suitable for all_pairs_t_test. sig_tests_labels = [] sig_tests_data = [] for data_point, data_point_label in zip(plot_data, plot_x_axis_labels): for dist, comp_field in zip(data_point, comparison_field_states): sig_tests_labels.append('%s vs %s' % (data_point_label, comp_field)) sig_tests_data.append(dist) sig_tests_results = all_pairs_t_test( sig_tests_labels, sig_tests_data, tail_type=opts.tail_type, num_permutations=opts.num_permutations) sig_tests_f.write(sig_tests_results) sig_tests_f.close() if opts.save_raw_data: # Write the raw plot data into a tab-delimited file, where each line # has the distances between a comparison group and another field state # 'point' along the x-axis. assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\ "labels do not match the number of points along the x-axis." raw_data_fp = join(opts.output_dir, "%s_Distance_Comparisons.xls" % field) raw_data_f = open(raw_data_fp, 'w') raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n") for label, data in zip(plot_x_axis_labels, plot_data): assert (len(comparison_field_states) == len(data)), "The " +\ "number of specified comparison groups does not match " +\ "the number of groups found at the current point along " +\ "the x-axis." for comp_field_state, comp_grp_data in zip(comparison_field_states, data): raw_data_f.write(comp_field_state + "\t" + label + "\t" + "\t".join(map(str, comp_grp_data)) + "\n") raw_data_f.close()