def test_generate_comparative_plots_empty_marker_list(self):
     """generate_comparative_plots() should use the predefined list of
     markers if an empty list is provided by the user."""
     generate_comparative_plots('scatter', self.ValidTypicalData,
             [1, 4, 10, 11], ["T0", "T1", "T2", "T3"],
             ["Infants", "Children", "Teens"], [],
             "x-axis label", "y-axis label", "Test")
 def test_generate_comparative_plots_empty_marker_list(self):
     """generate_comparative_plots() should use the predefined list of
     markers if an empty list is provided by the user."""
     generate_comparative_plots('scatter', self.ValidTypicalData,
                                [1, 4, 10, 11], ["T0", "T1", "T2", "T3"],
                                ["Infants", "Children", "Teens"], [],
                                "x-axis label", "y-axis label", "Test")
 def test_generate_comparative_plots_insufficient_symbols(self):
     """generate_comparative_plots() should work even when there aren't
     enough symbols. We should capture a print statement that warns the
     users."""
     saved_stdout = sys.stdout
     try:
         out = StringIO()
         sys.stdout = out
         generate_comparative_plots('scatter', self.ValidTypicalData,
                 [1, 4, 10, 11], ["T0", "T1", "T2", "T3"],
                 ["Infants", "Children", "Teens"], ['^'],
                 "x-axis label", "y-axis label", "Test")
         output = out.getvalue().strip()
         self.assertEqual(output, "There are not enough markers to "
                 "uniquely represent each distribution in your dataset. "
                 "You may want to provide a list of markers that is at "
                 "least as large as the number of distributions in your "
                 "dataset.")
     finally:
         sys.stdout = saved_stdout
 def test_generate_comparative_plots_insufficient_symbols(self):
     """generate_comparative_plots() should work even when there aren't
     enough symbols. We should capture a print statement that warns the
     users."""
     saved_stdout = sys.stdout
     try:
         out = StringIO()
         sys.stdout = out
         generate_comparative_plots('scatter', self.ValidTypicalData,
                                    [1, 4, 10, 11],
                                    ["T0", "T1", "T2", "T3"],
                                    ["Infants", "Children", "Teens"], ['^'],
                                    "x-axis label", "y-axis label", "Test")
         output = out.getvalue().strip()
         self.assertEqual(
             output, "There are not enough markers to "
             "uniquely represent each distribution in your dataset. "
             "You may want to provide a list of markers that is at "
             "least as large as the number of distributions in your "
             "dataset.")
     finally:
         sys.stdout = saved_stdout
 def test_generate_comparative_plots_box(self):
     """generate_comparative_plots() should return a valid boxplot Figure
     object."""
     fig = generate_comparative_plots('box', self.ValidTypicalData,
             [1, 4, 10, 11], ["T0", "T1", "T2", "T3"],
             ["Infants", "Children", "Teens"], ['b', 'g', 'y'],
             "x-axis label", "y-axis label", "Test")
     ax = fig.get_axes()[0]
     self.assertEqual(ax.get_title(), "Test")
     self.assertEqual(ax.get_xlabel(), "x-axis label")
     self.assertEqual(ax.get_ylabel(), "y-axis label")
     self.assertEqual(len(ax.get_xticklabels()), 4)
     self.assertFloatEqual(ax.get_xticks(), [2.1, 7.2, 17.4, 19.1])
 def test_generate_comparative_plots_box(self):
     """generate_comparative_plots() should return a valid boxplot Figure
     object."""
     fig = generate_comparative_plots('box', self.ValidTypicalData,
                                      [1, 4, 10, 11],
                                      ["T0", "T1", "T2", "T3"],
                                      ["Infants", "Children", "Teens"],
                                      ['b', 'g', 'y'], "x-axis label",
                                      "y-axis label", "Test")
     ax = fig.get_axes()[0]
     self.assertEqual(ax.get_title(), "Test")
     self.assertEqual(ax.get_xlabel(), "x-axis label")
     self.assertEqual(ax.get_ylabel(), "y-axis label")
     self.assertEqual(len(ax.get_xticklabels()), 4)
     self.assertFloatEqual(ax.get_xticks(), [2.1, 7.2, 17.4, 19.1])
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(
            open(opts.distance_matrix_fp, 'U'))
    except:
        option_parser.error("This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option.")
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(
            open(opts.mapping_fp, 'U'))
    except QiimeParseError:
        option_parser.error("This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option.")

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == 'auto':
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number "
                                "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == 'auto':
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number "
                                "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(','))
    comparison_field_states = [field_state.strip('"').strip("'")
                               for field_state in comparison_field_states]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to "
                            "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(dist_matrix_header,
            dist_matrix, mapping_header, mapping, field,
            comparison_field_states)

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = map(float, field_states)
            x_spacing.sort()
        except:
            option_parser.error("The 'numeric' label type is invalid because "
                                "not all field states could be converted into "
                                "numbers. Please specify a different label "
                                "type.")

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(
                    comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) \
                       for color in data_color_order]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must "
                            "be greater than zero.")

    plot_figure = generate_comparative_plots(opts.plot_type, plot_data,
            x_values=x_spacing, data_point_labels=plot_x_axis_labels,
            distribution_labels=comparison_field_states,
            distribution_markers=plot_colors, x_label=plot_x_label,
            y_label=plot_y_label, title=plot_title,
            x_tick_labels_orientation=opts.x_tick_labels_orientation,
            y_min=y_min, y_max=y_max, whisker_length=opts.whisker_length,
            error_bar_type=opts.error_bar_type,
            distribution_width=opts.distribution_width,
            group_spacing=opts.group_spacing, figure_width=width,
            figure_height=height)

    # Save the plot in the specified format.
    output_plot_fp = join(opts.output_dir, "%s_Distance_Comparisons.%s" %
                          (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp, format=opts.imagetype,
            transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.xls" % field), 'w')

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append('%s vs %s' % (data_point_label,
                                                      comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(sig_tests_labels, sig_tests_data,
                tail_type=opts.tail_type,
                num_permutations=opts.num_permutations)
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\
                "labels do not match the number of points along the x-axis."
        raw_data_fp = join(opts.output_dir,
                           "%s_Distance_Comparisons.xls" % field)
        raw_data_f = open(raw_data_fp, 'w')

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert (len(comparison_field_states) == len(data)), "The " +\
                    "number of specified comparison groups does not match " +\
                    "the number of groups found at the current point along " +\
                    "the x-axis."
            for comp_field_state, comp_grp_data in zip(comparison_field_states, data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" +
                        "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(
            open(opts.distance_matrix_fp, 'U'))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option.")
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(
            open(opts.mapping_fp, 'U'))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option.")

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == 'auto':
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number "
                                "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == 'auto':
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number "
                                "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(','))
    comparison_field_states = [
        field_state.strip('"').strip("'")
        for field_state in comparison_field_states
    ]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to "
                            "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(
        dist_matrix_header, dist_matrix, mapping_header, mapping, field,
        comparison_field_states)

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = map(float, field_states)
            x_spacing.sort()
        except:
            option_parser.error("The 'numeric' label type is invalid because "
                                "not all field states could be converted into "
                                "numbers. Please specify a different label "
                                "type.")

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(
                comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) \
                       for color in data_color_order]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must "
                            "be greater than zero.")

    plot_figure = generate_comparative_plots(
        opts.plot_type,
        plot_data,
        x_values=x_spacing,
        data_point_labels=plot_x_axis_labels,
        distribution_labels=comparison_field_states,
        distribution_markers=plot_colors,
        x_label=plot_x_label,
        y_label=plot_y_label,
        title=plot_title,
        x_tick_labels_orientation=opts.x_tick_labels_orientation,
        y_min=y_min,
        y_max=y_max,
        whisker_length=opts.whisker_length,
        error_bar_type=opts.error_bar_type,
        distribution_width=opts.distribution_width,
        group_spacing=opts.group_spacing,
        figure_width=width,
        figure_height=height)

    # Save the plot in the specified format.
    output_plot_fp = join(
        opts.output_dir,
        "%s_Distance_Comparisons.%s" % (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp,
                        format=opts.imagetype,
                        transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.xls" % field), 'w')

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append('%s vs %s' %
                                        (data_point_label, comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(
            sig_tests_labels,
            sig_tests_data,
            tail_type=opts.tail_type,
            num_permutations=opts.num_permutations)
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\
                "labels do not match the number of points along the x-axis."
        raw_data_fp = join(opts.output_dir,
                           "%s_Distance_Comparisons.xls" % field)
        raw_data_f = open(raw_data_fp, 'w')

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert (len(comparison_field_states) == len(data)), "The " +\
                    "number of specified comparison groups does not match " +\
                    "the number of groups found at the current point along " +\
                    "the x-axis."
            for comp_field_state, comp_grp_data in zip(comparison_field_states,
                                                       data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" +
                                 "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()