Пример #1
0
    def test_get_field_state_comparisons(self):
        """get_field_state_comparisons() should return a 2D dictionary of
        distances between a field state and its comparison field states."""
        comparison_groupings = get_field_state_comparisons(
                self.dist_matrix_header, self.dist_matrix, self.mapping_header,
                self.mapping, self.field, ['Control'])
        expected = {'Fast': {'Control': [0.72899999999999998,
            0.80000000000000004, 0.72099999999999997, 0.76500000000000001,
            0.77600000000000002, 0.74399999999999999, 0.749,
            0.67700000000000005, 0.73399999999999999, 0.77700000000000002,
            0.73299999999999998, 0.72399999999999998, 0.69599999999999995,
            0.67500000000000004, 0.65400000000000003, 0.69599999999999995,
            0.73099999999999998, 0.75800000000000001, 0.73799999999999999,
            0.73699999999999999]}}
        self.assertFloatEqual(comparison_groupings, expected)

        comparison_groupings = get_field_state_comparisons(
                self.dist_matrix_header, self.dist_matrix, self.mapping_header,
                self.mapping, self.field, ['Fast'])
        expected = {'Control': {'Fast': [0.72899999999999998,
            0.80000000000000004, 0.72099999999999997, 0.76500000000000001,
            0.77600000000000002, 0.74399999999999999, 0.749,
            0.67700000000000005, 0.73399999999999999, 0.77700000000000002,
            0.73299999999999998, 0.72399999999999998, 0.69599999999999995,
            0.67500000000000004, 0.65400000000000003, 0.69599999999999995,
            0.73099999999999998, 0.75800000000000001, 0.73799999999999999,
            0.73699999999999999]}}
        self.assertFloatEqual(comparison_groupings, expected)
Пример #2
0
    def test_get_field_state_comparisons(self):
        """get_field_state_comparisons() should return a 2D dictionary of
        distances between a field state and its comparison field states."""
        comparison_groupings = get_field_state_comparisons(
                self.dist_matrix_header, self.dist_matrix, self.mapping_header,
                self.mapping, self.field, ['Control'])
        expected = {'Fast': {'Control': [0.72899999999999998,
            0.80000000000000004, 0.72099999999999997, 0.76500000000000001,
            0.77600000000000002, 0.74399999999999999, 0.749,
            0.67700000000000005, 0.73399999999999999, 0.77700000000000002,
            0.73299999999999998, 0.72399999999999998, 0.69599999999999995,
            0.67500000000000004, 0.65400000000000003, 0.69599999999999995,
            0.73099999999999998, 0.75800000000000001, 0.73799999999999999,
            0.73699999999999999]}}
        self.assertFloatEqual(comparison_groupings, expected)

        comparison_groupings = get_field_state_comparisons(
                self.dist_matrix_header, self.dist_matrix, self.mapping_header,
                self.mapping, self.field, ['Fast'])
        expected = {'Control': {'Fast': [0.72899999999999998,
            0.80000000000000004, 0.72099999999999997, 0.76500000000000001,
            0.77600000000000002, 0.74399999999999999, 0.749,
            0.67700000000000005, 0.73399999999999999, 0.77700000000000002,
            0.73299999999999998, 0.72399999999999998, 0.69599999999999995,
            0.67500000000000004, 0.65400000000000003, 0.69599999999999995,
            0.73099999999999998, 0.75800000000000001, 0.73799999999999999,
            0.73699999999999999]}}
        self.assertFloatEqual(comparison_groupings, expected)
Пример #3
0
 def test_get_field_state_comparisons_tiny(self):
     """get_field_state_comparisons() should return an empty dictionary."""
     comparison_groupings = get_field_state_comparisons(
         self.tiny_dist_matrix_header, self.tiny_dist_matrix,
         self.tiny_mapping_header, self.tiny_mapping, self.tiny_field,
         ['SampleFieldState1'])
     self.assertEqual(comparison_groupings, {})
Пример #4
0
 def test_get_field_state_comparisons_tiny(self):
     """get_field_state_comparisons() should return an empty dictionary."""
     comparison_groupings = get_field_state_comparisons(
             self.tiny_dist_matrix_header, self.tiny_dist_matrix,
             self.tiny_mapping_header, self.tiny_mapping, self.tiny_field,
             ['SampleFieldState1'])
     self.assertEqual(comparison_groupings, {})
Пример #5
0
 def test_get_field_state_comparisons_small(self):
     """get_field_state_comparisons() should return a 2D dictionary of
     distances between a field state and its comparison field states."""
     comparison_groupings = get_field_state_comparisons(
         self.small_dist_matrix_header, self.small_dist_matrix,
         self.small_mapping_header, self.small_mapping, self.small_field,
         ['SampleFieldState1'])
     expected = {'SampleFieldState2': {'SampleFieldState1': [0.5]}}
     self.assertFloatEqual(comparison_groupings, expected)
Пример #6
0
 def test_get_field_state_comparisons_small(self):
     """get_field_state_comparisons() should return a 2D dictionary of
     distances between a field state and its comparison field states."""
     comparison_groupings = get_field_state_comparisons(
             self.small_dist_matrix_header, self.small_dist_matrix,
             self.small_mapping_header, self.small_mapping,
             self.small_field, ['SampleFieldState1'])
     expected = {'SampleFieldState2': {'SampleFieldState1': [0.5]}}
     self.assertFloatEqual(comparison_groupings, expected)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory " "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(open(opts.distance_matrix_fp, "U"))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option."
        )
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(open(opts.mapping_fp, "U"))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option."
        )

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == "auto":
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number " "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == "auto":
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number " "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(","))
    comparison_field_states = [field_state.strip('"').strip("'") for field_state in comparison_field_states]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to " "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(
        dist_matrix_header, dist_matrix, mapping_header, mapping, field, comparison_field_states
    )

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = sorted(map(float, field_states))
        except:
            option_parser.error(
                "The 'numeric' label type is invalid because "
                "not all field states could be converted into "
                "numbers. Please specify a different label "
                "type."
            )

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) for color in data_color_order]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must " "be greater than zero.")

    plot_figure = grouped_distributions(
        opts.plot_type,
        plot_data,
        x_values=x_spacing,
        data_point_labels=plot_x_axis_labels,
        distribution_labels=comparison_field_states,
        distribution_markers=plot_colors,
        x_label=plot_x_label,
        y_label=plot_y_label,
        title=plot_title,
        x_tick_labels_orientation=opts.x_tick_labels_orientation,
        y_min=y_min,
        y_max=y_max,
        whisker_length=opts.whisker_length,
        error_bar_type=opts.error_bar_type,
        distribution_width=opts.distribution_width,
        figure_width=width,
        figure_height=height,
    )

    # Save the plot in the specified format.
    output_plot_fp = join(opts.output_dir, "%s_Distance_Comparisons.%s" % (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp, format=opts.imagetype, transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), "w")

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append("%s vs %s" % (data_point_label, comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(
            sig_tests_labels, sig_tests_data, tail_type=opts.tail_type, num_permutations=opts.num_permutations
        )
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert len(plot_x_axis_labels) == len(plot_data), (
            "The number of " + "labels do not match the number of points along the x-axis."
        )
        raw_data_fp = join(opts.output_dir, "%s_Distance_Comparisons.txt" % field)
        raw_data_f = open(raw_data_fp, "w")

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert len(comparison_field_states) == len(data), (
                "The "
                + "number of specified comparison groups does not match "
                + "the number of groups found at the current point along "
                + "the x-axis."
            )
            for comp_field_state, comp_grp_data in zip(comparison_field_states, data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" + "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(
            open(opts.distance_matrix_fp, 'U'))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option.")
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(
            open(opts.mapping_fp, 'U'))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option.")

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == 'auto':
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number "
                                "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == 'auto':
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number "
                                "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(','))
    comparison_field_states = [
        field_state.strip('"').strip("'")
        for field_state in comparison_field_states
    ]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to "
                            "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(
        dist_matrix_header, dist_matrix, mapping_header, mapping, field,
        comparison_field_states)

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = map(float, field_states)
            x_spacing.sort()
        except:
            option_parser.error("The 'numeric' label type is invalid because "
                                "not all field states could be converted into "
                                "numbers. Please specify a different label "
                                "type.")

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(
                comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) \
                       for color in data_color_order]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must "
                            "be greater than zero.")

    plot_figure = generate_comparative_plots(
        opts.plot_type,
        plot_data,
        x_values=x_spacing,
        data_point_labels=plot_x_axis_labels,
        distribution_labels=comparison_field_states,
        distribution_markers=plot_colors,
        x_label=plot_x_label,
        y_label=plot_y_label,
        title=plot_title,
        x_tick_labels_orientation=opts.x_tick_labels_orientation,
        y_min=y_min,
        y_max=y_max,
        whisker_length=opts.whisker_length,
        error_bar_type=opts.error_bar_type,
        distribution_width=opts.distribution_width,
        figure_width=width,
        figure_height=height)

    # Save the plot in the specified format.
    output_plot_fp = join(
        opts.output_dir,
        "%s_Distance_Comparisons.%s" % (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp,
                        format=opts.imagetype,
                        transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), 'w')

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append('%s vs %s' %
                                        (data_point_label, comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(
            sig_tests_labels,
            sig_tests_data,
            tail_type=opts.tail_type,
            num_permutations=opts.num_permutations)
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\
                "labels do not match the number of points along the x-axis."
        raw_data_fp = join(opts.output_dir,
                           "%s_Distance_Comparisons.txt" % field)
        raw_data_f = open(raw_data_fp, 'w')

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert (len(comparison_field_states) == len(data)), "The " +\
                    "number of specified comparison groups does not match " +\
                    "the number of groups found at the current point along " +\
                    "the x-axis."
            for comp_field_state, comp_grp_data in zip(comparison_field_states,
                                                       data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" +
                                 "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()