Exemplo n.º 1
0
def _get_groupings(dist_matrix_header,
                   dist_matrix,
                   groups,
                   within=True,
                   suppress_symmetry_and_hollowness_check=False):
    """Returns a list of distance groupings.

    The return value is a list that contains tuples of three elements: the
    first two elements are the field values being compared, and the third
    element is a list of the distances.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        - dist_matrix_header: The distance matrix header.
        - dist_matrix: The distance matrix.
        - groups: A dictionary mapping field value to sample IDs, obtained by
                  calling group_by_field().
        - within: If True, distances are grouped within a field value. If
          False, distances are grouped between field values.
        - suppress_symmetry_and_hollowness_check: By default, the input
          distance matrix will be checked for symmetry and hollowness. It is
          recommended to leave this check in place for safety, as the check
          is fairly fast. However, if you *know* you have a symmetric and
          hollow distance matrix, you can disable this check for small
          performance gains on extremely large distance matrices
    
    If within is True, the zeros along the diagonal of the distance matrix are
    omitted.
    """
    # Note: Much of this code is taken from Jeremy Widmann's
    # distances_by_groups() function, part of make_distance_histograms.py.
    if not suppress_symmetry_and_hollowness_check:
        if not is_symmetric_and_hollow(dist_matrix):
            raise ValueError("The distance matrix must be symmetric and "
                             "hollow.")
    result = []
    group_items = groups.items()

    for i, (row_group, row_ids) in enumerate(group_items):
        row_indices = _get_indices(dist_matrix_header, row_ids)
        if within:
            # Handle the case where indices are the same so we need to omit
            # the diagonal.
            block = dist_matrix[row_indices][:, row_indices]

            size = len(row_indices)
            indices = []
            for i in range(size):
                for j in range(i, size):
                    if i != j:
                        indices.append(block[i][j])
            if indices:
                result.append((row_group, row_group, indices))
        else:
            # Handle the case where indices are separate: just return blocks.
            for j in range(i + 1, len(groups)):
                col_group, col_ids = group_items[j]
                col_indices = _get_indices(dist_matrix_header, col_ids)
                vals = dist_matrix[row_indices][:, col_indices]

                # Flatten the array into a single-level list.
                vals = map(None, vals.flat)
                if vals:
                    result.append((row_group, col_group, vals))
    return result
Exemplo n.º 2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
    #Some code for error checking of input args:
    
    #Check if distance_matrix_file is valid:
    try:
        d_header, d_mat = parse_distmat(open(opts.distance_matrix_file,'U'))
    except:
        option_parser.error("This does not look like a valid distance matrix file.  Please supply a valid distance matrix file using the -d option.")

    if not is_symmetric_and_hollow(d_mat):
        option_parser.error("The distance matrix must be symmetric and "
                            "hollow.")

    #Check if map_fname is valid:
    try:
        mapping, m_header, m_comments = \
            parse_mapping_file(open(opts.map_fname,'U'))
    except QiimeParseError:
        option_parser.error("This does not look like a valid metadata mapping file.  Please supply a valid mapping file using the -m option.")
    
    #make sure background_color is valid
    if opts.background_color not in ['black','white']:
        option_parser.error("'%s' is not a valid background color.  Please pass in either 'black' or 'white' using the -k option."%(opts.background_color))
    
    #make sure prefs file is valid if it exists
    if opts.prefs_path is not None:
        try:
            prefs_file = open(opts.prefs_path, 'U').read()
        except IOError:
            option_parser.error("Provided prefs file, '%s', does not exist.  Please pass in a valid prefs file with the -p option."%(opts.prefs_path))
            
    if opts.prefs_path is not None:
        prefs = parse_prefs_file(prefs_file)
    else:
        prefs=None

    
    color_prefs, color_data, background_color, label_color, ball_scale,\
     arrow_colors=sample_color_prefs_and_map_data_from_options(opts)
    
    #list of labelname, groups, colors, data_colors, data_color_order    
    groups_and_colors=list(iter_color_groups(mapping=color_data['map'],\
        prefs=color_prefs))
    
    #dict mapping labelname to list of: [groups, colors, data_colors,
    # data_color_order]
    field_to_colors = {}
    for color_info in groups_and_colors:
        field_to_colors[color_info[0]]=color_info[1:]
    
    qiime_dir = get_qiime_project_dir()+'/qiime/support_files/'
        
    fields = opts.fields
    if fields is not None:
        fields = map(strip,fields.split(','))
        fields = [i.strip('"').strip("'") for i in fields]
    elif prefs is not None:
        fields = prefs.get('FIELDS',None)
    else:
        fields = get_interesting_mapping_fields(mapping, m_header)
    
    #Check that all provided fields are valid:
    if fields is not None:
        for f in fields:
            if f not in m_header:
                option_parser.error("The field, %s, is not in the provided mapping file.  Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file."%(f))
    
    within_distances, between_distances, dmat = \
        group_distances(mapping_file=opts.map_fname,\
        dmatrix_file=opts.distance_matrix_file,\
        fields=fields,\
        dir_prefix=get_random_directory_name(output_dir=opts.dir_path,\
            prefix='distances'))
    
    if not opts.suppress_html_output:
        #histograms output path
        histograms_path = path.join(opts.dir_path,'histograms')
        try:
            mkdir(histograms_path)
        except OSError:     #raised if dir exists
            pass
        
        #draw all histograms
        distances_dict, label_to_histogram_filename = \
            draw_all_histograms(single_field=within_distances, \
                paired_field=between_distances, \
                dmat=dmat,\
                histogram_dir=histograms_path,\
                field_to_color_prefs=field_to_colors,\
                background_color=background_color)
        
        #Get relative path to histogram files.
        label_to_histogram_filename_relative = \
            _make_relative_paths(label_to_histogram_filename, opts.dir_path)
        
        dm_fname=path.split(opts.distance_matrix_file)[-1]
        basename=path.splitext(dm_fname)[0]
        outfile_name = basename+'_distance_histograms.html'
        make_main_html(distances_dict=distances_dict,\
            label_to_histogram_filename=label_to_histogram_filename_relative,\
            root_outdir=opts.dir_path, \
            outfile_name = outfile_name, \
            title='Distance Histograms')
        
        #Handle saving web resources locally.
        #javascript file
        javascript_path = path.join(opts.dir_path,'js')
        try:
            mkdir(javascript_path)
        except OSError:     #raised if dir exists
            pass
        js_out = open(javascript_path+'/histograms.js','w')
        js_out.write(open(qiime_dir+'js/histograms.js').read())
        js_out.close()
        
    monte_carlo_iters = opts.monte_carlo_iters
    if monte_carlo_iters > 0:
        #Do Monte Carlo for all fields
        monte_carlo_group_distances(mapping_file=opts.map_fname,\
            dmatrix_file=opts.distance_matrix_file,\
            prefs=prefs, \
            dir_prefix = opts.dir_path,\
            fields=fields,\
            default_iters=monte_carlo_iters)
            
        #Do Monte Carlo for within and between fields
        monte_carlo_group_distances_within_between(\
            single_field=within_distances,\
            paired_field=between_distances, dmat=dmat, \
            dir_prefix = opts.dir_path,\
            num_iters=monte_carlo_iters)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    #Some code for error checking of input args:

    #Check if distance_matrix_file is valid:
    try:
        d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, 'U'))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix file.  Please supply a valid distance matrix file using the -d option."
        )

    if not is_symmetric_and_hollow(d_mat):
        option_parser.error("The distance matrix must be symmetric and "
                            "hollow.")

    #Check if map_fname is valid:
    try:
        mapping, m_header, m_comments = \
            parse_mapping_file(open(opts.map_fname,'U'))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping file.  Please supply a valid mapping file using the -m option."
        )

    #make sure background_color is valid
    if opts.background_color not in ['black', 'white']:
        option_parser.error(
            "'%s' is not a valid background color.  Please pass in either 'black' or 'white' using the -k option."
            % (opts.background_color))

    #make sure prefs file is valid if it exists
    if opts.prefs_path is not None:
        try:
            prefs_file = open(opts.prefs_path, 'U').read()
        except IOError:
            option_parser.error(
                "Provided prefs file, '%s', does not exist.  Please pass in a valid prefs file with the -p option."
                % (opts.prefs_path))

    if opts.prefs_path is not None:
        prefs = parse_prefs_file(prefs_file)
    else:
        prefs = None


    color_prefs, color_data, background_color, label_color, ball_scale,\
     arrow_colors=sample_color_prefs_and_map_data_from_options(opts)

    #list of labelname, groups, colors, data_colors, data_color_order
    groups_and_colors=list(iter_color_groups(mapping=color_data['map'],\
        prefs=color_prefs))

    #dict mapping labelname to list of: [groups, colors, data_colors,
    # data_color_order]
    field_to_colors = {}
    for color_info in groups_and_colors:
        field_to_colors[color_info[0]] = color_info[1:]

    qiime_dir = get_qiime_project_dir() + '/qiime/support_files/'

    fields = opts.fields
    if fields is not None:
        fields = map(strip, fields.split(','))
        fields = [i.strip('"').strip("'") for i in fields]
    elif prefs is not None:
        fields = prefs.get('FIELDS', None)
    else:
        fields = get_interesting_mapping_fields(mapping, m_header)

    #Check that all provided fields are valid:
    if fields is not None:
        for f in fields:
            if f not in m_header:
                option_parser.error(
                    "The field, %s, is not in the provided mapping file.  Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file."
                    % (f))

    within_distances, between_distances, dmat = \
        group_distances(mapping_file=opts.map_fname,\
        dmatrix_file=opts.distance_matrix_file,\
        fields=fields,\
        dir_prefix=get_random_directory_name(output_dir=opts.dir_path,\
            prefix='distances'))

    if not opts.suppress_html_output:
        #histograms output path
        histograms_path = path.join(opts.dir_path, 'histograms')
        try:
            mkdir(histograms_path)
        except OSError:  #raised if dir exists
            pass

        #draw all histograms
        distances_dict, label_to_histogram_filename = \
            draw_all_histograms(single_field=within_distances, \
                paired_field=between_distances, \
                dmat=dmat,\
                histogram_dir=histograms_path,\
                field_to_color_prefs=field_to_colors,\
                background_color=background_color)

        #Get relative path to histogram files.
        label_to_histogram_filename_relative = \
            _make_relative_paths(label_to_histogram_filename, opts.dir_path)

        dm_fname = path.split(opts.distance_matrix_file)[-1]
        basename = path.splitext(dm_fname)[0]
        outfile_name = basename + '_distance_histograms.html'
        make_main_html(distances_dict=distances_dict,\
            label_to_histogram_filename=label_to_histogram_filename_relative,\
            root_outdir=opts.dir_path, \
            outfile_name = outfile_name, \
            title='Distance Histograms')

        #Handle saving web resources locally.
        #javascript file
        javascript_path = path.join(opts.dir_path, 'js')
        try:
            mkdir(javascript_path)
        except OSError:  #raised if dir exists
            pass
        js_out = open(javascript_path + '/histograms.js', 'w')
        js_out.write(open(qiime_dir + 'js/histograms.js').read())
        js_out.close()

    monte_carlo_iters = opts.monte_carlo_iters
    if monte_carlo_iters > 0:
        #Do Monte Carlo for all fields
        monte_carlo_group_distances(mapping_file=opts.map_fname,\
            dmatrix_file=opts.distance_matrix_file,\
            prefs=prefs, \
            dir_prefix = opts.dir_path,\
            fields=fields,\
            default_iters=monte_carlo_iters)

        #Do Monte Carlo for within and between fields
        monte_carlo_group_distances_within_between(\
            single_field=within_distances,\
            paired_field=between_distances, dmat=dmat, \
            dir_prefix = opts.dir_path,\
            num_iters=monte_carlo_iters)
Exemplo n.º 4
0
def _get_groupings(dist_matrix_header, dist_matrix, groups, within=True,
                   suppress_symmetry_and_hollowness_check=False):
    """Returns a list of distance groupings.

    The return value is a list that contains tuples of three elements: the
    first two elements are the field values being compared, and the third
    element is a list of the distances.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        - dist_matrix_header: The distance matrix header.
        - dist_matrix: The distance matrix.
        - groups: A dictionary mapping field value to sample IDs, obtained by
                  calling group_by_field().
        - within: If True, distances are grouped within a field value. If
          False, distances are grouped between field values.
        - suppress_symmetry_and_hollowness_check: By default, the input
          distance matrix will be checked for symmetry and hollowness. It is
          recommended to leave this check in place for safety, as the check
          is fairly fast. However, if you *know* you have a symmetric and
          hollow distance matrix, you can disable this check for small
          performance gains on extremely large distance matrices
    
    If within is True, the zeros along the diagonal of the distance matrix are
    omitted.
    """
    # Note: Much of this code is taken from Jeremy Widmann's
    # distances_by_groups() function, part of make_distance_histograms.py.
    if not suppress_symmetry_and_hollowness_check:
        if not is_symmetric_and_hollow(dist_matrix):
            raise ValueError("The distance matrix must be symmetric and "
                             "hollow.")
    result = []
    group_items = groups.items()

    for i, (row_group, row_ids) in enumerate(group_items):
        row_indices = _get_indices(dist_matrix_header, row_ids)
        if within:
            # Handle the case where indices are the same so we need to omit
            # the diagonal.
            block = dist_matrix[row_indices][:,row_indices]

            size = len(row_indices)
            indices = []
            for i in range(size):
                for j in range(i,size):
                    if i != j:
                        indices.append(block[i][j])
            if indices:
                result.append((row_group, row_group, indices))
        else:
            # Handle the case where indices are separate: just return blocks.
            for j in range(i+1, len(groups)):
                col_group, col_ids = group_items[j]
                col_indices = _get_indices(dist_matrix_header, col_ids)
                vals = dist_matrix[row_indices][:,col_indices]

                # Flatten the array into a single-level list.
                vals = map(None, vals.flat)
                if vals:
                    result.append((row_group, col_group, vals))
    return result