Exemplo n.º 1
0
    def test_remove_nans(self):
        """remove_nans: Deletes any samples with NANs in their coordinates"""
        coord_data = array([[10,60,-0.219044992,0.079674486,0.09233683],
                           [20,55,-0.042258081, nan,0.024837603],
                           [30,50,0.080504323,-0.212014503,-0.088353435]])
        coords = [self.coord_header, coord_data]
        remove_nans(coords)

        exp_header = ["Sample1","Sample3"]
        exp_coords = array([[10,60,-0.219044992,0.079674486,0.09233683],
                           [30,50,0.080504323,-0.212014503,-0.088353435]])
        self.assertEqual(coords[0],exp_header)
        self.assertEqual(coords[1],exp_coords)
Exemplo n.º 2
0
    def test_remove_nans(self):
        """remove_nans: Deletes any samples with NANs in their coordinates"""
        coord_data = array([[10, 60, -0.219044992, 0.079674486, 0.09233683],
                            [20, 55, -0.042258081, nan, 0.024837603],
                            [30, 50, 0.080504323, -0.212014503, -0.088353435]])
        coords = [self.coord_header, coord_data]
        remove_nans(coords)

        exp_header = ["Sample1", "Sample3"]
        exp_coords = array([[10, 60, -0.219044992, 0.079674486, 0.09233683],
                            [30, 50, 0.080504323, -0.212014503, -0.088353435]])
        self.assertEqual(coords[0], exp_header)
        self.assertEqual(coords[1], exp_coords)
Exemplo n.º 3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    prefs, data, background_color, label_color, ball_scale, arrow_colors = \
                            sample_color_prefs_and_map_data_from_options(opts)
    
    if len(opts.coord_fnames.split(',')) < 2 and opts.edges_file is None:
        option_parser.error('Please provide at least two ' +\
                     'coordinate files or a custom edges file')

    #Open and get coord data (for multiple coords files)
    coord_files = process_coord_filenames(opts.coord_fnames)
    coord_files_valid = validate_coord_files(coord_files)
    if not coord_files_valid:
        option_parser.error('Every line of every coord file must ' +\
                            'have the same number of columns.')
    num_coord_files = len(coord_files)
    data['edges'], data['coord'] = \
        get_multiple_coords(coord_files, opts.edges_file, opts.serial)
    
    # if the edges file wasn't supplied, we appended _i to each file's samples
    # therefore we now add duplicated samples with _0, _1,... to mapping file
    if opts.edges_file is None:
        newmap = [data['map'][0]]
        for i in xrange(len(coord_files)):
            for sample in data['map'][1:]:
                newsample = ['%s_%d' %(sample[0],i)]
                newsample.extend(sample[1:])
                newmap.append(newsample)
        data['map'] = newmap

    # remove any samples not present in mapping file
    remove_unmapped_samples(data['map'],data['coord'],data['edges'])

    if(len(data['coord'][1]) == 0):
        raise ValueError, '\n\nError: None of the sample IDs in the coordinates files were present in the mapping file.\n'
    
    # process custom axes, if present.
    custom_axes = None
    if opts.custom_axes:
        custom_axes = process_custom_axes(opts.custom_axes)
        get_custom_coords(custom_axes, data['map'], data['coord'])
        remove_nans(data['coord'])
        scale_custom_coords(custom_axes,data['coord'])

    

    # Generate random output file name and create directories
    if opts.output_dir:
        create_dir(opts.output_dir)
        dir_path = opts.output_dir
    else:
        dir_path='./'
    
    qiime_dir=get_qiime_project_dir()

    jar_path=os.path.join(qiime_dir,'qiime/support_files/jar/')

    data_dir_path = get_random_directory_name(output_dir=dir_path,
                                              return_absolute_path=False)    

    try:
        os.mkdir(data_dir_path)
    except OSError:
        pass

    jar_dir_path = os.path.join(dir_path,'jar')
    
    try:
        os.mkdir(jar_dir_path)
    except OSError:
        pass
    
    shutil.copyfile(os.path.join(jar_path,'king.jar'), os.path.join(jar_dir_path,'king.jar'))

    filepath=coord_files[0]
    filename=filepath.strip().split('/')[-1]
    
    try:
        action = generate_3d_plots
    except NameError:
        action = None

    #Place this outside try/except so we don't mask NameError in action
    if action:
        generate_3d_plots(prefs, data, custom_axes,
               background_color, label_color,
               dir_path, data_dir_path, filename,
               ball_scale=ball_scale, arrow_colors=arrow_colors,
               user_supplied_edges=not(opts.edges_file is None))
Exemplo n.º 4
0
def main():
    print "\nWarning: make_3d_plots.py is being deprecated in favor of make_emperor.py, and will no longer be available in QIIME 1.8.0-dev.\n"

    option_parser, opts, args = parse_command_line_parameters(**script_info)

    prefs, data, background_color, label_color, ball_scale, arrow_colors= \
                            sample_color_prefs_and_map_data_from_options(opts)
    
    
    plot_scaled= 'scaled' in opts.scaling_method
    plot_unscaled= 'unscaled' in opts.scaling_method
    
    if opts.output_format == 'invue':
        # validating the number of points for interpolation
        if (opts.interpolation_points<0):
            option_parser.error('The --interpolation_points should be ' +\
                            'greater or equal to 0.')
                            
        # make sure that coord file has internally consistent # of columns
        coord_files_valid = validate_coord_files(opts.coord_fname)
        if not coord_files_valid:
            option_parser.error('Every line of every coord file must ' +\
                            'have the same number of columns.')
                            
        #Open and get coord data
        data['coord'] = get_coord(opts.coord_fname, opts.ellipsoid_method)
    
        # remove any samples not present in mapping file
        remove_unmapped_samples(data['map'],data['coord'])

        # if no samples overlapped between mapping file and otu table, exit
        if len(data['coord'][0]) == 0:
            print "\nError: OTU table and mapping file had no samples in common\n"
            exit(1)

        if opts.output_dir:
            create_dir(opts.output_dir,False)
            dir_path=opts.output_dir
        else:
            dir_path='./'
        
        filepath=opts.coord_fname
        if os.path.isdir(filepath):
            coord_files = [fname for fname in os.listdir(filepath) if not \
                           fname.startswith('.')]
            filename = os.path.split(coord_files[0])[-1]
        else:
            filename = os.path.split(filepath)[-1]

        generate_3d_plots_invue(prefs, data, dir_path, filename, \
            opts.interpolation_points, opts.polyhedron_points, \
            opts.polyhedron_offset)
        
        #finish script
        return

    # Potential conflicts
    if not opts.custom_axes is None and os.path.isdir(opts.coord_fname):
        # can't do averaged pcoa plots _and_ custom axes in the same plot
        option_parser.error("Please supply either custom axes or multiple coordinate \
files, but not both.")
    # check that smoothness is an integer between 0 and 3
    try:
        ellipsoid_smoothness = int(opts.ellipsoid_smoothness)
    except:
        option_parser.error("Please supply an integer ellipsoid smoothness \
value.")
    if ellipsoid_smoothness < 0 or ellipsoid_smoothness > 3:
        option_parser.error("Please supply an ellipsoid smoothness value \
between 0 and 3.")
    # check that opacity is a float between 0 and 1
    try:
        ellipsoid_alpha = float(opts.ellipsoid_opacity)
    except:
        option_parser.error("Please supply a number for ellipsoid opacity.")
    if ellipsoid_alpha < 0 or ellipsoid_alpha > 1:
        option_parser.error("Please supply an ellipsoid opacity value \
between 0 and 1.")
    # check that ellipsoid method is valid
    ellipsoid_methods = ['IQR','sdev']
    if not opts.ellipsoid_method in ellipsoid_methods:
        option_parser.error("Please supply a valid ellipsoid method. \
Valid methods are: " + ', '.join(ellipsoid_methods) + ".")
  
    # gather ellipsoid drawing preferences
    ellipsoid_prefs = {}
    ellipsoid_prefs["smoothness"] = ellipsoid_smoothness
    ellipsoid_prefs["alpha"] = ellipsoid_alpha

    # make sure that coord file has internally consistent # of columns
    coord_files_valid = validate_coord_files(opts.coord_fname)
    if not coord_files_valid:
        option_parser.error('Every line of every coord file must ' +\
                            'have the same number of columns.')

    #Open and get coord data
    data['coord'] = get_coord(opts.coord_fname, opts.ellipsoid_method)
    
    # remove any samples not present in mapping file
    remove_unmapped_samples(data['map'],data['coord'])
    
    # if no samples overlapped between mapping file and otu table, exit
    if len(data['coord'][0]) == 0:
        print "\nError: OTU table and mapping file had no samples in common\n"
        exit(1)

    # process custom axes, if present.
    custom_axes = None
    if opts.custom_axes:
        custom_axes = process_custom_axes(opts.custom_axes)

        get_custom_coords(custom_axes, data['map'], data['coord'])
        remove_nans(data['coord'])
        scale_custom_coords(custom_axes,data['coord'])

    # process vectors if requested
    if opts.add_vectors:
        add_vectors={}
        add_vectors['vectors'] = opts.add_vectors.split(',')
        add_vectors['weight_by_vector'] = opts.weight_by_vector
        if len(add_vectors)>3:
            raise ValueError, 'You must add maximum 3 columns but %s' % opts.add_vectors
        
        # Validating Vectors values
        if opts.vectors_algorithm:
            axes_number = len(data['coord'][1][1])
            if opts.vectors_axes<0 or opts.vectors_axes>axes_number:
                raise ValueError, 'vectors_algorithm should be between 0 and the max number' +\
                      'of samples/pcoa-axes: %d' % len(data['coord'][1][1])
            if opts.vectors_axes == 0: 
                opts.vectors_axes = axes_number
            add_vectors['vectors_axes'] = opts.vectors_axes
            valid_chars = '_.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
            for c in opts.vectors_path:
                if c not in valid_chars:
                    raise ValueError, 'vectors_path (%s) has invalid chars' % opts.vectors_path
            add_vectors['vectors_output'] = {}
            add_vectors['vectors_algorithm']=opts.vectors_algorithm
            add_vectors['eigvals'] = data['coord'][3]
            add_vectors['window_size'] = None

            # checks specific for the modified first difference algorithm
            if add_vectors['vectors_algorithm'] == 'wdiff':
                try:
                    add_vectors['window_size'] = int(opts.window_size)
                except TypeError:
                    raise TypeError, 'Specify --window_size as an integer'

                # sanity check as the value can only be greater or equal to one
                if add_vectors['window_size'] < 1:
                    raise ValueError, 'The value of window_size is invalid, '+\
                        'the value must be greater than zero, not %d' % add_vectors['window_size']

        else:
            add_vectors['vectors_algorithm'] = None
        add_vectors['vectors_path'] = opts.vectors_path
    else:
        add_vectors = None

    if opts.taxa_fname != None:
        # get taxonomy counts
        # get list of sample_ids that haven't been removed
        sample_ids = data['coord'][0]
        # get taxa summaries for all sample_ids
        lineages, taxa_counts = get_taxa(opts.taxa_fname, sample_ids)
        data['taxa'] = {}
        data['taxa']['lineages'] = lineages
        data['taxa']['counts'] = taxa_counts

        # get average relative abundance of taxa
        data['taxa']['prevalence'] = get_taxa_prevalence(data['taxa']['counts'])
        # get coordinates of taxa (weighted mean of sample scores)
        data['taxa']['coord'] = get_taxa_coords(data['taxa']['counts'],
            data['coord'][1])
        
        # trim results, do NOT change order
        # check: https://github.com/qiime/qiime/issues/677
        remove_rare_taxa(data['taxa'],nkeep=opts.n_taxa_keep)
        
        # write taxa coords if requested
        if not opts.biplot_output_file is None:
            output = make_biplot_scores_output(data['taxa'])            
            fout = open(opts.biplot_output_file,'w')
            fout.write('\n'.join(output))
            fout.close()

    if opts.output_dir:
        create_dir(opts.output_dir,False)
        dir_path=opts.output_dir
    else:
        dir_path='./'
    
    qiime_dir=get_qiime_project_dir()

    jar_path=os.path.join(qiime_dir,'qiime/support_files/jar/')

    data_dir_path = get_random_directory_name(output_dir=dir_path,
                                              return_absolute_path=False)    
    
    try:
        os.mkdir(data_dir_path)
    except OSError:
        pass

    data_file_path=data_dir_path

    jar_dir_path = os.path.join(dir_path,'jar')
    
    try:
        os.mkdir(jar_dir_path)
    except OSError:
        pass
    
    shutil.copyfile(os.path.join(jar_path,'king.jar'), os.path.join(jar_dir_path,'king.jar'))

    filepath=opts.coord_fname
    if os.path.isdir(filepath):
        coord_files = [fname for fname in os.listdir(filepath) if not \
                           fname.startswith('.')]
        filename = os.path.split(coord_files[0])[-1]
    else:
        filename = os.path.split(filepath)[-1]

    try:
        action = generate_3d_plots
    except NameError:
        action = None

    #Place this outside try/except so we don't mask NameError in action
    if action:
        action(prefs,data,custom_axes,background_color,label_color,dir_path, \
                data_file_path,filename,ellipsoid_prefs=ellipsoid_prefs, \
                add_vectors=add_vectors, plot_scaled=plot_scaled, \
                plot_unscaled=plot_unscaled)
Exemplo n.º 5
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    prefs, data, background_color, label_color, ball_scale, arrow_colors = \
                            sample_color_prefs_and_map_data_from_options(opts)
    
    if len(opts.coord_fnames) < 2 and opts.edges_file is None:
        option_parser.error('Please provide at least two ' +\
                     'coordinate files or a custom edges file')

    #Open and get coord data (for multiple coords files)
    coord_files = opts.coord_fnames
    coord_files_valid = validate_coord_files(coord_files)
    if not coord_files_valid:
        option_parser.error('Every line of every coord file must ' +\
                            'have the same number of columns.')
    num_coord_files = len(coord_files)
    data['edges'], data['coord'] = \
        get_multiple_coords(coord_files, opts.edges_file, opts.serial)
    
    # if the edges file wasn't supplied, we appended _i to each file's samples
    # therefore we now add duplicated samples with _0, _1,... to mapping file
    if opts.edges_file is None:
        newmap = [data['map'][0]]
        for i in xrange(len(coord_files)):
            for sample in data['map'][1:]:
                newsample = ['%s_%d' %(sample[0],i)]
                newsample.extend(sample[1:])
                newmap.append(newsample)
        data['map'] = newmap

    # remove any samples not present in mapping file
    remove_unmapped_samples(data['map'],data['coord'],data['edges'])

    if(len(data['coord'][1]) == 0):
        raise ValueError, '\n\nError: None of the sample IDs in the coordinates files were present in the mapping file.\n'
    
    # process custom axes, if present.
    custom_axes = None
    if opts.custom_axes:
        custom_axes = process_custom_axes(opts.custom_axes)
        get_custom_coords(custom_axes, data['map'], data['coord'])
        remove_nans(data['coord'])
        scale_custom_coords(custom_axes,data['coord'])

    

    # Generate random output file name and create directories
    if opts.output_dir:
        create_dir(opts.output_dir)
        dir_path = opts.output_dir
    else:
        dir_path='./'
    
    qiime_dir=get_qiime_project_dir()

    jar_path=os.path.join(qiime_dir,'qiime/support_files/jar/')

    data_dir_path = get_random_directory_name(output_dir=dir_path,
                                              return_absolute_path=False)    

    try:
        os.mkdir(data_dir_path)
    except OSError:
        pass

    jar_dir_path = os.path.join(dir_path,'jar')
    
    try:
        os.mkdir(jar_dir_path)
    except OSError:
        pass
    
    shutil.copyfile(os.path.join(jar_path,'king.jar'), os.path.join(jar_dir_path,'king.jar'))

    filepath=coord_files[0]
    filename=filepath.strip().split('/')[-1]
    
    try:
        action = generate_3d_plots
    except NameError:
        action = None

    #Place this outside try/except so we don't mask NameError in action
    if action:
        generate_3d_plots(prefs, data, custom_axes,
               background_color, label_color,
               dir_path, data_dir_path, filename,
               ball_scale=ball_scale, arrow_colors=arrow_colors,
               user_supplied_edges=not(opts.edges_file is None))
Exemplo n.º 6
0
def main():
    print "\nWarning: compare_3d_plots.py is being deprecated in favor of make_emperor.py, and will no longer be available in QIIME 1.8.0-dev.\n"

    option_parser, opts, args = parse_command_line_parameters(**script_info)

    prefs, data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options(
        opts
    )

    if len(opts.coord_fnames) < 2 and opts.edges_file is None:
        option_parser.error("Please provide at least two " + "coordinate files or a custom edges file")

    # Open and get coord data (for multiple coords files)
    coord_files = opts.coord_fnames
    coord_files_valid = validate_coord_files(coord_files)
    if not coord_files_valid:
        option_parser.error("Every line of every coord file must " + "have the same number of columns.")
    num_coord_files = len(coord_files)
    data["edges"], data["coord"] = get_multiple_coords(coord_files, opts.edges_file, opts.serial)

    # if the edges file wasn't supplied, we appended _i to each file's samples
    # therefore we now add duplicated samples with _0, _1,... to mapping file
    if opts.edges_file is None:
        newmap = [data["map"][0]]
        for i in xrange(len(coord_files)):
            for sample in data["map"][1:]:
                newsample = ["%s_%d" % (sample[0], i)]
                newsample.extend(sample[1:])
                newmap.append(newsample)
        data["map"] = newmap

    # remove any samples not present in mapping file
    remove_unmapped_samples(data["map"], data["coord"], data["edges"])

    if len(data["coord"][1]) == 0:
        raise ValueError, "\n\nError: None of the sample IDs in the coordinates files were present in the mapping file.\n"

    # process custom axes, if present.
    custom_axes = None
    if opts.custom_axes:
        custom_axes = process_custom_axes(opts.custom_axes)
        get_custom_coords(custom_axes, data["map"], data["coord"])
        remove_nans(data["coord"])
        scale_custom_coords(custom_axes, data["coord"])

    # Generate random output file name and create directories
    if opts.output_dir:
        create_dir(opts.output_dir)
        dir_path = opts.output_dir
    else:
        dir_path = "./"

    qiime_dir = get_qiime_project_dir()

    jar_path = os.path.join(qiime_dir, "qiime/support_files/jar/")

    data_dir_path = get_random_directory_name(output_dir=dir_path, return_absolute_path=False)

    try:
        os.mkdir(data_dir_path)
    except OSError:
        pass

    jar_dir_path = os.path.join(dir_path, "jar")

    try:
        os.mkdir(jar_dir_path)
    except OSError:
        pass

    shutil.copyfile(os.path.join(jar_path, "king.jar"), os.path.join(jar_dir_path, "king.jar"))

    filepath = coord_files[0]
    filename = filepath.strip().split("/")[-1]

    try:
        action = generate_3d_plots
    except NameError:
        action = None

    # Place this outside try/except so we don't mask NameError in action
    if action:
        generate_3d_plots(
            prefs,
            data,
            custom_axes,
            background_color,
            label_color,
            dir_path,
            data_dir_path,
            filename,
            ball_scale=ball_scale,
            arrow_colors=arrow_colors,
            user_supplied_edges=not (opts.edges_file is None),
        )
Exemplo n.º 7
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    prefs, data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options(
        opts
    )

    scaling_methods = opts.scaling_method.split(",")
    plot_scaled = False
    plot_unscaled = False
    for i in scaling_methods:
        if i.lower() == "scaled":
            plot_scaled = True
        elif i.lower() == "unscaled":
            plot_unscaled = True

    if not (plot_scaled or plot_unscaled):
        raise ValueError, "You must choose a valid scaling method (scaled or unscaled)"

    if opts.output_format == "invue":
        # validating the number of points for interpolation
        if opts.interpolation_points < 0:
            option_parser.error("The --interpolation_points should be " + "greater or equal to 0.")

        # make sure that coord file has internally consistent # of columns
        coord_files_valid = validate_coord_files(opts.coord_fname)
        if not coord_files_valid:
            option_parser.error("Every line of every coord file must " + "have the same number of columns.")

        # Open and get coord data
        data["coord"] = get_coord(opts.coord_fname, opts.ellipsoid_method)

        # remove any samples not present in mapping file
        remove_unmapped_samples(data["map"], data["coord"])

        # if no samples overlapped between mapping file and otu table, exit
        if len(data["coord"][0]) == 0:
            print "\nError: OTU table and mapping file had no samples in common\n"
            exit(1)

        if opts.output_dir:
            create_dir(opts.output_dir, False)
            dir_path = opts.output_dir
        else:
            dir_path = "./"

        filepath = opts.coord_fname
        if os.path.isdir(filepath):
            coord_files = [fname for fname in os.listdir(filepath) if not fname.startswith(".")]
            filename = os.path.split(coord_files[0])[-1]
        else:
            filename = os.path.split(filepath)[-1]

        generate_3d_plots_invue(
            prefs, data, dir_path, filename, opts.interpolation_points, opts.polyhedron_points, opts.polyhedron_offset
        )

        # finish script
        return

    # Potential conflicts
    if not opts.custom_axes is None and os.path.isdir(opts.coord_fname):
        # can't do averaged pcoa plots _and_ custom axes in the same plot
        option_parser.error(
            "Please supply either custom axes or multiple coordinate \
files, but not both."
        )
    # check that smoothness is an integer between 0 and 3
    try:
        ellipsoid_smoothness = int(opts.ellipsoid_smoothness)
    except:
        option_parser.error(
            "Please supply an integer ellipsoid smoothness \
value."
        )
    if ellipsoid_smoothness < 0 or ellipsoid_smoothness > 3:
        option_parser.error(
            "Please supply an ellipsoid smoothness value \
between 0 and 3."
        )
    # check that opacity is a float between 0 and 1
    try:
        ellipsoid_alpha = float(opts.ellipsoid_opacity)
    except:
        option_parser.error("Please supply a number for ellipsoid opacity.")
    if ellipsoid_alpha < 0 or ellipsoid_alpha > 1:
        option_parser.error(
            "Please supply an ellipsoid opacity value \
between 0 and 1."
        )
    # check that ellipsoid method is valid
    ellipsoid_methods = ["IQR", "sdev"]
    if not opts.ellipsoid_method in ellipsoid_methods:
        option_parser.error(
            "Please supply a valid ellipsoid method. \
Valid methods are: "
            + ", ".join(ellipsoid_methods)
            + "."
        )

    # gather ellipsoid drawing preferences
    ellipsoid_prefs = {}
    ellipsoid_prefs["smoothness"] = ellipsoid_smoothness
    ellipsoid_prefs["alpha"] = ellipsoid_alpha

    # make sure that coord file has internally consistent # of columns
    coord_files_valid = validate_coord_files(opts.coord_fname)
    if not coord_files_valid:
        option_parser.error("Every line of every coord file must " + "have the same number of columns.")

    # Open and get coord data
    data["coord"] = get_coord(opts.coord_fname, opts.ellipsoid_method)

    # remove any samples not present in mapping file
    remove_unmapped_samples(data["map"], data["coord"])

    # if no samples overlapped between mapping file and otu table, exit
    if len(data["coord"][0]) == 0:
        print "\nError: OTU table and mapping file had no samples in common\n"
        exit(1)

    # process custom axes, if present.
    custom_axes = None
    if opts.custom_axes:
        custom_axes = process_custom_axes(opts.custom_axes)

        get_custom_coords(custom_axes, data["map"], data["coord"])
        remove_nans(data["coord"])
        scale_custom_coords(custom_axes, data["coord"])

    # process vectors if requested
    if opts.add_vectors:
        add_vectors = {}
        add_vectors["vectors"] = opts.add_vectors.split(",")
        add_vectors["weight_by_vector"] = opts.weight_by_vector
        if len(add_vectors) > 3:
            raise ValueError, "You must add maximum 3 columns but %s" % opts.add_vectors

        # Validating Vectors values
        if opts.vectors_algorithm:
            axes_number = len(data["coord"][1][1])
            if opts.vectors_axes < 0 or opts.vectors_axes > axes_number:
                raise ValueError, "vectors_algorithm should be between 0 and the max number" + "of samples/pcoa-axes: %d" % len(
                    data["coord"][1][1]
                )
            if opts.vectors_axes == 0:
                opts.vectors_axes = axes_number
            add_vectors["vectors_axes"] = opts.vectors_axes
            valid_chars = "_.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
            for c in opts.vectors_path:
                if c not in valid_chars:
                    raise ValueError, "vectors_path (%s) has invalid chars" % opts.vectors_path
            add_vectors["vectors_output"] = {}
            add_vectors["vectors_algorithm"] = opts.vectors_algorithm
            add_vectors["eigvals"] = data["coord"][3]
            add_vectors["window_size"] = None

            # checks specific for the modified first difference algorithm
            if add_vectors["vectors_algorithm"] == "wdiff":
                try:
                    add_vectors["window_size"] = int(opts.window_size)
                except TypeError:
                    raise TypeError, "Specify --window_size as an integer"

                # sanity check as the value can only be greater or equal to one
                if add_vectors["window_size"] < 1:
                    raise ValueError, "The value of window_size is invalid, " + "the value must be greater than zero, not %d" % add_vectors[
                        "window_size"
                    ]

        else:
            add_vectors["vectors_algorithm"] = None
        add_vectors["vectors_path"] = opts.vectors_path
    else:
        add_vectors = None

    if opts.taxa_fname != None:
        # get taxonomy counts
        # get list of sample_ids that haven't been removed
        sample_ids = data["coord"][0]
        # get taxa summaries for all sample_ids
        lineages, taxa_counts = get_taxa(opts.taxa_fname, sample_ids)
        data["taxa"] = {}
        data["taxa"]["lineages"] = lineages
        data["taxa"]["counts"] = taxa_counts

        # get average relative abundance of taxa
        data["taxa"]["prevalence"] = get_taxa_prevalence(data["taxa"]["counts"])
        remove_rare_taxa(data["taxa"], nkeep=opts.n_taxa_keep)
        # get coordinates of taxa (weighted mean of sample scores)
        data["taxa"]["coord"] = get_taxa_coords(data["taxa"]["counts"], data["coord"][1])
        data["taxa"]["coord"]

        # write taxa coords if requested
        if not opts.biplot_output_file is None:
            output = make_biplot_scores_output(data["taxa"])
            fout = open(opts.biplot_output_file, "w")
            fout.write("\n".join(output))
            fout.close()

    if opts.output_dir:
        create_dir(opts.output_dir, False)
        dir_path = opts.output_dir
    else:
        dir_path = "./"

    qiime_dir = get_qiime_project_dir()

    jar_path = os.path.join(qiime_dir, "qiime/support_files/jar/")

    data_dir_path = get_random_directory_name(output_dir=dir_path, return_absolute_path=False)

    try:
        os.mkdir(data_dir_path)
    except OSError:
        pass

    data_file_path = data_dir_path

    jar_dir_path = os.path.join(dir_path, "jar")

    try:
        os.mkdir(jar_dir_path)
    except OSError:
        pass

    shutil.copyfile(os.path.join(jar_path, "king.jar"), os.path.join(jar_dir_path, "king.jar"))

    filepath = opts.coord_fname
    if os.path.isdir(filepath):
        coord_files = [fname for fname in os.listdir(filepath) if not fname.startswith(".")]
        filename = os.path.split(coord_files[0])[-1]
    else:
        filename = os.path.split(filepath)[-1]

    try:
        action = generate_3d_plots
    except NameError:
        action = None

    # Place this outside try/except so we don't mask NameError in action
    if action:
        action(
            prefs,
            data,
            custom_axes,
            background_color,
            label_color,
            dir_path,
            data_file_path,
            filename,
            ellipsoid_prefs=ellipsoid_prefs,
            add_vectors=add_vectors,
            plot_scaled=plot_scaled,
            plot_unscaled=plot_unscaled,
        )
Exemplo n.º 8
0
def preprocess_coords_file(coords_header,
                           coords_data,
                           coords_eigenvals,
                           coords_pct,
                           mapping_header,
                           mapping_data,
                           custom_axes=None,
                           jackknifing_method=None,
                           is_comparison=False):
    """Process a PCoA data and handle customizations in the contents

    Inputs:
    coords_header: list of sample identifiers in the PCoA file _or_ list of
    lists with sample identifiers for each coordinate file (if jackknifing or
    comparing plots)
    coords_data: matrix of coordinates in the PCoA file _or_ list of numpy
    arrays with coordinates for each file (if jackknifing or comparing plots)
    coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_
    list of numpy arrays with the eigenvalues (if jackknifing or comparing plots
    )
    coords_pct: numpy array with a the percent explained by each principal
    coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or
    comparing plots)
    mapping_header: mapping file headers names
    mapping_data: mapping file data
    custom_axes: name of the mapping data fields to add to coords_data
    jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info
    see qiime.util.summarize_pcoas
    is_comparison: whether or not the inputs should be considered as the ones
    for a comparison plot

    Outputs:
    coords_header: list of sample identifiers in the PCoA file
    coords_data: matrix of coordinates in the PCoA file with custom_axes if
    provided
    coords_eigenvalues: either the eigenvalues of the input coordinates or the
    average eigenvalues of the multiple coords that were passed in
    coords_pct: list of percents explained by each axis as given by the master
    coordinates i. e. the center around where the values revolve
    coords_low: coordinates representing the lower edges of an ellipse; None if
    no jackknifing is applied
    coords_high: coordinates representing the highere edges of an ellipse; None
    if no jackknifing is applied
    clones: total number of input files

    This controller function handles any customization that has to be done to
    the PCoA data prior to the formatting. Note that the first element in each
    list (coords, headers, eigenvalues & percents) will be considered the master
    set of coordinates.

    Raises: AssertionError if a comparison plot is requested but a list of data
    is not passed as input
    """

    # prevent obscure and obfuscated errors
    if is_comparison:
        assert type(coords_data) == list, "Cannot process a comparison with "+\
            "the data from a single coordinates file"

    mapping_file = [mapping_header] + mapping_data
    coords_file = [coords_header, coords_data]

    # number PCoA files; zero for any case except for comparison plots
    clones = 0

    if custom_axes and type(coords_data) == ndarray:
        # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
        get_custom_coords(custom_axes, mapping_file, coords_file)
        remove_nans(coords_file)
        scale_custom_coords(custom_axes, coords_file)
    elif type(coords_data) == list and is_comparison == False:
        # take the first pcoa file as the master set of coordinates
        master_pcoa = [
            coords_header[0], coords_data[0], coords_eigenvals[0],
            coords_pct[0]
        ]

        # support pcoas must be a list of lists where each list contain
        # all the elements that compose a coordinates file
        support_pcoas = [[h, d, e, p] for h, d, e, p in zip(
            coords_header, coords_data, coords_eigenvals, coords_pct)]

        # do not apply procrustes, at least not for now
        coords_data, coords_low, coords_high, eigenvalues_average,\
            identifiers = summarize_pcoas(master_pcoa, support_pcoas,
                method=jackknifing_method, apply_procrustes=False)

        # custom axes and jackknifing is a tricky thing to do, you only have to
        # add the custom values to the master file which is represented as the
        # coords_data return value. Since there is really no variation in that
        # axis then you have to change the values of coords_high and of
        # coords_low to something really small so that WebGL work properly
        if custom_axes:
            coords_file = [master_pcoa[0], coords_data]
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

            # this opens support for as many custom axes as needed
            axes = len(custom_axes)
            coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes])
            coords_high[:,
                        0:axes] = ones([coords_high.shape[0], axes]) * 0.00001
            coords_data = coords_file[1]

        # return a value containing coords_low and coords_high
        return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\
            coords_low, coords_high, clones
    # comparison plots are processed almost individually
    elif type(coords_data) == list and is_comparison:

        # indicates the number of files that were totally processed so other
        # functions/APIs are aware of how many times to replicate the metadata
        clones = len(coords_data)
        out_headers, out_coords = [], []

        for index in range(0, clones):
            headers_i = coords_header[index]
            coords_i = coords_data[index]

            # tag each header with the the number in which those coords came in
            out_headers.extend(
                [element + '_%d' % index for element in headers_i])

            if index == 0:
                # numpy can only stack things if they have the same shape
                out_coords = coords_i

                # the eigenvalues and percents explained are really the ones
                # belonging to the the first set of coordinates that was passed
                coords_eigenvals = coords_eigenvals[index]
                coords_pct = coords_pct[index]
            else:
                out_coords = vstack((out_coords, coords_i))

        coords_file = [out_headers, out_coords]

        if custom_axes:
            # this condition deals with the fact that in order for the custom
            # axes to be added into the original coordinates, we have to add the
            # suffix for the sample identifiers that the coordinates have
            if clones:
                out_data = []
                for index in range(0, clones):
                    out_data.extend([[element[0] + '_%d' % index] +
                                     element[1::] for element in mapping_data])
                mapping_file = [mapping_header] + out_data

            # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

    # if no coords summary is applied, return None in the corresponding values
    # note that the value of clones will be != 0 for a comparison plot
    return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\
        None, clones
Exemplo n.º 9
0
def preprocess_coords_file(
    coords_header,
    coords_data,
    coords_eigenvals,
    coords_pct,
    mapping_header,
    mapping_data,
    custom_axes=None,
    jackknifing_method=None,
    is_comparison=False,
):
    """Process a PCoA data and handle customizations in the contents

    Inputs:
    coords_header: list of sample identifiers in the PCoA file _or_ list of
    lists with sample identifiers for each coordinate file (if jackknifing or
    comparing plots)
    coords_data: matrix of coordinates in the PCoA file _or_ list of numpy
    arrays with coordinates for each file (if jackknifing or comparing plots)
    coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_
    list of numpy arrays with the eigenvalues (if jackknifing or comparing plots
    )
    coords_pct: numpy array with a the percent explained by each principal
    coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or
    comparing plots)
    mapping_header: mapping file headers names
    mapping_data: mapping file data
    custom_axes: name of the mapping data fields to add to coords_data
    jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info
    see qiime.util.summarize_pcoas
    is_comparison: whether or not the inputs should be considered as the ones
    for a comparison plot

    Outputs:
    coords_header: list of sample identifiers in the PCoA file
    coords_data: matrix of coordinates in the PCoA file with custom_axes if
    provided
    coords_eigenvalues: either the eigenvalues of the input coordinates or the
    average eigenvalues of the multiple coords that were passed in
    coords_pct: list of percents explained by each axis as given by the master
    coordinates i. e. the center around where the values revolve
    coords_low: coordinates representing the lower edges of an ellipse; None if
    no jackknifing is applied
    coords_high: coordinates representing the highere edges of an ellipse; None
    if no jackknifing is applied
    clones: total number of input files

    This controller function handles any customization that has to be done to
    the PCoA data prior to the formatting. Note that the first element in each
    list (coords, headers, eigenvalues & percents) will be considered the master
    set of coordinates.

    Raises: AssertionError if a comparison plot is requested but a list of data
    is not passed as input
    """

    # prevent obscure and obfuscated errors
    if is_comparison:
        assert type(coords_data) == list, (
            "Cannot process a comparison with " + "the data from a single coordinates file"
        )

    mapping_file = [mapping_header] + mapping_data
    coords_file = [coords_header, coords_data]

    # number PCoA files; zero for any case except for comparison plots
    clones = 0

    if custom_axes and type(coords_data) == ndarray:
        # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
        get_custom_coords(custom_axes, mapping_file, coords_file)
        remove_nans(coords_file)
        scale_custom_coords(custom_axes, coords_file)
    elif type(coords_data) == list and is_comparison == False:
        # take the first pcoa file as the master set of coordinates
        master_pcoa = [coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0]]

        # support pcoas must be a list of lists where each list contain
        # all the elements that compose a coordinates file
        support_pcoas = [[h, d, e, p] for h, d, e, p in zip(coords_header, coords_data, coords_eigenvals, coords_pct)]

        # do not apply procrustes, at least not for now
        coords_data, coords_low, coords_high, eigenvalues_average, identifiers = summarize_pcoas(
            master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False
        )

        # custom axes and jackknifing is a tricky thing to do, you only have to
        # add the custom values to the master file which is represented as the
        # coords_data return value. Since there is really no variation in that
        # axis then you have to change the values of coords_high and of
        # coords_low to something really small so that WebGL work properly
        if custom_axes:
            coords_file = [master_pcoa[0], coords_data]
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

            # this opens support for as many custom axes as needed
            axes = len(custom_axes)
            coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes])
            coords_high[:, 0:axes] = ones([coords_high.shape[0], axes]) * 0.00001
            coords_data = coords_file[1]

        # return a value containing coords_low and coords_high
        return identifiers, coords_data, eigenvalues_average, master_pcoa[3], coords_low, coords_high, clones
    # comparison plots are processed almost individually
    elif type(coords_data) == list and is_comparison:

        # indicates the number of files that were totally processed so other
        # functions/APIs are aware of how many times to replicate the metadata
        clones = len(coords_data)
        out_headers, out_coords = [], []

        for index in range(0, clones):
            headers_i = coords_header[index]
            coords_i = coords_data[index]

            # tag each header with the the number in which those coords came in
            out_headers.extend([element + "_%d" % index for element in headers_i])

            if index == 0:
                # numpy can only stack things if they have the same shape
                out_coords = coords_i

                # the eigenvalues and percents explained are really the ones
                # belonging to the the first set of coordinates that was passed
                coords_eigenvals = coords_eigenvals[index]
                coords_pct = coords_pct[index]
            else:
                out_coords = vstack((out_coords, coords_i))

        coords_file = [out_headers, out_coords]

        if custom_axes:
            # this condition deals with the fact that in order for the custom
            # axes to be added into the original coordinates, we have to add the
            # suffix for the sample identifiers that the coordinates have
            if clones:
                out_data = []
                for index in range(0, clones):
                    out_data.extend([[element[0] + "_%d" % index] + element[1::] for element in mapping_data])
                mapping_file = [mapping_header] + out_data

            # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

    # if no coords summary is applied, return None in the corresponding values
    # note that the value of clones will be != 0 for a comparison plot
    return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None, None, clones
Exemplo n.º 10
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    prefs, data, background_color, label_color, ball_scale, arrow_colors= \
                            sample_color_prefs_and_map_data_from_options(opts)
    
    if opts.output_format == 'invue':
        # validating the number of points for interpolation
        if (opts.interpolation_points<0):
            option_parser.error('The --interpolation_points should be ' +\
                            'greater or equal to 0.')
                            
        # make sure that coord file has internally consistent # of columns
        coord_files_valid = validate_coord_files(opts.coord_fname)
        if not coord_files_valid:
            option_parser.error('Every line of every coord file must ' +\
                            'have the same number of columns.')
       
        coord_files_valid = validate_coord_files(opts.coord_fname)
        if not coord_files_valid:
            option_parser.error('Every line of every coord file must ' +\
                            'have the same number of columns.')
        #Open and get coord data
        data['coord'] = get_coord(opts.coord_fname, opts.ellipsoid_method)
    
        # remove any samples not present in mapping file
        remove_unmapped_samples(data['map'],data['coord'])

        # if no samples overlapped between mapping file and otu table, exit
        if len(data['coord'][0]) == 0:
            print "\nError: OTU table and mapping file had no samples in common\n"
            exit(1)
        

        if opts.output_dir:
            create_dir(opts.output_dir,False)
            dir_path=opts.output_dir
        else:
            dir_path='./'
        
        filepath=opts.coord_fname
        if os.path.isdir(filepath):
            coord_files = [fname for fname in os.listdir(filepath) if not \
                           fname.startswith('.')]
            filename = os.path.split(coord_files[0])[-1]
        else:
            filename = os.path.split(filepath)[-1]
	
        generate_3d_plots_invue(prefs, data, dir_path, filename, \
            opts.interpolation_points, opts.polyhedron_points, \
            opts.polyhedron_offset)
        
        #finish script
        return

    # Potential conflicts
    if not opts.custom_axes is None and os.path.isdir(opts.coord_fname):
        # can't do averaged pcoa plots _and_ custom axes in the same plot
        option_parser.error("Please supply either custom axes or multiple coordinate \
files, but not both.")
    # check that smoothness is an integer between 0 and 3
    try:
        ellipsoid_smoothness = int(opts.ellipsoid_smoothness)
    except:
        option_parser.error("Please supply an integer ellipsoid smoothness \
value.")
    if ellipsoid_smoothness < 0 or ellipsoid_smoothness > 3:
        option_parser.error("Please supply an ellipsoid smoothness value \
between 0 and 3.")
    # check that opacity is a float between 0 and 1
    try:
        ellipsoid_alpha = float(opts.ellipsoid_opacity)
    except:
        option_parser.error("Please supply a number for ellipsoid opacity.")
    if ellipsoid_alpha < 0 or ellipsoid_alpha > 1:
        option_parser.error("Please supply an ellipsoid opacity value \
between 0 and 1.")
    # check that ellipsoid method is valid
    ellipsoid_methods = ['IQR','sdev']
    if not opts.ellipsoid_method in ellipsoid_methods:
        option_parser.error("Please supply a valid ellipsoid method. \
Valid methods are: " + ', '.join(ellipsoid_methods) + ".")
  
    # gather ellipsoid drawing preferences
    ellipsoid_prefs = {}
    ellipsoid_prefs["smoothness"] = ellipsoid_smoothness
    ellipsoid_prefs["alpha"] = ellipsoid_alpha

    # make sure that coord file has internally consistent # of columns
    coord_files_valid = validate_coord_files(opts.coord_fname)
    if not coord_files_valid:
        option_parser.error('Every line of every coord file must ' +\
                            'have the same number of columns.')

    #Open and get coord data
    data['coord'] = get_coord(opts.coord_fname, opts.ellipsoid_method)
    
    # remove any samples not present in mapping file
    remove_unmapped_samples(data['map'],data['coord'])
    
    # if no samples overlapped between mapping file and otu table, exit
    if len(data['coord'][0]) == 0:
        print "\nError: OTU table and mapping file had no samples in common\n"
        exit(1)

    if opts.taxa_fname != None:
        # get taxonomy counts
        # get list of sample_ids that haven't been removed
        sample_ids = data['coord'][0]
        # get taxa summaries for all sample_ids
        lineages, taxa_counts = get_taxa(opts.taxa_fname, sample_ids)
        data['taxa'] = {}
        data['taxa']['lineages'] = lineages
        data['taxa']['counts'] = taxa_counts

        # get average relative abundance of taxa
        data['taxa']['prevalence'] = get_taxa_prevalence(data['taxa']['counts'])
        remove_rare_taxa(data['taxa'],nkeep=opts.n_taxa_keep)
        # get coordinates of taxa (weighted mean of sample scores)
        data['taxa']['coord'] = get_taxa_coords(data['taxa']['counts'],
            data['coord'][1])
        data['taxa']['coord']

        # write taxa coords if requested
        if not opts.biplot_output_file is None:
            output = make_biplot_scores_output(data['taxa'])            
            fout = open(opts.biplot_output_file,'w')
            fout.write('\n'.join(output))
            fout.close()

    # process custom axes, if present.
    custom_axes = None
    if opts.custom_axes:
        custom_axes = process_custom_axes(opts.custom_axes)
        get_custom_coords(custom_axes, data['map'], data['coord'])
        remove_nans(data['coord'])
        scale_custom_coords(custom_axes,data['coord'])

    if opts.output_dir:
        create_dir(opts.output_dir,False)
        dir_path=opts.output_dir
    else:
        dir_path='./'
    
    qiime_dir=get_qiime_project_dir()

    jar_path=os.path.join(qiime_dir,'qiime/support_files/jar/')

    data_dir_path = get_random_directory_name(output_dir=dir_path,
                                              return_absolute_path=False)    
    
    try:
        os.mkdir(data_dir_path)
    except OSError:
        pass

    data_file_path=data_dir_path

    jar_dir_path = os.path.join(dir_path,'jar')
    
    try:
        os.mkdir(jar_dir_path)
    except OSError:
        pass
    
    shutil.copyfile(os.path.join(jar_path,'king.jar'), os.path.join(jar_dir_path,'king.jar'))

    filepath=opts.coord_fname
    if os.path.isdir(filepath):
        coord_files = [fname for fname in os.listdir(filepath) if not \
                           fname.startswith('.')]
        filename = os.path.split(coord_files[0])[-1]
    else:
        filename = os.path.split(filepath)[-1]

    try:
        action = generate_3d_plots
    except NameError:
        action = None

    #Place this outside try/except so we don't mask NameError in action
    if action:
        action(prefs,data,custom_axes,background_color,label_color,dir_path, \
                data_file_path,filename,ellipsoid_prefs=ellipsoid_prefs)