Пример #1
0
	def validate_metadata(metadata_path, environmental_metadata_path):
		# Load metadata via QIIME2 metadata API
		# It will verify metadata vadlity as well
		try:
			metadata_df = load_metadata(metadata_path)
		except MetadataFileError as err:
			message = str(err)

			return 400, message

		try:
			environmental_metadata_df = load_metadata(environmental_metadata_path)
		except MetadataFileError as err:
			message = str(err)

			return 400, message

		# Check user-specified columns actually exist in the metadata file
		try:
			check_column_exists(metadata_df, fill_variable)
		
		except AXIOME3PipelineError as err:
			message = str(err)
		
			return 400, message
		
		return 200, "Ok"
Пример #2
0
def prep_bubbleplot(feature_table_artifact_path,
                    taxonomy_artifact_path,
                    metadata_path=None,
                    level="asv",
                    groupby_taxa="phylum",
                    abundance_threshold=0.1,
                    keyword=None):
    feature_table_artifact = check_artifact_type(feature_table_artifact_path,
                                                 "feature_table")
    taxonomy_artifact = check_artifact_type(taxonomy_artifact_path, "taxonomy")
    collapsed_df = collapse_taxa(feature_table_artifact, taxonomy_artifact,
                                 level)

    original_taxa = pd.Series(collapsed_df["Taxon"])
    row_id = pd.Series(collapsed_df.index)
    renamed_taxa = rename_taxa(original_taxa, row_id)

    percent_df = calculate_percent_value(
        collapsed_df.drop(["Taxon"], axis="columns"))
    percent_df["SpeciesName"] = renamed_taxa

    taxa_group = group_by_taxa(original_taxa, groupby_taxa, level)
    percent_df["TaxaGroup"] = taxa_group

    filter_criteria = filter_by_keyword(original_taxa, keyword)
    filtered_df = percent_df.loc[filter_criteria, ]

    long_df = pd.melt(filtered_df,
                      id_vars=['SpeciesName', 'TaxaGroup'],
                      var_name="SampleName",
                      value_name="Percentage")

    abundance_filtered_df = filter_by_abundance(long_df, "Percentage",
                                                abundance_threshold)
    rounded_abundance_filtered_df = round_percentage(abundance_filtered_df,
                                                     "Percentage", 3)
    sorted_df = alphabetical_sort_df(rounded_abundance_filtered_df,
                                     ["TaxaGroup", "SpeciesName"])
    # Make SpeciesName column category to avoid automatic sorting
    sorted_df['SpeciesName'] = pd.Categorical(
        sorted_df['SpeciesName'],
        categories=sorted_df['SpeciesName'].unique(),
        ordered=True)
    # Join metadata with bubbleplot df
    if (metadata_path is not None):
        metadata_df = load_metadata(metadata_path)
        merged_df = sorted_df.merge(metadata_df,
                                    how="inner",
                                    left_on="SampleName",
                                    right_index=True)

        return merged_df

    return sorted_df
Пример #3
0
	def validate_metadata(metadata_path, target_primary, target_secondary):
		# Load metadata via QIIME2 metadata API
		# It will verify metadata vadlity as well
		try:
			metadata_df = load_metadata(metadata_path)
		except MetadataFileError as err:
			message = str(err)

			return 400, message

		# Check user-specified columns actually exist in the metadata file
		try:
			check_column_exists(metadata_df, target_primary, target_secondary)

		except AXIOME3PipelineError as err:
			message = str(err)

			return 400, message

		return 200, "Ok"
Пример #4
0
def generate_pcoa_plot(pcoa,
                       metadata,
                       colouring_variable,
                       shape_variable=None,
                       primary_dtype="category",
                       secondary_dtype="category",
                       palette='Paired',
                       brewer_type='qual',
                       alpha=0.9,
                       stroke=0.6,
                       point_size=6,
                       x_axis_text_size=10,
                       y_axis_text_size=10,
                       legend_title_size=10,
                       legend_text_size=10,
                       PC_axis1=1,
                       PC_axis2=2):

    # raise AXIOME3Error if PC_axis1 == PC_axis2
    if (PC_axis1 == PC_axis2):
        raise AXIOME3Error("PC axis one and PC axis two cannot be equal!")

    # Load metadata file
    metadata_df = load_metadata(metadata)

    # Inner join metadata file with ordinations
    pcoa_coords = pcoa.samples
    pcoa_data_samples = pd.merge(pcoa_coords,
                                 right=metadata_df,
                                 left_index=True,
                                 right_index=True)

    # Make x and y axis labels
    proportions = pcoa.proportion_explained

    x_explained_idx = PC_axis1 - 1
    y_explained_idx = PC_axis2 - 1
    pc_1 = 'Axis ' + str(PC_axis1)
    pc_2 = 'Axis ' + str(PC_axis2)

    x_explained = str(round(proportions[x_explained_idx] * 100, 1))
    y_explained = str(round(proportions[y_explained_idx] * 100, 1))

    # Convert user specified columns to category
    # **BIG ASSUMPTION HERE**
    pcoa_data_samples = convert_col_dtype(pcoa_data_samples,
                                          colouring_variable, primary_dtype)

    if (shape_variable is not None):
        pcoa_data_samples = convert_col_dtype(pcoa_data_samples,
                                              shape_variable, secondary_dtype)

    # Pre-format target variables
    #primary_target_fill = 'factor(' + str(colouring_variable) + ')'
    primary_target_fill = str(colouring_variable)

    if (shape_variable is not None):
        secondary_target_fill = str(shape_variable)

        ggplot_obj = ggplot(
            pcoa_data_samples,
            aes(x=pc_1,
                y=pc_2,
                fill=primary_target_fill,
                shape=secondary_target_fill))
    else:
        ggplot_obj = ggplot(pcoa_data_samples,
                            aes(x=pc_1, y=pc_2, fill=primary_target_fill))

    # Plot the data
    pcoa_plot = (
        ggplot_obj + geom_point(size=point_size, alpha=alpha, stroke=stroke) +
        theme_bw() +
        theme(panel_grid=element_blank(),
              line=element_line(colour='black'),
              panel_border=element_rect(colour='black'),
              legend_title=element_text(size=legend_title_size, face='bold'),
              legend_key=element_blank(),
              legend_text=element_text(size=legend_text_size),
              axis_title_x=element_text(size=x_axis_text_size),
              axis_title_y=element_text(size=y_axis_text_size),
              legend_key_height=5,
              text=element_text(family='Arial', colour='black')) +
        xlab(pc_1 + ' (' + x_explained + '%)') +
        ylab(pc_2 + ' (' + y_explained + '%)'))

    # Custom colours
    color_len = len(pcoa_data_samples[colouring_variable].unique())
    color_name = str(colouring_variable)
    pcoa_plot = add_fill_colours_from_users(pcoa_plot, color_name, palette,
                                            brewer_type)

    # Custom shapes
    if (shape_variable is not None):
        shape_len = len(pcoa_data_samples[shape_variable].unique())
        shape_name = str(shape_variable)

        pcoa_plot = add_discrete_shape(pcoa_plot, shape_len, shape_name)

    return pcoa_plot
Пример #5
0
if __name__ == "__main__":
    parser = args_parse()

    # Print help messages if no arguments are supplied
    if (len(sys.argv) < 2):
        parser.print_help()
        sys.exit(0)

    args = parser.parse_args()

    # Load QIIME2 PCoA result and convert to PCoA object
    pcoa = convert_qiime2_2_skbio(args.pcoa_qza)

    # Load metadata
    metadata_df = load_metadata(args.metadata)

    # Check if metadata has target columns
    check_column_exists(metadata_df, args.target_primary,
                        args.target_secondary)

    # Generate PCoA plot
    pcoa_plot = generate_pcoa_plot(pcoa=pcoa,
                                   metadata=args.metadata,
                                   colouring_variable=args.target_primary,
                                   shape_variable=args.target_secondary,
                                   point_size=args.point_size,
                                   alpha=args.alpha,
                                   stroke=args.stroke,
                                   PC_axis1=args.pc_axis_one,
                                   PC_axis2=args.pc_axis_two)
Пример #6
0
def prep_triplot_input(sample_metadata_path,
                       env_metadata_path,
                       feature_table_artifact_path,
                       taxonomy_artifact_path,
                       sampling_depth=0,
                       ordination_collapse_level="asv",
                       wascores_collapse_level="phylum",
                       dissmilarity_index="Bray-Curtis",
                       R2_threshold=0.1,
                       pval_threshold=0.05,
                       wa_threshold=0.1,
                       PC_axis_one=1,
                       PC_axis_two=2,
                       output_dir='.'):

    # Load sample metadata
    sample_metadata_df = load_metadata(sample_metadata_path)
    # Load environmental metadata
    # and drop rows with missing values (WARN users?)
    env_metadata_df = load_env_metadata(env_metadata_path)
    env_metadata_df = env_metadata_df.dropna()

    # Load feature table and collapse
    feature_table_artifact = check_artifact_type(feature_table_artifact_path,
                                                 "feature_table")
    taxonomy_artifact = check_artifact_type(taxonomy_artifact_path, "taxonomy")
    ordination_collapsed_df = collapse_taxa(feature_table_artifact,
                                            taxonomy_artifact, sampling_depth,
                                            ordination_collapse_level)
    abundance_collapsed_df = collapse_taxa(feature_table_artifact,
                                           taxonomy_artifact, sampling_depth,
                                           wascores_collapse_level)

    # Rename taxa for wascores collapsed df
    original_taxa = pd.Series(abundance_collapsed_df["Taxon"])
    row_id = pd.Series(abundance_collapsed_df.index)
    renamed_taxa = rename_taxa(original_taxa, row_id)

    abundance_collapsed_df['Taxa'] = renamed_taxa
    abundance_collapsed_df_reindexed = abundance_collapsed_df.set_index('Taxa')
    abundance_collapsed_df_reindexed = abundance_collapsed_df_reindexed.drop(
        ['Taxon'], axis="columns")

    # transpose feature table so that it has samples as rows, taxa/ASV as columns
    ordination_transposed_df = ordination_collapsed_df.drop(['Taxon'],
                                                            axis="columns").T
    abundance_transposed_df = abundance_collapsed_df_reindexed.T

    # Remove samples that have total counts <= 5 (R complains if total count <= 5)
    count_filtered_df = filter_by_total_count(ordination_transposed_df)

    # Find sample intersection of feature table, sample metadata, and environmental metadata
    intersection_feature_table_df, intersection_abundance_df, intersection_sample_metadata_df, intersection_environmental_metadata_df, sample_summary = find_sample_intersection(
        count_filtered_df, abundance_transposed_df, sample_metadata_df,
        env_metadata_df)

    process_input_with_R(intersection_feature_table_df,
                         intersection_abundance_df,
                         intersection_sample_metadata_df,
                         intersection_environmental_metadata_df,
                         VEGDIST_OPTIONS[dissmilarity_index], R2_threshold,
                         pval_threshold, wa_threshold, PC_axis_one,
                         PC_axis_two, output_dir)

    # After successful subprocess call, there should be intermediate output files...
    # Elegant way to do this?
    merged_df_path = os.path.join(output_dir, "processed_merged_df.csv")
    proj_arrow_df_path = os.path.join(output_dir,
                                      "processed_vector_arrow_df.csv")
    filtered_wa_df_path = os.path.join(output_dir, "processed_wa_df.csv")
    proj_env_df_path = os.path.join(output_dir, "processed_projection_df.csv")
    proportion_df_path = os.path.join(output_dir,
                                      "processed_proportion_explained_df.csv")

    # Read back into pandas dataframes
    merged_df = pd.read_csv(merged_df_path)
    renamed_vector_arrow_df = pd.read_csv(proj_arrow_df_path)
    filtered_wascores_df = pd.read_csv(filtered_wa_df_path)
    proportion_explained = pd.read_csv(proportion_df_path)
    projection_df = pd.read_csv(proj_env_df_path)

    return merged_df, renamed_vector_arrow_df, filtered_wascores_df, proportion_explained, projection_df, sample_summary