template_DE_stats_filename, local_dir, num_runs, project_id, analysis_type, col_to_rank_genes, logFC_name, pvalue_name, ) # ### Gene summary table # In[15]: summary_gene_ranks = ranking.generate_summary_table( template_DE_stats_filename, template_DE_stats, simulated_DE_summary_stats, col_to_rank_genes, local_dir, 'gene', params) summary_gene_ranks.head() # In[16]: # Check if there is an NaN values, there should not be summary_gene_ranks.isna().any() # In[17]: # Create `gene_summary_fielname` summary_gene_ranks.to_csv(gene_summary_filename, sep='\t') # ### Compare gene ranking
analysis_type, col_to_rank_genes, logFC_name, pvalue_name, ) # ## Gene summary table # # Note: Using DESeq, genes with NaN in `Adj P-value (Real)` column are those genes flagged because of the `cooksCutoff` parameter. The cook's distance as a diagnostic to tell if a single sample has a count which has a disproportionate impact on the log fold change and p-values. These genes are flagged with an NA in the pvalue and padj columns of the result table. For more information you can read [DESeq FAQs](https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#pvaluesNA) # + summary_gene_ranks = ranking.generate_summary_table( template_DE_stats_filename, template_DE_stats, simulated_DE_summary_stats, col_to_rank_genes, local_dir, "gene", params, ) summary_gene_ranks.head() # - summary_gene_ranks.isna().any() # Create `gene_summary_filename` summary_gene_ranks.to_csv(gene_summary_filename, sep="\t") # ## Compare gene ranking # Studies have found that some genes are more likely to be differentially expressed even across a wide range of experimental designs. These *generic genes* are not necessarily specific to the biological process being studied but instead represent a more systematic change.