def main(): parser = argparse.ArgumentParser("TCGA", description="TCGA formatted results generation") # Logging flag parser.add_argument( "-d", "--debug", action="store_const", const=logging.DEBUG, dest="level", help="Enable debug logging." ) parser.set_defaults(level=logging.INFO) # Required flags parser.add_argument("-w", "--db_connect", required=True, help="Name of desired miRbase.") parser.add_argument("-g", "--genome_version", required=True, choices=["hg38"], help="Genome Version of Annotation.") parser.add_argument("-e", "--species_code", required=True, choices=["hsa"], help="Organism species code.") parser.add_argument("-s", "--sam_path", required=True, help="Path to directory containing bams.") parser.add_argument("-p", "--mirna_species", required=True, help="Path to mirna_species.txt") parser.add_argument("-x", "--crossmapped", required=True, help="Path to crossmapped.txt") parser.add_argument("-i", "--isoforms", required=True, help="Path to isoforms.txt") parser.add_argument("-u", "--uuid", required=True, help="UUID/GDC_ID for the harmonized BAM.") parser.add_argument("-r", "--barcode", required=True, help="BAM barcode") # Optional DB Flags parser.add_argument("-y", "--db_cred_s3url", required=False, help="String s3url of the postgres db_cred file") parser.add_argument("-z", "--s3cfg_path", required=False, help="Path to the s3cfg file.") args = parser.parse_args() connect_path = args.db_connect genome_version = args.genome_version species_code = args.species_code sam_path = args.sam_path mirna_species = args.mirna_species crossmapped = args.crossmapped isoforms = args.isoforms uuid = args.uuid barcode = args.barcode if args.db_cred_s3url: db_cred_s3url = args.db_cred_s3url s3cfg_path = args.s3cfg_path else: db_cred_s3url = None logger = pipe_util.setup_logging("mir_profiler_tcga", args, uuid) if db_cred_s3url is not None: conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger) engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict)) else: # local sqllite case sqlite_name = "mir_profiler_tcga" + uuid + ".db" engine_path = "sqlite:///" + sqlite_name engine = sqlalchemy.create_engine(engine_path, isolation_level="SERIALIZABLE") # Generate TCGA formatted results logger.info("Beginning: TCGA formatted results generation") tcga_CMD = [ "perl", "/home/ubuntu/bin/mirna-profiler/v0.2.7/code/custom_output/tcga/tcga.pl", "-d", connect_path, "-o", species_code, "-g", genome_version, "-s", sam_path, "-r", mirna_species, "-c", crossmapped, "-i", isoforms, ] output = pipe_util.do_command(tcga_CMD, logger) df = time_util.store_time(uuid, tcga_CMD, output, logger) df["bam_name"] = barcode unique_key_dict = {"uuid": uuid, "bam_name": barcode} table_name = "time_mem_mir_tcga" df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger) logger.info("Completed: TCGA formatted results generation")
def main(): parser = argparse.ArgumentParser('miRNA matrix mimat development', description = 'Mature miRNA gene expression matrix genreation',) # Logging flag parser.add_argument('-d', '--debug', action = 'store_const', const = logging.DEBUG, dest = 'level', help = 'Enable debug logging.', ) parser.set_defaults(level = logging.INFO) # Required flags parser.add_argument('-w', '--db_connect', required = True, help = 'Name of desired miRbase.', ) parser.add_argument('-e', '--species_code', required = True, choices = ['hsa'], help = 'Organism species code.', ) parser.add_argument('-s', '--sam_path', required = True, help = 'Path to SAM file', ) parser.add_argument('-m', '--mirna_path', required = True, help = 'Path to miRNA.txt file', ) parser.add_argument('-x', '--crossmapped_path', required = True, help = 'Path to crossmapped.txt file', ) parser.add_argument('-u', '--uuid', required = True, help = 'UUID/GDC_ID for the harmonized BAM.', ) parser.add_argument('-r', '--barcode', required = True, help = 'BAM barcode', ) # Optional DB Flags parser.add_argument('-y', '--db_cred_s3url', required = False, help = 'String s3url of the postgres db_cred file', ) parser.add_argument('-z', '--s3cfg_path', required = False, help = 'Path to the s3cfg file.', ) args = parser.parse_args() db_connect = args.db_connect species_code = args.species_code sam_path = args.sam_path mirna_path = args.mirna_path crossmapped_path = args.crossmapped_path uuid = args.uuid barcode = args.barcode if args.db_cred_s3url: db_cred_s3url = args.db_cred_s3url s3cfg_path = args.s3cfg_path else: db_cred_s3url = None logger = pipe_util.setup_logging('mir_profiler_mimat', args, uuid) if db_cred_s3url is not None: conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger) engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict)) else: #local sqllite case sqlite_name = 'mir_profiler_mimat' + uuid + '.db' engine_path = 'sqlite:///' + sqlite_name engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE') # Get stats from the alignment annotations logger.info('Beginning: Mature miRNA gene expression matrix genreation') mimat_CMD = ['perl', '/home/ubuntu/bin/mirna-profiler/v0.2.7/code/library_stats/expression_matrix_mimat.pl', '-d', db_connect, '-o', species_code, '-s', sam_path, '-r', mirna_path, '-c', crossmapped_path] output = pipe_util.do_command(mimat_CMD, logger) df = time_util.store_time(uuid, mimat_CMD, output, logger) df['bam_name'] = barcode unique_key_dict = {'uuid': uuid, 'bam_name': barcode} table_name = 'time_mem_mir_expn_mimat' df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger) logger.info('Completed: Mature miRNA gene expression matrix genreation')
def main(): parser = argparse.ArgumentParser('SAM alignment stats', description = 'Generate alignment stats for the miRNA in the annotated SAM file',) # Logging flag parser.add_argument('-d', '--debug', action = 'store_const', const = logging.DEBUG, dest = 'level', help = 'Enable debug logging.', ) parser.set_defaults(level = logging.INFO) # Required flags parser.add_argument('-s', '--sam_path', required = True, help = 'Path to SAM file', ) parser.add_argument('-a', '--adapter_path', required = True, help = 'Path to adapter report', ) parser.add_argument('-u', '--uuid', required = True, help = 'UUID/GDC_ID for the harmonized BAM.', ) parser.add_argument('-r', '--barcode', required = True, help = 'BAM barcode', ) # Optional DB Flags parser.add_argument('-y', '--db_cred_s3url', required = False, help = 'String s3url of the postgres db_cred file', ) parser.add_argument('-z', '--s3cfg_path', required = False, help = 'Path to the s3cfg file.', ) args = parser.parse_args() sam_path = args.sam_path adapter_path = args.adapter_path uuid = args.uuid barcode = args.barcode if args.db_cred_s3url: db_cred_s3url = args.db_cred_s3url s3cfg_path = args.s3cfg_path else: db_cred_s3url = None logger = pipe_util.setup_logging('mir_profiler_stats', args, uuid) if db_cred_s3url is not None: conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger) engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict)) else: #local sqllite case sqlite_name = 'mir_profiler_stats' + uuid + '.db' engine_path = 'sqlite:///' + sqlite_name engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE') # Get stats from the alignment annotations logger.info('Beginning: Alignment stats generation') stats_CMD = ['perl', '/home/ubuntu/bin/mirna-profiler/v0.2.7/code/library_stats/alignment_stats.pl', '-s', sam_path, '-a', adapter_path] output = pipe_util.do_command(stats_CMD, logger) df = time_util.store_time(uuid, stats_CMD, output, logger) df['bam_name'] = barcode unique_key_dict = {'uuid': uuid, 'bam_name': barcode} table_name = 'time_mem_mir_alignment_stats' df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger) logger.info('Completed: Alignment stats generation')
def main(): parser = argparse.ArgumentParser('SAM Annotator', description = 'Annotates the SAM files with miRNA hits',) # Logging flag parser.add_argument('-d', '--debug', action = 'store_const', const = logging.DEBUG, dest = 'level', help = 'Enable debug logging.', ) parser.set_defaults(level = logging.INFO) # Required flags parser.add_argument('-e', '--species_code', required = True, choices = ['hsa'], help = 'Organism species code.', ) parser.add_argument('-s', '--sam_path', required = True, help = 'Path to directory containing bams.', ) parser.add_argument('-w', '--db_connect', required = True, help = 'Path to db_connection file', ) parser.add_argument('-u', '--uuid', required = True, help = 'UUID/GDC_ID for the harmonized BAM.', ) parser.add_argument('-r', '--barcode', required = True, help = 'BAM barcode', ) # Optional DB Flags parser.add_argument('-y', '--db_cred_s3url', required = False, help = 'String s3url of the postgres db_cred file', ) parser.add_argument('-z', '--s3cfg_path', required = False, help = 'Path to the s3cfg file.', ) args = parser.parse_args() species_code = args.species_code sam_path = args.sam_path connect_path = args.db_connect uuid = args.uuid barcode = args.barcode if args.db_cred_s3url: db_cred_s3url = args.db_cred_s3url s3cfg_path = args.s3cfg_path else: db_cred_s3url = None logger = pipe_util.setup_logging('mir_profiler_annotator', args, uuid) if db_cred_s3url is not None: conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger) engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict)) else: #local sqllite case sqlite_name = 'mir_profiler_annotator' + uuid + '.db' engine_path = 'sqlite:///' + sqlite_name engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE') # Annotate the SAM files logger.info('Beginning: SAM file annotation') annotate_CMD = ['perl', '/home/ubuntu/bin/mirna-profiler/v0.2.7/code/annotation/annotate.pl', '-d', connect_path, '-o', species_code, '-s', sam_path] output = pipe_util.do_command(annotate_CMD, logger) df = time_util.store_time(uuid, annotate_CMD, output, logger) df['bam_name'] = barcode unique_key_dict = {'uuid': uuid, 'bam_name': barcode} table_name = 'time_mem_mir_sam_annotator' df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger) logger.info('Completed: SAM file annotation')
def main(): parser = argparse.ArgumentParser('BAM to SAM conversion', description = 'Use samtools to convert a SAM to BAM.', ) # Logging flag parser.add_argument('-d', '--debug', action = 'store_const', const = logging.DEBUG, dest = 'level', help = 'Enable debug logging.', ) parser.set_defaults(level = logging.INFO) # Required flags parser.add_argument('-b', '--bam_path', required = True, help = 'Path to BAM file.', ) parser.add_argument('-o', '--output_name', required = True, help = 'Desired name for output SAM.', ) parser.add_argument('-u', '--uuid', required = True, help = 'UUID/GDC_ID for the harmonized BAM.', ) parser.add_argument('-r', '--barcode', required = True, help = 'BAM barcode', ) # Optional DB Flags parser.add_argument('-y', '--db_cred_s3url', required = False, help = 'String s3url of the postgres db_cred file', ) parser.add_argument('-z', '--s3cfg_path', required = False, help = 'Path to the s3cfg file.', ) args = parser.parse_args() bam_path = args.bam_path output_name = args.output_name uuid = args.uuid barcode = args.barcode if args.db_cred_s3url: db_cred_s3url = args.db_cred_s3url s3cfg_path = args.s3cfg_path else: db_cred_s3url = None logger = pipe_util.setup_logging('mir_profiler_samtools', args, uuid) if db_cred_s3url is not None: conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger) engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict)) else: #local sqllite case sqlite_name = 'mir_profiler_samtools' + uuid + '.db' engine_path = 'sqlite:///' + sqlite_name engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE') # Convert the BAMs to SAMs if they do not already exist logger.info('Beginning: BAM to SAM conversion') BAMtoSAM_CMD = ['samtools', 'view', '-h', bam_path, '-o', output_name] shell_BtS_CMD = ' '.join(BAMtoSAM_CMD) output = pipe_util.do_shell_command(shell_BtS_CMD, logger) df = time_util.store_time(uuid, shell_BtS_CMD, output, logger) df['bam_name'] = barcode unique_key_dict = {'uuid': uuid, 'bam_name': barcode} table_name = 'time_mem_mir_samtools_view' df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger) logger.info('Completed: BAM to SAM conversion')
def main(): parser = argparse.ArgumentParser("Graph generation", description="Generate graphs for different miRNA stats") # Logging flag parser.add_argument( "-d", "--debug", action="store_const", const=logging.DEBUG, dest="level", help="Enable debug logging." ) parser.set_defaults(level=logging.INFO) # Required flags parser.add_argument("-s", "--sam_path", required=True, help="Path to SAM file") parser.add_argument("-f", "--filtered_taglen", required=True, help="Path to filtered_taglength.csv") parser.add_argument("-v", "--softclip_taglen", required=True, help="Path to softclip_taglength.csv") parser.add_argument("-a", "--adapter_report", required=True, help="Path to adapter report") parser.add_argument("-c", "--chastity_taglen", required=True, help="Path to chastity_taglength.csv") parser.add_argument("-l", "--alignment_stats", required=True, help="Path to alignment_stats.csv") parser.add_argument("-u", "--uuid", required=True, help="UUID/GDC_ID for the harmonized BAM.") parser.add_argument("-r", "--barcode", required=True, help="BAM barcode") # Optional DB Flags parser.add_argument("-y", "--db_cred_s3url", required=False, help="String s3url of the postgres db_cred file") parser.add_argument("-z", "--s3cfg_path", required=False, help="Path to the s3cfg file.") args = parser.parse_args() sam_path = args.sam_path filtered_taglen = args.filtered_taglen softclip_taglen = args.softclip_taglen adapter_report = args.adapter_report chastity_taglen = args.chastity_taglen alignment_stats = args.alignment_stats uuid = args.uuid barcode = args.barcode if args.db_cred_s3url: db_cred_s3url = args.db_cred_s3url s3cfg_path = args.s3cfg_path else: db_cred_s3url = None logger = pipe_util.setup_logging("mir_profiler_graph", args, uuid) if db_cred_s3url is not None: conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger) engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict)) else: # local sqllite case sqlite_name = "mir_profiler_graph" + uuid + ".db" engine_path = "sqlite:///" + sqlite_name engine = sqlalchemy.create_engine(engine_path, isolation_level="SERIALIZABLE") # Generate the graphs for the annotation data logger.info("Beginning: Annotation graph generation") graph_CMD = [ "perl", "/home/ubuntu/bin/mirna-profiler/v0.2.7/code/library_stats/graph_libs.pl", "-s", sam_path, "-f", filtered_taglen, "-o", softclip_taglen, "-a", adapter_report, "-c", chastity_taglen, "-t", alignment_stats, ] output = pipe_util.do_command(graph_CMD, logger) df = time_util.store_time(uuid, graph_CMD, output, logger) df["bam_name"] = barcode unique_key_dict = {"uuid": uuid, "bam_name": barcode} table_name = "time_mem_mir_graph" df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger) # Store time command will go here logger.info("Completed: Annotation graph generation")
def main(): parser = argparse.ArgumentParser( "miRNA adapter report", description="Generate adapter report for alignments that did not have adapter trimming done", ) # Logging flag parser.add_argument( "-d", "--debug", action="store_const", const=logging.DEBUG, dest="level", help="Enable debug logging." ) parser.set_defaults(level=logging.INFO) parser.add_argument("-s", "--sam_path", required=True, help="Path to sam file.") parser.add_argument("-u", "--uuid", required=True, help="UUID/GDC_ID for the harmonized BAM.") parser.add_argument("-r", "--barcode", required=True, help="BAM barcode") # Optional DB Flags parser.add_argument("-y", "--db_cred_s3url", required=False, help="String s3url of the postgres db_cred file") parser.add_argument("-z", "--s3cfg_path", required=False, help="Path to the s3cfg file.") args = parser.parse_args() sam_path = args.sam_path uuid = args.uuid barcode = args.barcode if args.db_cred_s3url: db_cred_s3url = args.db_cred_s3url s3cfg_path = args.s3cfg_path else: db_cred_s3url = None logger = pipe_util.setup_logging("mir_profiler_adapter_report", args, uuid) if db_cred_s3url is not None: conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger) engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict)) else: # local sqllite case sqlite_name = "mir_profiler_adapter_report" + uuid + ".db" engine_path = "sqlite:///" + sqlite_name engine = sqlalchemy.create_engine(engine_path, isolation_level="SERIALIZABLE") logger.info("Beginning: Adapter report generation") sam_name = os.path.basename(sam_path) sam_base, sam_ext = os.path.splitext(sam_name) adapter_name = sam_base + "_adapter.report" adapter_CMD = [ "cat", sam_path, "|", "awk '{arr[length($10)]+=1} END {for (i in arr) {print i\" \"arr[i]}}'", "|", 'sort -t " " -k1n >', adapter_name, ] shell_adapter_CMD = " ".join(adapter_CMD) output = pipe_util.do_shell_command(shell_adapter_CMD, logger) df = time_util.store_time(uuid, shell_adapter_CMD, output, logger) df["bam_name"] = barcode unique_key_dict = {"uuid": uuid, "bam_name": barcode} table_name = "time_mem_mir_adapter_report" df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger) logger.info("Completed: Adapter report generation")