def main(): global meta_config global study_config global janus_path global logger import logging from support import helper from generate import meta from generate.analysis_pipelines.COPY_NUMBER_ALTERATION.support_functions import fix_chrom, fix_seg_id verb = logger.isEnabledFor(logging.INFO) # TODO replace the 'verb' switch with logger logger.info('Gathering and decompressing SEG files into temporary folder, and updating config') meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info('Done.') logger.info('Fixing Chromosome numbering ...') fix_chrom(meta_config, study_config, logger) logger.info('Done.') logger.info('Fixing .SEG IDs') fix_seg_id(meta_config, study_config, logger) logger.info('Done.') logger.info('Concatenating SEG Files to export folder') helper.concat_files(meta_config, study_config, verb) logger.info('Done.') logger.info('Generating segments Meta ...') meta.generate_meta_type(meta_config,study_config,logger) logger.info('Done.')
def main(): global meta_config global study_config global janus_path global verb from support import helper from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions import logging verb = logger.isEnabledFor( logging.INFO) # TODO replace the 'verb' switch with logger logger.info('Transferring VCF files to temporary folder') meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info( 'Ensuring both columns exist, otherwise adding UNMATCHED column ...') support_functions.verify_dual_columns(meta_config, verb) logger.info('Exporting vcf2maf...') logger.info('And deleting .vcf s...') meta_config = support_functions.export2maf(meta_config, study_config, verb) # Generate the meta data files for mutation extended data logger.info('Generating MUTATION_EXTENDED Meta ...') meta.generate_meta_type(meta_config, study_config, logger) logger.info('Cleaning MAF Files ...') support_functions.clean_head(meta_config, verb) logger.info('Concating MAF Files to export folder ...') helper.concat_files(meta_config, study_config, verb)
def main(): global meta_config global study_config global janus_path global verb helper.working_on( verb, message='Gathering and decompressing SEG files into temporary folder') helper.decompress_to_temp(meta_config, study_config, verb) helper.working_on(verb) helper.working_on( verb, message= 'Fixing HMMCopy formatting, chromosome, and chromosome max-length ...') fix_hmmcopy_tsv(meta_config, study_config, verb) fix_chrom(meta_config, study_config, verb) ### fix_hmmcopy_max_chrom fixes the maximum chromosome length AND imputes the num.mark value fix_hmmcopy_max_chrom(meta_config, study_config, janus_path, verb) helper.working_on(verb) helper.working_on(verb, message='Fixing .SEG IDs') fix_seg_id(meta_config, study_config, verb) helper.working_on(verb) helper.working_on(verb, message='Concating SEG Files to export folder') helper.concat_files(meta_config, study_config, verb) helper.working_on(verb)
def main(): global meta_config global study_config global janus_path global logger import logging import os from support import helper from generate import meta from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions verb = logger.isEnabledFor( logging.INFO) # TODO replace the 'verb' switch with logger logger.info('Started processing data for CAP_mutation pipeline') # Decompress MAF files to temp folder logger.info('Decompressing MAF files to temporary folder') meta_config = helper.relocate_inputs(meta_config, study_config, verb) # Clean MAF files logger.info('Cleaning MAF Files ...') support_functions.clean_head(meta_config, verb) # Concatenate MAF files logger.info('Concatenating MAF Files to export folder ...') helper.concat_files(meta_config, study_config, verb) # Generate the meta data files for mutation extended data logger.info('Generating MUTATION_EXTENDED Meta ...') meta.generate_meta_type(meta_config, study_config, logger) #Filtering MAF Files logger.info('Filtering MAF Files ...') support_functions.maf_filter( meta_config, study_config, meta_config.config_map['Mutation_Type'], meta_config.config_map['Filter_Exception'], meta_config.config_map['Minimum_Tumour_Depth'], meta_config.config_map['Minimum_Tumour_AF'], meta_config.config_map['Maximum_gnomAD_AF'], meta_config.config_map['Maximum_Local_Freq']) #oncokb-annotation logger.info('Annotating MAF files ...') support_functions.oncokb_annotation( meta_config, study_config, meta_config.config_map['oncokb_api_token'], verb) #TGL Pipe Filtering logger.info('Filtering TGL pipe ...') try: support_functions.TGL_filter(meta_config, study_config) except FileNotFoundError as err: # eg. failure to read vep_keep_columns.txt logger.error("Cannot read file: {0}".format(err)) raise logger.info('Finished processing data for CAP_mutation pipeline')
def test_concatenate_files(self): test_name = 'concatenate_files' input_dir = os.path.join(self.dataDir, test_name) out_dir = os.path.join(self.tmp.name, test_name) os.mkdir(out_dir) inputs = ['foo.tsv', 'bar.tsv', 'baz.tsv'] df = pd.DataFrame({'FILE_NAME': inputs}) exports_config = mock_legacy_config({'input_folder': input_dir}, df) study_config = mock_legacy_config({'output_folder': out_dir}) concat_files(exports_config, study_config, True) checksums = { 'data_mock_concat.txt': 'd5f4cd22aed26f6e5022571ad5f3d745' } self.verify_checksums(checksums, out_dir)
def main(): global meta_config global study_config global janus_path global verb helper.working_on(verb, message='Gathering and decompressing SEG files into temporary folder') helper.decompress_to_temp(meta_config, study_config, verb) helper.working_on(verb) helper.working_on(verb, message='Fixing Chromosome numbering ...') fix_chrom(meta_config, study_config, verb) helper.working_on(verb) helper.working_on(verb, message='Fixing .SEG IDs') fix_seg_id(meta_config, study_config, verb) helper.working_on(verb) helper.working_on(verb, message='Concating SEG Files to export folder') helper.concat_files(meta_config, study_config, verb) helper.working_on(verb)
def main(): global meta_config global study_config global janus_path global logger import logging from support import helper from generate import meta from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions verb = logger.isEnabledFor( logging.INFO) # TODO replace the 'verb' switch with logger logger.info('Decompressing VCF files to temporary folder') meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info( 'Ensuring both columns exist, otherwise adding UNMATCHED column ...') support_functions.verify_dual_columns(meta_config, verb) logger.info('Filtering for only PASS ...') support_functions.filter_vcf_rejects(meta_config, verb) logger.info('Exporting vcf2maf, and deleting .VCFs') meta_config = support_functions.export2maf(meta_config, study_config, verb) # Generate the meta data files for mutation extended data logger.info('Generating MUTATION_EXTENDED Meta ...') meta.generate_meta_type(meta_config, study_config, logger) logger.info('Cleaning MAF Files ...') support_functions.clean_head(meta_config, verb) logger.info('Concating MAF Files to export folder ...') helper.concat_files(meta_config, study_config, verb) logger.info('Finished processing data for Mutect pipeline')
def main(): global meta_config global study_config global janus_path global logger import logging import os from support import helper from generate.analysis_pipelines.COPY_NUMBER_ALTERATION.support_functions import fix_chrom, fix_seg_id, preProcCNA, ProcCNA from generate import meta from utilities.constants import DATA_DIRNAME AP_NAME = 'analysis_pipelines' CNA_NAME = 'COPY_NUMBER_ALTERATION' verb = logger.isEnabledFor( logging.INFO) # TODO replace the 'verb' switch with logger # note that __file__ is the path to the executing module components.py, not this script if meta_config.config_map.get('genebed'): genebed = meta_config.config_map.get('genebed') else: genebed = os.path.join(os.path.dirname(__file__), AP_NAME, CNA_NAME, DATA_DIRNAME, 'ncbi_genes_hg19_canonical.bed') if meta_config.config_map.get('genelist'): genelist = meta_config.config_map.get('genelist') else: genelist = os.path.join(os.path.dirname(__file__), AP_NAME, CNA_NAME, DATA_DIRNAME, 'targeted_genelist.txt') logger.info('Transferring SEG files to temporary folder') meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info('Done.') logger.info('Fixing Chromosome numbering ...') fix_chrom(meta_config, study_config, logger) logger.info('Done.') logger.info('Fixing .SEG IDs') fix_seg_id(meta_config, study_config, logger) logger.info('Done.') logger.info('Concatenating SEG Files to export folder') helper.concat_files(meta_config, study_config, verb) logger.info('Done.') #Call preProcCNA.r to generate reduced seg files logger.info('Generating reduced SEG files ...') preProcCNA(meta_config, study_config, genebed, genelist, meta_config.config_map['gain'], meta_config.config_map['ampl'], meta_config.config_map['htzd'], meta_config.config_map['hmzd'], logger) logger.info('Done.') logger.info('Generating CNA and log2CNA files ...') ProcCNA(meta_config, study_config, genebed, genelist, meta_config.config_map['gain'], meta_config.config_map['ampl'], meta_config.config_map['htzd'], meta_config.config_map['hmzd'], meta_config.config_map['oncokb_api_token'], verb) logger.info('Done.') # TODO legacy metadata generation left in place for now. But does it make sense for data to be *both* discrete and continuous? logger.info('Generating segments Meta ...') meta.generate_meta_type(meta_config, study_config, logger) logger.info('Done.') if meta_config.config_map.get('DISCRETE'): logger.info('Generating DISCRETE Meta ...') meta_config.datahandler = 'DISCRETE' meta.generate_meta_type(meta_config, study_config, logger) logger.info('Done.') if meta_config.config_map.get('CONTINUOUS'): logger.info('Generating CONTINUOUS Meta ...') meta_config.datahandler = 'CONTINUOUS' meta.generate_meta_type(meta_config, study_config, logger) logger.info('Done.')