示例#1
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging

    from support import helper
    from generate import meta
    from generate.analysis_pipelines.COPY_NUMBER_ALTERATION.support_functions import fix_chrom, fix_seg_id

    verb = logger.isEnabledFor(logging.INFO) # TODO replace the 'verb' switch with logger

    logger.info('Gathering and decompressing SEG files into temporary folder, and updating config')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)
    logger.info('Done.')

    logger.info('Fixing Chromosome numbering ...')
    fix_chrom(meta_config, study_config, logger)
    logger.info('Done.')

    logger.info('Fixing .SEG IDs')
    fix_seg_id(meta_config, study_config, logger)
    logger.info('Done.')

    logger.info('Concatenating SEG Files to export folder')
    helper.concat_files(meta_config, study_config, verb)
    logger.info('Done.')

    logger.info('Generating segments Meta ...')
    meta.generate_meta_type(meta_config,study_config,logger)
    logger.info('Done.')
示例#2
0
def main():
    global meta_config
    global study_config
    global janus_path
    global verb

    from support import helper
    from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions

    import logging
    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    logger.info('Transferring VCF files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)

    logger.info(
        'Ensuring both columns exist, otherwise adding UNMATCHED column ...')
    support_functions.verify_dual_columns(meta_config, verb)

    logger.info('Exporting vcf2maf...')
    logger.info('And deleting .vcf s...')
    meta_config = support_functions.export2maf(meta_config, study_config, verb)

    # Generate the meta data files for mutation extended data
    logger.info('Generating MUTATION_EXTENDED Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)

    logger.info('Cleaning MAF Files ...')
    support_functions.clean_head(meta_config, verb)

    logger.info('Concating MAF Files to export folder  ...')
    helper.concat_files(meta_config, study_config, verb)
示例#3
0
def main():
    global meta_config
    global study_config
    global janus_path
    global verb

    helper.working_on(
        verb,
        message='Gathering and decompressing SEG files into temporary folder')
    helper.decompress_to_temp(meta_config, study_config, verb)
    helper.working_on(verb)

    helper.working_on(
        verb,
        message=
        'Fixing HMMCopy formatting, chromosome, and chromosome max-length ...')
    fix_hmmcopy_tsv(meta_config, study_config, verb)
    fix_chrom(meta_config, study_config, verb)

    ### fix_hmmcopy_max_chrom fixes the maximum chromosome length AND imputes the num.mark value
    fix_hmmcopy_max_chrom(meta_config, study_config, janus_path, verb)

    helper.working_on(verb)

    helper.working_on(verb, message='Fixing .SEG IDs')
    fix_seg_id(meta_config, study_config, verb)
    helper.working_on(verb)

    helper.working_on(verb, message='Concating SEG Files to export folder')
    helper.concat_files(meta_config, study_config, verb)
    helper.working_on(verb)
示例#4
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging
    import os
    from support import helper
    from generate import meta
    from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions

    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    logger.info('Started processing data for CAP_mutation pipeline')

    # Decompress MAF files to temp folder
    logger.info('Decompressing MAF files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)

    # Clean MAF files
    logger.info('Cleaning MAF Files ...')
    support_functions.clean_head(meta_config, verb)

    # Concatenate MAF files
    logger.info('Concatenating MAF Files to export folder  ...')
    helper.concat_files(meta_config, study_config, verb)

    # Generate the meta data files for mutation extended data
    logger.info('Generating MUTATION_EXTENDED Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)

    #Filtering MAF Files
    logger.info('Filtering MAF Files ...')
    support_functions.maf_filter(
        meta_config, study_config, meta_config.config_map['Mutation_Type'],
        meta_config.config_map['Filter_Exception'],
        meta_config.config_map['Minimum_Tumour_Depth'],
        meta_config.config_map['Minimum_Tumour_AF'],
        meta_config.config_map['Maximum_gnomAD_AF'],
        meta_config.config_map['Maximum_Local_Freq'])

    #oncokb-annotation
    logger.info('Annotating MAF files ...')
    support_functions.oncokb_annotation(
        meta_config, study_config, meta_config.config_map['oncokb_api_token'],
        verb)

    #TGL Pipe Filtering
    logger.info('Filtering TGL pipe ...')
    try:
        support_functions.TGL_filter(meta_config, study_config)
    except FileNotFoundError as err:  # eg. failure to read vep_keep_columns.txt
        logger.error("Cannot read file: {0}".format(err))
        raise

    logger.info('Finished processing data for CAP_mutation pipeline')
示例#5
0
 def test_concatenate_files(self):
     test_name = 'concatenate_files'
     input_dir = os.path.join(self.dataDir, test_name)
     out_dir = os.path.join(self.tmp.name, test_name)
     os.mkdir(out_dir)
     inputs = ['foo.tsv', 'bar.tsv', 'baz.tsv']
     df = pd.DataFrame({'FILE_NAME': inputs})
     exports_config = mock_legacy_config({'input_folder': input_dir}, df)
     study_config = mock_legacy_config({'output_folder': out_dir})
     concat_files(exports_config, study_config, True)
     checksums = {
         'data_mock_concat.txt': 'd5f4cd22aed26f6e5022571ad5f3d745'
     }
     self.verify_checksums(checksums, out_dir)
示例#6
0
def main():
    global meta_config
    global study_config
    global janus_path
    global verb

    helper.working_on(verb, message='Gathering and decompressing SEG files into temporary folder')
    helper.decompress_to_temp(meta_config, study_config, verb)
    helper.working_on(verb)

    helper.working_on(verb, message='Fixing Chromosome numbering ...')
    fix_chrom(meta_config, study_config, verb)
    helper.working_on(verb)

    helper.working_on(verb, message='Fixing .SEG IDs')
    fix_seg_id(meta_config, study_config, verb)
    helper.working_on(verb)

    helper.working_on(verb, message='Concating SEG Files to export folder')
    helper.concat_files(meta_config, study_config, verb)
    helper.working_on(verb)
示例#7
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging
    from support import helper
    from generate import meta
    from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions

    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    logger.info('Decompressing VCF files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)

    logger.info(
        'Ensuring both columns exist, otherwise adding UNMATCHED column ...')
    support_functions.verify_dual_columns(meta_config, verb)

    logger.info('Filtering for only PASS ...')
    support_functions.filter_vcf_rejects(meta_config, verb)

    logger.info('Exporting vcf2maf, and deleting .VCFs')
    meta_config = support_functions.export2maf(meta_config, study_config, verb)

    # Generate the meta data files for mutation extended data
    logger.info('Generating MUTATION_EXTENDED Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)

    logger.info('Cleaning MAF Files ...')
    support_functions.clean_head(meta_config, verb)

    logger.info('Concating MAF Files to export folder  ...')
    helper.concat_files(meta_config, study_config, verb)

    logger.info('Finished processing data for Mutect pipeline')
示例#8
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging
    import os
    from support import helper
    from generate.analysis_pipelines.COPY_NUMBER_ALTERATION.support_functions import fix_chrom, fix_seg_id, preProcCNA, ProcCNA
    from generate import meta
    from utilities.constants import DATA_DIRNAME

    AP_NAME = 'analysis_pipelines'
    CNA_NAME = 'COPY_NUMBER_ALTERATION'

    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    # note that __file__ is the path to the executing module components.py, not this script
    if meta_config.config_map.get('genebed'):
        genebed = meta_config.config_map.get('genebed')
    else:
        genebed = os.path.join(os.path.dirname(__file__), AP_NAME, CNA_NAME,
                               DATA_DIRNAME, 'ncbi_genes_hg19_canonical.bed')
    if meta_config.config_map.get('genelist'):
        genelist = meta_config.config_map.get('genelist')
    else:
        genelist = os.path.join(os.path.dirname(__file__), AP_NAME, CNA_NAME,
                                DATA_DIRNAME, 'targeted_genelist.txt')

    logger.info('Transferring SEG files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)
    logger.info('Done.')

    logger.info('Fixing Chromosome numbering ...')
    fix_chrom(meta_config, study_config, logger)
    logger.info('Done.')

    logger.info('Fixing .SEG IDs')
    fix_seg_id(meta_config, study_config, logger)
    logger.info('Done.')

    logger.info('Concatenating SEG Files to export folder')
    helper.concat_files(meta_config, study_config, verb)
    logger.info('Done.')

    #Call preProcCNA.r to generate reduced seg files
    logger.info('Generating reduced SEG files ...')
    preProcCNA(meta_config, study_config, genebed, genelist,
               meta_config.config_map['gain'], meta_config.config_map['ampl'],
               meta_config.config_map['htzd'], meta_config.config_map['hmzd'],
               logger)
    logger.info('Done.')

    logger.info('Generating CNA and log2CNA files ...')
    ProcCNA(meta_config, study_config, genebed, genelist,
            meta_config.config_map['gain'], meta_config.config_map['ampl'],
            meta_config.config_map['htzd'], meta_config.config_map['hmzd'],
            meta_config.config_map['oncokb_api_token'], verb)
    logger.info('Done.')

    # TODO legacy metadata generation left in place for now. But does it make sense for data to be *both* discrete and continuous?
    logger.info('Generating segments Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)
    logger.info('Done.')

    if meta_config.config_map.get('DISCRETE'):
        logger.info('Generating DISCRETE Meta ...')
        meta_config.datahandler = 'DISCRETE'
        meta.generate_meta_type(meta_config, study_config, logger)
        logger.info('Done.')

    if meta_config.config_map.get('CONTINUOUS'):
        logger.info('Generating CONTINUOUS Meta ...')
        meta_config.datahandler = 'CONTINUOUS'
        meta.generate_meta_type(meta_config, study_config, logger)
        logger.info('Done.')