Exemplo n.º 1
0
def main(argv):
    if argv.help:
        log.info(
            '\n'
            'ClinicalDataAnnotator.py -i <input clinical file> -o <output clinical file> -a <annotated alteration files, separate by ,> [-s sample list filter]\n'
            '  Essential clinical columns:\n'
            '    SAMPLE_ID: sample ID')
        sys.exit()
    if argv.sample_ids_filter:
        setsampleidsfileterfile(argv.sample_ids_filter)

    annotated_alteration_files = re.split(',|, ',
                                          argv.annotated_alteration_files)
    if argv.input_file == '' or argv.output_file == '' or len(
            annotated_alteration_files) == 0:
        required_params = []
        if argv.input_file == '':
            required_params.append('-i')
        if argv.output_file == '':
            required_params.append('-o')
        if len(annotated_alteration_files) == 0:
            required_params.append('-a')
        log.error('The parameter(s) ' + ', '.join(required_params) +
                  ' can not be empty')
        log.info('for help: python ClinicalDataAnnotator.py -h')
        sys.exit(2)

    log.info('annotating %s ...' % argv.input_file)
    process_clinical_data(annotated_alteration_files, argv.input_file,
                          argv.output_file)

    log.info('done!')
Exemplo n.º 2
0
def main(argv):
    params = {
        "catogerycolumn": argv.catogery_column,  # -c
        "thresholdcat": argv.threshold_cat,  # -n
    }
    if argv.help:
        log.info(
            '\n'
            'OncoKBPlots.py -i <annotated clinical file> -o <output PDF file> [-c <categorization column, '
            'e.g. CANCER_TYPE>] [-s sample list filter] [-n threshold of # samples in a category] [-l comma separated levels to include]\n'
            '  Essential clinical columns:\n'
            '    SAMPLE_ID: sample ID\n'
            '    HIGHEST_LEVEL: Highest OncoKB levels\n'
            '  Supported levels (-l): \n'
            '    LEVEL_1,LEVEL_2,LEVEL_3A,LEVEL_3B,LEVEL_4,ONCOGENIC,VUS')
        sys.exit()
    if argv.input_file == '' or argv.output_file == '':
        required_params = []
        if argv.input_file == '':
            required_params.append('-i')
        if argv.output_file == '':
            required_params.append('-o')

        log.error('The parameter(s) ' + ', '.join(required_params) +
                  ' can not be empty')
        log.info('for help: python OncoKBPlots.py -h')
        sys.exit(2)
    if argv.sample_ids_filter:
        setsampleidsfileterfile(argv.sample_ids_filter)
    if argv.levels:
        params["levels"] = re.split(',', argv.levels)

    log.info('annotating %s ...' % argv.input_file)
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1)

    plotclinicalactionability(ax1, argv.input_file, argv.output_file, params)

    # ax.yaxis.grid(linestyle="dotted", color="lightgray") # horizontal lines
    # plt.margins(0.01)

    plotclinicalactionability(ax1, args.input_file, args.output_file, params)
    plotimplications(ax2, 'HIGHEST_DX_LEVEL', 'OncoKB Diagnostic Implications',
                     dxLevels, args.input_file, argv.output_file, params)
    plotimplications(ax3, 'HIGHEST_PX_LEVEL', 'OncoKB Prognostic Implications',
                     pxLevels, args.input_file, argv.output_file, params)

    plt.subplots_adjust(left=0.2, bottom=0.3)
    plt.gcf().text(0.90,
                   0.1,
                   "Generated by OncoKB\n[Chakravarty et al., JCO PO 2017]",
                   fontsize=6,
                   horizontalalignment='right',
                   verticalalignment='bottom')
    fig.tight_layout()
    fig.savefig(argv.output_file, bbox_inches='tight')

    log.info('done!')
Exemplo n.º 3
0
def main(argv):
    if argv.help:
        log.info(
            '\n'
            "FusionAnnotator.py -i <input Fusion file> -o <output Fusion file> [-p previous results] [-c <input clinical file>] [-s sample list filter] [-t <default tumor type>] [-u <oncokb api url>] [-b <oncokb api bear token>] [-r <structural variant name format, default: [A-Za-z\\d]+-[A-Za-z\\d]+>]\n"
            '  Essential Fusion columns (case insensitive):\n'
            '    HUGO_SYMBOL: Hugo gene symbol\n'
            '    VARIANT_CLASSIFICATION: Translational effect of variant allele\n'
            '    TUMOR_SAMPLE_BARCODE: sample ID\n'
            '    FUSION: amino acid change, e.g. "TMPRSS2-ERG"\n'
            '  Essential clinical columns:\n'
            '    SAMPLE_ID: sample ID\n'
            '    ONCOTREE_CODE: tumor type code from oncotree (oncotree.mskcc.org)\n'
            '  Cancer type will be assigned based on the following priority:\n'
            '     1) ONCOTREE_CODE in clinical data file\n'
            '     2) ONCOTREE_CODE exist in Fusion\n'
            '     3) default tumor type (-t)\n'
            '  Default OncoKB base url is https://www.oncokb.org')
        sys.exit()
    if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '':
        required_params = []
        if argv.input_file == '':
            required_params.append('-i')
        if argv.output_file == '':
            required_params.append('-o')
        if argv.oncokb_api_bearer_token == '':
            required_params.append('-b')

        log.error('The parameter(s) ' + ', '.join(required_params) +
                  ' can not be empty')
        log.info('for help: python FusionAnnotator.py -h')
        sys.exit(2)
    if argv.sample_ids_filter:
        setsampleidsfileterfile(argv.sample_ids_filter)
    if argv.cancer_hotspots_base_url:
        setcancerhotspotsbaseurl(argv.cancer_hotspots_base_url)
    if argv.oncokb_api_url:
        setoncokbbaseurl(argv.oncokb_api_url)
    setoncokbapitoken(argv.oncokb_api_bearer_token)

    cancertypemap = {}
    if argv.input_clinical_file:
        readCancerTypes(argv.input_clinical_file, cancertypemap)

    validate_oncokb_token()

    log.info('annotating %s ...' % argv.input_file)
    process_fusion(argv.input_file, argv.output_file,
                   argv.previous_result_file, argv.default_cancer_type,
                   cancertypemap, argv.structural_variant_name_format)

    log.info('done!')
def main(argv):
    if argv.help:
        log.info(
            '\n'
            'StructuralVariantAnnotator.py -i <input structural variant file> -o <output structural variant file> [-p previous results] [-c <input clinical file>] [-s sample list filter] [-t <default tumor type>] [-u <oncokb api url>] [-b <oncokb api bear token>]\n'
            '  Essential structural variant columns (case insensitive):\n'
            '    GENEA: Hugo gene symbol for gene A\n'
            '    GENEB: Hugo gene symbol for gene B\n'
            '    SV_TYPE: Structural variant type. Available values: DELETION, TRANSLOCATION, DUPLICATION, INSERTION, INVERSION, FUSION, UNKNOWN. Other type will be converted to UNKNOWN\n'
            '    TUMOR_SAMPLE_BARCODE: sample ID\n'
            '  Essential clinical columns:\n'
            '    SAMPLE_ID: sample ID\n'
            '    ONCOTREE_CODE: tumor type code from oncotree (oncotree.mskcc.org)\n'
            '  Cancer type will be assigned based on the following priority:\n'
            '     1) ONCOTREE_CODE in clinical data file\n'
            '     2) ONCOTREE_CODE exist in structural variant\n'
            '     3) default tumor type (-t)\n'
            '  Default OncoKB base url is https://www.oncokb.org')
        sys.exit()
    if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '':
        required_params = []
        if argv.input_file == '':
            required_params.append('-i')
        if argv.output_file == '':
            required_params.append('-o')
        if argv.oncokb_api_bearer_token == '':
            required_params.append('-b')

        log.error('The parameter(s) ' + ', '.join(required_params) +
                  ' can not be empty')
        log.info('for help: python StructuralVariantAnnotator.py -h')
        sys.exit(2)
    if argv.sample_ids_filter:
        setsampleidsfileterfile(argv.sample_ids_filter)
    if argv.cancer_hotspots_base_url:
        setcancerhotspotsbaseurl(argv.cancer_hotspots_base_url)
    if argv.oncokb_api_url:
        setoncokbbaseurl(argv.oncokb_api_url)
    setoncokbapitoken(argv.oncokb_api_bearer_token)

    cancertypemap = {}
    if argv.input_clinical_file:
        readCancerTypes(argv.input_clinical_file, cancertypemap)

    validate_oncokb_token()

    log.info('annotating %s ...' % argv.input_file)
    process_sv(argv.input_file, argv.output_file, argv.previous_result_file,
               argv.default_cancer_type, cancertypemap)

    log.info('done!')
Exemplo n.º 5
0
def main(argv):
    if argv.help:
        log.info(
            '\n'
            'CnaAnnotator.py -i <input CNA file> -o <output CNA file> [-p previous results] [-c <input clinical file>] [-s sample list filter] [-t <default tumor type>] [-u oncokb-base-url] [-b oncokb_api_bear_token] [-z annotate_gain_loss] [-f CNA file formt, gistic or individual]\n'
            '  Input CNA file uses GISTIC output by default (https://docs.cbioportal.org/5.1-data-loading/data-loading/file-formats#data-file-1). You can also list copy number alteration individually by specifying -f=individual\n'
            '  Essential clinical columns:\n'
            '    SAMPLE_ID: sample ID\n'
            '  Cancer type will be assigned based on the following priority:\n'
            '     1) ONCOTREE_CODE in clinical data file\n'
            '     2) ONCOTREE_CODE exist in MAF\n'
            '     3) default tumor type (-t)\n'
            '  We do not annotate Gain and Loss by default, add -z to include the analysis. See https://github.com/oncokb/oncokb-annotator/issues/51 for more information.\n'
            '  Default OncoKB base url is https://www.oncokb.org')
        sys.exit()
    if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '':
        required_params = []
        if argv.input_file == '':
            required_params.append('-i')
        if argv.output_file == '':
            required_params.append('-o')
        if argv.oncokb_api_bearer_token == '':
            required_params.append('-b')

        log.error('The parameter(s) ' + ', '.join(required_params) +
                  ' can not be empty')
        log.info('for help: python CnaAnnotator.py -h')
        sys.exit(2)
    if argv.sample_ids_filter:
        setsampleidsfileterfile(argv.sample_ids_filter)
    if argv.oncokb_api_url:
        setoncokbbaseurl(argv.oncokb_api_url)
    setoncokbapitoken(argv.oncokb_api_bearer_token)

    cancertypemap = {}
    if argv.input_clinical_file:
        readCancerTypes(argv.input_clinical_file, cancertypemap)

    validate_oncokb_token()

    log.info('annotating %s ...' % argv.input_file)
    process_cna_data(argv.input_file, argv.output_file,
                     argv.previous_result_file, argv.default_cancer_type,
                     cancertypemap, argv.annotate_gain_loss,
                     argv.cna_file_format.lower())

    log.info('done!')
Exemplo n.º 6
0
def main(argv):
    if argv.help:
        log.info(
            '\n'
            'MafAnnotator.py -i <input MAF file> -o <output MAF file> [-p previous results] [-c <input clinical file>] '
            '[-s sample list filter] [-t <default tumor type>] [-u oncokb-base-url] [-b oncokb api bear token] [-a] [-q query type] [-r default reference genome]\n'
            'For definitions of the MAF format, please see https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/\n\n'
            'Essential MAF columns for querying HGVSp_Short and HGVSp(case insensitive):\n'
            '    Hugo_Symbol: Hugo gene symbol\n'
            '    Tumor_Sample_Barcode: sample ID\n'
            '    HGVSp(query type: HGVSp): protein change in HGVSp format\n'
            '    HGVSp_Short(query type: HGVSp_Short): protein change in HGVSp format using 1-letter amino-acid codes\n'
            'Essential MAF columns for querying HGVSg(case insensitive):\n'
            '    Tumor_Sample_Barcode: sample ID\n'
            '    HGVSg: Genomic change in HGVSg format\n'
            'Essential MAF columns for querying genomic change(case insensitive):\n'
            '    Tumor_Sample_Barcode: sample ID\n'
            '    Chromosome: Chromosome number\n'
            '    Start_Position: Mutation start coordinate\n'
            '    End_Position: Mutation end coordinate\n'
            '    Reference_Allele: The plus strand reference allele at this position\n'
            '    Tumor_Seq_Allele1: Primary data genotype for tumor sequencing (discovery) allele\n'
            '    Tumor_Seq_Allele2: Tumor sequencing (discovery) allele 2\n'
            'Essential clinical columns:\n'
            '    SAMPLE_ID: sample ID\n'
            '    ONCOTREE_CODE: tumor type code from oncotree (http://oncotree.mskcc.org)\n'
            'Cancer type will be assigned based on the following priority:\n'
            '    1) ONCOTREE_CODE in clinical data file\n'
            '    2) ONCOTREE_CODE exist in MAF\n'
            '    3) default tumor type (-t)\n'
            'Query type only allows the following values (case-insensitive):\n'
            '    - HGVSp_Short\n'
            '      It reads from column HGVSp_Short or Alteration\n'
            '    - HGVSp\n'
            '      It reads from column HGVSp or Alteration\n'
            '    - HGVSg\n'
            '      It reads from column HGVSg or Alteration\n'
            '    - Genomic_Change\n'
            '      It reads from columns Chromosome, Start_Position, End_Position, Reference_Allele, Tumor_Seq_Allele1 and Tumor_Seq_Allele2  \n'
            'Reference Genome only allows the following values(case-insensitive):\n'
            '    - GRCh37\n'
            '      GRCh38\n'
            'Default OncoKB base url is https://www.oncokb.org.\n'
        )
        sys.exit()
    if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '':
        required_params = []
        if argv.input_file == '':
            required_params.append('-i')
        if argv.output_file == '':
            required_params.append('-o')
        if argv.oncokb_api_bearer_token == '':
            required_params.append('-b')

        log.error('The parameter(s) ' + ', '.join(required_params) + ' can not be empty')
        log.info('For help: python MafAnnotator.py -h')
        sys.exit(2)

    if argv.sample_ids_filter:
        setsampleidsfileterfile(argv.sample_ids_filter)
    if argv.cancer_hotspots_base_url:
        setcancerhotspotsbaseurl(argv.cancer_hotspots_base_url)
    if argv.oncokb_api_url:
        setoncokbbaseurl(argv.oncokb_api_url)
    setoncokbapitoken(argv.oncokb_api_bearer_token)

    cancertypemap = {}
    if argv.input_clinical_file:
        readCancerTypes(argv.input_clinical_file, cancertypemap)

    log.info('annotating %s ...' % argv.input_file)

    user_input_query_type = None
    if argv.query_type is not None:
        try:
            user_input_query_type = QueryType[argv.query_type.upper()]
        except KeyError:
            log.error(
                'Query type is not acceptable. Only the following allows(case insensitive): HGVSp_Short, HGVSp, HGVSg, Genomic_Change')
            raise

    default_reference_genome = None
    if argv.default_reference_genome is not None:
        try:
            default_reference_genome = ReferenceGenome[argv.default_reference_genome.upper()]
        except KeyError:
            log.error(
                'Reference genome is not acceptable. Only the following allows(case insensitive): GRCh37, GRCh38')
            raise

    validate_oncokb_token()

    processalterationevents(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type,
                            cancertypemap, argv.annotate_hotspots, user_input_query_type, default_reference_genome)

    log.info('done!')