Пример #1
0
 def add_arguments(self, parser):
     parser.add_argument('-vc',
                         '--variant_collection',
                         help="variant collection sample name")
     parser.add_argument('-vs',
                         '--collection_set',
                         help="variant collection set name")
Пример #2
0
def parse_args():
    """ 
    Description:
        function 'parse_args' parses arguments from command-line and returns an argparse
        object containing the arguments and their values. Default values are 'False' if option
        is not listed in the command, else the option value is set to True.
    """
    parser = argparse.ArgumentParser(
        'Input a bgzip compressed and tabix indexed vcf and output hgvs normalized vcf filename.'
    )
    parser.add_argument(
        '-i',
        '--inVCF',
        type=str,
        help='Input bgzip compressed and tabix indexed vcf filepath.')
    parser.add_argument('-o',
                        '--outVCF',
                        type=str,
                        help='Output hgvs-normalized VCF filename.')
    parser.add_argument(
        '-r',
        '--refFASTA',
        type=str,
        help='Input FASTA format reference filename. ex: "hg38.p12.fa"')
    parser.add_argument(
        '-g',
        '--refSEQ',
        type=str,
        help='Input GenePred format refSeq filename. ex: "ncbiRefSeq.txt"')

    options = parser.parse_args()
    return options
Пример #3
0
def main():
    global REFGENE

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--readable_input', 
                        help='readable input file for conversion.')
    parser.add_argument('-o', '--writable_output', 
                        help='writable output file for conversion.')
    parser.add_argument('-g', '--genome_path', help='Link to hg38.fa.')
    parser.add_argument('-r', '--reference_genome', default='./hg38.BRCA.refGene.txt',
                        help='Link to hg38.BRCA.refgene.txt.')

    args = parser.parse_args()
    GENOME = SequenceFileDB(args.genome_path)
    REFGENE = args.reference_genome

    f_in = open(args.readable_input, "r")
    f_out = open(args.writable_output, "w")
    f_out.write("\t".join(OUTPUT_COLUMNS) + "\n")
    for index, line in enumerate(f_in):
        # 
        # Clean the line by removing leading or trailing spaces adjacent to tabs.  
        #
        line = re.sub("( )*\t( )*", "\t", line)
        items = np.array(line.rstrip().split("\t"))
        if index == 0:
            # Handle column names
            columns = np.array([i.replace(" ", "_") for i in items])
            index_to_save = [np.where(columns == i)[0][0] for i in COLUMNS_TO_SAVE]
            column_idx = dict(zip(COLUMNS_TO_SAVE, index_to_save))
            continue
        #
        # In the date last evaluated field, delete the time last evaluated if provided.
        #
        date_last_evaluated_idx = column_idx["Date_last_evaluated"]
        items[date_last_evaluated_idx] = items[date_last_evaluated_idx].split(' ')[0]
        OMIM_id_index = column_idx["Condition_ID_value"]
        items[OMIM_id_index] = convert_OMIM_id(items[OMIM_id_index])
        items[column_idx["HGVS"]] = cleanup_HGVS(items[column_idx["Reference_sequence"]],
                                 items[column_idx["HGVS"]], HP, EVM)
        HGVS_cDNA = items[column_idx["Reference_sequence"]] + ":" + items[column_idx["HGVS"]]
        print items[column_idx["Reference_sequence"]], items[column_idx["HGVS"]], HGVS_cDNA
        try:
            genome_coor, HGVS_p = convert_HGVS(HGVS_cDNA, GENOME)
        except:
            if (items[column_idx["HGVS"]]).find(";") > -1:
                genome_coor, HGVS_p = create_None_filler()
        aa_abrev_index = column_idx["Abbrev_AA_change"]
        if HGVS_p not in ["p.?", "p.(=)", "None"]:
            if items[aa_abrev_index] == '':
                items[aa_abrev_index] = HGVS_p_to_AA_abrev(HGVS_p)
        final_items = list(items[index_to_save])
        final_items.insert(1, genome_coor)
        final_items.append(HGVS_p)
        new_line = "\t".join(list(final_items)) + "\n"
        f_out.write(new_line)
    f_in.close()
    f_out.close()
Пример #4
0
def main():
    parser = argparse.ArgumentParser(description='Script that produces sample based excel readable file for annotated variants.')
    parser.add_argument(dest='vcf', help='Path to the vcf. This file must have been split and normalized before.')
    parser.add_argument(dest='output_file', help='Path to the output tab separated file.')
    parser.add_argument('-n','--negativeIndividuals', metavar="neg_indivuals", help='File with negative sample names for a given phenotype. Will add column isPositive to the output.')
    parser.add_argument('-f','--fields', nargs='+', help='Specific fields to include')
    args = parser.parse_args()

    extractFields(args.vcf, args.output_file, args.negativeIndividuals, args.fields)
Пример #5
0
def main():
    global REFGENE

    parser = argparse.ArgumentParser()
    parser.add_argument('-i',
                        '--readable_input',
                        type=argparse.FileType('r'),
                        help='Opened readable input file for conversion.')
    parser.add_argument('-o',
                        '--writable_output',
                        type=argparse.FileType('w'),
                        help='Opened writable output file for conversion.')
    parser.add_argument('-g', '--genome_path', help='Link to hg38.fa.')
    parser.add_argument('-r',
                        '--reference_genome',
                        default='./hg38.BRCA.refGene.txt',
                        help='Link to hg38.BRCA.refgene.txt.')

    args = parser.parse_args()
    GENOME = SequenceFileDB(args.genome_path)
    REFGENE = args.reference_genome

    f_out = args.writable_output
    f_out.write("\t".join(OUTPUT_COLUMNS) + "\n")
    f_in = args.readable_input
    for index, line in enumerate(f_in):
        items = np.array(line.rstrip().split("\t"))
        if index == 0:
            # Handle column names
            columns = np.array([i.replace(" ", "_") for i in items])
            index_to_save = [
                np.where(columns == i)[0][0] for i in COLUMNS_TO_SAVE
            ]
            column_idx = dict(zip(COLUMNS_TO_SAVE, index_to_save))
            continue
        OMIM_id_index = column_idx["Condition_ID_value"]
        items[OMIM_id_index] = convert_OMIM_id(items[OMIM_id_index])
        HGVS_cDNA = (items[column_idx["Reference_sequence"]] + ":" +
                     items[column_idx["HGVS"]])
        try:
            genome_coor, HGVS_p = convert_HGVS(HGVS_cDNA, GENOME)
        except:
            if (items[column_idx["HGVS"]]).find(";") > -1:
                genome_coor, HGVS_p = create_None_filler()
        aa_abrev_index = column_idx["Abbrev_AA_change"]
        if HGVS_p not in ["p.?", "p.(=)", "None"]:
            if items[aa_abrev_index] == '':
                items[aa_abrev_index] = HGVS_p_to_AA_abrev(HGVS_p)
        final_items = list(items[index_to_save])
        final_items.insert(1, genome_coor)
        final_items.append(HGVS_p)
        new_line = "\t".join(list(final_items)) + "\n"
        f_out.write(new_line)
    f_in.close()
    f_out.close()
            if sv.posedit.pos.start == sv.posedit.pos.end and sv.posedit.length_change(
            ) == 0:
                rv[MISSENSE_INDEX] += int(v.split(":")[1])
            else:
                rv[INFRAME_INDEX] += int(v.split(":")[1])

    return pd.Series(rv, index=INDEX_NAMES)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("hotspots_2d",
                        default="../data/hotspots/v2_multi_type_residue.txt",
                        type=str,
                        help="2D cancerhotspots data file")
    parser.add_argument("hotspots_3d",
                        default="../data/hotspots/3d_hotspots.txt",
                        type=str,
                        help="3D cancerhotspots data file")
    parser.add_argument("--removed_hotspots",
                        default=None,
                        type=str,
                        help='Output removed hotspots')
    args = parser.parse_args()

    hotspots_2d = pd.read_csv(args.hotspots_2d, sep="\t")
    hotspots_2d.columns = [
        c.lower().replace("-", "_") for c in hotspots_2d.columns
    ]
Пример #7
0
def supply_args():
    """
    Populate args.
    https://docs.python.org/2.7/library/argparse.html
    """
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('infile',
                        help='Input VCF to apply Annovar annotations to.')
    parser.add_argument('outfile', help='Output VCF')
    parser.add_argument('--evf',
                        help='Input exonic_variant_function Annovar file.')
    parser.add_argument('--vf', help='Input variant_function Annovar file.')
    parser.add_argument(
        '--ccds_evf', help='Input CCDS exonic_variant_function Annovar file.')
    parser.add_argument('--ccds_vf',
                        help='Input CCDS variant_function Annovar file.')
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + VERSION)
    args = parser.parse_args()

    if not args.evf and not args.vf:
        raise SyntaxError("Must specify either vf or evf, or both.")

    return args
Пример #8
0
def supply_args():
    """
    Populate args.
    https://docs.python.org/2.7/library/argparse.html
    """
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--sam', required=True, help='Input SAM File')
    parser.add_argument('--outfile', required=True, help='Output File')
    parser.add_argument('--ref',
                        required=True,
                        help='Input Reference Sequence')
    parser.add_argument('--ref_build',
                        choices=['hg19'],
                        default='hg19',
                        help='Which reference build to utilize')
    parser.add_argument('--target',
                        choices=['flt3', 'flt3_e13', 'flt3_e14', 'flt3_e15'],
                        default='flt3_e14',
                        help='Region to target')
    parser.add_argument(
        '--coords',
        help='Coordinate range, in the format [chrom:start-stop], 1-based.')
    parser.add_argument('--paired',
                        action='store_true',
                        help='Data is paired-end data.')
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + VERSION)
    args = parser.parse_args()

    if args.target and args.coords:
        raise Exception(
            "You can't specify both a known target region and a custom coordinate at the same time."
        )
    if not args.target and not args.coords:
        raise Exception("You must specify either the target or coords option.")

    return args
Пример #9
0
import os
import sys
import re
import json
import argparse

from civicpy import civic
import hgvs.parser
import hgvs.dataproviders.uta
import hgvs.assemblymapper

parser = argparse.ArgumentParser(
    description='Retrieve civic variant info from CIViC database')
parser.add_argument('-r',
                    '--ref_dir',
                    type=str,
                    required=True,
                    help='directory for reference files')
parser.add_argument('-o',
                    '--out_dir',
                    type=str,
                    required=True,
                    help='output directory for parsed files')

args = vars(parser.parse_args())
ref_dir = args['ref_dir']
results_dir = args['out_dir']

if not os.path.exists(ref_dir):
    print("Please indicate the reference directory")
    sys.exit(1)
Пример #10
0
 def add_arguments(self, parser):
     parser.add_argument('--vcf', required=True)
     parser.add_argument('--reference', required=True)
            if sv.posedit.pos.start == sv.posedit.pos.end and sv.posedit.length_change(
            ) == 0:
                rv[MISSENSE_INDEX] += int(v.split(":")[1])
            else:
                rv[INFRAME_INDEX] += int(v.split(":")[1])

    return pd.Series(rv, index=INDEX_NAMES)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("hotspots_2d",
                        default="../data/hotspots/v2_multi_type_residue.txt",
                        type=str,
                        help="2D cancerhotspots data file")
    parser.add_argument("hotspots_3d",
                        default="../data/hotspots/3d_hotspots.txt",
                        type=str,
                        help="3D cancerhotspots data file")
    parser.add_argument("--removed_hotspots",
                        default=None,
                        type=str,
                        help='Output removed hotspots')
    parser.add_argument(
        "--override_unassigned_transcript_id_2d_hotspots",
        default=None,
        required=True,
        type=str,
        help='Override transcript_id field for 2d hotspots without assignment')
Пример #12
0
def parse_args():
    """
    Description:
        function 'parse_args' parses arguments from command-line and returns an argparse
        object containing the arguments and their values. Default values are 'False' if option
        is not listed in the command, else the option value is set to True.
    """
    parser = argparse.ArgumentParser(
        description=
        'Fill in hg18, hg19 genomic coordinates and cDNA hgvs strings in merged BRCA variant dataset.'
    )
    parser.add_argument(
        '-i',
        '--inBRCA',
        type=argparse.FileType('r'),
        help='Input ENIGMA BRCA datatable file for conversion.')
    parser.add_argument('-j',
                        '--inHg18',
                        type=argparse.FileType('r'),
                        help='Input hg18 reference genome fasta file.')
    parser.add_argument('-k',
                        '--inHg19',
                        type=argparse.FileType('r'),
                        help='Input hg19 reference genome fasta file.')
    parser.add_argument('-l',
                        '--inHg38',
                        type=argparse.FileType('r'),
                        help='Input hg38 reference genome fasta file.')
    parser.add_argument(
        '-r',
        '--inRefSeq18',
        type=argparse.FileType('r'),
        help='Input refseq annotation hg18-based genepred file.')
    parser.add_argument(
        '-s',
        '--inRefSeq19',
        type=argparse.FileType('r'),
        help='Input refseq annotation hg19-based genepred file.')
    parser.add_argument(
        '-t',
        '--inRefSeq38',
        type=argparse.FileType('r'),
        help='Input refseq annotation hg38-based genepred file.')
    parser.add_argument(
        '-p',
        '--calcProtein',
        dest='calcProtein',
        action='store_true',
        help=
        'Set flag for hgvs protein fill-in. May not result in complete fill-in.'
    )
    parser.add_argument('-o',
                        '--outBRCA',
                        type=argparse.FileType('w'),
                        help='Output filled in ENIGMA BRCA datatable file.')
    parser.add_argument(
        '--artifacts_dir',
        help='Artifacts directory with pipeline artifact files.')

    parser.set_defaults(calcProtein=False)
    options = parser.parse_args()
    return options
"""
Created on Fri Mar  1 11:58:04 2019

@author: brigidameireles
"""
import argparse
import hgvs.variantmapper
import hgvs.parser
import hgvs.assemblymapper
import hgvs.dataproviders.uta
import re

parser = argparse.ArgumentParser(
    description='Use HGVS package to find the Chromosome and the Position')
parser.add_argument('-in',
                    '--input',
                    help='Input example: \'NM_001637.3:c.1582G>A\'',
                    required=True)
args = parser.parse_args()

if __name__ == "__main__":

    hgvs_c = args.input
    d_chromo = {
        'NC_000001': 1,
        'NC_000002': 2,
        'NC_000003': 3,
        'NC_000004': 4,
        'NC_000005': 5,
        'NC_000006': 6,
        'NC_000007': 7,
        'NC_000008': 8,
def parse_args():
    """
    Description:
        function 'parse_args' parses arguments from command-line and returns an argparse
        object containing the arguments and their values. Default values are 'False' if option
        is not listed in the command, else the option value is set to True.
    """
    parser = argparse.ArgumentParser(description='Fill in hg18, hg19 genomic coordinates and cDNA hgvs strings in merged BRCA variant dataset.')
    parser.add_argument('-i', '--inBRCA', type=argparse.FileType('r'),
                        help='Input ENIGMA BRCA datatable file for conversion.')
    parser.add_argument('-j', '--inHg18', type=argparse.FileType('r'),
                        help='Input hg18 reference genome fasta file.')
    parser.add_argument('-k', '--inHg19', type=argparse.FileType('r'),
                        help='Input hg19 reference genome fasta file.')
    parser.add_argument('-l', '--inHg38', type=argparse.FileType('r'),
                        help='Input hg38 reference genome fasta file.')
    parser.add_argument('-r', '--inRefSeq18', type=argparse.FileType('r'),
                        help='Input refseq annotation hg18-based genepred file.')
    parser.add_argument('-s', '--inRefSeq19', type=argparse.FileType('r'),
                        help='Input refseq annotation hg19-based genepred file.')
    parser.add_argument('-t', '--inRefSeq38', type=argparse.FileType('r'),
                        help='Input refseq annotation hg38-based genepred file.')
    parser.add_argument('-p', '--calcProtein', dest='calcProtein', action='store_true',
                        help='Set flag for hgvs protein fill-in. May not result in complete fill-in.')
    parser.add_argument('-o', '--outBRCA', type=argparse.FileType('w'),
                        help='Output filled in ENIGMA BRCA datatable file.')
    parser.add_argument('--artifacts_dir', help='Artifacts directory with pipeline artifact files.')

    parser.set_defaults(calcProtein=False)
    options = parser.parse_args()
    return options
      'stop': stop,
      'ref_allele': ref_allele,
      'var_allele': var_allele,
    }
    return maf_var

def get_info(val):
    info = {}
    for v in val.split(";"):
        tmp1 = v.split("=")
        if len(tmp1) == 2:
            info[tmp1[0]] = tmp1[1]
    return info

parser = argparse.ArgumentParser(description='Generate civic variant info to be used with GDC mutation indexing')
parser.add_argument('-i', '--gene_code', type=str, required=True, help='gene info from GDC-used gene model')
parser.add_argument('-g', '--gdna_var', type=str, required=True, help='input civic variants for gDNA')
parser.add_argument('-gv', '--gdna_vcf', type=str, required=True, help='input gDNA in VCF format after Liftover')
parser.add_argument('-c', '--cdna_var', type=str, required=True, help='input civic variants for cDNA')
parser.add_argument('-cv', '--cdna_vcf', type=str, required=True, help='input cDNA-predicted gDNA in VCF format after Liftover')
parser.add_argument('-p', '--prot_var', type=str, required=True, help='input civic variants for prot')
parser.add_argument('-o', '--out_dir', type=str, required=True, help='output directory for mapping/unmapping files')

args = vars(parser.parse_args())
gene_code_fn = args['gene_code']
gdna_var_fn = args['gdna_var']
gdna_vcf_fn = args['gdna_vcf']
cdna_var_fn = args['cdna_var']
cdna_vcf_fn = args['cdna_vcf']
prot_var_fn = args['prot_var']
out_dir = args['out_dir']
Пример #16
0
def supply_args():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--input_vcf', help='Input VCF.')
    parser.add_argument('--output_vcf', help='Output VCF.')

    parser.add_argument('--chrom', help='Chromosome.')
    parser.add_argument('--pos', help='Position.')
    parser.add_argument('--ref', help='Refence Allele.')
    parser.add_argument('--alt', help='Alternate Allele.')

    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + VERSION)
    args = parser.parse_args()

    if args.input_vcf and not args.output_vcf:
        raise Exception(
            'Must specify an output VCF if you are specifying an input VCF.')
    if args.chrom and (not args.pos or not args.ref or not args.alt):
        raise Exception('Must specify each of chrom, pos, ref, and alt.')

    return args