def main(): parser = argparse.ArgumentParser( description='Fix dbsnp VP calls and add OXOG filter') parser.add_argument('inputvcf', type=argparse.FileType('r'), default=sys.stdin, help="Merged and annotated VCF file") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Specify output file (default:stdout)") args = parser.parse_args() reader = vcf.Reader(args.inputvcf) reader.infos['dbsnp_somatic'] = vcf.parser._Info( id='dbsnp_somatic', num=None, type='Flag', desc='Known-somatic dbSNP variant', source=None, version=None) reader.filters['OXOGFAIL'] = vcf.parser._Filter( id='OXOGFAIL', desc="Failed OXOG oxidative artifact filter") reader.metadata[ 'reference'] = 'ftp://ftp.sanger.ac.uk/pub/project/PanCancer/genome.fa.gz' writer = vcf.Writer(args.output, reader) for record in reader: new_info = {} # copy some records over directly for item in ['VAF', 't_alt_count', 't_ref_count']: if item in record.INFO and record.INFO[item] > 0: new_info[item] = record.INFO[item] for item in [ 'dbsnp', 'cosmic', 'Callers', 'NumCallers', 'repeat_masker', '1000genomes_AF', '1000genomes_ID' ]: if item in record.INFO: new_info[item] = record.INFO[item] if 'dbsnp_VP' in record.INFO: qualbyte = int(record.INFO['dbsnp_VP'], 16) & 255 somatic = (qualbyte & 2**5) > 0 if somatic: new_info['dbsnp_somatic'] = True if 'OXOG_Fail' in record.INFO: if record.INFO['OXOG_Fail'] == 'True': if record.FILTER is None: record.FILTER = ['OXOGFAIL'] else: record.FILTER.append('OXOGFAIL') record.INFO = new_info writer.write_record(record) return 0
def main(): parser = argparse.ArgumentParser(description='Annotate merged vcf with VAF information where available') parser.add_argument('inputvcf', type=argparse.FileType('r'), default=sys.stdin, help="Merged and annotated VCF file") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Specify output file (default:stdout)") args = parser.parse_args() reader = vcf.Reader(args.inputvcf) reader.metadata['reference']='ftp://ftp.sanger.ac.uk/pub/project/PanCancer/genome.fa.gz' reader.infos['dbsnp_somatic'] = vcf.parser._Info(id='dbsnp_somatic', num=None, type='Flag', desc='Known-somatic dbSNP variant', source=None, version=None) reader.infos['t_vaf'] = vcf.parser._Info(id='t_vaf', num=1, type='Float', desc='VAF in tumor from sga', source=None, version=None) reader.infos['n_vaf'] = vcf.parser._Info(id='n_vaf', num=1, type='Float', desc='VAF in normal from sga', source=None, version=None) reader.infos['t_alt_count'] = vcf.parser._Info(id='t_alt_count', num=1, type='Integer', desc='Tumor alt count from sga', source=None, version=None) reader.infos['t_ref_count'] = vcf.parser._Info(id='t_ref_count', num=1, type='Integer', desc='Tumor ref count from sga if non-zero', source=None, version=None) reader.infos['n_alt_count'] = vcf.parser._Info(id='n_alt_count', num=1, type='Integer', desc='Normal alt count from sga if non-zero', source=None, version=None) reader.infos['n_ref_count'] = vcf.parser._Info(id='n_ref_count', num=1, type='Integer', desc='Normal ref count from sga if non-zero', source=None, version=None) reader.infos['model_score'] = vcf.parser._Info(id='model_score', num=1, type='Float', desc='consensus model score, 0-1', source=None, version=None) reader.filters['LOWSUPPORT'] = vcf.parser._Filter(id='LOWSUPPORT', desc='Insufficient support in consensus model') writer = vcf.Writer(args.output, reader) for record in reader: new_info = {} # skip broad private calls if record.INFO['Callers'] == ['broad']: continue # skip calls that are defined to be SVs varlen = abs(len(record.REF) - len(record.ALT[0])) if varlen >= 100: continue # copy some records over directly for item in ['dbsnp', 'cosmic', 'Callers', 'NumCallers', 'repeat_masker', '1000genomes_AF', '1000genomes_ID']: if item in record.INFO: new_info[item] = record.INFO[item] if 'model_score' in record.INFO: new_info['model_score'] = round_dig(float(record.INFO['model_score']),3) if 'dbsnp_VP' in record.INFO: qualbyte = int(record.INFO['dbsnp_VP'],16) & 255 somatic = (qualbyte & 2**5) > 0 if somatic: new_info['dbsnp_somatic'] = True if ('TumorVarDepth' in record.INFO) and (record.INFO['TumorVarDepth'] > 0): new_info['t_vaf'] = record.INFO['TumorVAF'] new_info['t_alt_count'] = record.INFO['TumorVarDepth'] new_info['t_ref_count'] = record.INFO['TumorTotalDepth']-record.INFO['TumorVarDepth'] if ('NormalVarDepth' in record.INFO) and (record.INFO['NormalVarDepth'] > 0): new_info['n_vaf'] = record.INFO['NormalVAF'] new_info['n_alt_count'] = record.INFO['NormalVarDepth'] new_info['n_ref_count'] = record.INFO['NormalTotalDepth']-record.INFO['NormalVarDepth'] record.INFO = new_info writer.write_record(record) return 0
def main(): parser = argparse.ArgumentParser(description='Fix dbsnp VP calls and add OXOG filter') parser.add_argument('inputvcf', type=argparse.FileType('r'), default=sys.stdin, help="Merged and annotated VCF file") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Specify output file (default:stdout)") args = parser.parse_args() reader = vcf.Reader(args.inputvcf) reader.infos['dbsnp_somatic'] = vcf.parser._Info(id='dbsnp_somatic', num=None, type='Flag', desc='Known-somatic dbSNP variant', source=None, version=None) reader.filters['OXOGFAIL'] = vcf.parser._Filter(id='OXOGFAIL', desc="Failed OXOG oxidative artifact filter") reader.metadata['reference']='ftp://ftp.sanger.ac.uk/pub/project/PanCancer/genome.fa.gz' writer = vcf.Writer(args.output, reader) for record in reader: new_info = {} # copy some records over directly for item in ['VAF', 't_alt_count', 't_ref_count']: if item in record.INFO and record.INFO[item] > 0: new_info[item] = record.INFO[item] for item in ['dbsnp', 'cosmic', 'Callers', 'NumCallers', 'repeat_masker', '1000genomes_AF', '1000genomes_ID']: if item in record.INFO: new_info[item] = record.INFO[item] if 'dbsnp_VP' in record.INFO: qualbyte = int(record.INFO['dbsnp_VP'],16) & 255 somatic = (qualbyte & 2**5) > 0 if somatic: new_info['dbsnp_somatic'] = True if 'OXOG_Fail' in record.INFO: if record.INFO['OXOG_Fail'] == 'True': if record.FILTER is None: record.FILTER = ['OXOGFAIL'] else: record.FILTER.append('OXOGFAIL') record.INFO = new_info writer.write_record(record) return 0
def main(): parser = argparse.ArgumentParser( description='Filter Y-chromosome calls if sex is female') parser.add_argument('-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="input VCF file") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Specify output file (default:stdout)") parser.add_argument( '-s', '--sex', type=str, default="male", help="Apply filter if this flag is exactly 'female' (default:male)") args = parser.parse_args() apply_filter = args.sex == "female" filtername = 'SEXF' reader = vcf.Reader(args.input) if apply_filter: reader.filters[filtername] = vcf.parser._Filter( id=filtername, desc= 'Likely artifact or call in PAR region: Y-chromosome variant in female donor' ) writer = vcf.Writer(args.output, reader) for record in reader: if (apply_filter and record.CHROM in ['Y', 'chrY']): if not record.FILTER: record.FILTER = [filtername] else: record.FILTER += [filtername] writer.write_record(record) return 0
def main(): parser = argparse.ArgumentParser(description='Filter Y-chromosome calls if sex is female') parser.add_argument('-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="input VCF file") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Specify output file (default:stdout)") parser.add_argument('-s', '--sex', type=str, default="male", help="Apply filter if this flag is exactly 'female' (default:male)") args = parser.parse_args() apply_filter = args.sex == "female" filtername = 'SEXF' reader = vcf.Reader(args.input) if apply_filter: reader.filters[filtername] = vcf.parser._Filter(id=filtername, desc='Likely artifact or call in PAR region: Y-chromosome variant in female donor') writer = vcf.Writer(args.output, reader) for record in reader: if (apply_filter and record.CHROM in ['Y', 'chrY']): if not record.FILTER: record.FILTER = [filtername] else: record.FILTER += [filtername] writer.write_record(record) return 0
def main(): parser = argparse.ArgumentParser( description='Add info or filter tag based on presence in MAF') parser.add_argument('MAF', type=str, help="MAF file for variant classification annotation") parser.add_argument('name', help='Name of info field or filter') parser.add_argument('-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="VCF file to be processed (default: stdin)") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Output file (default:stdout)") parser.add_argument('-n', '--info', action='store_true', help="Add info flag rather than filter") parser.add_argument('-c', '--column', type=str, help='column in MAF to use for info, if present') parser.add_argument('-d', '--description', default="", type=str, help='description of new info/filter field') args = parser.parse_args() reader = vcf.Reader(args.input) if args.info: reader.infos[args.name] = vcf.parser._Info(id=args.name, num='1', type='String', desc=args.description, source=None, version=None) else: reader.filters[args.name] = vcf.parser._Filter(id=args.name, desc=args.description) writer = vcf.Writer(args.output, reader) classification_dict = get_classification_dict_from_MAF( args.MAF, args.column) for record in reader: assert len(record.ALT) == 1 variant = variant_tuple(record, record.ALT[0]) if variant in classification_dict: if args.info: record.INFO[args.name] = classification_dict[variant] else: if not record.FILTER: record.FILTER = [args.name] else: record.FILTER += [args.name] writer.write_record(record) return 0
def main(): parser = argparse.ArgumentParser(description='Add info or filter tag based on presence in MAF') parser.add_argument('MAF', type=str, help="MAF file for variant classification annotation") parser.add_argument('name', help='Name of info field or filter') parser.add_argument('-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="VCF file to be processed (default: stdin)") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Output file (default:stdout)") parser.add_argument('-a', '--action', choices=['info','filter'], default='info', help='add tag (info) or filter based on presence in MAF (default:info)') parser.add_argument('-c', '--column', type=str, help='column in MAF to use for info, if present') parser.add_argument('-d', '--description', default="", type=str, help='description of new info/filter field') args = parser.parse_args() reader = vcf.Reader(args.input) if args.action == "info": reader.infos[args.name] = vcf.parser._Info(id=args.name, num='1', type='String', desc=args.description, source=None, version=None) else: reader.filters[args.name] = vcf.parser._Filter(id=args.name, desc=args.description) writer = vcf.Writer(args.output, reader) classification_dict = get_classification_dict_from_MAF(args.MAF, args.column) for record in reader: assert len(record.ALT) == 1 variant = variant_tuple(record, record.ALT[0]) if variant in classification_dict: if args.action == "info": record.INFO[args.name] = classification_dict[variant] else: if not record.FILTER: record.FILTER = [args.name] else: record.FILTER += [args.name] writer.write_record(record) return 0
def filter_calls(): parser = argparse.ArgumentParser( description='Set genotypes based on DP4 scores') parser.add_argument('-i', '--input', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout) parser.add_argument('-e', '--error', type=float, default=0.01, help='Error rate') parser.add_argument('-t', '--callthreshold', type=float, default=0.02, help='Max prob to call') parser.add_argument('-s', '--strandbias', type=float, default=0.10, help='minimum strand ratio') parser.add_argument('-m', '--mindepth', type=int, default=10, help='minimum total depth') parser.add_argument('-g', '--germlineprob', type=float, default=0.02, help='Maximum prob of germline') args = parser.parse_args() vcf_reader = vcf.Reader(args.input) vcf_reader.infos['Validation_status'] = vcf.parser._Info(id='Validation_status', num='.', type='String', desc='Status from validation data', source=None, version=None) vcf_writer = vcf.Writer(args.output, vcf_reader) for record in vcf_reader: normal_reads = int(record.INFO['NormalReads'][0]) tumour_reads = int(record.INFO['TumourReads'][0]) normal_evidence = [int(x) for x in record.INFO['NormalEvidenceReads']] tumour_evidence = [int(x) for x in record.INFO['TumourEvidenceReads']] if min(normal_reads, tumour_reads) < args.mindepth: record.FILTER = ['LOWDEPTH'] record.INFO['Validation_status'] = 'LOWDEPTH' vcf_writer.write_record(record) continue record.FILTER = [] if sum(tumour_evidence) < 7 or not call_from_depths(tumour_reads, tumour_evidence, args.error, args.callthreshold): record.FILTER = ['NOTSEEN'] if (tumour_reads > 0) > 0 and reject_from_strandbias(tumour_reads, tumour_evidence, args.strandbias): record.FILTER += ['STRANDBIAS'] if germline_hom_het(normal_reads, normal_evidence, tumour_reads, tumour_evidence, args.germlineprob): record.FILTER += ['GERMLINE'] elif reject_from_normal_evidence_vs_noise(normal_reads, normal_evidence, tumour_reads, tumour_evidence, args.error): record.FILTER += ['NORMALEVIDENCE'] if len(record.FILTER) == 0: record.FILTER = ['PASS'] record.INFO['Validation_status'] = ','.join(record.FILTER) vcf_writer.write_record(record)
def filter_calls(): parser = argparse.ArgumentParser( description='Set genotypes based on DP4 scores') parser.add_argument('-i', '--input', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout) parser.add_argument('-e', '--error', type=float, default=0.01, help='Error rate') parser.add_argument('-t', '--callthreshold', type=float, default=0.02, help='Max prob to call') parser.add_argument('-s', '--strandbias', type=float, default=0.10, help='minimum strand ratio') parser.add_argument('-m', '--mindepth', type=int, default=10, help='minimum total depth') parser.add_argument('-g', '--germlineprob', type=float, default=0.02, help='Maximum prob of germline') args = parser.parse_args() vcf_reader = vcf.Reader(args.input) vcf_reader.infos['Validation_status'] = vcf.parser._Info( id='Validation_status', num='.', type='String', desc='Status from validation data', source=None, version=None) vcf_writer = vcf.Writer(args.output, vcf_reader) for record in vcf_reader: normal_reads = int(record.INFO['NormalReads'][0]) tumour_reads = int(record.INFO['TumourReads'][0]) normal_evidence = [int(x) for x in record.INFO['NormalEvidenceReads']] tumour_evidence = [int(x) for x in record.INFO['TumourEvidenceReads']] if min(normal_reads, tumour_reads) < args.mindepth: record.FILTER = ['LOWDEPTH'] record.INFO['Validation_status'] = 'LOWDEPTH' vcf_writer.write_record(record) continue record.FILTER = [] if sum(tumour_evidence) < 7 or not call_from_depths( tumour_reads, tumour_evidence, args.error, args.callthreshold): record.FILTER = ['NOTSEEN'] if (tumour_reads > 0) > 0 and reject_from_strandbias( tumour_reads, tumour_evidence, args.strandbias): record.FILTER += ['STRANDBIAS'] if germline_hom_het(normal_reads, normal_evidence, tumour_reads, tumour_evidence, args.germlineprob): record.FILTER += ['GERMLINE'] elif reject_from_normal_evidence_vs_noise(normal_reads, normal_evidence, tumour_reads, tumour_evidence, args.error): record.FILTER += ['NORMALEVIDENCE'] if len(record.FILTER) == 0: record.FILTER = ['PASS'] record.INFO['Validation_status'] = ','.join(record.FILTER) vcf_writer.write_record(record)
def main(): parser = argparse.ArgumentParser( description='Fix dbsnp VP calls and add OXOG filter') parser.add_argument('MAF', type=str, help="MAF file for filtering") parser.add_argument('sample', type=str, help="tumour aliquot id") parser.add_argument('filtername', type=str, help="Filter name to apply") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Specify output file (default:stdout)") parser.add_argument('-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="Merged and annotated VCF file (default: stdin)") parser.add_argument('-d', '--desc', type=str, default="", help="Description of filter") parser.add_argument('-n', '--info', action='store_true', help="Add info flag rather than filter") args = parser.parse_args() reader = vcf.Reader(args.input) if args.info: reader.infos[args.filtername] = vcf.parser._Info(id=args.filtername, num=0, type='Flag', desc=args.desc, source=None, version=None) else: reader.filters[args.filtername] = vcf.parser._Filter( id=args.filtername, desc=args.desc) writer = vcf.Writer(args.output, reader) entries = get_entries_from_MAF(args.MAF) for record in reader: variants = [variant_tuple(args.sample, record)] if len(record.ALT[0]) != len(record.REF): variants += [ (args.sample, record.CHROM, record.POS + abs(len(record.ALT[0]) - len(record.REF))) ] for variant in variants: if variant in entries: if not args.info: if not record.FILTER: record.FILTER = [args.filtername] elif args.filtername not in record.FILTER: record.FILTER = record.FILTER + [args.filtername] else: record.INFO[args.filtername] = True writer.write_record(record) return 0
def main(): parser = argparse.ArgumentParser( description='Annotate merged vcf with VAF information where available') parser.add_argument('inputvcf', type=argparse.FileType('r'), default=sys.stdin, help="Merged and annotated VCF file") parser.add_argument('-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Specify output file (default:stdout)") args = parser.parse_args() reader = vcf.Reader(args.inputvcf) reader.metadata[ 'reference'] = 'ftp://ftp.sanger.ac.uk/pub/project/PanCancer/genome.fa.gz' reader.infos['dbsnp_somatic'] = vcf.parser._Info( id='dbsnp_somatic', num=None, type='Flag', desc='Known-somatic dbSNP variant', source=None, version=None) reader.infos['t_vaf'] = vcf.parser._Info(id='t_vaf', num=1, type='Float', desc='VAF in tumor from sga', source=None, version=None) reader.infos['n_vaf'] = vcf.parser._Info(id='n_vaf', num=1, type='Float', desc='VAF in normal from sga', source=None, version=None) reader.infos['t_alt_count'] = vcf.parser._Info( id='t_alt_count', num=1, type='Integer', desc='Tumor alt count from sga', source=None, version=None) reader.infos['t_ref_count'] = vcf.parser._Info( id='t_ref_count', num=1, type='Integer', desc='Tumor ref count from sga if non-zero', source=None, version=None) reader.infos['n_alt_count'] = vcf.parser._Info( id='n_alt_count', num=1, type='Integer', desc='Normal alt count from sga if non-zero', source=None, version=None) reader.infos['n_ref_count'] = vcf.parser._Info( id='n_ref_count', num=1, type='Integer', desc='Normal ref count from sga if non-zero', source=None, version=None) reader.infos['model_score'] = vcf.parser._Info( id='model_score', num=1, type='Float', desc='consensus model score, 0-1', source=None, version=None) reader.filters['LOWSUPPORT'] = vcf.parser._Filter( id='LOWSUPPORT', desc='Insufficient support in consensus model') writer = vcf.Writer(args.output, reader) for record in reader: new_info = {} # skip broad private calls if record.INFO['Callers'] == ['broad']: continue # skip calls that are defined to be SVs varlen = abs(len(record.REF) - len(record.ALT[0])) if varlen >= 100: continue # copy some records over directly for item in [ 'dbsnp', 'cosmic', 'Callers', 'NumCallers', 'repeat_masker', '1000genomes_AF', '1000genomes_ID' ]: if item in record.INFO: new_info[item] = record.INFO[item] if 'model_score' in record.INFO: new_info['model_score'] = round_dig( float(record.INFO['model_score']), 3) if 'dbsnp_VP' in record.INFO: qualbyte = int(record.INFO['dbsnp_VP'], 16) & 255 somatic = (qualbyte & 2**5) > 0 if somatic: new_info['dbsnp_somatic'] = True if ('TumorVarDepth' in record.INFO) and (record.INFO['TumorVarDepth'] > 0): new_info['t_vaf'] = record.INFO['TumorVAF'] new_info['t_alt_count'] = record.INFO['TumorVarDepth'] new_info['t_ref_count'] = record.INFO[ 'TumorTotalDepth'] - record.INFO['TumorVarDepth'] if ('NormalVarDepth' in record.INFO) and (record.INFO['NormalVarDepth'] > 0): new_info['n_vaf'] = record.INFO['NormalVAF'] new_info['n_alt_count'] = record.INFO['NormalVarDepth'] new_info['n_ref_count'] = record.INFO[ 'NormalTotalDepth'] - record.INFO['NormalVarDepth'] record.INFO = new_info writer.write_record(record) return 0