Пример #1
0
def load_dbsnp():
    dbsnp_file = open(settings.INTERMEDIATE_FILE_DIR + 'dbsnp.tsv', 'w')
    for i, variant in enumerate(vcf_stuff.iterate_vcf(open(settings.DBSNP_VCF_FILE))):
        if not i % 100000:
            print i
        fields = [
            str(variant.xpos),
            variant.ref,
            variant.alt,
            variant.vcf_id,
        ]
        dbsnp_file.write('\t'.join(fields)+'\n')
    dbsnp_file.close()
Пример #2
0
import sys
from xbrowse.parsers.vcf_stuff import iterate_vcf
from xbrowse.utils import get_aaf, compressed_file


if __name__ == '__main__':

    vcf_file = compressed_file(sys.argv[1])
    for variant in iterate_vcf(vcf_file, genotypes=True):
        print '\t'.join([
            str(variant.xpos),
            variant.ref,
            variant.alt,
            str(get_aaf(variant)),
        ])
Пример #3
0
import sys
from xbrowse.parsers.vcf_stuff import iterate_vcf
from xbrowse.utils import get_aaf, compressed_file

if __name__ == '__main__':

    vcf_file = compressed_file(sys.argv[1])
    for variant in iterate_vcf(vcf_file, genotypes=True):
        print '\t'.join([
            str(variant.xpos),
            variant.ref,
            variant.alt,
            str(get_aaf(variant)),
        ])
Пример #4
0
import gzip
import argparse
from xbrowse.parsers import vcf_stuff


if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Create a CSV from the ClinVar VCF file that can go into a pandas dataframe')
    parser.add_argument('vcf')
    args = parser.parse_args()
    for variant in vcf_stuff.iterate_vcf(gzip.open(args.vcf)):
        fields = [
            str(variant.xpos),
            variant.ref,
            variant.alt
        ]
        print '\t'.join(fields)