Exemplo n.º 1
0
def calc_enrichment(args):
    genotype_folder = args['genotype_folder']
    genes_files = args['genes_file']
    gwas_file = args['gwas_file']
    log.info('Retrieving genes')
    genes = {}
    for genes_file in genes_files:
        with open(genes_file, 'r') as f:
            genes[os.path.basename(genes_file)] = json.load(f)

    gwas_result = None
    _, input_ext = os.path.splitext(gwas_file)
    log.info('Opening GWAS file')
    if input_ext == '.csv':
        gwas_result = result.load_from_csv(gwas_file)
    else:
        gwas_result = result.load_from_hdf5(gwas_file)
    log.info('Loading genotype file')
    genotype_data = _load_genotype_(genotype_folder)
    top_snps = gwas_result.get_top_snps(args['top_snps_count'])
    top_snps.sort(order=['scores'])
    top_snps = top_snps[:args['top_snps_count']]
    pval = enrichment.enrichment(genes, genotype_data, top_snps,
                                 args['window_size'],
                                 args['permutation_count'])
    print json.dumps(pval)
    return pval
Exemplo n.º 2
0
def calc_enrichment(args):
    genotype_folder = args['genotype_folder']
    genes_files = args['genes_file']
    gwas_file = args['gwas_file']
    log.info('Retrieving genes')
    genes = {}
    for genes_file in genes_files:
        with open(genes_file,'r') as f:
            genes[os.path.basename(genes_file)] = json.load(f)

    gwas_result = None
    _,input_ext = os.path.splitext(gwas_file)
    log.info('Opening GWAS file')
    if input_ext == '.csv':
        gwas_result = result.load_from_csv(gwas_file)
    else:
        gwas_result = result.load_from_hdf5(gwas_file)
    log.info('Loading genotype file')
    genotype_data = _load_genotype_(genotype_folder)
    top_snps = gwas_result.get_top_snps(args['top_snps_count'])
    top_snps.sort(order=['scores'])
    top_snps = top_snps[:args['top_snps_count']]
    pval = enrichment.enrichment(genes,genotype_data,top_snps,args['window_size'],args['permutation_count'])
    print json.dumps(pval)
    return pval
Exemplo n.º 3
0
def plot(args):
    _, ext = os.path.splitext(args['file'])
    if ext not in SUPPORTED_FILE_EXT:
        raise Exception(
            'The input file must have one of the supported extensions: %s' %
            str(SUPPORTED_FILE_EXT))
    chrs = None
    marker_size = args['marker_size']
    if 'chr' in args and args['chr'] is not None and args['chr'] != '':
        chrs = [args['chr']]
    gwas_result = None
    if ext in HDF5_FILE_EXT:
        gwas_result = result.load_from_hdf5(args['file'])
    else:
        gwas_result = result.load_from_csv(args['file'])
    fdr = args['fdr']
    colored = args['colored']
    color_map = None
    if colored:
        color_map = ['b', 'g', 'r', 'c', 'm']
    plotting.plot_gwas_result(gwas_result,
                              args['output'],
                              chrs,
                              args['macs'],
                              marker_size=marker_size,
                              fdr=fdr,
                              color_map=color_map)
Exemplo n.º 4
0
def calc_ld(args):
    positions = args['positions']
    range = int(args['range'])
    is_chr_pos = CHR_POS_PATTERN.match(positions)
    genotypeData = genotype.load_hdf5_genotype_data(args['genotype_file'])
    accession_file = args.get('acession_file',None)
    if accession_file is not None:
        accessions = _load_accessions(accession_file)
        genotypeData.filter_accessions(accessions)
        genotypeData.filter_non_binary()
    if is_chr_pos:
        chr = is_chr_pos.group(1)
        position = int(is_chr_pos.group(2))
        log.info('Calculating LD for chr %s and position %s with +/- range of %s' % (chr,position, range))
        # get the positions
        abs_ix,ix,found = genotypeData.get_pos_ix(chr,position)
        min_ix = max(0,abs_ix - range)
        max_ix = min(abs_ix + range, genotypeData.genome_length)
        chr_pos_list = zip(genotypeData.chromosomes[min_ix:max_ix],genotypeData.positions[min_ix:max_ix])
    else:
        if os.path.exists(positions) == False:
            raise ValueError('%s path does not exist' % positions)
        # get the SNPs
        _,ext = os.path.splitext(positions)
        if ext not in SUPPORTED_FILE_EXT:
            raise Exception('The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT))
        if ext  in HDF5_FILE_EXT:
            gwas_result = result.load_from_hdf5(positions)
        else:
            gwas_result =  result.load_from_csv(positions)
        gwas_data = gwas_result.get_top_snps(range)
        chr_pos_list = zip(map(str,gwas_data['chr']),gwas_data['positions'])
    chr_pos_list = sorted(chr_pos_list,key=itemgetter(0,1))
    ld_data = genotypeData.calculate_ld(chr_pos_list)
    _save_ld_data(args['output_file'],ld_data,chr_pos_list)
Exemplo n.º 5
0
def qq_plot(args):
    _,ext = os.path.splitext(args['file'])
    if ext not in SUPPORTED_FILE_EXT:
        raise Exception('The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT))
    gwas_result = None
    if ext in HDF5_FILE_EXT:
        gwas_result = result.load_from_hdf5(args['file'])
    else:
        gwas_result = result.load_from_csv(args['file'])
    plotting.plot_qq(gwas_result,args['output'])
Exemplo n.º 6
0
def qq_plot(args):
    _, ext = os.path.splitext(args['file'])
    if ext not in SUPPORTED_FILE_EXT:
        raise Exception(
            'The input file must have one of the supported extensions: %s' %
            str(SUPPORTED_FILE_EXT))
    gwas_result = None
    if ext in HDF5_FILE_EXT:
        gwas_result = result.load_from_hdf5(args['file'])
    else:
        gwas_result = result.load_from_csv(args['file'])
    plotting.plot_qq(gwas_result, args['output'])
Exemplo n.º 7
0
def convert(args):

    _,input_ext = os.path.splitext(args['inputfile'])
    _,output_ext = os.path.splitext(args['outputfile'])
    if input_ext == output_ext:
        raise Exception('use different file extension for input (%s) and output file (%s)' % (input_ext,output_ext))
    if input_ext not in SUPPORTED_FILE_EXT:
        raise Exception('The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT))
    if output_ext not in SUPPORTED_FILE_EXT:
        raise Exception('The output file must have one of the supported extensions: (%s)' % ', '.join(SUPPORTED_FILE_EXT))
    gwas_result = None
    if input_ext == '.csv':
        gwas_result = result.load_from_csv(args['inputfile'])
        gwas_result.save_as_hdf5(args['outputfile'])
    else:
        gwas_result = result.load_from_hdf5(args['inputfile'])
        gwas_result.save_as_csv(args['outputfile'])
Exemplo n.º 8
0
def plot(args):
    _,ext = os.path.splitext(args['file'])
    if ext not in SUPPORTED_FILE_EXT:
        raise Exception('The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT))
    chrs = None
    marker_size = args['marker_size']
    if 'chr' in args and args['chr'] is not None and args['chr'] != '':
        chrs = [args['chr']]
    gwas_result = None
    if ext in HDF5_FILE_EXT:
        gwas_result = result.load_from_hdf5(args['file'])
    else:
        gwas_result = result.load_from_csv(args['file'])
    fdr = args['fdr']
    colored = args['colored']
    color_map = None
    if colored:
        color_map = ['b', 'g', 'r', 'c', 'm']
    plotting.plot_gwas_result(gwas_result,args['output'],chrs,args['macs'],marker_size=marker_size, fdr=fdr, color_map=color_map)
Exemplo n.º 9
0
def calc_ld(args):
    positions = args['positions']
    range = int(args['range'])
    is_chr_pos = CHR_POS_PATTERN.match(positions)
    genotypeData = genotype.load_hdf5_genotype_data(args['genotype_file'])
    accession_file = args.get('acession_file', None)
    if accession_file is not None:
        accessions = _load_accessions(accession_file)
        genotypeData.filter_accessions(accessions)
        genotypeData.filter_non_binary()
    if is_chr_pos:
        chr = is_chr_pos.group(1)
        position = int(is_chr_pos.group(2))
        log.info(
            'Calculating LD for chr %s and position %s with +/- range of %s' %
            (chr, position, range))
        # get the positions
        abs_ix, ix, found = genotypeData.get_pos_ix(chr, position)
        min_ix = max(0, abs_ix - range)
        max_ix = min(abs_ix + range, genotypeData.genome_length)
        chr_pos_list = zip(genotypeData.chromosomes[min_ix:max_ix],
                           genotypeData.positions[min_ix:max_ix])
    else:
        if os.path.exists(positions) == False:
            raise ValueError('%s path does not exist' % positions)
        # get the SNPs
        _, ext = os.path.splitext(positions)
        if ext not in SUPPORTED_FILE_EXT:
            raise Exception(
                'The input file must have one of the supported extensions: %s'
                % str(SUPPORTED_FILE_EXT))
        if ext in HDF5_FILE_EXT:
            gwas_result = result.load_from_hdf5(positions)
        else:
            gwas_result = result.load_from_csv(positions)
        gwas_data = gwas_result.get_top_snps(range)
        chr_pos_list = zip(map(str, gwas_data['chr']), gwas_data['positions'])
    chr_pos_list = sorted(chr_pos_list, key=itemgetter(0, 1))
    ld_data = genotypeData.calculate_ld(chr_pos_list)
    _save_ld_data(args['output_file'], ld_data, chr_pos_list)
Exemplo n.º 10
0
def convert(args):

    _, input_ext = os.path.splitext(args['inputfile'])
    _, output_ext = os.path.splitext(args['outputfile'])
    if input_ext == output_ext:
        raise Exception(
            'use different file extension for input (%s) and output file (%s)'
            % (input_ext, output_ext))
    if input_ext not in SUPPORTED_FILE_EXT:
        raise Exception(
            'The input file must have one of the supported extensions: %s' %
            str(SUPPORTED_FILE_EXT))
    if output_ext not in SUPPORTED_FILE_EXT:
        raise Exception(
            'The output file must have one of the supported extensions: (%s)' %
            ', '.join(SUPPORTED_FILE_EXT))
    gwas_result = None
    if input_ext == '.csv':
        gwas_result = result.load_from_csv(args['inputfile'])
        gwas_result.save_as_hdf5(args['outputfile'])
    else:
        gwas_result = result.load_from_hdf5(args['inputfile'])
        gwas_result.save_as_csv(args['outputfile'])