示例#1
0
 def __plink_to_npz(self, p, file_name):
     '''Convert p from plink to npz format using the file set specified by file_name.'''
     npz = file_name+'.npz'
     # Save test problem in plink format
     io.write_plink(p, prefix=file_name)
     # Convert plink -> npz
     io.plink_to_npz(file_name, npz)
     # Load npz and check that the problem object didn't change
     p2 = io.read_npz(npz)
     return p2
示例#2
0
 def __save_and_load_problem_plink(self, problem):
     '''Save and load a problem from PLINK file set.'''
     try:
         # Get a temporary file name
         f = tempfile.NamedTemporaryFile(delete=False)
         file_name = f.name
         f.close()
         io.write_plink(problem, file_name)
         return io.read_plink(prefix=file_name)
     finally:
         # Delete test files
         for ext in ['', '.pdg.tfam', '.tfam', '.tped', '.hap.tped', '.info']:
             os.remove(file_name + ext)
示例#3
0
文件: phase.py 项目: orenlivne/ober
def __main(options):
    '''
    --------------------------------------------------
    Main program - accepts an options struct.
    --------------------------------------------------
    '''
    if options.debug: print 'Input options', options
    print 'Building phaser (stage = %d) ...' % (options.stage, )
    phaser = build_phasing_pipeline(options)

    if options.debug: print 'Reading data ...'
    problem = __load_problem(options)

    if options.debug: print 'Phasing ...'
    params = PhaseParam()
    params.update_from_struct(options)
    request = run_phasing_chain(phaser, problem, params)

    print ''
    request.stats.pprint()
    print ''

    if options.output is not None:
        if options.min_output:
            print 'Minimizing output size...'
            io.slim(problem)
        out_prefix, ext = os.path.splitext(options.output)
        if ext == '.npz':
            print 'Writing haplotype result to %s in NPZ format ...' % (
                options.output, )
            io.write_npz(problem, options.output)
            output_info = out_prefix + '.info.npz'
            print 'Writing problem info result to %s in NPZ format ...' % (
                output_info, )
            io.write_info_npz(problem.info, output_info)
        else:
            print 'Writing haplotype result to %s in PLINK format ...' % (
                options.output, )
            io.write_plink(problem, options.output, verbose=options.debug)
    return problem
示例#4
0
文件: phase.py 项目: orenlivne/ober
def __main(options):
    '''
    --------------------------------------------------
    Main program - accepts an options struct.
    --------------------------------------------------
    '''
    if options.debug: print 'Input options', options
    print 'Building phaser (stage = %d) ...' % (options.stage,)
    phaser = build_phasing_pipeline(options)
    
    if options.debug: print 'Reading data ...'
    problem = __load_problem(options)

    if options.debug: print 'Phasing ...'
    params = PhaseParam()
    params.update_from_struct(options)
    request = run_phasing_chain(phaser, problem, params)
    
    print ''
    request.stats.pprint()
    print ''

    if options.output is not None:
        if options.min_output:
            print 'Minimizing output size...'
            io.slim(problem)
        out_prefix, ext = os.path.splitext(options.output)
        if ext == '.npz':
            print 'Writing haplotype result to %s in NPZ format ...' % (options.output,)
            io.write_npz(problem, options.output)
            output_info = out_prefix + '.info.npz'
            print 'Writing problem info result to %s in NPZ format ...' % (output_info,)
            io.write_info_npz(problem.info, output_info)
        else:
            print 'Writing haplotype result to %s in PLINK format ...' % (options.output,)
            io.write_plink(problem, options.output, verbose=options.debug)
    return problem
示例#5
0
文件: map.py 项目: orenlivne/ober
        # Convert plink tped -> npz
        problem = io.read_plink(prefix=base_name, pedigree=pedigree_file, haplotype=None,
                                verbose=options.debug)
    
        # Phase, impute, fill missing
        phaser = phase.build_phasing_pipeline(options)      
        request = phase.run_phasing_chain(phaser, problem)
        stats = request.stats
        print ''
        stats.pprint()
        print ''
    
        # Convert phased npz -> plink tped. Save only genotypes (haplotypes may need to be saved in the stats
        # object as a hash table for 'coloring the pedigree' at a later stage.
        genotype_file = out_base_name + '.tped'
        io.write_plink(problem, out_base_name, verbose=True,
                       save_node_type=False, save_genotype=True, save_haplotype=False, save_error=False)
        
        # Save statistics and phasing metadata in a separate npz
        np.savez(out_base_name + '.stats', 
                 stats=np.array([stats]), 
                 info=np.array([problem.info]), 
                 pedigree=np.array([problem.pedigree]))

        plink_cmd_base = '%s --tfile %s' % (bu.PLINK, out_base_name,)
        if options.recode:
            # First, compute allele frequencies with PLINK  
            util.run_command('%s --nonfounders --freq --out %s' % (plink_cmd_base, out_base_name))
            # Convert frequencies file that to a reference allele recoding
            # file (a file containing the list of SNPs and their minor allele letter)
            bu.frq_to_minor_file(out_base_name + '.frq', out_base_name + '.mnr')