def __save_and_load_problem_npz(self, problem): '''Save and load a problem from NPZ file.''' out_file = tempfile.TemporaryFile() io.write_npz(problem, out_file) # Only needed here to simulate closing & reopening file; you will need to call # out_file.close() on to prevent file locking in Windows out_file.seek(0) return io.read_npz(out_file)
def __npz_to_plink(self, p, file_name): '''Convert p from npz to plink format using the file set specified by file_name.''' npz = file_name+'.npz' # Save test problem in plink format io.write_npz(p, npz) # Convert plink -> npz io.npz_to_plink(npz, file_name) # Load npz and check that the problem object didn't change p2 = io.read_plink(prefix=file_name) return p2
def __main(options): ''' -------------------------------------------------- Main program - accepts an options struct. -------------------------------------------------- ''' if options.debug: print 'Input options', options print 'Building phaser (stage = %d) ...' % (options.stage, ) phaser = build_phasing_pipeline(options) if options.debug: print 'Reading data ...' problem = __load_problem(options) if options.debug: print 'Phasing ...' params = PhaseParam() params.update_from_struct(options) request = run_phasing_chain(phaser, problem, params) print '' request.stats.pprint() print '' if options.output is not None: if options.min_output: print 'Minimizing output size...' io.slim(problem) out_prefix, ext = os.path.splitext(options.output) if ext == '.npz': print 'Writing haplotype result to %s in NPZ format ...' % ( options.output, ) io.write_npz(problem, options.output) output_info = out_prefix + '.info.npz' print 'Writing problem info result to %s in NPZ format ...' % ( output_info, ) io.write_info_npz(problem.info, output_info) else: print 'Writing haplotype result to %s in PLINK format ...' % ( options.output, ) io.write_plink(problem, options.output, verbose=options.debug) return problem
def __main(options): ''' -------------------------------------------------- Main program - accepts an options struct. -------------------------------------------------- ''' if options.debug: print 'Input options', options print 'Building phaser (stage = %d) ...' % (options.stage,) phaser = build_phasing_pipeline(options) if options.debug: print 'Reading data ...' problem = __load_problem(options) if options.debug: print 'Phasing ...' params = PhaseParam() params.update_from_struct(options) request = run_phasing_chain(phaser, problem, params) print '' request.stats.pprint() print '' if options.output is not None: if options.min_output: print 'Minimizing output size...' io.slim(problem) out_prefix, ext = os.path.splitext(options.output) if ext == '.npz': print 'Writing haplotype result to %s in NPZ format ...' % (options.output,) io.write_npz(problem, options.output) output_info = out_prefix + '.info.npz' print 'Writing problem info result to %s in NPZ format ...' % (output_info,) io.write_info_npz(problem.info, output_info) else: print 'Writing haplotype result to %s in PLINK format ...' % (options.output,) io.write_plink(problem, options.output, verbose=options.debug) return problem
util.run_command('%s --nonfounders --freq --out %s' % (plink_cmd_base, out_base_name)) # Convert frequencies file that to a reference allele recoding # file (a file containing the list of SNPs and their minor allele letter) bu.frq_to_minor_file(out_base_name + '.frq', out_base_name + '.mnr') # Then convert binary PLINK to a recoded 12-recoded TPED, where 1=minor allele for each SNP out_recoded = out_base_name + '.recoded' util.run_command('%s --transpose --recode12 --reference-allele %s.mnr --out %s' % \ (plink_cmd_base, out_base_name, out_recoded)) # Reload the recoded problem for ext in ('nof', 'tped', 'tfam'): os.rename(out_recoded + '.' + ext, out_base_name + '.' + ext) genotype = io_genotype.read('plink', 'genotype', tped=out_base_name + '.tped', load_ids=False) else: genotype = problem.genotype # Write problem to file in our (npz) io.write_npz(problem, out_base_name + '.npz') # Write genotypes Gaixin formats; she uses those separate files io_genotype.write('gaixin', genotype, options.out_gxn + '.gxn', sample_id=problem.pedigree.sample_id_genotyped) # Convert plink tped to bed; delete the tped set util.run_command('%s --make-bed --out %s' % (plink_cmd_base, out_base_name)) for ext in ('nof', 'pdg.tfam', 'tped', 'tfam', 'info'): os.remove(out_base_name + '.' + ext) except: traceback.print_exc(file=sys.stdout) sys.exit(util.EXIT_FAILURE)