def write(filename, snpdata, count_A1=False, force_python_only=False): """Writes a :class:`SnpData` to Bed format and returns the :class:`.Bed`. :param filename: the name of the file to create :type filename: string :param snpdata: The in-memory data that should be written to disk. :type snpdata: :class:`SnpData` :param count_A1: Tells if it should count the number of A1 alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True. :type count_A1: bool :rtype: :class:`.Bed` >>> from pysnptools.snpreader import Pheno, Bed >>> import pysnptools.util as pstutil >>> from pysnptools.util import example_file # Download and return local file name >>> pheno_fn = example_file("pysnptools/examples/toydata.phe") >>> snpdata = Pheno(pheno_fn).read() # Read data from Pheno format >>> pstutil.create_directory_if_necessary("tempdir/toydata.5chrom.bed") >>> Bed.write("tempdir/toydata.5chrom.bed",snpdata,count_A1=False) # Write data in Bed format Bed('tempdir/toydata.5chrom.bed',count_A1=False) """ if isinstance(filename, SnpData) and isinstance( snpdata, str ): #For backwards compatibility, reverse inputs if necessary warnings.warn( "write statement should have filename before data to write", DeprecationWarning) filename, snpdata = snpdata, filename if count_A1 is None: warnings.warn( "'count_A1' was not set. For now it will default to 'False', but in the future it will default to 'True'", FutureWarning) count_A1 = False SnpReader._write_fam(snpdata, filename, remove_suffix="bed") SnpReader._write_map_or_bim(snpdata, filename, remove_suffix="bed", add_suffix="bim") bedfile = SnpReader._name_of_other_file(filename, remove_suffix="bed", add_suffix="bed") if not force_python_only: from pysnptools.snpreader import wrap_plink_parser if snpdata.val.flags["C_CONTIGUOUS"]: order = "C" elif snpdata.val.flags["F_CONTIGUOUS"]: order = "F" else: raise Exception("order not known (not 'F' or 'C')") if snpdata.val.dtype == np.float64: if order == "F": wrap_plink_parser.writePlinkBedFile2doubleFAAA( bedfile.encode('ascii'), snpdata.iid_count, snpdata.sid_count, count_A1, snpdata.val) else: wrap_plink_parser.writePlinkBedFile2doubleCAAA( bedfile.encode('ascii'), snpdata.iid_count, snpdata.sid_count, count_A1, snpdata.val) elif snpdata.val.dtype == np.float32: if order == "F": wrap_plink_parser.writePlinkBedFile2floatFAAA( bedfile.encode('ascii'), snpdata.iid_count, snpdata.sid_count, count_A1, snpdata.val) else: wrap_plink_parser.writePlinkBedFile2floatCAAA( bedfile.encode('ascii'), snpdata.iid_count, snpdata.sid_count, count_A1, snpdata.val) else: raise Exception( "dtype '{0}' not known, only float64 and float32".format( snpdata.val.dtype)) else: if not count_A1: zero_code = 0b00 two_code = 0b11 else: zero_code = 0b11 two_code = 0b00 with open(bedfile, "wb") as bed_filepointer: #see http://zzz.bwh.harvard.edu/plink/binary.shtml bed_filepointer.write(bytes(bytearray([0b01101100 ]))) #magic numbers bed_filepointer.write(bytes(bytearray([0b00011011 ]))) #magic numbers bed_filepointer.write(bytes(bytearray([0b00000001 ]))) #snp major for sid_index in range(snpdata.sid_count): if sid_index % 1 == 0: logging.info("Writing snp # {0} to file '{1}'".format( sid_index, filename)) col = snpdata.val[:, sid_index] for iid_by_four in range(0, snpdata.iid_count, 4): vals_for_this_byte = col[iid_by_four:iid_by_four + 4] byte = 0b00000000 for val_index in range(len(vals_for_this_byte)): val = vals_for_this_byte[val_index] if val == 0: code = zero_code elif val == 1: code = 0b10 #backwards on purpose elif val == 2: code = two_code elif np.isnan(val): code = 0b01 #backwards on purpose else: raise Exception( "Can't convert value '{0}' to BED format (only 0,1,2,NAN allowed)" .format(val)) byte |= (code << (val_index * 2)) bed_filepointer.write(bytes(bytearray([byte]))) logging.info("Done writing " + filename) return Bed(filename, count_A1=count_A1)
def write(filename, snpdata, count_A1=False, force_python_only=False): """Writes a :class:`SnpData` to Bed format. :param filename: the name of the file to create :type filename: string :param snpdata: The in-memory data that should be written to disk. :type snpdata: :class:`SnpData` :param count_A1: Tells if it should count the number of A1 alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True. :type count_A1: bool >>> from pysnptools.snpreader import Pheno, Bed >>> import pysnptools.util as pstutil >>> snpdata = Pheno('../examples/toydata.phe').read() # Read data from Pheno format >>> pstutil.create_directory_if_necessary("tempdir/toydata.bed") >>> Bed.write("tempdir/toydata.bed",snpdata,count_A1=False) # Write data in Bed format """ if isinstance(filename,SnpData) and isinstance(snpdata,str): #For backwards compatibility, reverse inputs if necessary warnings.warn("write statement should have filename before data to write", DeprecationWarning) filename, snpdata = snpdata, filename if count_A1 is None: warnings.warn("'count_A1' was not set. For now it will default to 'False', but in the future it will default to 'True'", FutureWarning) count_A1 = False SnpReader._write_fam(snpdata, filename, remove_suffix="bed") SnpReader._write_map_or_bim(snpdata, filename, remove_suffix="bed", add_suffix="bim") bedfile = SnpReader._name_of_other_file(filename,remove_suffix="bed", add_suffix="bed") if not force_python_only: from pysnptools.snpreader import wrap_plink_parser if snpdata.val.flags["C_CONTIGUOUS"]: order = "C" elif snpdata.val.flags["F_CONTIGUOUS"]: order = "F" else: raise Exception("order '{0}' not known, only 'F' and 'C'".format(order)) if snpdata.val.dtype == np.float64: if order=="F": wrap_plink_parser.writePlinkBedFile2doubleFAAA(bedfile.encode('ascii'), snpdata.iid_count, snpdata.sid_count, count_A1, snpdata.val) else: wrap_plink_parser.writePlinkBedFile2doubleCAAA(bedfile.encode('ascii'), snpdata.iid_count, snpdata.sid_count, count_A1, snpdata.val) elif snpdata.val.dtype == np.float32: if order=="F": wrap_plink_parser.writePlinkBedFile2floatFAAA(bedfile.encode('ascii'), snpdata.iid_count, snpdata.sid_count, count_A1, snpdata.val) else: wrap_plink_parser.writePlinkBedFile2floatCAAA(bedfile.encode('ascii'), snpdata.iid_count, snpdata.sid_count, count_A1, snpdata.val) else: raise Exception("dtype '{0}' not known, only float64 and float32".format(snpdata.val.dtype)) else: if not count_A1: zero_code = 0b00 two_code = 0b11 else: zero_code = 0b11 two_code = 0b00 with open(bedfile,"wb") as bed_filepointer: #see http://pngu.mgh.harvard.edu/~purcell/plink/binary.shtml bed_filepointer.write(bytes(bytearray([0b01101100]))) #magic numbers bed_filepointer.write(bytes(bytearray([0b00011011]))) #magic numbers bed_filepointer.write(bytes(bytearray([0b00000001]))) #snp major for sid_index in range(snpdata.sid_count): if sid_index % 1 == 0: logging.info("Writing snp # {0} to file '{1}'".format(sid_index, filename)) col = snpdata.val[:, sid_index] for iid_by_four in range(0,snpdata.iid_count,4): vals_for_this_byte = col[iid_by_four:iid_by_four+4] byte = 0b00000000 for val_index in range(len(vals_for_this_byte)): val = vals_for_this_byte[val_index] if val == 0: code = zero_code elif val == 1: code = 0b10 #backwards on purpose elif val == 2: code = two_code elif np.isnan(val): code = 0b01 #backwards on purpose else: raise Exception("Can't convert value '{0}' to BED format (only 0,1,2,NAN allowed)".format(val)) byte |= (code << (val_index*2)) bed_filepointer.write(bytes(bytearray([byte]))) logging.info("Done writing " + filename)