Exemplo n.º 1
0
    def write(filename, snpdata):
        """Writes a :class:`SnpData` to dat/fam/map format.

        :param filename: the name of the file to create
        :type filename: string
        :param snpdata: The in-memory data that should be written to disk.
        :type snpdata: :class:`SnpData`

        >>> from pysnptools.snpreader import Dat, Bed
        >>> import pysnptools.util as pstutil
        >>> snpdata = Bed('../examples/toydata.bed',count_A1=False)[:,:10].read()  # Read first 10 snps from Bed format
        >>> pstutil.create_directory_if_necessary("tempdir/toydata10.dat")
        >>> Dat.write("tempdir/toydata10.dat",snpdata)              # Write data in dat/fam/map format
        """

        if isinstance(filename,SnpData) and isinstance(snpdata,str): #For backwards compatibility, reverse inputs if necessary
            warnings.warn("write statement should have filename before data to write", DeprecationWarning)
            filename, snpdata = snpdata, filename 

        SnpReader._write_fam(snpdata, filename, remove_suffix="dat")
        SnpReader._write_map_or_bim(snpdata, filename, remove_suffix="dat", add_suffix="map")
        filename = SnpReader._name_of_other_file(filename,remove_suffix="dat", add_suffix="dat")

        snpsarray = snpdata.val
        with open(filename,"w") as dat_filepointer:
            for sid_index, sid in enumerate(snpdata.sid):
                if sid_index % 1000 == 0:
                    logging.info("Writing snp # {0} to file '{1}'".format(sid_index, filename))
                dat_filepointer.write("{0}\tj\tn\t".format(sid)) #use "j" and "n" as the major and minor allele
                row = snpsarray[:,sid_index]
                dat_filepointer.write("\t".join((str(i) for i in row)) + "\n")
        logging.info("Done writing " + filename)
Exemplo n.º 2
0
    def write(snpdata, basefilename,force_python_only=False):
        SnpReader._write_fam(snpdata, basefilename, remove_suffix="bed")
        SnpReader._write_map_or_bim(snpdata, basefilename, remove_suffix="bed", add_suffix="bim")

        bedfile = SnpReader._name_of_other_file(basefilename,remove_suffix="bed", add_suffix="bed")


        if not force_python_only:
            from pysnptools.snpreader import wrap_plink_parser

            if snpdata.val.flags["C_CONTIGUOUS"]:
                order = "C"
            elif snpdata.val.flags["F_CONTIGUOUS"]:
                order = "F"
            else:
                raise Exception("order '{0}' not known, only 'F' and 'C'".format(order))

            if snpdata.val.dtype == np.float64:
                if order=="F":
                    wrap_plink_parser.writePlinkBedFiledoubleFAAA(bedfile, snpdata.iid_count, snpdata.sid_count, snpdata.val)
                else:
                    wrap_plink_parser.writePlinkBedFiledoubleCAAA(bedfile, snpdata.iid_count, snpdata.sid_count, snpdata.val)
            elif snpdata.val.dtype == np.float32:
                if order=="F":
                    wrap_plink_parser.writePlinkBedFilefloatFAAA(bedfile, snpdata.iid_count, snpdata.sid_count, snpdata.val)
                else:
                    wrap_plink_parser.writePlinkBedFilefloatCAAA(bedfile, snpdata.iid_count, snpdata.sid_count, snpdata.val)
            else:
                raise Exception("dtype '{0}' not known, only float64 and float32".format(snpdata.val.dtype))
            
        else:
            with open(bedfile,"wb") as bed_filepointer:
                #see http://pngu.mgh.harvard.edu/~purcell/plink/binary.shtml
                bed_filepointer.write(chr(0b01101100)) #magic numbers
                bed_filepointer.write(chr(0b00011011)) #magic numbers
                bed_filepointer.write(chr(0b00000001)) #snp major

                for sid_index in xrange(snpdata.sid_count):
                    if sid_index % 1 == 0:
                        logging.info("Writing snp # {0} to file '{1}'".format(sid_index, basefilename))

                    col = snpdata.val[:, sid_index]
                    for iid_by_four in xrange(0,snpdata.iid_count,4):
                        vals_for_this_byte = col[iid_by_four:iid_by_four+4]
                        byte = 0b00000000
                        for val_index in xrange(len(vals_for_this_byte)):
                            val = vals_for_this_byte[val_index]
                            if val == 0:
                                code = 0b00
                            elif val == 1:
                                code = 0b10 #backwards on purpose
                            elif val == 2:
                                code = 0b11
                            elif np.isnan(val):
                                code = 0b01 #backwards on purpose
                            else:
                                raise Exception("Can't convert value '{0}' to BED format (only 0,1,2,NAN allowed)".format(val))
                            byte |= (code << (val_index*2))
                        bed_filepointer.write(chr(byte))
        logging.info("Done writing " + basefilename)
Exemplo n.º 3
0
    def write(filename, snpdata):
        """Writes a :class:`SnpData` to dat/fam/map format.

        :param filename: the name of the file to create
        :type filename: string
        :param snpdata: The in-memory data that should be written to disk.
        :type snpdata: :class:`SnpData`

        >>> from pysnptools.snpreader import Dat, Bed
        >>> import pysnptools.util as pstutil
        >>> snpdata = Bed('../examples/toydata.bed',count_A1=False)[:,:10].read()  # Read first 10 snps from Bed format
        >>> pstutil.create_directory_if_necessary("tempdir/toydata10.dat")
        >>> Dat.write("tempdir/toydata10.dat",snpdata)              # Write data in dat/fam/map format
        """

        if isinstance(filename,SnpData) and isinstance(snpdata,str): #For backwards compatibility, reverse inputs if necessary
            warnings.warn("write statement should have filename before data to write", DeprecationWarning)
            filename, snpdata = snpdata, filename 

        SnpReader._write_fam(snpdata, filename, remove_suffix="dat")
        SnpReader._write_map_or_bim(snpdata, filename, remove_suffix="dat", add_suffix="map")
        filename = SnpReader._name_of_other_file(filename,remove_suffix="dat", add_suffix="dat")

        snpsarray = snpdata.val
        with open(filename,"w") as dat_filepointer:
            for sid_index, sid in enumerate(snpdata.sid):
                if sid_index % 1000 == 0:
                    logging.info("Writing snp # {0} to file '{1}'".format(sid_index, filename))
                dat_filepointer.write("{0}\tj\tn\t".format(sid)) #use "j" and "n" as the major and minor allele
                row = snpsarray[:,sid_index]
                dat_filepointer.write("\t".join((str(i) for i in row)) + "\n")
        logging.info("Done writing " + filename)
Exemplo n.º 4
0
    def write(snpdata, basefilename):
        SnpReader._write_fam(snpdata, basefilename, remove_suffix="dat")
        SnpReader._write_map_or_bim(snpdata, basefilename, remove_suffix="dat", add_suffix="map")

        snpsarray = snpdata.val
        with open(basefilename,"w") as dat_filepointer:
            for sid_index, sid in enumerate(snpdata.sid):
                if sid_index % 1000 == 0:
                    logging.info("Writing snp # {0} to file '{1}'".format(sid_index, basefilename))
                dat_filepointer.write("{0}\tj\tn\t".format(sid)) #use "j" and "n" as the major and minor allele
                row = snpsarray[:,sid_index]
                dat_filepointer.write("\t".join((str(i) for i in row)) + "\n")
        logging.info("Done writing " + basefilename)
Exemplo n.º 5
0
    def write(filename, snpdata, count_A1=False, force_python_only=False):
        """Writes a :class:`SnpData` to Bed format.

        :param filename: the name of the file to create
        :type filename: string
        :param snpdata: The in-memory data that should be written to disk.
        :type snpdata: :class:`SnpData`
        :param count_A1: Tells if it should count the number of A1 alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True.
        :type count_A1: bool

        >>> from pysnptools.snpreader import Pheno, Bed
        >>> import pysnptools.util as pstutil
        >>> snpdata = Pheno('../examples/toydata.phe').read()         # Read data from Pheno format
        >>> pstutil.create_directory_if_necessary("tempdir/toydata.bed")
        >>> Bed.write("tempdir/toydata.bed",snpdata,count_A1=False)   # Write data in Bed format
        """

        if isinstance(filename, SnpData) and isinstance(
                snpdata, str
        ):  #For backwards compatibility, reverse inputs if necessary
            warnings.warn(
                "write statement should have filename before data to write",
                DeprecationWarning)
            filename, snpdata = snpdata, filename

        if count_A1 is None:
            warnings.warn(
                "'count_A1' was not set. For now it will default to 'False', but in the future it will default to 'True'",
                FutureWarning)
            count_A1 = False

        SnpReader._write_fam(snpdata, filename, remove_suffix="bed")
        SnpReader._write_map_or_bim(snpdata,
                                    filename,
                                    remove_suffix="bed",
                                    add_suffix="bim")

        bedfile = SnpReader._name_of_other_file(filename,
                                                remove_suffix="bed",
                                                add_suffix="bed")

        if not force_python_only:
            from pysnptools.snpreader import wrap_plink_parser

            if snpdata.val.flags["C_CONTIGUOUS"]:
                order = "C"
            elif snpdata.val.flags["F_CONTIGUOUS"]:
                order = "F"
            else:
                raise Exception(
                    "order '{0}' not known, only 'F' and 'C'".format(order))

            if snpdata.val.dtype == np.float64:
                if order == "F":
                    wrap_plink_parser.writePlinkBedFile2doubleFAAA(
                        bedfile, snpdata.iid_count, snpdata.sid_count,
                        count_A1, snpdata.val)
                else:
                    wrap_plink_parser.writePlinkBedFile2doubleCAAA(
                        bedfile, snpdata.iid_count, snpdata.sid_count,
                        count_A1, snpdata.val)
            elif snpdata.val.dtype == np.float32:
                if order == "F":
                    wrap_plink_parser.writePlinkBedFile2floatFAAA(
                        bedfile, snpdata.iid_count, snpdata.sid_count,
                        count_A1, snpdata.val)
                else:
                    wrap_plink_parser.writePlinkBedFile2floatCAAA(
                        bedfile, snpdata.iid_count, snpdata.sid_count,
                        count_A1, snpdata.val)
            else:
                raise Exception(
                    "dtype '{0}' not known, only float64 and float32".format(
                        snpdata.val.dtype))

        else:
            if not count_A1:
                zero_code = 0b00
                two_code = 0b11
            else:
                zero_code = 0b11
                two_code = 0b00

            with open(bedfile, "wb") as bed_filepointer:
                #see http://pngu.mgh.harvard.edu/~purcell/plink/binary.shtml
                bed_filepointer.write(chr(0b01101100))  #magic numbers
                bed_filepointer.write(chr(0b00011011))  #magic numbers
                bed_filepointer.write(chr(0b00000001))  #snp major

                for sid_index in xrange(snpdata.sid_count):
                    if sid_index % 1 == 0:
                        logging.info("Writing snp # {0} to file '{1}'".format(
                            sid_index, filename))

                    col = snpdata.val[:, sid_index]
                    for iid_by_four in xrange(0, snpdata.iid_count, 4):
                        vals_for_this_byte = col[iid_by_four:iid_by_four + 4]
                        byte = 0b00000000
                        for val_index in xrange(len(vals_for_this_byte)):
                            val = vals_for_this_byte[val_index]
                            if val == 0:
                                code = zero_code
                            elif val == 1:
                                code = 0b10  #backwards on purpose
                            elif val == 2:
                                code = two_code
                            elif np.isnan(val):
                                code = 0b01  #backwards on purpose
                            else:
                                raise Exception(
                                    "Can't convert value '{0}' to BED format (only 0,1,2,NAN allowed)"
                                    .format(val))
                            byte |= (code << (val_index * 2))
                        bed_filepointer.write(chr(byte))
        logging.info("Done writing " + filename)
Exemplo n.º 6
0
    def write(filename, snpdata, force_python_only=False):
        """Writes a :class:`SnpData` to Bed format.

        :param filename: the name of the file to create
        :type filename: string
        :param snpdata: The in-memory data that should be written to disk.
        :type snpdata: :class:`SnpData`

        >>> from pysnptools.snpreader import Pheno, Bed
        >>> import pysnptools.util as pstutil
        >>> snpdata = Pheno('../examples/toydata.phe').read() # Read data from Pheno format
        >>> pstutil.create_directory_if_necessary("tempdir/toydata.bed")
        >>> Bed.write("tempdir/toydata.bed",snpdata)       # Write data in Bed format
        """

        if isinstance(filename,SnpData) and isinstance(snpdata,str): #For backwards compatibility, reverse inputs if necessary
            warnings.warn("write statement should have filename before data to write", DeprecationWarning)
            filename, snpdata = snpdata, filename 


        SnpReader._write_fam(snpdata, filename, remove_suffix="bed")
        SnpReader._write_map_or_bim(snpdata, filename, remove_suffix="bed", add_suffix="bim")

        bedfile = SnpReader._name_of_other_file(filename,remove_suffix="bed", add_suffix="bed")

        if not force_python_only:
            from pysnptools.snpreader import wrap_plink_parser

            if snpdata.val.flags["C_CONTIGUOUS"]:
                order = "C"
            elif snpdata.val.flags["F_CONTIGUOUS"]:
                order = "F"
            else:
                raise Exception("order '{0}' not known, only 'F' and 'C'".format(order))

            if snpdata.val.dtype == np.float64:
                if order=="F":
                    wrap_plink_parser.writePlinkBedFiledoubleFAAA(bedfile, snpdata.iid_count, snpdata.sid_count, snpdata.val)
                else:
                    wrap_plink_parser.writePlinkBedFiledoubleCAAA(bedfile, snpdata.iid_count, snpdata.sid_count, snpdata.val)
            elif snpdata.val.dtype == np.float32:
                if order=="F":
                    wrap_plink_parser.writePlinkBedFilefloatFAAA(bedfile, snpdata.iid_count, snpdata.sid_count, snpdata.val)
                else:
                    wrap_plink_parser.writePlinkBedFilefloatCAAA(bedfile, snpdata.iid_count, snpdata.sid_count, snpdata.val)
            else:
                raise Exception("dtype '{0}' not known, only float64 and float32".format(snpdata.val.dtype))
            
        else:
            with open(bedfile,"wb") as bed_filepointer:
                #see http://pngu.mgh.harvard.edu/~purcell/plink/binary.shtml
                bed_filepointer.write(chr(0b01101100)) #magic numbers
                bed_filepointer.write(chr(0b00011011)) #magic numbers
                bed_filepointer.write(chr(0b00000001)) #snp major

                for sid_index in xrange(snpdata.sid_count):
                    if sid_index % 1 == 0:
                        logging.info("Writing snp # {0} to file '{1}'".format(sid_index, filename))

                    col = snpdata.val[:, sid_index]
                    for iid_by_four in xrange(0,snpdata.iid_count,4):
                        vals_for_this_byte = col[iid_by_four:iid_by_four+4]
                        byte = 0b00000000
                        for val_index in xrange(len(vals_for_this_byte)):
                            val = vals_for_this_byte[val_index]
                            if val == 0:
                                code = 0b00
                            elif val == 1:
                                code = 0b10 #backwards on purpose
                            elif val == 2:
                                code = 0b11
                            elif np.isnan(val):
                                code = 0b01 #backwards on purpose
                            else:
                                raise Exception("Can't convert value '{0}' to BED format (only 0,1,2,NAN allowed)".format(val))
                            byte |= (code << (val_index*2))
                        bed_filepointer.write(chr(byte))
        logging.info("Done writing " + filename)