Exemplo n.º 1
0
def hase_convert(args):
    R = Reader('genotype')

    R.start(args.genotype[0], vcf=args.vcf)

    with Timer() as t:
        if R.format == 'PLINK':
            G = GenotypePLINK(args.study_name[0], reader=R)
            G.split_size = CONVERTER_SPLIT_SIZE
            G.plink2hdf5(out=args.out)

        elif R.format == 'MINIMAC':
            G = GenotypeMINIMAC(args.study_name[0], reader=R)
            G.split_size = CONVERTER_SPLIT_SIZE
            G.MACH2hdf5(args.out, id=args.id)

        elif R.format == 'VCF':
            G = GenotypeVCF(args.study_name[0], reader=R)
            G.split_size = CONVERTER_SPLIT_SIZE
            G.VCF2hdf5(args.out)
        else:
            raise ValueError(
                'Genotype data should be in PLINK/MINIMAC/VCF format and alone in folder'
            )

    check_converter(args.out, args.study_name[0])
    print(('Time to convert all data: {} sec'.format(t.secs)))
    return
Exemplo n.º 2
0
class Study:
    def __init__(self, name):
        self.name = name
        self.genotype = None
        self.phenotype = None
        self.derivatives = None
        self.covariates = None
        self.study_name = None
        self.encoded = None

    def add_genotype(self, genotype_path, hdf5=True):
        self.genotype = Reader('genotype')
        self.genotype.start(genotype_path,
                            hdf5=hdf5,
                            study_name=self.study_name,
                            ID=False)

    def add_phenotype(self, phenotype_path):
        self.phenotype = Reader('phenotype')
        self.phenotype.start(phenotype_path)

    def add_derivatives(self, derivatives_path):
        self.derivatives = Reader('partial')
        self.derivatives.start(derivatives_path, study_name=self.study_name)
        self.derivatives.folder.load()

    def add_covariates(self, covariates_path):
        self.covariates = Reader('covariates')
        self.covariates.start(covariates_path)
Exemplo n.º 3
0
def hase_convert(args):
    if (os.path.exists(args.outfolder + '/probes/')) and (
            os.path.exists(args.outfolder + '/genotype/')) and (
                os.path.exists(args.outfolder + '/individuals/')):
        print(
            "The folders: probes, genotype and individuals already exist. Data seems already in HASE format. Delete "
            "the folders if the files are not converted properly. Continuing with the current files:"
        )
        return
    else:
        print('using', args.outfolder)

    R = Reader('genotype')

    R.start(args.genotype[0], vcf=args.vcf)

    with Timer() as t:
        if R.format == 'PLINK':
            G = GenotypePLINK(args.study_name[0], reader=R)
            G.split_size = CONVERTER_SPLIT_SIZE
            G.plink2hdf5(out=args.out)

        elif R.format == 'MINIMAC':
            G = GenotypeMINIMAC(args.study_name[0], reader=R)
            G.split_size = CONVERTER_SPLIT_SIZE
            G.MACH2hdf5(args.out, id=args.id)

        elif R.format == 'VCF':
            G = GenotypeVCF(args.study_name[0], reader=R)
            G.split_size = CONVERTER_SPLIT_SIZE
            G.VCF2hdf5(args.out)
        else:
            raise ValueError(
                'Genotype data should be in PLINK/MINIMAC/VCF format and alone in folder'
            )

    check_converter(args.out, args.study_name[0])
    args.outfolder = args.genotype
    print(('Time to convert all data: {} sec'.format(t.secs)))
    return
Exemplo n.º 4
0
 def add_derivatives(self, derivatives_path):
     self.derivatives = Reader('partial')
     self.derivatives.start(derivatives_path, study_name=self.study_name)
     self.derivatives.folder.load()
Exemplo n.º 5
0
 def add_phenotype(self, phenotype_path):
     self.phenotype = Reader('phenotype')
     self.phenotype.start(phenotype_path)
Exemplo n.º 6
0
 def add_genotype(self, genotype_path, hdf5=True):
     self.genotype = Reader('genotype')
     self.genotype.start(genotype_path,
                         hdf5=hdf5,
                         study_name=self.study_name,
                         ID=False)
Exemplo n.º 7
0
 def add_covariates(self, covariates_path):
     self.covariates = Reader('covariates')
     self.covariates.start(covariates_path)
Exemplo n.º 8
0
                raise ValueError('Node # {} > {} total number of nodes'.format(
                    args.node[1], args.node[0]))

    if not os.path.isdir(args.out):
        print("Creating output folder {}".format(args.out))
        os.mkdir(args.out)

    if args.np:
        check_np()

    ################################### CONVERTING ##############################
    if args.mode == 'converting':

        # ARG_CHECKER.check(args,mode='converting')

        R = Reader('genotype')
        R.start(args.genotype[0], vcf=args.vcf)

        with Timer() as t:
            if R.format == 'PLINK':
                G = GenotypePLINK(args.study_name[0], reader=R)
                G.split_size = CONVERTER_SPLIT_SIZE
                G.plink2hdf5(out=args.out)

            elif R.format == 'MINIMAC':
                G = GenotypeMINIMAC(args.study_name[0], reader=R)
                if args.cluster == 'y':
                    G.cluster = True
                G.split_size = CONVERTER_SPLIT_SIZE
                G.MACH2hdf5(args.out, id=args.id)