def hase_convert(args): R = Reader('genotype') R.start(args.genotype[0], vcf=args.vcf) with Timer() as t: if R.format == 'PLINK': G = GenotypePLINK(args.study_name[0], reader=R) G.split_size = CONVERTER_SPLIT_SIZE G.plink2hdf5(out=args.out) elif R.format == 'MINIMAC': G = GenotypeMINIMAC(args.study_name[0], reader=R) G.split_size = CONVERTER_SPLIT_SIZE G.MACH2hdf5(args.out, id=args.id) elif R.format == 'VCF': G = GenotypeVCF(args.study_name[0], reader=R) G.split_size = CONVERTER_SPLIT_SIZE G.VCF2hdf5(args.out) else: raise ValueError( 'Genotype data should be in PLINK/MINIMAC/VCF format and alone in folder' ) check_converter(args.out, args.study_name[0]) print(('Time to convert all data: {} sec'.format(t.secs))) return
class Study: def __init__(self, name): self.name = name self.genotype = None self.phenotype = None self.derivatives = None self.covariates = None self.study_name = None self.encoded = None def add_genotype(self, genotype_path, hdf5=True): self.genotype = Reader('genotype') self.genotype.start(genotype_path, hdf5=hdf5, study_name=self.study_name, ID=False) def add_phenotype(self, phenotype_path): self.phenotype = Reader('phenotype') self.phenotype.start(phenotype_path) def add_derivatives(self, derivatives_path): self.derivatives = Reader('partial') self.derivatives.start(derivatives_path, study_name=self.study_name) self.derivatives.folder.load() def add_covariates(self, covariates_path): self.covariates = Reader('covariates') self.covariates.start(covariates_path)
def hase_convert(args): if (os.path.exists(args.outfolder + '/probes/')) and ( os.path.exists(args.outfolder + '/genotype/')) and ( os.path.exists(args.outfolder + '/individuals/')): print( "The folders: probes, genotype and individuals already exist. Data seems already in HASE format. Delete " "the folders if the files are not converted properly. Continuing with the current files:" ) return else: print('using', args.outfolder) R = Reader('genotype') R.start(args.genotype[0], vcf=args.vcf) with Timer() as t: if R.format == 'PLINK': G = GenotypePLINK(args.study_name[0], reader=R) G.split_size = CONVERTER_SPLIT_SIZE G.plink2hdf5(out=args.out) elif R.format == 'MINIMAC': G = GenotypeMINIMAC(args.study_name[0], reader=R) G.split_size = CONVERTER_SPLIT_SIZE G.MACH2hdf5(args.out, id=args.id) elif R.format == 'VCF': G = GenotypeVCF(args.study_name[0], reader=R) G.split_size = CONVERTER_SPLIT_SIZE G.VCF2hdf5(args.out) else: raise ValueError( 'Genotype data should be in PLINK/MINIMAC/VCF format and alone in folder' ) check_converter(args.out, args.study_name[0]) args.outfolder = args.genotype print(('Time to convert all data: {} sec'.format(t.secs))) return
def add_derivatives(self, derivatives_path): self.derivatives = Reader('partial') self.derivatives.start(derivatives_path, study_name=self.study_name) self.derivatives.folder.load()
def add_phenotype(self, phenotype_path): self.phenotype = Reader('phenotype') self.phenotype.start(phenotype_path)
def add_genotype(self, genotype_path, hdf5=True): self.genotype = Reader('genotype') self.genotype.start(genotype_path, hdf5=hdf5, study_name=self.study_name, ID=False)
def add_covariates(self, covariates_path): self.covariates = Reader('covariates') self.covariates.start(covariates_path)
raise ValueError('Node # {} > {} total number of nodes'.format( args.node[1], args.node[0])) if not os.path.isdir(args.out): print("Creating output folder {}".format(args.out)) os.mkdir(args.out) if args.np: check_np() ################################### CONVERTING ############################## if args.mode == 'converting': # ARG_CHECKER.check(args,mode='converting') R = Reader('genotype') R.start(args.genotype[0], vcf=args.vcf) with Timer() as t: if R.format == 'PLINK': G = GenotypePLINK(args.study_name[0], reader=R) G.split_size = CONVERTER_SPLIT_SIZE G.plink2hdf5(out=args.out) elif R.format == 'MINIMAC': G = GenotypeMINIMAC(args.study_name[0], reader=R) if args.cluster == 'y': G.cluster = True G.split_size = CONVERTER_SPLIT_SIZE G.MACH2hdf5(args.out, id=args.id)