示例#1
0
parser.add_argument('-mapper_name', type=str, help='Mapper name')
parser.add_argument("-o", "--out", type=str, required=True, help="path to save result folder")
parser.add_argument("-save_name", type=str, required=True, help="merge study name")
parser.add_argument('-study_name', type=str, required=True,nargs='+', help=' Name for saved genotype data, without ext')

parser.add_argument('-cluster', type=str, default='n', choices=['y','n'], help=' Is it parallel cluster job, default no')
parser.add_argument('-node', nargs='+',help='number of nodes / this node number, example: 10 2 ')
parser.add_argument('-split',type=int,help='Split size for merge genotypes')


args = parser.parse_args()
print args

if __name__ == '__main__':

	mapper=Mapper(args.mapper_name)
	mapper.load(args.mapper)
	mapper.chunk_size=args.split


	hdf5_iter=0
	h5_name=args.save_name
	pytable_filter=tables.Filters(complevel=9, complib='zlib')
	gen=[]
	for i,j in enumerate(args.genotype):
		gen.append(Reader('genotype'))
		gen[i].start(j,hdf5=True, study_name=args.study_name[i], ID=False)

	RSID=[]
	SUB_ID=[]
	for i in gen:
示例#2
0
文件: hase.py 项目: urmovosa/hase
                G.split_size = CONVERTER_SPLIT_SIZE
                G.VCF2hdf5(args.out)
            else:
                raise ValueError(
                    'Genotype data should be in PLINK/MINIMAC/VCF format and alone in folder'
                )

        check_converter(args.out, args.study_name[0])
        print('Time to convert all data: {} sec'.format(t.secs))

    ################################### ENCODING ##############################

    elif args.mode == 'encoding':

        #ARG_CHECKER.check(args,mode='encoding')
        mapper = Mapper()
        mapper.genotype_names = args.study_name
        mapper.chunk_size = MAPPER_CHUNK_SIZE
        mapper.reference_name = args.ref_name
        mapper.load_flip(args.mapper)
        mapper.load(args.mapper)

        phen = Reader('phenotype')
        phen.start(args.phenotype[0])

        gen = Reader('genotype')
        gen.start(args.genotype[0],
                  hdf5=args.hdf5,
                  study_name=args.study_name[0],
                  ID=False)
示例#3
0
文件: hase.py 项目: roshchupkin/hase
			elif R.format=='VCF':
				G = GenotypeVCF(args.study_name[0], reader=R)
				if args.cluster=='y':
					G.cluster=True
				G.split_size=CONVERTER_SPLIT_SIZE
				G.VCF2hdf5(args.out)
			else:
				raise ValueError('Genotype data should be in PLINK/MINIMAC/VCF format and alone in folder')
		print ('Time to convert all data: {} sec'.format(t.secs))

	################################### ENCODING ##############################

	elif args.mode=='encoding':

		#ARG_CHECKER.check(args,mode='encoding')
		mapper=Mapper()
		mapper.genotype_names=args.study_name
		mapper.chunk_size=MAPPER_CHUNK_SIZE
		mapper.reference_name=args.ref_name
		mapper.load_flip(args.mapper)
		mapper.load(args.mapper)

		phen=Reader('phenotype')
		phen.start(args.phenotype[0])

		gen=Reader('genotype')
		gen.start(args.genotype[0], hdf5=args.hdf5, study_name=args.study_name[0], ID=False)

		e=Encoder(args.out)
		e.study_name=args.study_name[0]