covariates=tuple(i.folder._data.metadata for i in pard)) if row_index[2].shape[0] != np.sum( [i.folder._data.metadata['id'].shape[0] for i in pard]): raise ValueError( 'Partial Derivatives covariates have different number of subjects {} than genotype and phenotype {}' .format( row_index[2].shape[0], np.sum([ i.folder._data.metadata['id'].shape[0] for i in pard ]))) while True: if mapper.cluster == 'n': SNPs_index, keys = mapper.get() else: ch = mapper.chunk_pop() if ch is None: SNPs_index = None break SNPs_index, keys = mapper.get(chunk_number=ch) if isinstance(SNPs_index, type(None)): break Analyser.rsid = keys if np.sum(PD) == 0: genotype = np.array([]) with Timer() as t_g: genotype = merge_genotype(gen, SNPs_index, mapper) genotype = genotype[:, row_index[0]] print "Time to get G {}s".format(t_g.secs)
for i,j in enumerate(args.genotype): gen.append(Reader('genotype')) gen[i].start(j,hdf5=True, study_name=args.study_name[i], ID=False) RSID=[] SUB_ID=[] for i in gen: SUB_ID.append(i.folder._data.get_id()) mapper.cluster=args.cluster mapper.node=args.node while True: if args.cluster=='n': SNPs_index, keys=mapper.get_next() else: chunk=mapper.chunk_pop() if chunk is None: SNPs_index=None break print chunk SNPs_index, keys=mapper.get_chunk(chunk) if SNPs_index is None: break RSID.append(keys) data=merge_genotype(gen, SNPs_index) #TODO (high) add mapper print data.shape if args.cluster=='n': h5_gen_file = tables.openFile( os.path.join(args.out,str(hdf5_iter)+'_'+h5_name+'.h5'), 'w', title=args.save_name)