def get_fam_means(ids,ped,gts,gts_ids,remove_proband = True, return_famsizes = False): """ Used in get_gts_matrix to find the mean genotype in each sibship (family) for each SNP or for a PGS. The gtarray that is returned is indexed based on the subset of ids provided from sibships of size 2 or greater. If remove_proband=True, then the genotype/PGS of the index individual is removed from the fam_mean given for that individual. """ ids, ids_fams, gts_fams = find_individuals_with_sibs(ids, ped, gts_ids) fams = np.unique(ids_fams) fams_dict = make_id_dict(fams) # Compute sums of genotypes in each family fam_sums = np.zeros((fams.shape[0],gts.shape[1]),dtype=gts.dtype) fam_counts = np.zeros((fams.shape[0]),dtype=int) for i in range(0,fams.shape[0]): fam_indices = np.where(gts_fams==fams[i])[0] fam_sums[i,:] = np.sum(gts[fam_indices,:],axis=0) fam_counts[i] = fam_indices.shape[0] # Place in vector corresponding to IDs if remove_proband: gts_id_dict = make_id_dict(gts_ids) G_sib = np.zeros((ids.shape[0],gts.shape[1]),dtype = np.float32) for i in range(0,ids.shape[0]): fam_index = fams_dict[ids_fams[i]] G_sib[i,:] = fam_sums[fam_index,:] n_i = fam_counts[fam_index] if remove_proband: G_sib[i,:] = G_sib[i,:] - gts[gts_id_dict[ids[i]],:] n_i = n_i-1 G_sib[i,:] = G_sib[i,:]/float(n_i) if return_famsizes: return [gtarray(G_sib, ids),fam_counts,fam_sums] else: return gtarray(G_sib,ids)
def find_par_gts(pheno_ids, ped, gts_id_dict, imp_fams=None): """ Used in get_gts_matrix to find whether individuals have imputed or observed parental genotypes, and to find the indices of the observed/imputed parents in the observed/imputed genotype arrays. 'par_status' codes whether an individual has parents that are observed or imputed or neither. 'gt_indices' records the relevant index of the parent in the observed/imputed genotype arrays 'fam_labels' records the family of the individual based on the pedigree """ # Whether mother and father have observed/imputed genotypes par_status = np.zeros((pheno_ids.shape[0],2),dtype=int) par_status[:] = -1 # Indices of obsered/imputed genotypes in relevant arrays gt_indices = np.zeros((pheno_ids.shape[0],3),dtype=int) gt_indices[:] = -1 ## Build dictionaries # Where each individual is in the pedigree ped_dict = make_id_dict(ped,1) # Where the imputed data is for each family if imp_fams is not None: fam_dict = make_id_dict(imp_fams) # Store family ID of each individual fam_labels = np.zeros((pheno_ids.shape[0]),dtype=ped.dtype) # Find status and find indices for i in range(0,pheno_ids.shape[0]): # Find index in genotypes if pheno_ids[i] in gts_id_dict: gt_indices[i,0] = gts_id_dict[pheno_ids[i]] # Find index in pedigree if pheno_ids[i] in ped_dict: ped_i = ped[ped_dict[pheno_ids[i]], :] fam_labels[i] = ped_i[0] # Check for observed father if ped_i[2] in gts_id_dict: gt_indices[i,1] = gts_id_dict[ped_i[2]] par_status[i,0] = 0 # Check for observed mother if ped_i[3] in gts_id_dict: gt_indices[i, 2] = gts_id_dict[ped_i[3]] par_status[i,1] = 0 # If parent not observed, look for imputation if imp_fams is not None: if ped_i[0] in fam_dict: imp_index = fam_dict[ped_i[0]] # Check if this is imputation of father, or mother, or both if ped_i[4] == 'False' and not par_status[i,0] == 0: gt_indices[i, 1] = imp_index par_status[i, 0] = 1 if ped_i[5] == 'False' and not par_status[i,1] == 0: gt_indices[i, 2] = imp_index par_status[i, 1] = 1 return par_status, gt_indices, fam_labels
def get_map_positions(mapfile, gts, min_map_prop=0.5): map_file = open(mapfile, 'r') map_header = map_file.readline() map_header = np.array(map_header.split(' ')) map_header[len(map_header) - 1] = map_header[len(map_header) - 1].split('\n')[0] map_file.close() if 'pposition' in map_header and 'gposition' in map_header: bp_pos = np.loadtxt(mapfile, usecols=np.where(map_header == 'pposition')[0][0], dtype=int, skiprows=1) pos_dict = make_id_dict(bp_pos) cm_pos = np.loadtxt(mapfile, usecols=np.where(map_header == 'gposition')[0][0], dtype=float, skiprows=1) # Check for NAs if np.sum(np.isnan(cm_pos)) > 0: raise (ValueError('Map cannot have NAs')) if np.min(cm_pos) < 0: raise (ValueError('Map file cannot have negative values')) if np.var(cm_pos) == 0: raise (ValueError('Map file has no variation')) # Check ordering ordered_map = np.sort(cm_pos) if np.array_equal(cm_pos, ordered_map): pass else: raise (ValueError( 'Map not monotonic. Please make sure input is ordered correctly' )) # Check scale if np.max(cm_pos) > 5000: raise (ValueError('Maximum value of map too large')) # Find positions of SNPs in map file map = np.zeros((gts.shape[1]), dtype=float) map[:] = np.nan in_map = np.array([x in pos_dict for x in gts.pos]) # Check if we have at least 50% of SNPs in map prop_in_map = np.mean(in_map) if prop_in_map < min_map_prop: raise (ValueError('Only ' + str(round(100 * prop_in_map)) + '% of SNPs have genetic positions in ' + mapfile + '. Need at least ' + str(round(100 * min_map_prop)) + '%')) print('Found genetic map positions for ' + str(round(100 * prop_in_map)) + '% of SNPs in ' + mapfile) # Fill in map values map[in_map] = cm_pos[[pos_dict[x] for x in gts.pos[in_map]]] # Linearly interpolate map if prop_in_map < 1: print( 'Linearly interpolating genetic map for SNPs not in input map') map = np.interp(gts.pos, gts.pos[in_map], map[in_map]) return map else: raise (ValueError( 'Map file must contain columns pposition and gposition'))
def get_indices_given_ped(ped, gts_ids, imp_fams=None, ids=None, sib=False, verbose=False): """ Used in get_gts_matrix_given_ped to get the ids of individuals with observed/imputed parental genotypes and, if sib=True, at least one genotyped sibling. It returns those ids along with the indices of the relevant individuals and their first degree relatives in the observed genotypes (observed indices), and the indices of the imputed parental genotypes for those individuals. """ # Made dictionary for observed genotypes gts_id_dict = make_id_dict(gts_ids) # If IDs not provided, use all individuals with observed genotypes if ids is None: ids = gts_ids # Find individuals with genotyped siblings if sib: # Look in full genotype sample in case some genotyped sibs are not in ids ids = gts_ids ids = find_individuals_with_sibs(ids, ped, gts_ids, return_ids_only=True) if verbose: print('Found ' + str(ids.shape[0]) + ' individuals with genotyped siblings') ### Find parental status if verbose: print('Checking for observed/imputed parental genotypes') par_status, gt_indices, fam_labels = find_par_gts(ids, ped, gts_id_dict, imp_fams=imp_fams) # Find which individuals can be used none_missing = np.min(gt_indices, axis=1) >= 0 N = np.sum(none_missing) if N == 0: raise ValueError( 'No individuals with phenotype observations and complete observed/imputed genotype observations') # Take those that can be used gt_indices = gt_indices[none_missing, :] par_status = par_status[none_missing, :] ids = ids[none_missing] parcount = np.sum(par_status==0,axis=1) if verbose: print(str(N) + ' individuals with phenotype observations and complete observed/imputed genotype observations') print(str(np.sum(parcount==0))+' individuals with imputed but no observed parental genotypes') print(str(np.sum(parcount==1))+' individuals with one observed and one imputed parent') print(str(np.sum(parcount==2))+' individuals with both parents observed') # Find indices of individuals and their parents in observed genotypes observed_indices = np.sort(np.unique(np.hstack((gt_indices[:, 0], gt_indices[par_status[:, 0] == 0, 1], gt_indices[par_status[:, 1] == 0, 2])))) # Get indices of imputed parents imp_indices = np.sort(np.unique(np.hstack((gt_indices[par_status[:, 0] == 1, 1], gt_indices[par_status[:, 1] == 1, 2])))) # Return ids with imputed/observed parents return ids, observed_indices, imp_indices, parcount
def __init__(self,chrom,sid,pos, A1, A2, freqs, direct, direct_SE, avg_NTC, avg_NTC_SE, population, population_SE, r_direct_avg_NTC, r_direct_pop, ldscores = None, map=None): sizes = np.array([sid.shape[0],pos.shape[0],A1.shape[0],A2.shape[0],freqs.shape[0],direct.shape[0], avg_NTC.shape[0],population.shape[0],r_direct_avg_NTC.shape[0],r_direct_pop.shape[0]]) if np.unique(sizes).shape[0] > 1: raise(ValueError('All inputs to sumstats class must have same size')) self.chrom = np.zeros(sid.shape,dtype=int) self.chrom[:] = int(chrom) self.sid = np.array(sid,dtype=str) self.sid_dict = make_id_dict(self.sid) self.pos = np.array(pos,dtype=int) self.A1 = np.array(A1,dtype=str) self.A2 = np.array(A2,dtype=str) self.freqs = ma.array(freqs,dtype=float) self.freqs.mask = np.isnan(self.freqs) self.direct = ma.array(direct, dtype=float) self.direct.mask = np.isnan(self.direct) self.direct_SE = ma.array(direct_SE, dtype=float) self.direct_SE.mask = np.isnan(self.direct_SE) self.avg_NTC = ma.array(avg_NTC, dtype=float) self.avg_NTC.mask = np.isnan(self.avg_NTC) self.avg_NTC_SE = ma.array(avg_NTC_SE, dtype=float) self.avg_NTC_SE.mask = np.isnan(self.avg_NTC_SE) self.population = ma.array(population, dtype=float) self.population.mask = np.isnan(self.population) self.population_SE = ma.array(population_SE, dtype=float) self.population_SE.mask = np.isnan(self.population_SE) self.r_direct_avg_NTC = ma.array(r_direct_avg_NTC, dtype=float) self.r_direct_avg_NTC.mask = np.isnan(self.r_direct_avg_NTC) self.r_direct_pop = ma.array(r_direct_pop, dtype=float) self.r_direct_pop.mask = np.isnan(self.r_direct_pop) if ldscores is not None: if not ldscores.shape[0] == sid.shape[0]: raise(ValueError('LD scores must have same size as other sumstats')) self.ldscores = ma.array(ldscores,dtype=float) self.ldscores.mask = np.isnan(self.ldscores) else: self.ldscores = None if map is not None: if not map.shape[0] == sid.shape[0]: raise(ValueError('LD scores must have same size as other sumstats')) self.map = ma.array(map,dtype=float) self.map.mask = np.isnan(self.map) else: self.map = None
def filter(self,filter_pass): self.chrom = self.chrom[filter_pass] self.sid = self.sid[filter_pass] self.sid_dict = make_id_dict(self.sid) self.pos = self.pos[filter_pass] self.A1 = self.A1[filter_pass] self.A2 = self.A2[filter_pass] self.freqs = self.freqs[filter_pass] self.direct = self.direct[filter_pass] self.direct_SE = self.direct_SE[filter_pass] self.avg_NTC = self.avg_NTC[filter_pass] self.avg_NTC_SE = self.avg_NTC_SE[filter_pass] self.population = self.population[filter_pass] self.population_SE = self.population_SE[filter_pass] self.r_direct_avg_NTC = self.r_direct_avg_NTC[filter_pass] self.r_direct_pop = self.r_direct_pop[filter_pass] if self.ldscores is not None: self.ldscores = self.ldscores[filter_pass] if self.map is not None: self.map = self.map[filter_pass]
def concatenate(self,s2): self.chrom = np.hstack((self.chrom,s2.chrom)) self.sid = np.hstack((self.sid, s2.sid)) self.sid_dict = make_id_dict(self.sid) self.pos = np.hstack((self.pos, s2.pos)) self.A1 = np.hstack((self.A1, s2.A1)) self.A2 = np.hstack((self.A2, s2.A2)) self.freqs = ma.concatenate([self.freqs, s2.freqs]) self.direct = ma.concatenate([self.direct, s2.direct]) self.direct_SE = ma.concatenate([self.direct_SE, s2.direct_SE]) self.avg_NTC = ma.concatenate([self.avg_NTC, s2.avg_NTC]) self.avg_NTC_SE = ma.concatenate([self.avg_NTC_SE, s2.avg_NTC_SE]) self.population = ma.concatenate([self.population, s2.population]) self.population_SE = ma.concatenate([self.population_SE, s2.population_SE]) self.r_direct_avg_NTC = ma.concatenate([self.r_direct_avg_NTC, s2.r_direct_avg_NTC]) self.r_direct_pop = ma.concatenate([self.r_direct_pop, s2.r_direct_pop]) if self.ldscores is not None and s2.ldscores is not None: self.ldscores = ma.concatenate([self.ldscores, s2.ldscores]) if self.map is not None and s2.map is not None: self.map = ma.concatenate([self.map, s2.map])
def filter_ids(self,keep_ids, verbose=False): """ Keep only individuals with ids given by keep_ids """ in_ids = np.array([x in self.id_dict for x in keep_ids]) n_filtered = np.sum(in_ids) if n_filtered==0: raise(ValueError('No individuals would be left after filtering')) else: if verbose: print('After filtering, '+str(n_filtered)+' individuals remain') indices = np.array([self.id_dict[x] for x in keep_ids[in_ids]]) if self.ndim == 2: self.gts = self.gts[indices, :] elif self.ndim == 3: self.gts = self.gts[indices, :, :] self.ids = self.ids[indices] self.id_dict = make_id_dict(self.ids) self.shape = self.gts.shape if self.fams is not None: self.fams = self.fams[indices]
def filter(self, filter_pass): if self.freqs is not None: self.freqs = self.freqs[filter_pass] if self.ndim == 2: self.gts = self.gts[:,filter_pass] elif self.ndim == 3: self.gts = self.gts[:,:,filter_pass] self.shape = self.gts.shape if self.sid is not None: self.sid = self.sid[filter_pass] self.sid_dict = make_id_dict(self.sid) if self.pos is not None: self.pos = self.pos[filter_pass] if self.alleles is not None: self.alleles = self.alleles[filter_pass] if self.chrom is not None: self.chrom = self.chrom[filter_pass] if self.map is not None: self.map = self.map[filter_pass] if self.error_probs is not None: self.error_probs = self.error_probs[filter_pass]
def match_phenotype(G,y,pheno_ids): """Match a phenotype to a genotype array by individual IDs. Args: G : :class:`gtarray` genotype array to match phenotype to y : :class:`~numpy:numpy.array` vector of phenotype values pheno_ids: :class:`~numpy:numpy.array` vector of individual IDs corresponding to phenotype vector, y Returns: y : :class:`~numpy:numpy.array` vector of phenotype values matched by individual IDs to the genotype array """ in_G_dict = np.array([x in G.id_dict for x in pheno_ids]) y = y[in_G_dict] pheno_ids = pheno_ids[in_G_dict] pheno_id_dict = make_id_dict(pheno_ids) y = y[[pheno_id_dict[x] for x in G.ids]] return y
def find_individuals_with_sibs(ids, ped, gts_ids, return_ids_only=False): """ Used in get_gts_matrix and get_fam_means to find the individuals in ids that have genotyped siblings. """ # Find genotyped sibships of size > 1 ped_dict = make_id_dict(ped, 1) ids_in_ped = np.array([x in ped_dict for x in gts_ids]) gts_fams = np.zeros((gts_ids.shape[0]), dtype=gts_ids.dtype) gts_fams[ids_in_ped] = np.array( [ped[ped_dict[x], 0] for x in gts_ids[ids_in_ped]]) fams, counts = np.unique(gts_fams[ids_in_ped], return_counts=True) sibships = set(fams[counts > 1]) # Find individuals with genotyped siblings ids_in_ped = np.array([x in ped_dict for x in ids]) ids = ids[ids_in_ped] ids_fams = np.array([ped[ped_dict[x], 0] for x in ids]) ids_with_sibs = np.array([x in sibships for x in ids_fams]) ids = ids[ids_with_sibs] ids_fams = ids_fams[ids_with_sibs] if return_ids_only: return ids else: return ids, ids_fams, gts_fams
def infer_ibd_chr(sibpairs, error_prob, error_probs, outprefix, bedfile=None, bgenfile=None, chrom=None, min_length=0.01, mapfile=None, ibdmatrix=False, ld_out=False, min_maf=0.01, max_missing=5, max_error=0.01): if bedfile is None and bgenfile is None: raise(ValueError('Must provide either bed file or bgenfile')) if bedfile is not None and bgenfile is not None: raise(ValueError('Provide either bed file or bgen file. Not both.')) if bedfile is not None: ## Read bed print('Reading genotypes from ' + bedfile) bimfile = bedfile.split('.bed')[0] + '.bim' # Determine chromosome if chrom is None: chrom = np.loadtxt(bimfile, usecols=0, dtype=str) chrom = np.unique(chrom) if chrom.shape[0] > 1: raise (ValueError('More than 1 chromosome in input bedfile')) else: chrom = chrom[0] print('Inferring IBD for chromosome ' + str(chrom)) # Read sibling genotypes from bed file gts = read_sibs_from_bed(bedfile, sibpairs) elif bgenfile is not None: ## Read bed print('Reading genotypes from ' + bgenfile) # Determine chromosome if chrom is None: bgen = open_bgen(bgenfile,verbose=False) chrom = bgen.chromosomes chrom = np.unique(chrom) if chrom.shape[0] > 1: raise (ValueError('More than 1 chromosome in input bgenfile')) else: chrom = chrom[0] if chrom=='': chrom = 0 print('Inferring IBD for chromosome ' + str(chrom)) # Read sibling genotypes from bed file gts = read_sibs_from_bgen(bgenfile, sibpairs) # Calculate allele frequencies print('Calculating allele frequencies') gts.compute_freqs() # Check which sibling pairs have genotypes sibpair_indices = np.zeros((sibpairs.shape), dtype=bool) sibpair_indices[:, 0] = np.array([x in gts.id_dict for x in sibpairs[:, 0]]) sibpair_indices[:, 1] = np.array([x in gts.id_dict for x in sibpairs[:, 1]]) sibpairs = sibpairs[np.sum(sibpair_indices, axis=1) == 2, :] if sibpairs.shape[0] == 0: raise (ValueError('No genotyped sibling pairs found')) print(str(np.sum(sibpairs.shape[0])) + ' sibpairs have genotypes') # Find indices of sibpairs sibpair_indices = np.zeros((sibpairs.shape), dtype=int) sibpair_indices[:, 0] = np.array([gts.id_dict[x] for x in sibpairs[:, 0]]) sibpair_indices[:, 1] = np.array([gts.id_dict[x] for x in sibpairs[:, 1]]) # Filtering on MAF, LD score, and genotyping error # Find error probabilities p_error = np.zeros((gts.sid.shape[0])) p_error[:] = error_prob if error_probs is not None: in_error_probs = np.array([x in error_probs.sid_dict for x in gts.sid]) error_index = np.array([error_probs.sid_dict[x] for x in gts.sid[in_error_probs]]) p_error[in_error_probs] = error_probs.error_ests[error_index] gts.error_probs = p_error # Filter print('Before filtering on MAF, missingness, and genotyping error, there were ' + str(gts.shape[1]) + ' SNPs') gts.filter_maf(min_maf) gts.filter_missingness(max_missing) gts.filter(gts.error_probs < max_error) print('After filtering, there are ' + str(gts.shape[1]) + ' SNPs') # Read map file if mapfile is None and bedfile is not None: print('Separate genetic map not provided, so attempting to read map from ' + bimfile) map = np.loadtxt(bimfile, usecols=2) map_snp_dict = make_id_dict(np.loadtxt(bimfile, usecols=1, dtype=str)) # Check for NAs if np.var(map) == 0: print('Map information not found in bim file.') print('Using default map (decode sex averaged map on GRCh38 coordinates)') gts.map = decode_map_from_pos(chrom, gts.pos) pc_mapped = str(round(100*(1-np.mean(np.isnan(gts.map))),2)) print('Found map positions for '+str(pc_mapped)+'% of SNPs') gts.filter(~np.isnan(gts.map)) else: if np.sum(np.isnan(map)) > 0: raise (ValueError('Map cannot have NAs')) if np.min(map) < 0: raise (ValueError('Map file cannot have negative values')) # Check ordering ordered_map = np.sort(map) if np.array_equal(map, ordered_map): pass else: raise (ValueError('Map not monotonic. Please make sure input is ordered correctly')) # Check scale if np.max(map) > 5000: raise (ValueError('Maximum value of map too large')) gts.filter(np.array([x in map_snp_dict for x in gts.sid])) gts.map = map[[map_snp_dict[x] for x in gts.sid]] elif mapfile is None and bgenfile is not None: print('Map file not provided.') print('Using default map (decode sex averaged map on Hg19 coordinates)') gts.map = decode_map_from_pos(chrom, gts.pos) pc_mapped = 100*(1-np.mean(np.isnan(gts.map))) if pc_mapped < 50: print('Warning: map positions not found for the majority of SNPs. Consider providing a genetic map using --map') print('Found map positions for '+str(round(pc_mapped,2))+'% of SNPs') gts.filter(~np.isnan(gts.map)) else: print('Reading map from ' + str(mapfile)) gts.map = get_map_positions(mapfile, gts) print('Read map') # Weights print('Computing LD weights') ld = compute_ld_scores(np.array(gts.gts, dtype=np.float_), gts.map, max_dist=1) gts.weights = np.power(ld, -1) # IBD print('Inferring IBD') ibd = infer_ibd(sibpair_indices, np.array(gts.gts,dtype=np.float_), gts.freqs, gts.map, gts.weights, gts.error_probs) ibd, allsegs = smooth_ibd(ibd, gts.map, gts.sid, gts.pos, min_length) ## Write output # Write segments segs_outfile = outfile_name(outprefix,'.ibd.segments.gz', chrom) print('Writing segments to ' + segs_outfile) write_segs(sibpairs, allsegs, chrom, segs_outfile) # Write matrix if ibdmatrix: outfile = outfile_name(outprefix,'.ibdmatrix.gz', chrom) print('Writing matrix output to ' + str(outfile)) ibd = np.row_stack( (np.column_stack((np.array(['sib1', 'sib2']).reshape((1, 2)), gts.sid.reshape(1, gts.shape[1]))), np.column_stack((sibpairs, ibd)))) np.savetxt(outfile, ibd, fmt='%s') if ld_out: ld_outfile = outfile_name(outprefix,'.l2.ldscore.gz', chrom) print('Writing LD-scores to '+ld_outfile) ld_out = np.vstack((np.array(['CHR', 'SNP', 'BP', 'L2']).reshape((1,4)),np.vstack((np.array([chrom for x in gts.sid]), gts.sid, gts.pos, ld)).T)) np.savetxt(ld_outfile, ld_out, fmt='%s')
def __init__(self, garray, ids, sid=None, alleles=None, pos=None, chrom=None, map=None, error_probs=None, fams=None, par_status=None): if type(garray) == np.ndarray or type(garray) == np.ma.core.MaskedArray: if type(garray) == np.ndarray: self.gts = ma.array(garray,mask=np.isnan(garray)) else: self.gts = garray self.shape = garray.shape self.ndim = garray.ndim self.dtype = garray.dtype self.freqs = None else: raise ValueError('Genotypes must be a numpy ndarray') if garray.shape[0] == ids.shape[0]: self.ids = ids self.id_dict = make_id_dict(ids) else: raise ValueError('Shape of genotypes and ids does not match') if sid is not None: if sid.shape[0] == garray.shape[1]: self.snp_index = 1 self.sid = sid self.sid_dict = make_id_dict(sid) elif sid.shape[0] == garray.shape[2]: self.snp_index = 2 self.sid = sid self.sid_dict = make_id_dict(sid) else: raise ValueError('Shape of SNP ids (sid) does not match shape of genotype array') if alleles is not None: if self.sid is not None: if alleles.shape[0] == self.sid.shape[0]: self.alleles = alleles else: raise ValueError('Size of alleles does not match size of genotypes') else: raise(ValueError('Must provide SNP ids')) else: self.alleles = None if pos is not None: if self.sid is not None: if pos.shape[0] == self.sid.shape[0]: self.pos = pos else: raise ValueError('Size of position vector does not match size of genotypes') else: raise(ValueError('Must provide SNP ids')) else: self.pos = None if chrom is not None: if self.sid is not None: if chrom.shape[0] == self.sid.shape[0]: self.chrom = chrom else: raise ValueError('Size of map does not match number of SNPs') else: raise(ValueError('Must provide SNP ids')) else: self.chrom = None if map is not None: if self.sid is not None: if map.shape[0] == self.sid.shape[0]: self.map = map else: raise ValueError('Size of map does not match number of SNPs') else: raise(ValueError('Must provide SNP ids')) else: self.map = None if error_probs is not None: if self.sid is not None: if error_probs.shape[0] == self.sid.shape[0]: self.error_probs = error_probs else: raise ValueError('Size of map does not match number of SNPs') else: raise(ValueError('Must provide SNP ids')) else: self.error_probs = None if fams is not None: if fams.shape[0] == ids.shape[0] and fams.ndim==1: self.fams = fams else: raise ValueError('Fams not of same length as IDs') else: self.fams = None if par_status is not None: if par_status.shape[0] == ids.shape[0] and par_status.shape[1] == 2: self.par_status = par_status else: raise ValueError('Incompatible par status array') else: self.par_status = None self.mean_normalised = False if np.sum(self.gts.mask)>0: self.has_NAs = True else: self.has_NAs = False self.info = None