def getGenotypes(self,sample_idx=None,idx_start=None,idx_end=None,chrom=None,pos_start=None,pos_end=None,center=True,unit=True,impute_missing=False,snp_idx=None,windowsize=0): """load genotypes. Optionally the indices for loading subgroups the genotypes for all people based on position of cumulative position. Positions can be given as (pos_start-pos_end on chrom) If both of these are None (default), then all genotypes are returned Args: idx_start: genotype index based selection (start index) idx_end: genotype index based selection (end index) chrom: chromosome based selection (return whole chromosome) pos_start: position based selection (start position) tuple of chrom, position pos_end: position based selection (end position) tuple of chrom, position impute_missing: Boolean indicator variable if missing values should be imputed Returns: X: scipy.array of genotype values """ #position based matching? if (idx_start is None) and (idx_end is None) and ((pos_start is not None) & (pos_end is not None)): idx_start,idx_end=self.getGenoIndex(chrom=chrom,pos_start=pos_start,pos_end=pos_end,windowsize=windowsize) #index based matching? if (idx_start is not None) & (idx_end is not None): X = self.geno_matrix[:,idx_start:idx_end] elif snp_idx is not None: X = self.geno_matrix[:,snp_idx] else: X = self.geno_matrix[:,:] if sample_idx is not None: X=X[sample_idx] if impute_missing: X = du.imputeMissing(X,center=center,unit=unit) return X
def getGenotypes(self,idx_start=None,idx_end=None,pos_start=None,pos_end=None,windowsize=0,chrom=None,center=True,unit=True,impute_missing=False,cast_float=True): """return genotypes. Optionally the indices for loading subgroups the genotypes for all people can be given in one out of three ways: - 0-based indexing (idx_start-idx_end) - position (pos_start-pos_end on chrom) - cumulative position (pos_cum_start-pos_cum_end) If all these are None (default), then all genotypes are returned Args: idx_start: genotype index based selection (start index) idx_end: genotype index based selection (end index) pos_start: position based selection (start position) pos_end: position based selection (end position) chrom: position based selection (chromosome) pos_cum_start: cumulative position based selection (start position) pos_cum_end: cumulative position based selection (end position) impute_missing: Boolean indicator variable if missing values should be imputed cast_float: Boolean indicator variable if output genotypes should be casted as float Returns: X: scipy.array of genotype values """ query_idx = self.range_query_geno(idx_start=idx_start, idx_end=idx_end, chrom=chrom, pos_start=pos_start,windowsize=windowsize) X = self.geno_reader.getGenotypes(sample_idx= sp.array(self.sample_idx["geno"]),snp_idx=query_idx) if impute_missing: X = du.imputeMissing(X,center=center,unit=unit) if cast_float: if X.dtype!='float64': X = sp.array(X,dtype='float64') return X
def getGenotypes(self, sample_idx=None, idx_start=None, idx_end=None, pos_start=None, pos_end=None, chrom=None, center=True, unit=True, pos_cum_start=None, pos_cum_end=None, impute_missing=False, snp_idx=None): """load genotypes. Optionally the indices for loading subgroups the genotypes for all people can be given in one out of three ways: - 0-based indexing (idx_start-idx_end) - position (pos_start-pos_end on chrom) - cumulative position (pos_cum_start-pos_cum_end) If all these are None (default), then all genotypes are returned Args: idx_start: genotype index based selection (start index) idx_end: genotype index based selection (end index) pos_start: position based selection (start position) pos_end: position based selection (end position) chrom: position based selection (chromosome) pos_cum_start: cumulative position based selection (start position) pos_cum_end: cumulative position based selection (end position) impute_missing: Boolean indicator variable if missing values should be imputed Returns: X: scipy.array of genotype values """ #position based matching? if (idx_start is None) and (idx_end is None) and ( (pos_start is not None) & (pos_end is not None) & (chrom is not None)) or ((pos_cum_start is not None) & (pos_cum_end is not None)): idx_start, idx_end = self.getGenoIndex(pos_start=pos_start, pos_end=pos_end, chrom=chrom, pos_cum_start=pos_cum_start, pos_cum_end=pose_cum1) #index based matching? if (idx_start is not None) & (idx_end is not None): X = self.geno_matrix[:, idx_start:idx_end] elif snp_idx is not None: X = self.geno_matrix[:, snp_idx] else: X = self.geno_matrix[:, :] if sample_idx is not None: X = X[sample_idx] if impute_missing: X = du.imputeMissing(X, center=center, unit=unit) return X
def getGenotypes(self, idx_start=None, idx_end=None, pos_start=None, pos_end=None, windowsize=0, chrom=None, center=True, unit=True, impute_missing=False, cast_float=True): """return genotypes. Optionally the indices for loading subgroups the genotypes for all people can be given in one out of three ways: - 0-based indexing (idx_start-idx_end) - position (pos_start-pos_end on chrom) - cumulative position (pos_cum_start-pos_cum_end) If all these are None (default), then all genotypes are returned Args: idx_start: genotype index based selection (start index) idx_end: genotype index based selection (end index) pos_start: position based selection (start position) pos_end: position based selection (end position) chrom: position based selection (chromosome) pos_cum_start: cumulative position based selection (start position) pos_cum_end: cumulative position based selection (end position) impute_missing: Boolean indicator variable if missing values should be imputed cast_float: Boolean indicator variable if output genotypes should be casted as float Returns: X: scipy.array of genotype values """ query_idx = self.range_query_geno(idx_start=idx_start, idx_end=idx_end, chrom=chrom, pos_start=pos_start, windowsize=windowsize) X = self.geno_reader.getGenotypes(sample_idx=sp.array( self.sample_idx["geno"]), snp_idx=query_idx) if impute_missing: X = du.imputeMissing(X, center=center, unit=unit) if cast_float: if X.dtype != 'float64': X = sp.array(X, dtype='float64') return X
def getGenotypes(self,sample_idx=None,idx_start=None,idx_end=None,pos_start=None,pos_end=None,chrom=None,center=True,unit=True,pos_cum_start=None,pos_cum_end=None,impute_missing=False,snp_idx=None): """load genotypes. Optionally the indices for loading subgroups the genotypes for all people can be given in one out of three ways: - 0-based indexing (idx_start-idx_end) - position (pos_start-pos_end on chrom) - cumulative position (pos_cum_start-pos_cum_end) If all these are None (default), then all genotypes are returned Args: idx_start: genotype index based selection (start index) idx_end: genotype index based selection (end index) pos_start: position based selection (start position) pos_end: position based selection (end position) chrom: position based selection (chromosome) pos_cum_start: cumulative position based selection (start position) pos_cum_end: cumulative position based selection (end position) impute_missing: Boolean indicator variable if missing values should be imputed Returns: X: scipy.array of genotype values """ #position based matching? if (idx_start is None) and (idx_end is None) and ((pos_start is not None) & (pos_end is not None) & (chrom is not None)) or ((pos_cum_start is not None) & (pos_cum_end is not None)): idx_start,idx_end=self.getGenoIndex(pos_start=pos_start,pos_end=pos_end,chrom=chrom,pos_cum_start=pos_cum_start,pos_cum_end=pose_cum1) #index based matching? if (idx_start is not None) & (idx_end is not None): X = self.geno_matrix[:,idx_start:idx_end] elif snp_idx is not None: X = self.geno_matrix[:,snp_idx] else: X = self.geno_matrix[:,:] if sample_idx is not None: X=X[sample_idx] if impute_missing: X = du.imputeMissing(X,center=center,unit=unit) return X