Python Dataset.get_standardized_genotypes примеры использования

Язык программирования: Python

Пространство имен/Пакет: primitives

Класс/Тип: Dataset

Метод/Функция: get_standardized_genotypes

Примеров на hotexamples.com: 4

Python Dataset.get_standardized_genotypes - 4 примера найдено. Это лучшие примеры Python кода для primitives.Dataset.get_standardized_genotypes, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dataset(14)

slices(3)

get_standardized_genotypes(2)

get_standardized_genotypes_in_iter(1)

project_out_covariates(1)

random_indivs(1)

Пример #1

Показать файл

Файл: sim_betas.py Проект: yakirr/statgen_y1

def main(args):
    np.random.seed(args.beta_num)
    sim = SumstatSimulation(args.sim_name)
    arch = Architecture(sim.architecture)
    d = Dataset(sim.dataset)

    # sample the beta
    beta = arch.draw_effect_sizes(sim.dataset, sim.h2g)[:, 0]

    # compute noiseless phenotypes slice by slice
    Y = np.zeros(d.N)
    t0 = time()
    for s in d.slices():
        # X will be N x M
        print(int(time() - t0), ": getting genotypes from file. SNPs", s)
        X = d.get_standardized_genotypes(s)
        print("computing phenotypes. SNPs", s)
        Y += X.dot(beta[s[0] : s[1]])
        del X

    # normalize the Y and the beta to the desired heritability
    normalization = np.std(Y) / np.sqrt(sim.h2g)
    if normalization == 0:
        normalization = 1  # just in case we have some 0s...
    Y /= normalization
    beta /= normalization

    # write the betas and the noiseless phenotypes
    pickle.dump(beta, sim.beta_file(args.beta_num, "wb"), 2)
    pickle.dump(Y, sim.noiseless_Y_file(args.beta_num, "wb"), 2)

Пример #2

Показать файл

def main(args):
    np.random.seed(args.beta_num)
    sim = SumstatSimulation(args.sim_name)
    arch = Architecture(sim.architecture)
    d = Dataset(sim.dataset)

    # sample the beta
    beta = arch.draw_effect_sizes(sim.dataset, sim.h2g)[:, 0]

    # compute noiseless phenotypes slice by slice
    Y = np.zeros(d.N)
    t0 = time()
    for s in d.slices():
        # X will be N x M
        print(int(time() - t0), ': getting genotypes from file. SNPs', s)
        X = d.get_standardized_genotypes(s)
        print('computing phenotypes. SNPs', s)
        Y += X.dot(beta[s[0]:s[1]])
        del X

    # normalize the Y and the beta to the desired heritability
    normalization = np.std(Y) / np.sqrt(sim.h2g)
    if normalization == 0: normalization = 1  # just in case we have some 0s...
    Y /= normalization
    beta /= normalization

    # write the betas and the noiseless phenotypes
    pickle.dump(beta, sim.beta_file(args.beta_num, 'wb'), 2)
    pickle.dump(Y, sim.noiseless_Y_file(args.beta_num, 'wb'), 2)

Пример #3

Показать файл

def main(args):
    d = Dataset(args.refpanel + '.' + str(args.chrnum))
    annot_filename = '{}.{}.annot.gz'.format(args.annot_stem, args.chrnum)
    cannot_filename = '{}.{}.cannot.gz'.format(args.annot_stem, args.chrnum)
    cannot_norm_filename = '{}.{}.cannot.norm'.format(args.annot_stem,
                                                      args.chrnum)

    annot = pd.read_csv(annot_filename, compression='gzip', sep='\t', header=0)
    name = annot.columns[-1]
    v = annot.ix[:, name].values

    #TODO: use ld blocks, possibly just those that have non-trivial intersection with the
    # nonzero entries of v
    print('computing Xv')
    Xv = np.zeros(d.N)
    for s in d.slices():
        print(s)
        X = d.get_standardized_genotypes(s)
        Xv += X.dot(v[s[0]:s[1]])

    print('computing XTXv')
    XTXv = np.zeros(d.M)
    for s in d.slices():
        print(s)
        X = d.get_standardized_genotypes(s)
        XTXv[s[0]:s[1]] = X.T.dot(Xv)

    print('computing V^TRv')
    Rv = XTXv / d.N
    vTRv = v.dot(Rv)

    # write output
    print('writing output')
    annot[name + '.CONV'] = Rv
    with gzip.open(cannot_filename, 'wt') as f:
        annot.to_csv(f, index=False, sep='\t')

    with open(cannot_norm_filename, 'w') as f:
        f.write(str(vTRv))

Пример #4

Показать файл

Файл: signed_oldpreprocess.py Проект: yakirr/statgen_y1

def main(args):
    d = Dataset(args.refpanel + '.' + str(args.chrnum))
    annot_filename = '{}.{}.annot.gz'.format(args.annot_stem, args.chrnum)
    cannot_filename = '{}.{}.cannot.gz'.format(args.annot_stem, args.chrnum)
    cannot_norm_filename = '{}.{}.cannot.norm'.format(args.annot_stem, args.chrnum)

    annot = pd.read_csv(annot_filename, compression='gzip', sep='\t', header=0)
    name = annot.columns[-1]
    v = annot.ix[:,name].values

    #TODO: use ld blocks, possibly just those that have non-trivial intersection with the
    # nonzero entries of v
    print('computing Xv')
    Xv = np.zeros(d.N)
    for s in d.slices():
        print(s)
        X = d.get_standardized_genotypes(s)
        Xv += X.dot(v[s[0]:s[1]])

    print('computing XTXv')
    XTXv = np.zeros(d.M)
    for s in d.slices():
        print(s)
        X = d.get_standardized_genotypes(s)
        XTXv[s[0]:s[1]] = X.T.dot(Xv)

    print('computing V^TRv')
    Rv = XTXv / d.N
    vTRv = v.dot(Rv)

    # write output
    print('writing output')
    annot[name+'.CONV'] = Rv
    with gzip.open(cannot_filename, 'wt') as f:
        annot.to_csv(f, index=False, sep='\t')

    with open(cannot_norm_filename, 'w') as f:
        f.write(str(vTRv))