def measured_genotype_association(extrapredictor): model = MixedModel(peds, outcome=args.outcome, fixed_effects=args.fixefs + [extrapredictor]) model.add_genetic_effect() model.fit_model() # Under the null (i.e. most loci in the genome) estimates of beta # for alleles should be close to zero most of the time. If they're # near zero, they're not explaining any of the variance in the # response variable, so variance component estimates shouldn't be # far from the null model. If we start with the null model's estimates # we can probably save an iteration or two of scoring (or probably like # a hundred iterations of expectation-maximization), and get to our # null result sooner. If we're not there, we'll move out to real estimate # anyway so it's essentially a free optimization. model.maximize(method=args.maxmethod, starts=null_model.variance_components, verbose=args.verbose, restricted=False) return model
if args.only is not None: only = frozenset(args.only) print('Reading pedigree') peds = pyd.io.read_ped(args.ped) print('Reading phenotypes') pyd.io.read_phenotypes(peds, args.phen) print('Reading genotypes') genodata = pyd.io.plink.read_plink(pedfile=args.geno, mapfile=args.map) peds.update(genodata) print('Fitting polygenic model') null_model = MixedModel(peds, outcome=args.outcome, fixed_effects=args.fixefs) null_model.add_genetic_effect() null_model.fit_model() null_model.maximize(method=args.maxmethod, verbose=args.verbose, restricted=False) null_model.summary() llik_null = null_model.loglikelihood() def parse_range(rangestr): chrom, span = rangestr.split(':') chrom = chrom.replace('chr', '') span = [int(x) for x in span.split('-')] return chrom, span[0], span[1]