Exemplo n.º 1
0
def run_other_new(n,
                  m,
                  pheno_vector,
                  geno,
                  restricted_max_likelihood=True,
                  refit=False):
    """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
    
    restricted_max_likelihood -- whether to use restricted max likelihood; True or False
    refit -- whether to refit the variance component for each marker
    
    """

    print("Running the new LMM2 engine in run_other_new")
    print("REML=", restricted_max_likelihood, " REFIT=", refit)

    # Adjust phenotypes
    n, Y, keep = phenotype.remove_missing_new(n, pheno_vector)

    # if options.maf_normalization:
    #     G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
    #     print "MAF replacements: \n",G
    # if not options.skip_genotype_normalization:
    # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)

    geno = geno[:, keep]
    with Bench("Calculate Kinship"):
        K, G = calculate_kinship_new(geno)

    print("kinship_matrix: ", pf(K))
    print("kinship_matrix.shape: ", pf(K.shape))

    # with Bench("Create LMM object"):
    #     lmm_ob = lmm2.LMM2(Y,K)
    # with Bench("LMM_ob fitting"):
    #     lmm_ob.fit()

    print("run_other_new genotype_matrix: ", G.shape)
    print(G)

    with Bench("Doing GWAS"):
        t_stats, p_values = gwas.gwas(Y,
                                      G,
                                      K,
                                      restricted_max_likelihood=True,
                                      refit=False,
                                      verbose=True)
    Bench().report()
    return p_values, t_stats
Exemplo n.º 2
0
def test_list_delete_top():
    liste = list(range(1, m + 1))

    with Bench():
        for i in range(1, m + 1):
            liste.remove(i)
    assert liste == []
Exemplo n.º 3
0
def test_list_multiple_cast():
  a = []
  divisor = 100
  with Bench():
    block = n // 100
    for h in range(1,block+1):
      a += list(range((h-1)*divisor+1,h*divisor+1))
  assert ref == a
Exemplo n.º 4
0
def test_list_multiple_comprehension():
  a = []
  divisor = 100
  with Bench():
    block = n // 100
    for h in range(1,block+1):
      a += [i for i in range((h-1)*divisor+1,h*divisor+1)]
  assert ref == a
Exemplo n.º 5
0
def test_list_delete_back_slice():
    liste = list(range(1, m + 1))
    umgekehrteListe = reversed(liste)

    with Bench():
        # Hier wird ein worst case (der ungünstigste Fall) erzeugt,
        # in dem aus der liste das hinterste Elemente zuerst gelöscht wird,
        # bis die Liste leer ist.
        for i in umgekehrteListe:
            liste = liste[:i - 1]
        assert liste == []
Exemplo n.º 6
0
def run_other_old(pheno_vector,
                  genotype_matrix,
                  restricted_max_likelihood=True,
                  refit=False):
    """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
    
    restricted_max_likelihood -- whether to use restricted max likelihood; True or False
    refit -- whether to refit the variance component for each marker
    
    """

    print("Running the original LMM engine in run_other (old)")
    print("REML=", restricted_max_likelihood, " REFIT=", refit)
    with Bench("Calculate Kinship"):
        kinship_matrix, genotype_matrix = calculate_kinship_new(
            genotype_matrix)

    print("kinship_matrix: ", pf(kinship_matrix))
    print("kinship_matrix.shape: ", pf(kinship_matrix.shape))

    # with Bench("Create LMM object"):
    #     lmm_ob = LMM(pheno_vector, kinship_matrix)

    # with Bench("LMM_ob fitting"):
    #     lmm_ob.fit()

    print("run_other_old genotype_matrix: ", genotype_matrix.shape)
    print(genotype_matrix)

    with Bench("Doing GWAS"):
        t_stats, p_values = GWAS(pheno_vector,
                                 genotype_matrix.T,
                                 kinship_matrix,
                                 restricted_max_likelihood=True,
                                 refit=False)
    Bench().report()
    return p_values, t_stats
Exemplo n.º 7
0
def test_list_numpy_arange():
  import numpy
  a = []
  with Bench():
    a = numpy.arange(1,n+1).tolist()
  assert ref == a
Exemplo n.º 8
0
def test_list_cast():
  a = []
  with Bench():
    a = list(range(1,n+1))
  assert ref == a
Exemplo n.º 9
0
def test_list_for_comprehension():
  a = []
  with Bench():
    a = [i for i in range(1,n+1)]
  assert ref == a
Exemplo n.º 10
0
def test_list_plus():
  a = []
  with Bench():
    for i in range(1,n+1):
      a += [i]
  assert ref == a
Exemplo n.º 11
0
def test_list_append():
  a = []
  with Bench():
    for i in range(1,n+1):
      a.append(i)
  assert ref == a
Exemplo n.º 12
0
def run_human(pheno_vector,
              covariate_matrix,
              plink_input_file,
              kinship_matrix,
              refit=False):

    v = np.isnan(pheno_vector)
    keep = True - v
    keep = keep.reshape((len(keep), ))

    identifier = str(uuid.uuid4())

    #print("pheno_vector: ", pf(pheno_vector))
    #print("kinship_matrix: ", pf(kinship_matrix))
    #print("kinship_matrix.shape: ", pf(kinship_matrix.shape))

    #lmm_vars = pickle.dumps(dict(
    #    pheno_vector = pheno_vector,
    #    covariate_matrix = covariate_matrix,
    #    kinship_matrix = kinship_matrix
    #))
    #Redis.hset(identifier, "lmm_vars", lmm_vars)
    #Redis.expire(identifier, 60*60)

    if v.sum():
        pheno_vector = pheno_vector[keep]
        print("pheno_vector shape is now: ", pf(pheno_vector.shape))
        covariate_matrix = covariate_matrix[keep, :]
        print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
        print("keep is: ", pf(keep.shape))
        kinship_matrix = kinship_matrix[keep, :][:, keep]

    print("kinship_matrix:", pf(kinship_matrix))

    n = kinship_matrix.shape[0]
    print("n is:", n)
    lmm_ob = LMM(pheno_vector, kinship_matrix, covariate_matrix)
    lmm_ob.fit()

    # Buffers for pvalues and t-stats
    p_values = []
    t_stats = []

    #print("input_file: ", plink_input_file)

    with Bench("Opening and loading pickle file"):
        with gzip.open(plink_input_file, "rb") as input_file:
            data = pickle.load(input_file)

    plink_input = data['plink_input']

    #plink_input.getSNPIterator()
    with Bench("Calculating numSNPs"):
        total_snps = data['numSNPs']

    with Bench("snp iterator loop"):
        count = 0

        with Bench("Create list of inputs"):
            inputs = list(plink_input)

        with Bench("Divide into chunks"):
            results = chunks.divide_into_chunks(inputs, 64)

        result_store = []

        key = "plink_inputs"

        # Todo: Delete below line when done testing
        Redis.delete(key)

        timestamp = datetime.datetime.utcnow().isoformat()

        # Pickle chunks of input SNPs (from Plink interator) and compress them
        #print("Starting adding loop")
        for part, result in enumerate(results):
            #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
            holder = pickle.dumps(
                dict(identifier=identifier,
                     part=part,
                     timestamp=timestamp,
                     result=result), pickle.HIGHEST_PROTOCOL)

            #print("Adding:", part)
            Redis.rpush(key, zlib.compress(holder))
        #print("End adding loop")
        #print("***** Added to {} queue *****".format(key))
        for snp, this_id in plink_input:
            #with Bench("part before association"):
            #if count > 1000:
            #    break
            count += 1
            progress("human", count, total_snps)

            #with Bench("actual association"):
            ps, ts = human_association(snp, n, keep, lmm_ob, pheno_vector,
                                       covariate_matrix, kinship_matrix, refit)

            #with Bench("after association"):
            p_values.append(ps)
            t_stats.append(ts)

    return p_values, t_stats