Exemplo n.º 1
0
# Calculate Top 20 PCA Components
#[__, __, varvalsPCA]= tileutils.findTileVars(XtrainPCA,pathdataPCA,idxOPPCA)
#tiledPCA = tileutils.pcaComponents(XtrainPCA,varvalsPCA,20)

#del XtrainPCA
#del varvalsPCA

print("Reshaping Matrix")
# Reshaping Matrix to Combine Phases
[m, n] = Xtrain.shape
Xtrain = np.concatenate((Xtrain[:, 0:n:2], Xtrain[:, 1:n:2]), axis=0)
pathdata = pathdata[0:n:2]
idxOP = idxOP[0:n:2]

# Quality Cutoff 90% for Filter and Further ML
[Xtrain, pathdata, idxOP] = tileutils.qualCutOff(Xtrain, pathdata, idxOP, 0.90)
[pathdataOH, idxOPOH,
 varvals] = tileutils.findTileVars(Xtrain, pathdata, idxOP)

# Calculate OH Representation, Filtered using Pearson Chi2
# (tiledgenomes,tileposOH,idxOPOH,varvals,y,nparts,pcutoff):
[Xtrain, pathdataOH, varvals, idxOPOH,
 zygosity] = tileutils.chiZygosity(Xtrain,
                                   pathdataOH,
                                   idxOPOH,
                                   varvals,
                                   y,
                                   10,
                                   .01,
                                   zygosityreturn=True)
Exemplo n.º 2
0
# Create Vector of Original Index of Tile Position
idxn = Xtrain.shape[1]/2
idxrange = np.arange(idxn)
idxOP = np.empty(Xtrain.shape[1])
idxOP[0::2] = idxrange
idxOP[1::2] = idxrange

print("Reshaping Matrix")
# Reshaping Matrix to Combine Phases  
[m,n] = Xtrain.shape
Xtrain = np.concatenate((Xtrain[:,0:n:2], Xtrain[:,1:n:2]),axis=0)
pathdata = pathdata[0:n:2]
idxOP = idxOP[0:n:2]

# Quality Cutoff for Filter and Further ML
[Xtrain, pathdata, idxOP] = tileutils.qualCutOff(Xtrain,pathdata,idxOP,qualcutoff)
[pathdataOH, idxOPOH, varvals]= tileutils.findTileVars(Xtrain,pathdata,idxOP)

# Calculate OH Representation, Filtered using Pearson Chi2
# (tiledgenomes,tileposOH,idxOPOH,varvals,y,nparts,pcutoff):
[Xtrain, pathdataOH, varvals, idxOPOH, zygosity] = tileutils.chiZygosity(Xtrain,pathdataOH,idxOPOH,varvals,y,10,.05,zygosityreturn=True)

# Removing NaN values from y
idxNN = np.logical_not(np.isnan(y))
y = y[idxNN]
pheno = pheno[idxNN]
tiledPCA = tiledPCA[idxNN,:]

# Combine Filtered OH Encoded Tiled Genomes and PCA Components
np.save('XPCAwExt.npy', tiledPCA)
Exemplo n.º 3
0
# Match tiled genomes with y values by HUID
[y,pheno] = adutils.syncTilesAD(dataAD,names)

# Create Vector of Original Index of Tile Position
idxn = Xtrain.shape[1]/2
idxrange = np.arange(idxn)
idxOP = np.empty(Xtrain.shape[1])
idxOP[0::2] = idxrange
idxOP[1::2] = idxrange

# Remove XYM Chromosomes
#[Xtrain,pathdata,idxOP]  = tileutils.removeXYM(Xtrain,pathdata,idxOP)

print("Quality Cutoff 99% for PCA")
# Quality Cutoff 99% for PCA
[XtrainPCA, pathdataPCA, idxOPPCA] = tileutils.qualCutOff(Xtrain,pathdata,idxOP,1)

print(XtrainPCA.shape)

print("Calculating PCA")
# Calculate Top 20 PCA Components
[__, __, varvalsPCA]= tileutils.findTileVars(XtrainPCA,pathdataPCA,idxOPPCA)
tiledPCA = tileutils.pcaComponents(XtrainPCA,varvalsPCA,20)

del XtrainPCA
del varvalsPCA

print("Reshaping Matrix")
# Reshaping Matrix to Combine Phases  
[m,n] = Xtrain.shape
Xtrain = np.concatenate((Xtrain[:,0:n:2], Xtrain[:,1:n:2]),axis=0)
Exemplo n.º 4
0
    Xtrain[idxN1] = 0

    # Create Vector of Original Index of Tile Position
    idxn = Xtrain.shape[1] / 2
    idxrange = np.arange(idxn)
    idxOP = np.empty(Xtrain.shape[1])
    idxOP[0::2] = idxoffset + idxrange
    idxOP[1::2] = idxoffset + idxrange
    array_length = len(idxOP)
    last_element = idxOP[array_length - 1]
    idxoffset = last_element

    print("Quality Cutoff 99% for PCA")
    # Quality Cutoff 99% for PCA
    [XtrainPCA, pathdataPCA,
     idxOPPCA] = tileutils.qualCutOff(Xtrain, pathdata, idxOP, 1)
    allXPCA = np.hstack((allXPCA, XtrainPCA))
    allidx = np.hstack((idxOPPCA, allidx))
    allpathdataPCA = np.hstack((allpathdataPCA, pathdataPCA))

print("Calculating PCA")
# Calculate Top 20 PCA Components
[__, __, varvalsPCA] = tileutils.findTileVars(allXPCA, allpathdataPCA, allidx)
tiledPCA = tileutils.pcaComponents(allXPCA, varvalsPCA, 20)

del allXPCA
del varvalsPCA

# Save Final Outputs
np.save("tiledPCA.npy", tiledPCA)
df.to_csv("labels.csv", index=False, header=False)