LastEditTime: 2020-08-10 16:39:49
LastEditors: CheeReus_11
'''
import matplotlib.pyplot as plt
from ReadData import read_from_mat
from DimensionReduction import t_SNE, get_pca
from Utils import get_color, draw_scatter
from Clustering import hca, hca_dendrogram, hca_labels
from Metrics import ARI

# read data
X = read_from_mat('data/corr/A_islet.mat')['A']

# dimenison reduction
# t-SNE
dim_data = t_SNE(X, perp=5, with_normalize=True)

# PCA
# dim_data, ratio, result = get_pca(X, c=2, with_normalize=True)

# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# read labels
labels = read_from_mat('data/corr/Labels_islet.mat')['Labels']
labels = [i[0][0] for i in labels]
# print(labels)

# knn training and predict
model = hca(dim_data)
import joblib
from DimensionReduction import t_SNE, get_pca
from Utils import get_color, draw_scatter

data = joblib.load('ae_output/ae_dim_data_99.pkl')
print(data.shape)
labels = joblib.load('ae_output/labels.pkl')
print(len(labels))

dim_data = t_SNE(data, perp=40, with_normalize=True)

x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'c', 'k']
colors = get_color(labels, default_colors)

draw_scatter(x, y, labels, colors)
Пример #3
0
idx = 0
for gene in GEM:
    gene = np.array(gene)
    if len(gene[gene > 0]) >= 10:
        GEM_P.append(gene)
        genes_P.append(genes[idx])
    idx += 1

GEM_P = np.array(GEM_P)
print(GEM_P.shape)
print(len(genes_P))

# In[ ]:

dim_data, ratio, result = get_pca(GEM_P.T, c=20, with_normalize=True)
dim_data = t_SNE(dim_data, perp=30, with_normalize=True)
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]
default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels, default_colors)
draw_scatter(x, y, labels, colors)

# In[4]:

print(genes_P.index('POU5F1'))
print(genes_P.index('GATA6'))

# In[ ]:

# Scatter of gene GATA6 and POU5F1
# 文件路径
filePath = 'data/41592_2017_BFnmeth4179_MOESM235_ESM.xlsx'
x1 = xlrd.open_workbook(filePath)
sheet = x1.sheets()
labels = sheet[0].col_values(3)[1:]

# joblib.dump(X, 'datasets/human_islets.pkl')
# joblib.dump(labels, 'datasets/human_islets_labels.pkl')

print(labels)
print(X.shape)
print(datetime.datetime.now())

# PCA
dim_data, ratio, result = get_pca(X, c=11, with_normalize=False)
print(sum(ratio))
# t-SNE
dim_data = t_SNE(dim_data, perp=5, with_normalize=False)
# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# get color list based on labels
default_colors = ['b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels, default_colors)

# plot
draw_scatter(x, y, labels, colors)

import datetime
from Clustering import k_means
from Metrics import ARI, NMI

X = joblib.load('ae_output/ae_dim_data_99.pkl')
labels = joblib.load('ae_output/labels.pkl')


print(labels)
print(X.shape)
print(datetime.datetime.now())
# PCA
# dim_data, ratio, result = get_pca(X, c=11, with_normalize=False)
# print(sum(ratio))
# t-SNE
dim_data = t_SNE(X, perp=40, with_normalize=False)
print(datetime.datetime.now())
# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# get color list based on labels
default_colors = ['b', 'g', 'r', 'm', 'y', 'c']
colors = get_color(labels, default_colors)

# plot
draw_scatter(x, y, labels, colors)

predict_labels = k_means(X, k=6)

print(ARI(labels, predict_labels))