Exemplo n.º 1
0
    for sheet in data_sheets:
        row = sheet.nrows  # 行数
        col = sheet.ncols  # 列数
        for j in range(1, row):
            labels.append(sheet_names[i])
            data.append(sheet.row_values(j)[1:5])
        i += 1

    data = np.array(data)
    return data, [i[-2] for i in labels]


sleep_data, sleep_labels = read_data_by_sheets('data/sleep.xlsx')

# PCA
dim_data, ratio, result = get_pca(sleep_data, c=2, with_normalize=False)
# print(ratio)

# 绘图13
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]
default_colors = ['r', 'b', 'g', 'c', 'm']
colors = get_color(sleep_labels, default_colors)
print('Drawing...')
draw_scatter(x, y, sleep_labels, colors)

default_colors = [[0, 0.8, 1], [0, 0.5, 0.5], [0.2, 0.8, 0.8], [0.2, 0.4, 1], [0.6, 0.8, 1], [1, 0.6, 0.8],
                  [0.8, 0.6, 1], [1, 0.8, 0.6], [1, 0, 0], [0, 1, 0]]
for j in range(1, 10):

    acc_arr = []
# 文件路径
filePath = 'data/41592_2017_BFnmeth4179_MOESM235_ESM.xlsx'
x1 = xlrd.open_workbook(filePath)
sheet = x1.sheets()
labels = sheet[0].col_values(3)[1:]

# joblib.dump(X, 'datasets/human_islets.pkl')
# joblib.dump(labels, 'datasets/human_islets_labels.pkl')

print(labels)
print(X.shape)
print(datetime.datetime.now())

# PCA
dim_data, ratio, result = get_pca(X, c=11, with_normalize=False)
print(sum(ratio))
# t-SNE
dim_data = t_SNE(dim_data, perp=5, with_normalize=False)
# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# get color list based on labels
default_colors = ['b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels, default_colors)

# plot
draw_scatter(x, y, labels, colors)

Exemplo n.º 3
0
import numpy as np
import joblib

# read labels
labels = read_from_txt('data/human_islets_labels.txt')
labels = [i[-1] for i in labels][1:]
print(len(labels))

# read data
X = read_from_txt('data/human_islets.txt')
X = X.T[1:, 1:].astype(np.float64)
print(X.shape)
# joblib.dump(X, 'datasets/human_islets.pkl')
# joblib.dump(labels, 'datasets/human_islets_labels.pkl')
# dimenison reduction
# t-SNE
# dim_data = t_SNE(X, perp=5, with_normalize=True)

# PCA
dim_data, ratio, result = get_pca(X, c=2, with_normalize=True)

# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# get color list based on labels
colors = get_color(labels)

# plot
draw_scatter(x, y, labels, colors)
Exemplo n.º 4
0
genes_P = []
idx = 0
for gene in GEM:
    gene = np.array(gene)
    if len(gene[gene > 0]) >= 10:
        GEM_P.append(gene)
        genes_P.append(genes[idx])
    idx += 1

GEM_P = np.array(GEM_P)
print(GEM_P.shape)
print(len(genes_P))

# In[ ]:

dim_data, ratio, result = get_pca(GEM_P.T, c=20, with_normalize=True)
dim_data = t_SNE(dim_data, perp=30, with_normalize=True)
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]
default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels, default_colors)
draw_scatter(x, y, labels, colors)

# In[4]:

print(genes_P.index('POU5F1'))
print(genes_P.index('GATA6'))

# In[ ]:

# Scatter of gene GATA6 and POU5F1