Exemplo n.º 1
0
def manifold_plot(man,
                  fpkmMatrix,
                  samples,
                  standardize=3,
                  log=True,
                  show_text=False,
                  sep='_',
                  legend_loc='best',
                  legend_size=14):
    # man: the instance of a manifold algorithm
    ## preprocessing of the fpkmMatrix
    if log:
        fpkmMatrix = np.log10(fpkmMatrix + 1.)
    if standardize == 2:  # standardize along rows/genes
        fpkmMatrix = zscore(fpkmMatrix, axis=1)
    elif standardize == 1:  # standardize along cols/samples
        fpkmMatrix = zscore(fpkmMatrix, axis=0)

    fpkmMatrix = man.fit_transform(fpkmMatrix.T)
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111)
    scatter_proxies = []
    labels_show = []
    groups = {}
    conditions = list(set([s.split(sep)[0] for s in samples]))

    for row, label in zip(fpkmMatrix, samples):
        label_show = label.split(sep)[0]
        idx = conditions.index(label_show)
        ax.scatter(row[0],
                   row[1],
                   label='label',
                   color=COLORS10[idx],
                   visible=not show_text,
                   s=50,
                   marker='o')
        if label_show not in labels_show:
            labels_show.append(label_show)
            scatter1_proxy = Line2D([0], [0],
                                    ls="none",
                                    c=COLORS10[idx],
                                    marker='o')
            scatter_proxies.append(scatter1_proxy)
        if show_text:
            ax.text(row[0], row[1], label, \
             ha='center', va='center', rotation=0, color=COLORS10[idx], size='large')

    ax.legend(scatter_proxies,
              labels_show,
              numpoints=1,
              frameon=True,
              loc=legend_loc,
              prop={'size': legend_size})
    ax.set_xlabel('M1', fontsize=20)
    ax.set_ylabel('M2', fontsize=20)
    enlarge_tick_fontsize(ax, 14)
    fig.tight_layout()
    plt.show()

    return
Exemplo n.º 2
0
def PCA_plot(fpkmMatrix, samples, standardize=3, log=True, show_text=False, sep='_', legend_loc='best', legend_size=14):
	# standardize: whether to a zscore transformation on the log10 transformed FPKM
	pca = PCA(n_components=None)
	## preprocessing of the fpkmMatrix
	if log:
		fpkmMatrix = np.log10(fpkmMatrix + 1.)
	if standardize == 2: # standardize along rows/genes
		fpkmMatrix = zscore(fpkmMatrix, axis=1)
	elif standardize == 1: # standardize along cols/samples
		fpkmMatrix = zscore(fpkmMatrix, axis=0)

	## remove genes with NaNs
	fpkmMatrix = fpkmMatrix[~np.isnan(np.sum(fpkmMatrix, axis=1))]
	## get variance captured
	pca.fit(fpkmMatrix.T)
	variance_explained = pca.explained_variance_ratio_[0:3]
	variance_explained *= 100
	## compute PCA and plot
	pca = PCA(n_components=2)
	pca_transformed = pca.fit_transform(fpkmMatrix.T)
	fig = plt.figure(figsize=(8,8))
	ax = fig.add_subplot(111)
	scatter_proxies = []
	labels_show = []
	groups = {}
	conditions = list(set([s.split(sep)[0] for s in samples]))

	colors = COLORS10
	if len(conditions) > 10:
		colors = COLORS20
	if len(conditions) > 20:
		r = lambda: random.randint(0,255)
		colors = ['#%02X%02X%02X' % (r(),r(),r()) for i in range(len(conditions))]

	for row, label in zip(pca_transformed, samples):
		label_show = label.split(sep)[0]
		idx = conditions.index(label_show)
		ax.scatter(row[0], row[1], label='label', color=colors[idx], s=50, marker='o')
		if label_show not in labels_show:
			labels_show.append(label_show)
			scatter1_proxy = Line2D([0],[0], ls="none", c=colors[idx], marker='o')
			scatter_proxies.append(scatter1_proxy)
		if show_text:	
			ax.text(row[0], row[1]-2, label.split(sep)[1], \
				ha='center', va='center', rotation=0, color=colors[idx], size='large')
	
	ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True,loc=legend_loc, prop={'size':legend_size})
	ax.set_xlabel('PC1 (%.2f'%variance_explained[0] + '%' + ' variance captured)', fontsize=20)
	ax.set_ylabel('PC2 (%.2f'%variance_explained[1] + '%' + ' variance captured)', fontsize=20)
	enlarge_tick_fontsize(ax, 14)
	fig.tight_layout()
	plt.show()
Exemplo n.º 3
0
def manifold_plot(man, fpkmMatrix, samples, standardize=3, log=True, show_text=False, sep='_', legend_loc='best', legend_size=14):
	# man: the instance of a manifold algorithm
	## preprocessing of the fpkmMatrix
	if log:
		fpkmMatrix = np.log10(fpkmMatrix + 1.)
	if standardize == 2: # standardize along rows/genes
		fpkmMatrix = zscore(fpkmMatrix, axis=1)
	elif standardize == 1: # standardize along cols/samples
		fpkmMatrix = zscore(fpkmMatrix, axis=0)

	fpkmMatrix = man.fit_transform(fpkmMatrix.T)
	fig = plt.figure(figsize=(8,8))
	ax = fig.add_subplot(111)
	scatter_proxies = []
	labels_show = []
	groups = {}
	conditions = list(set([s.split(sep)[0] for s in samples]))

	for row, label in zip(fpkmMatrix, samples):
		label_show = label.split(sep)[0]
		idx = conditions.index(label_show)
		ax.scatter(row[0], row[1], label='label', color=COLORS10[idx], visible=not show_text, s=50, marker='o')
		if label_show not in labels_show:
			labels_show.append(label_show)
			scatter1_proxy = Line2D([0],[0], ls="none", c=COLORS10[idx], marker='o')
			scatter_proxies.append(scatter1_proxy)
		if show_text:	
			ax.text(row[0], row[1], label, \
				ha='center', va='center', rotation=0, color=COLORS10[idx], size='large')
	
	ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True,loc=legend_loc, prop={'size':legend_size})
	ax.set_xlabel('M1', fontsize=20)
	ax.set_ylabel('M2', fontsize=20)
	enlarge_tick_fontsize(ax, 14)
	fig.tight_layout()
	plt.show()


	return
Exemplo n.º 4
0
def PCA_plot(fpkmMatrix, samples, standardize=3, log=True, show_text=False, sep='_', legend_loc='best', legend_size=14):
	# standardize: whether to a zscore transformation on the log10 transformed FPKM
	## perform PCA
	variance_explained, pca_transformed = perform_PCA(fpkmMatrix, standardize=standardize, log=log)

	fig = plt.figure(figsize=(8,8))
	ax = fig.add_subplot(111)
	scatter_proxies = []
	labels_show = []
	groups = {}
	conditions = list(set([s.split(sep)[0] for s in samples]))

	colors = COLORS10
	if len(conditions) > 10:
		colors = COLORS20
	if len(conditions) > 20:
		r = lambda: random.randint(0,255)
		colors = ['#%02X%02X%02X' % (r(),r(),r()) for i in range(len(conditions))]

	for row, label in zip(pca_transformed, samples):
		label_show = label.split(sep)[0]
		idx = conditions.index(label_show)
		ax.scatter(row[0], row[1], label='label', color=colors[idx], s=50, marker='o')
		if label_show not in labels_show:
			labels_show.append(label_show)
			scatter1_proxy = Line2D([0],[0], ls="none", c=colors[idx], marker='o')
			scatter_proxies.append(scatter1_proxy)
		if show_text:	
			ax.text(row[0], row[1]-2, label.split(sep)[1], \
				ha='center', va='center', rotation=0, color=colors[idx], size='large')
	
	ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True,loc=legend_loc, prop={'size':legend_size})
	ax.set_xlabel('PC1 (%.2f'%variance_explained[0] + '%' + ' variance captured)', fontsize=20)
	ax.set_ylabel('PC2 (%.2f'%variance_explained[1] + '%' + ' variance captured)', fontsize=20)
	enlarge_tick_fontsize(ax, 14)
	fig.tight_layout()
	plt.show()
Exemplo n.º 5
0
def PCA_plot2(fpkmMatrix, color_by, shape_by, 
		standardize=3, log=True, legend_loc='best', legend_size=14):
	variance_explained, pca_transformed = perform_PCA(fpkmMatrix, standardize=standardize, log=log)

	fig = plt.figure(figsize=(8,8))
	ax = fig.add_subplot(111)
	scatter_proxies = []
	labels_show = [] # for legend and scatter proxies
	color_uniq = list(set(color_by))
	shape_uniq = list(set(shape_by))

	colors = COLORS10
	if len(color_uniq) > 10:
		colors = COLORS20
	if len(color_uniq) > 20:
		r = lambda: random.randint(0,255)
		colors = ['#%02X%02X%02X' % (r(),r(),r()) for i in range(len(color_uniq))]
	
	shapes = 'osv^phd'

	for row, label_c, label_s in zip(pca_transformed, color_by, shape_by):
		idx_c = color_uniq.index(label_c)
		idx_s = shape_uniq.index(label_s)
		ax.scatter(row[0], row[1], label='label', 
			color=colors[idx_c], s=50, marker=shapes[idx_s])
		label = '%s-%s' %(label_c, label_s)
		if label not in labels_show:
			labels_show.append(label)
			scatter1_proxy = Line2D([0],[0], ls="none", c=colors[idx_c], marker=shapes[idx_s])
			scatter_proxies.append(scatter1_proxy)

	ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True,loc=legend_loc, prop={'size':legend_size})
	ax.set_xlabel('PC1 (%.2f'%variance_explained[0] + '%' + ' variance captured)', fontsize=20)
	ax.set_ylabel('PC2 (%.2f'%variance_explained[1] + '%' + ' variance captured)', fontsize=20)
	enlarge_tick_fontsize(ax, 14)
	fig.tight_layout()
	return fig
Exemplo n.º 6
0
def PCA_plot2(fpkmMatrix,
              color_by,
              shape_by,
              standardize=3,
              log=True,
              legend_loc='best',
              legend_size=14):
    variance_explained, pca_transformed = perform_PCA(fpkmMatrix,
                                                      standardize=standardize,
                                                      log=log)

    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111)
    scatter_proxies = []
    labels_show = []  # for legend and scatter proxies
    color_uniq = list(set(color_by))
    shape_uniq = list(set(shape_by))

    colors = COLORS10
    if len(color_uniq) > 10:
        colors = COLORS20
    if len(color_uniq) > 20:
        r = lambda: random.randint(0, 255)
        colors = [
            '#%02X%02X%02X' % (r(), r(), r()) for i in range(len(color_uniq))
        ]

    shapes = 'osv^phd'

    for row, label_c, label_s in zip(pca_transformed, color_by, shape_by):
        idx_c = color_uniq.index(label_c)
        idx_s = shape_uniq.index(label_s)
        ax.scatter(row[0],
                   row[1],
                   label='label',
                   color=colors[idx_c],
                   s=50,
                   marker=shapes[idx_s])
        label = '%s-%s' % (label_c, label_s)
        if label not in labels_show:
            labels_show.append(label)
            scatter1_proxy = Line2D([0], [0],
                                    ls="none",
                                    c=colors[idx_c],
                                    marker=shapes[idx_s])
            scatter_proxies.append(scatter1_proxy)

    ax.legend(scatter_proxies,
              labels_show,
              numpoints=1,
              frameon=True,
              loc=legend_loc,
              prop={'size': legend_size})
    ax.set_xlabel('PC1 (%.2f' % variance_explained[0] + '%' +
                  ' variance captured)',
                  fontsize=20)
    ax.set_ylabel('PC2 (%.2f' % variance_explained[1] + '%' +
                  ' variance captured)',
                  fontsize=20)
    enlarge_tick_fontsize(ax, 14)
    fig.tight_layout()
    plt.show()
    return
Exemplo n.º 7
0
def PCA_plot(fpkmMatrix,
             samples,
             standardize=3,
             log=True,
             show_text=False,
             sep='_',
             legend_loc='best',
             legend_size=14):
    # standardize: whether to a zscore transformation on the log10 transformed FPKM
    pca = PCA(n_components=None)
    ## preprocessing of the fpkmMatrix
    if log:
        fpkmMatrix = np.log10(fpkmMatrix + 1.)
    if standardize == 2:  # standardize along rows/genes
        fpkmMatrix = zscore(fpkmMatrix, axis=1)
    elif standardize == 1:  # standardize along cols/samples
        fpkmMatrix = zscore(fpkmMatrix, axis=0)

    ## remove genes with NaNs
    fpkmMatrix = fpkmMatrix[~np.isnan(np.sum(fpkmMatrix, axis=1))]
    ## get variance captured
    pca.fit(fpkmMatrix.T)
    variance_explained = pca.explained_variance_ratio_[0:3]
    variance_explained *= 100
    ## compute PCA and plot
    pca = PCA(n_components=2)
    pca_transformed = pca.fit_transform(fpkmMatrix.T)
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111)
    scatter_proxies = []
    labels_show = []
    groups = {}
    conditions = list(set([s.split(sep)[0] for s in samples]))

    colors = COLORS10
    if len(conditions) > 10:
        colors = COLORS20
    if len(conditions) > 20:
        r = lambda: random.randint(0, 255)
        colors = [
            '#%02X%02X%02X' % (r(), r(), r()) for i in range(len(conditions))
        ]

    for row, label in zip(pca_transformed, samples):
        label_show = label.split(sep)[0]
        idx = conditions.index(label_show)
        ax.scatter(row[0],
                   row[1],
                   label='label',
                   color=colors[idx],
                   s=50,
                   marker='o')
        if label_show not in labels_show:
            labels_show.append(label_show)
            scatter1_proxy = Line2D([0], [0],
                                    ls="none",
                                    c=colors[idx],
                                    marker='o')
            scatter_proxies.append(scatter1_proxy)
        if show_text:
            ax.text(row[0], row[1]-2, label.split(sep)[1], \
             ha='center', va='center', rotation=0, color=colors[idx], size='large')

    ax.legend(scatter_proxies,
              labels_show,
              numpoints=1,
              frameon=True,
              loc=legend_loc,
              prop={'size': legend_size})
    ax.set_xlabel('PC1 (%.2f' % variance_explained[0] + '%' +
                  ' variance captured)',
                  fontsize=20)
    ax.set_ylabel('PC2 (%.2f' % variance_explained[1] + '%' +
                  ' variance captured)',
                  fontsize=20)
    enlarge_tick_fontsize(ax, 14)
    fig.tight_layout()
    plt.show()