-
Notifications
You must be signed in to change notification settings - Fork 0
/
4.07-colorBars.py
189 lines (143 loc) · 6.75 KB
/
4.07-colorBars.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#########################
### Customizing Colorbars
# Plot legends identify discrete labels of discrete points
# labeled colorbars identify continuous labels based on color of points, lines, regions
# can view the additional color-coded stuff in link provided:
# https://github.com/jakevdp/PythonDataScienceHandbook
## set up notebook:
import matplotlib.pyplot as plt
plt.style.use('classic')
# %matplotlib inline
import numpy as np
# as seen before, basic colorbar:
x = np.linspace(0, 10, 1000)
I = np.sin(x) * np.cos(x[:, np.newaxis])
plt.imshow(I)
plt.colorbar()
# now we can go further and customize
# colormap can be specified using 'cmap' arg to the plt function creating it
plt.imshow(I, cmap='gray')
# see all available colormaps in the plt.cm namespace with tab completion
# plt.cm.<TAB>
## Choosing the colormap
# several articles linked as this topic goes deeper than one chapter, of course
# 3 different kinds of colormaps discussed here:
# Sequential colormaps
# made up of one continuous sequence of colors
# e.g. binary, viridis
# Divergent colormaps
# usually contain 2 distinct colors, positive and negative devs from mean
# e.g. RdBu, PuOr
# Qualitative colormaps
# mix colors with no particular sequence
# e.g. rainbow, jet
# jet actually used to be default for matplotlib, despite the fact that
# qualitative colormaps are generally the least-helpful scheme due to non-uniform progression
# converting jet colorbar to black and white demonstrates this concept:
from matplotlib.colors import LinearSegmentedColormap
def grayscale_cmap(cmap):
"""Return a grayscale version of the given colormap"""
cmap = plt.cm.get_cmap(cmap)
colors = cmap(np.arange(cmap.N))
# convert RGBA to perceived grayscale Luminance
# cf. http://alienryderflex.com/hsp.html
RGB_weight = [0.299, 0.587, 0.114]
luminance = np.sqrt(np.dot(colors[:, :3] ** 2, RGB_weight))
colors[:, :3] = luminance[:, np.newaxis]
return LinearSegmentedColormap.from_list(cmap.name + "_gray", colors, cmap.N)
def view_colormap(cmap):
"""Plot a colormap with its grayscale equivalent"""
cmap = plt.cm.get_cmap(cmap)
colors = cmap(np.arange(cmap.N))
cmap = grayscale_cmap(cmap)
grayscale = cmap(np.arange(cmap.N))
fig, ax = plt.subplots(2, figsize=(6, 2),
subplot_kw=dict(xticks=[], yticks=[]))
ax[0].imshow([colors], extent=[0, 10, 0, 1])
ax[1].imshow([grayscale], extent=[0, 10, 0, 1])
view_colormap('jet')
# from output, can see bright white stripes in the grayscale image.
# note how they are present even in full-color
# these brighter sections naturally draw the eye, which defeats the purpose
# of using color-scale for data representation (unless you're intentionally
# obscuring meaning)
# "viridis" became the replacement for jet
# by plotting we can tell difference immediately: even brightness (in color and grayscale)
view_colormap('viridis')
# for a rainbow-style scheme on continuous data, can use the "cubehelix" colormap in place of
view_colormap('cubehelix')
# for other applications (e.g. positive-negative deviations from a mean)
# dual-color colorbars like RdBu are useful
# --- note that grayscale translation of these dual-color schemes lose +/- info
# keeps magnitude only, not direction
view_colormap('RdBu')
# to view additional colormaps, view the plt.cm submodule
# or refer to the Seaborn documentation
###################################
### Color limits and extensions
# As seen, Matplotlib allows for colorbar customizations.
# Colorbar is an instance of plt.Axes, so all the axes and tick formatting already used are applicable
# for instance, can set color limits and indicate out-of-bounds vals with traingular arrow
# accomplished using the "extend" property
# see the following example for a use case:
# ----------------------------------------
# make noise in 1% of the image pixels
speckles = (np.random.random(I.shape) < 0.01)
I[speckles] = np.random.normal(0, 3, np.count_nonzero(speckles))
plt.figure(figsize=(10, 3.5))
plt.sublplot(1, 2, 1)
plt.imshow(I, cmap='RdBu')
plt.colorbar()
plt.subplot(1, 2, 2)
plt.imshow(I, cmap='RdBu')
plt.colorbar(extend='both')
plt.clim(-1, 1)
# ----------------------------------------
# per example,
# in the left panel, default color limits incorporate the added noise, ruining the visualization
# in the right panel, the visualization limits disregard the noise, displaying a useful graph
##############################
### Discrete color bars
# colormaps by default are continuous, but can be made to represent discrete values
# plt.cm.get_cmap() function, passing name of suitable colormap and desire bin num
plt.imshow(I, cmap=plt.cm.get_cmap('Blues', 6))
plt.colorbar()
plt.clim(-1, 1)
## example: handwritten digits
# we'll use some data included in scikit-learn of hand written digits data
# download the data and visualize several example images with plt.imshow():
# ----------------------------------------
# Load images of the digits 0 through 5 and visualize several of them
from sklearn.datasets import load_digits
digits = load_digits(n_class=6)
fig, ax = plt.subplots(8, 8, figsize=(6, 6))
for i, axi in enumerate(ax.flat):
axi.imshow(digits.images[i], cmap='binary')
axi.set(xticks=[], yticks=[])
# ----------------------------------------
# Because each digit is defined by the hue of its 64 pixels,
# we consider digits to be a point lying in 64-dimensional space,
# where each dimension represents the brightness of 1 pixel.
# visualizing such relationships (given num dimensions) is hard
# One approach is to use 'dimensionality reduction', such as manifold learning
# dimensionality reduction is an example of unsupervised machine learning. neato!
# there is a later chapter dedicated to machine learning, so without getting into the
# discussion of how it works, here's a simple dimensionality reduction in action
# to achieve our desired goal:
# ----------------------------------------
# project the digits into 2 dimensions using IsoMap
from sklearn.manifold import Isomap
iso = Isomap(n_components=2)
projection = iso.fit_transform(digits.data)
# use discrete colormap to view results, set "ticks" / "clim" for aesthetics
plt.scatter(projection[:, 0], projection[:, 1], lw=0.1,
c=digits.target, cmap=plt.cm.get_cmap('cubehelix', 6))
plt.colorbar(ticket=range(6), label='digit value')
plt.clim(-0.5, 5.5)
# ----------------------------------------
# projection also gives insights on relationships within dataset
# i.e. note that '5' and '3' clusters are very close together
# while '0' and '1' clusters are extremely far apart
# the visualization is indicating that to a simple algorithm,
# 5s look more similar to 3s than 0s do to 1s
# which is something we probably could have guessed, but can now quantify