-
Notifications
You must be signed in to change notification settings - Fork 0
/
vision_forward_model.py
executable file
·338 lines (283 loc) · 12.2 KB
/
vision_forward_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
"""
Inferring 3D Shape from 2D Images
This file contains the vision forward model that renders
hypotheses to 2D images.
Created on Aug 27, 2015
Goker Erdogan
https://github.com/gokererdogan/
"""
import vtk
from vtk.util.numpy_support import vtk_to_numpy
import numpy as np
import scipy.misc
from gmllib.helpers import rgb2gray
import geometry_3d as geom3d
DEFAULT_CAMERA_DISTANCE = np.sqrt(8.0)
# camera position is given in spherical coordinates
# canonical view +- 45 degrees
DEFAULT_CAMERA_POS = [(DEFAULT_CAMERA_DISTANCE, -45.0, 45.0),
(DEFAULT_CAMERA_DISTANCE, 0.0, 45.0),
(DEFAULT_CAMERA_DISTANCE, 45.0, 45.0)]
DEFAULT_RENDER_SIZE = (200, 200)
# tube radius
TUBE_RADIUS = 0.05
class VisionForwardModel:
"""
Vision forward model for Shape class (hypothesis.py)
Creates 3D scene according to given shape representation
and uses VTK to render 3D scene to 2D image
Each part is assumed to be a rectangular prism.
Forward model expects an I3DHypothesis instance
that implements the method convert_to_positions_sizes
"""
def __init__(self, render_size=DEFAULT_RENDER_SIZE, camera_pos=DEFAULT_CAMERA_POS,
offscreen_rendering=True, custom_lighting=True):
"""
Initializes VTK objects for rendering.
"""
self.render_size = render_size
self.camera_pos = camera_pos
# vtk objects for rendering
self.vtkrenderer = vtk.vtkRenderer()
self.camera_view_count = len(self.camera_pos)
self.vtkcamera = vtk.vtkCamera()
self._setup_camera(self.camera_pos[0])
# this is the view angle we used for rendering objects in blender.
self.vtkcamera.SetViewAngle(35.0)
# if custom_lighting is ON, we use our own lights in the scene. default lighting of vtk is not very good.
# this is the default (used for our BDAoOSS model and CogSci16 paper).
# we illuminate the upper hemisphere.
self.custom_lighting = custom_lighting
if self.custom_lighting:
self.light1 = vtk.vtkLight()
self.light1.SetIntensity(.3)
self.light1.SetPosition(8, -12, 10)
self.light1.SetDiffuseColor(1.0, 1.0, 1.0)
self.light2 = vtk.vtkLight()
self.light2.SetIntensity(.3)
self.light2.SetPosition(-12, -10, 8)
self.light2.SetDiffuseColor(1.0, 1.0, 1.0)
self.light3 = vtk.vtkLight()
self.light3.SetIntensity(.3)
self.light3.SetPosition(10, 10, 12)
self.light3.SetDiffuseColor(1.0, 1.0, 1.0)
self.light4 = vtk.vtkLight()
self.light4.SetIntensity(.3)
self.light4.SetPosition(-10, 8, 10)
self.light4.SetDiffuseColor(1.0, 1.0, 1.0)
self.vtkrenderer.AddLight(self.light1)
self.vtkrenderer.AddLight(self.light2)
self.vtkrenderer.AddLight(self.light3)
self.vtkrenderer.AddLight(self.light4)
self.vtkrenderer.SetBackground(0.0, 0.0, 0.0) # Background color
self.vtkrender_window = vtk.vtkRenderWindow()
self.vtkrender_window.AddRenderer(self.vtkrenderer)
self.vtkrender_window.SetSize(self.render_size)
self.vtkrender_window_interactor = vtk.vtkRenderWindowInteractor()
self.vtkrender_window_interactor.SetRenderWindow(self.vtkrender_window)
# turn on off-screen rendering. Note that rendering will be much faster this way
# HOWEVER, you won't be able to use view etc. methods to look at and interact with
# the object. You must render the object and use matplotlib etc. to view the
# rendered image.
self.offscreen_rendering = offscreen_rendering
if self.offscreen_rendering:
self.vtkrender_window.SetOffScreenRendering(1)
# these below lines are here with the hope of fixing a problem with off-screen rendering.
# the quality of the offscreen render seems inferior.
# i'm not sure why this is.
# 1) it could be because there is a bug and anti-aliasing and multisampling is disabled
# for offscreen rendering. see the below link (though that applies to vtk6.0)
# http://public.kitware.com/pipermail/vtk-developers/2015-April/031741.html
# 2) it might be that offscreen rendering uses software rendering, no hardware
# acceleration. I don't know how one can check that.
# still, the quality of the renders are good enough for our purposes
self.vtkrender_window.SetLineSmoothing(1)
self.vtkrender_window.SetMultiSamples(8)
# vtk objects for reading, and rendering object parts
self.cube_source = vtk.vtkCubeSource()
self.cube_output = self.cube_source.GetOutput()
self.cube_mapper = vtk.vtkPolyDataMapper()
self.cube_mapper.SetInput(self.cube_output)
def _reset_camera(self):
"""
Reset camera to its original position
"""
self._setup_camera(self.camera_pos[0])
def _setup_camera(self, camera_pos):
"""
Set the camera position, view up and roll
camera_pos (3-tuple): Camera position in spherical coordinates
"""
camera_pos_cartesian = geom3d.spherical_to_cartesian(camera_pos)
self.vtkcamera.SetPosition(camera_pos_cartesian)
# calculate camera up
# if viewpoint radius is negative, we invert the view, i.e., rotate camera upside down
camera_up = self._calculate_camera_up(camera_pos)
self.vtkcamera.SetViewUp(camera_up)
@staticmethod
def _calculate_camera_up(camera_pos):
"""Calculate camera up direction from camera position.
Parameters:
camera_pos (tuple): spherical coordinates of camera position
Returns:
(tuple): camera up vector in cartesian coordinates
"""
# when camera position is (r, theta=0, phi=0)=(0, 0, z), camera up is (-1, 0, 0)
x, y, z = -1.0, 0.0, 0.0
# get the spherical coordinates of camera pos and rotate the camera up vector
_, theta, phi = camera_pos
phi *= (np.pi / 180.0)
theta *= (np.pi / 180.0)
# rotate by phi wrt to y
xr = (np.cos(phi) * x) + (np.sin(phi) * z)
yr = y
zr = (-np.sin(phi) * x) + (np.cos(phi) * z)
# rotate by theta wrt to z
x = (np.cos(theta) * xr) - (np.sin(theta) * yr)
y = (np.sin(theta) * xr) + (np.cos(theta) * yr)
z = zr
return x, y, z
def render(self, shape, rgb=False):
"""
Construct the 3D object from Shape instance and render it.
Returns numpy array with size number of viewpoints x self.render_size
If viewpoints are not defined in shape, it uses default viewpoints specified in this file.
Parameters:
shape (I3DHypothesis): shape to render. should contain primitive_type attribute and
convert_to_positions_sizes method.
rgb (bool): return a grayscale or RGB image
Returns:
(numpy.ndarray): rendered image of the object from the specified viewpoints.
an array of viewpoints x self.render_size
"""
self._build_scene(shape)
w = self.render_size[0]
h = self.render_size[1]
camera_positions = self.camera_pos
if shape.viewpoint is not None:
camera_positions = shape.viewpoint
viewpoint_count = len(camera_positions)
if rgb:
img_arr = np.zeros((viewpoint_count, 3, w, h), dtype=np.uint8)
else:
img_arr = np.zeros((viewpoint_count, w, h))
for i, camera_pos in enumerate(camera_positions):
self._setup_camera(camera_pos)
img_arr[i] = self._render_window_to2D(rgb)
return img_arr
def _render_window_to2D(self, rgb=False):
"""
Renders the window to 2D grayscale image
Called from render function for each viewpoint
"""
self.vtkrender_window.Render()
self.vtkwin_im = vtk.vtkWindowToImageFilter()
self.vtkwin_im.SetInput(self.vtkrender_window)
self.vtkwin_im.Update()
vtk_image = self.vtkwin_im.GetOutput()
width, height, _ = vtk_image.GetDimensions()
vtk_array = vtk_image.GetPointData().GetScalars()
components = vtk_array.GetNumberOfComponents()
arr = vtk_to_numpy(vtk_array).reshape(height, width, components)
if rgb:
arr = arr.transpose([2, 1, 0])
else:
arr = rgb2gray(arr)
return arr
def _build_scene(self, shape):
"""
Places each part of the shape into the scene
"""
# clear scene
self.vtkrenderer.RemoveAllViewProps()
self.vtkrenderer.Clear()
# add parts to scene, use the build_scene function for the primitive type of shape
if shape.primitive_type == 'CUBE':
self._build_scene_cube(shape)
elif shape.primitive_type == 'TUBE':
self._build_scene_tube(shape)
else:
raise ValueError("Unknown primitive type.")
self._reset_camera()
self.vtkrenderer.SetActiveCamera(self.vtkcamera)
def _build_scene_cube(self, shape):
positions, sizes = shape.convert_to_positions_sizes()
for position, size in zip(positions, sizes):
actor = vtk.vtkActor()
actor.SetMapper(self.cube_mapper)
actor.SetPosition(position)
actor.SetScale(size)
self.vtkrenderer.AddActor(actor)
def _build_scene_tube(self, shape):
positions = shape.convert_to_positions_sizes()
joint_count = len(positions)
pts = vtk.vtkPoints()
lines = vtk.vtkCellArray()
lines.InsertNextCell(joint_count)
for j in range(joint_count):
pts.InsertPoint(j, positions[j])
lines.InsertCellPoint(j)
td = vtk.vtkPolyData()
td.SetPoints(pts)
td.SetLines(lines)
tf = vtk.vtkTubeFilter()
tf.SetInput(td)
tf.SetRadius(TUBE_RADIUS)
# tf.SetVaryRadiusToVaryRadiusOff()
tf.SetCapping(1)
tf.SetNumberOfSides(50)
tf.Update()
tm = vtk.vtkPolyDataMapper()
tm.SetInput(tf.GetOutput())
ta = vtk.vtkActor()
ta.SetMapper(tm)
#ta.GetProperty().SetDiffuse(0.8)
ta.GetProperty().SetAmbient(0.25)
self.vtkrenderer.AddActor(ta)
def convert_world_to_display(self, viewpoint, x, y, z):
"""Converts world coordinates x, y, z to display coordinates.
Used in 2D affine alignment model to get shape feature coordinates
in image.
"""
self.vtkrenderer.SetActiveCamera(self.vtkcamera)
self._setup_camera(viewpoint)
vtkcoordinate = vtk.vtkCoordinate()
vtkcoordinate.SetCoordinateSystemToWorld()
vtkcoordinate.SetValue(x, y, z)
# x and y are flipped in render method.
y, x = vtkcoordinate.GetComputedDisplayValue(self.vtkrenderer)
return x, y
def _view(self, shape):
"""
Views object in window
Used for development and testing purposes
"""
self._build_scene(shape)
if shape.viewpoint is not None:
camera_pos = shape.viewpoint[0]
self._setup_camera(camera_pos)
self.vtkrender_window.Render()
self.vtkrender_window_interactor.Start()
def save_render(self, filename, shape):
"""
Save rendered image to disk. Saves one image for each viewpoint.
Parameters:
filename (str): save filename with extension.
shape (I3DHypothesis): shape to render
Returns:
None
"""
fp = filename.split('.')
fn = ".".join(fp[0:-1])
ext = fp[-1]
img = self.render(shape)
# make sure that scipy does not normalize the image
for i in range(img.shape[0]):
simg = scipy.misc.toimage(np.flipud(img[i]), cmin=0, cmax=255)
simg.save("{0:s}_{1:d}.{2:s}".format(fn, i, ext))
if __name__ == '__main__':
forward_model = VisionForwardModel(render_size=(200, 300))
import shape as hyp
s = hyp.Shape(forward_model)
r = forward_model.render(s)
forward_model.save_render('r.png', s)