def make_graph(self, input): out = nd.Struct() out.make_struct('levels') with nd.Scope('encoder'): conv0 = nd.scope.conv_nl(input, name="conv0", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv0']) conv1 = nd.scope.conv_nl(conv0, name="conv1", kernel_size=3, stride=2, pad=1, num_output=self._encoder_channels['conv1']) conv1_1 = nd.scope.conv_nl(conv1, name="conv1_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv1_1']) conv2 = nd.scope.conv_nl(conv1_1, name="conv2", kernel_size=3, stride=2, pad=1, num_output=self._encoder_channels['conv2']) conv2_1 = nd.scope.conv_nl(conv2, name="conv2_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv2_1']) prediction2 = self.predict(conv2_1, level=2, loss_weight=self._loss_weights['level2'], out=out) with nd.Scope('decoder'): decoder1, prediction1 = \ self.refine(level=1, input=conv2_1, input_prediction=prediction2, features=conv1_1, out=out) if self._exit_after == 1: out.final = out.levels[1] return out decoder0, prediction0 = \ self.refine(level=0, input=decoder1, input_prediction=prediction1, features=conv0, out=out) out.final = out.levels[0] return out
def make_graph(self, img0, img1, edge_features=None, use_1D_corr=False, single_direction=0): with nd.Scope('features', learn=self._learn_features): feat = self._features.make_graph(img0, img1) with nd.Scope('upper'): out = self._upper.make_graph(feat, edge_features, use_1D_corr, single_direction) out.feat = feat return out
def make_graph(self, data, include_losses=True): pred_config = nd.PredConfig() pred_config.add( nd.PredConfigId( type='disp', perspective='L', channels=1, scale=self._scale, )) pred_dispL_t_1 = data.disp.L pred_flow_fwd = data.flow[0].fwd pred_occ_fwd = data.occ[0].fwd pred_dispL_t1_warped = nd.ops.warp(pred_dispL_t_1, pred_flow_fwd) pred_config[0].mod_func = lambda x: self.interpolator( pred=x, prev_disp=pred_dispL_t1_warped) inp = nd.ops.concat(data.img.L, nd.ops.scale(pred_dispL_t1_warped, 0.05), pred_occ_fwd) with nd.Scope('refine_disp', learn=True, **self.scope_args()): arch = Architecture_S( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, loss_function=None, conv_upsample=self._conv_upsample, exit_after=0) out = arch.make_graph(inp, edge_features=data.img.L) return out
def make_graph(self, data, include_losses=False): # hypNet pred_config = nd.PredConfig() pred_config.add(nd.PredConfigId(type='flow_hyp', dir='fwd', offset=0, channels=2, scale=self._scale, array_length=self._num_hypotheses)) pred_config.add(nd.PredConfigId(type='iul_b_hyp_log', dir='fwd', offset=0, channels=2, scale=self._scale, array_length=self._num_hypotheses, mod_func=self._log_sigmoid)) nd.log('pred_config:') nd.log(pred_config) with nd.Scope('hypNet', shared_batchnorm=False, correlation_leaky_relu=True, **self.scope_args()): arch = Architecture_C( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, conv_upsample=True, loss_function= None, channel_factor=self._channel_factor, feature_channels=self._feature_channels ) out_hyp = arch.make_graph(data.img[0], data.img[1]) # mergeNet pred_config = nd.PredConfig() pred_config.add(nd.PredConfigId(type='flow', dir='fwd', offset=0, channels=2, scale=self._scale, dist=1)) pred_config.add(nd.PredConfigId(type='iul_b_log', dir='fwd', offset=0, channels=2, scale=self._scale, dist=1, mod_func=self.iul_b_log_sigmoid)) nd.log('pred_config:') nd.log(pred_config) hyps = [nd.ops.resample(hyp, reference=data.img[0], antialias=False, type='LINEAR') for hyp in [out_hyp.final.flow_hyp[0].fwd[i] for i in range(self._num_hypotheses)]] uncertainties = [nd.ops.resample(unc, reference=data.img[0], antialias=False, type='LINEAR') for unc in [out_hyp.final.iul_b_hyp_log[0].fwd[i] for i in range(self._num_hypotheses)]] img_warped = [nd.ops.warp(data.img[1], hyp) for hyp in hyps] with nd.Scope('mergeNet', shared_batchnorm=False, **self.scope_args()): input = nd.ops.concat([data.img[0]] + [data.img[1]] + hyps + uncertainties + img_warped) arch = Architecture_S( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, conv_upsample=True, loss_function= None, channel_factor=self._channel_factor ) out_merge = arch.make_graph(input) return out_merge
def predict(self, input, level, loss_weight, out): with nd.Scope('predict'): predicted = nd.scope.conv(input, name='conv', kernel_size=3, stride=1, pad=1, num_output=self._num_outputs) out.levels.make_struct(level) if callable(self._disassembling_function): out.levels[level] = self._disassembling_function(predicted) if callable(self._loss_function): self._loss_function(out.levels[level], level=level, weight=loss_weight) return predicted
def refine(self, level, input, features, out, input_prediction=None): num_output = self._decoder_channels['level%d' % level] with nd.Scope('refine_%d' % level): upconv = nd.scope.upconv_nl(input, name='deconv', kernel_size=4, stride=2, pad=1, num_output=num_output) concat_list = [features, upconv] if input_prediction is not None: upsampled_prediction = self.upsample_prediction(input_prediction, upconv, name="upsample_prediction%dto%d" % (level+1, level)) concat_list.append(upsampled_prediction) concatenated = nd.ops.concat(concat_list, axis=1) if self._interconv: concatenated = nd.scope.conv_nl( concatenated, name="interconv", kernel_size=3, stride=1, pad=1, num_output=num_output) refined_prediction = self.predict(concatenated, level=level, loss_weight=self._loss_weights['level%d' % level], out=out) return concatenated, refined_prediction
def make_graph(self, input, edge_features=None): out = nd.Struct() out.make_struct('levels') with nd.Scope('encoder'): conv1 = nd.scope.conv_nl( input, name="conv1", kernel_size=7, stride=2, pad=3, num_output=self._encoder_channels['conv1']) conv2 = nd.scope.conv_nl( conv1, name="conv2", kernel_size=5, stride=2, pad=2, num_output=self._encoder_channels['conv2']) conv3 = nd.scope.conv_nl( conv2, name="conv3", kernel_size=5, stride=2, pad=2, num_output=self._encoder_channels['conv3']) conv3_1 = nd.scope.conv_nl( conv3, name="conv3_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv3_1']) conv4 = nd.scope.conv_nl( conv3_1, name="conv4", kernel_size=3, stride=2, pad=1, num_output=self._encoder_channels['conv4']) conv4_1 = nd.scope.conv_nl( conv4, name="conv4_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv4_1']) if self._encoder_level == 4: prediction4 = self.predict( conv4_1, level=4, loss_weight=self._loss_weights['level4'], out=out) else: conv5 = nd.scope.conv_nl( conv4_1, name="conv5", kernel_size=3, stride=2, pad=1, num_output=self._encoder_channels['conv5']) conv5_1 = nd.scope.conv_nl( conv5, name="conv5_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv5_1']) if self._encoder_level == 5: prediction5 = self.predict( conv5_1, level=5, loss_weight=self._loss_weights['level5'], out=out) else: conv6 = nd.scope.conv_nl( conv5_1, name="conv6", kernel_size=3, stride=2, pad=1, num_output=self._encoder_channels['conv6']) conv6_1 = nd.scope.conv_nl( conv6, name="conv6_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv6_1']) prediction6 = self.predict( conv6_1, level=6, loss_weight=self._loss_weights['level6'], out=out) with nd.Scope('decoder'): if self._encoder_level >= 6: decoder5, prediction5 = \ self.refine(level=5, input=conv6_1, input_prediction=prediction6, features=conv5_1, out=out) if self._exit_after == 5: out.final = out.levels[5] return out if self._encoder_level >= 5: decoder4, prediction4 = \ self.refine(level=4, input=decoder5 if self._encoder_level > 5 else conv5_1, input_prediction=prediction5, features=conv4_1, out=out) if self._exit_after == 4: out.final = out.levels[4] return out decoder3, prediction3 = \ self.refine(level=3, input=decoder4 if self._encoder_level > 4 else conv4_1, input_prediction=prediction4, features=conv3_1, out=out) if self._exit_after == 3: out.final = out.levels[3] return out decoder2, prediction2 = \ self.refine(level=2, input=decoder3, input_prediction=prediction3, features=conv2, out=out) if self._exit_after == 2: out.final = out.levels[2] return out decoder1, prediction1 = \ self.refine(level=1, input=decoder2, input_prediction=prediction2, features=conv1, out=out) if self._exit_after == 1: out.final = out.levels[1] return out if edge_features is None: raise BaseException( 'Architecture_S needs edge features if now shallow') edges = nd.scope.conv_nl( edge_features, name="conv_edges", kernel_size=3, stride=1, pad=1, num_output=self._decoder_channels['level0']) decoder0, prediction0 = \ self.refine(level=0, input=decoder1, input_prediction=prediction1, features=edges, out=out) out.final = out.levels[0] return out
def make_graph(self, data, include_losses=True): pred_config = nd.PredConfig() pred_config.add( nd.PredConfigId(type='flow', dir='fwd', offset=0, channels=2, scale=self._scale)) pred_config.add( nd.PredConfigId(type='occ', dir='fwd', offset=0, channels=2, scale=self._scale)) nd.log('pred_config:') nd.log(pred_config) #### Net 1 #### with nd.Scope('net1', learn=False, **self.scope_args()): arch1 = Architecture_C( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, loss_function=None, conv_upsample=self._conv_upsample) out1 = arch1.make_graph(data.img[0], data.img[1]) #### Net 2 #### flow_fwd = out1.final.flow[0].fwd upsampled_flow_fwd = nd.ops.differentiable_resample( flow_fwd, reference=data.img[0]) warped = nd.ops.warp(data.img[1], upsampled_flow_fwd) # prepare data for second net occ_fwd = self.resample_occ(out1.final.occ[0].fwd, data.img[0]) input2 = nd.ops.concat(data.img[0], data.img[1], nd.ops.scale(upsampled_flow_fwd, 0.05), warped, occ_fwd) pred_config[0].mod_func = lambda x: nd.ops.add( x, nd.ops.resample( flow_fwd, reference=x, type='LINEAR', antialias=False)) pred_config[1].mod_func = lambda x: nd.ops.add( x, nd.ops.resample( occ_fwd, reference=x, type='LINEAR', antialias=False)) with nd.Scope('net2', learn=False, **self.scope_args()): arch2 = Architecture_S( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, loss_function=None, conv_upsample=self._conv_upsample) out2 = arch2.make_graph(input2) #### Net 3 #### flow_fwd = out2.final.flow[0].fwd upsampled_flow_fwd = nd.ops.differentiable_resample( flow_fwd, reference=data.img[0]) warped = nd.ops.warp(data.img[1], upsampled_flow_fwd) # prepare data for third net occ_fwd = self.resample_occ(out2.final.occ[0].fwd, data.img[0]) input3 = nd.ops.concat(data.img[0], data.img[1], nd.ops.scale(upsampled_flow_fwd, 0.05), warped, occ_fwd) pred_config.add( nd.PredConfigId(type='mb', dir='fwd', offset=0, channels=2, scale=self._scale)) pred_config[0].mod_func = lambda x: nd.ops.add( x, nd.ops.resample( flow_fwd, reference=x, type='LINEAR', antialias=False)) pred_config[1].mod_func = lambda x: nd.ops.add( x, nd.ops.resample( occ_fwd, reference=x, type='LINEAR', antialias=False)) with nd.Scope('net3', learn=True, **self.scope_args()): arch3 = Architecture_S( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, loss_function=None, conv_upsample=self._conv_upsample, exit_after=0, ) out3 = arch3.make_graph(input3, edge_features=data.img[0]) return out3
def make_graph(self, input, edge_features=None, use_1D_corr=False, single_direction=0): out = nd.Struct() out.make_struct('levels') if use_1D_corr: corr = nd.ops.correlation_1d(input.conv3a, input.conv3b, kernel_size=1, max_displacement=40, pad=40, stride_1=1, stride_2=1, single_direction=single_direction ) else: corr = nd.ops.correlation_2d(input.conv3a, input.conv3b, kernel_size=1, max_displacement=20, pad=20, stride_1=1, stride_2=2) with nd.Scope('encoder'): redir = nd.scope.conv_nl(input.conv3a, name="conv_redir", kernel_size=1, stride=1, pad=0, num_output=self._encoder_channels['conv_redir']) merged = nd.ops.concat(redir, corr) conv3_1 = nd.scope.conv_nl(merged, name="conv3_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv3_1']) conv4 = nd.scope.conv_nl(conv3_1, name="conv4", kernel_size=3, stride=2, pad=1, num_output=self._encoder_channels['conv4']) conv4_1 = nd.scope.conv_nl(conv4, name="conv4_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv4_1']) conv5 = nd.scope.conv_nl(conv4_1, name="conv5", kernel_size=3, stride=2, pad=1, num_output=self._encoder_channels['conv5']) conv5_1 = nd.scope.conv_nl(conv5, name="conv5_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv5_1']) conv6 = nd.scope.conv_nl(conv5_1, name="conv6", kernel_size=3, stride=2, pad=1, num_output=self._encoder_channels['conv6']) conv6_1 = nd.scope.conv_nl(conv6, name="conv6_1", kernel_size=3, stride=1, pad=1, num_output=self._encoder_channels['conv6_1']) prediction6 = self.predict(conv6_1, level=6, loss_weight=self._loss_weights['level6'], out=out) with nd.Scope('decoder'): decoder5, prediction5 = \ self.refine(level=5, input=conv6_1, input_prediction=prediction6, features=conv5_1, out=out) if self._exit_after == 5: out.final = out.levels[5] return out decoder4, prediction4 = \ self.refine(level=4, input=decoder5, input_prediction=prediction5, features=conv4_1, out=out) if self._exit_after == 4: out.final = out.levels[4] return out decoder3, prediction3 = \ self.refine(level=3, input=decoder4, input_prediction=prediction4, features=conv3_1, out=out) if self._exit_after == 3: out.final = out.levels[3] return out decoder2, prediction2 = \ self.refine(level=2, input=decoder3, input_prediction=prediction3, features=input.conv2a, out=out) if self._exit_after == 2: out.final = out.levels[2] return out decoder1, prediction1 = \ self.refine(level=1, input=decoder2, input_prediction=prediction2, features=input.conv1a, out=out) if self._exit_after == 1: out.final = out.levels[1] return out if edge_features is None: raise BaseException('Architecture_S needs edge features if now shallow') edges = nd.scope.conv_nl(edge_features, name="conv_edges", kernel_size=3, stride=1, pad=1, num_output=self._decoder_channels['level0']) decoder0, prediction0 = \ self.refine(level=0, input=decoder1, input_prediction=prediction1, features=edges, out=out) out.final = out.levels[0] return out
def make_graph(self, data, include_losses=True): pred_config = nd.PredConfig() pred_config.add( nd.PredConfigId(type='disp', perspective='L', channels=1, scale=self._scale, mod_func=lambda b: nd.ops.neg_relu(b))) pred_config.add( nd.PredConfigId( type='occ', perspective='L', channels=2, scale=self._scale, )) with nd.Scope('net1', learn=False, **self.scope_args()): arch1 = Architecture_C( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, loss_function=None, conv_upsample=self._conv_upsample, channel_factor=0.375) out1 = arch1.make_graph(data.img.L, data.img.R, use_1D_corr=True, single_direction=0) disp1 = out1.final.disp.L occ1 = self.resample_occ(out1.final.occ.L, data.img.L) upsampled_disp1 = nd.ops.differentiable_resample(disp1, reference=data.img.L) pred_config[0].mod_func = lambda x: nd.ops.add( x, nd.ops.resample(disp1, reference=x, type='LINEAR', antialias=False) ) pred_config[1].mod_func = lambda x: nd.ops.add( x, nd.ops.resample(occ1, reference=x, type='LINEAR', antialias=False)) warped = nd.ops.warp(data.img.R, nd.ops.disp_to_flow(upsampled_disp1)) input2 = nd.ops.concat(data.img.L, data.img.R, nd.ops.scale(upsampled_disp1, 0.05), warped, occ1) with nd.Scope('net2', learn=False, **self.scope_args()): arch2 = Architecture_S( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, loss_function=None, conv_upsample=self._conv_upsample, channel_factor=0.375) out2 = arch2.make_graph(input2) ## Net 3 disp2 = out2.final.disp.L occ2 = self.resample_occ(out2.final.occ.L, data.img.L) upsampled_disp2 = nd.ops.differentiable_resample(disp2, reference=data.img.L) pred_config.add( nd.PredConfigId(type='db', perspective='L', channels=2, scale=self._scale)) pred_config[0].mod_func = lambda x: nd.ops.add( x, nd.ops.resample(disp2, reference=x, type='LINEAR', antialias=False) ) pred_config[1].mod_func = lambda x: nd.ops.add( x, nd.ops.resample(occ2, reference=x, type='LINEAR', antialias=False)) warped = nd.ops.warp(data.img.R, nd.ops.disp_to_flow(upsampled_disp2)) input3 = nd.ops.concat(data.img.L, data.img.R, nd.ops.scale(upsampled_disp2, 0.05), warped, occ2) with nd.Scope('net3', learn=True, **self.scope_args()): arch3 = Architecture_S( num_outputs=pred_config.total_channels(), disassembling_function=pred_config.disassemble, loss_function=None, conv_upsample=self._conv_upsample, exit_after=0, channel_factor=0.375) out3 = arch3.make_graph(input3, edge_features=data.img.L) return out3