def construct_ls_vo_rt_model(inputs, cropping=((0, 0), (0, 0)), output_size=500, regularization=0, kernel_initializer='glorot_normal'): inputs = concat(inputs) features, bottleneck = construct_encoder( inputs, kernel_initializer=kernel_initializer) reconstructed_flow = construct_flow_decoder( bottleneck, cropping=cropping, output_channels=inputs.shape[-1].value) fc_rotation = dense(features, output_size=output_size, layers_num=2, regularization=regularization, kernel_initializer=kernel_initializer, name='rotation') fc_translation = dense(features, output_size=output_size, layers_num=2, regularization=regularization, kernel_initializer=kernel_initializer, name='translation') outputs = construct_outputs( [fc_rotation] * 3 + [fc_translation] * 3, regularization=regularization) + [reconstructed_flow] return outputs
def construct_simple_model(inputs, conv_layers=3, conv_filters=64, kernel_sizes=3, strides=1, paddings='same', fc_layers=2, output_sizes=500, activations='elu', regularizations=0, batch_norms=True): if type(conv_filters) != list: conv_filters = [conv_filters] * conv_layers if type(kernel_sizes) != list: kernel_sizes = [kernel_sizes] * conv_layers if type(strides) != list: strides = [strides] * conv_layers if type(paddings) != list: paddings = [paddings] * conv_layers if type(output_sizes) != list: output_sizes = [output_sizes] * fc_layers if type(activations) != list: activations = [activations] * (conv_layers + fc_layers) if type(regularizations) != list: regularizations = [regularizations] * (conv_layers + fc_layers) if type(batch_norms) != list: batch_norms = [batch_norms] * (conv_layers + fc_layers) inputs = concat(inputs) conv = inputs for i in range(conv_layers): conv = conv2d(conv, conv_filters[i], kernel_size=kernel_sizes[i], batch_norm=batch_norms[i], padding=paddings[i], kernel_initializer='glorot_normal', strides=strides[i], activation=activations[i], activity_regularizer=l2(regularizations[i])) flatten = Flatten()(conv) fc = flatten for i in range(fc_layers): fc = Dense(output_sizes[i], kernel_initializer='glorot_normal', activation=activations[i + conv_layers], activity_regularizer=l2(regularizations[i]))(fc) outputs = construct_outputs([fc] * 6) return outputs
def construct_st_vo_model(inputs, kernel_initializer='glorot_normal'): inputs = concat(inputs) conv1 = Conv2D(64, kernel_size=3, strides=2, kernel_initializer=kernel_initializer, name='conv1')(inputs) pool1 = MaxPooling2D(pool_size=4, strides=4, name='pool1')(conv1) conv2 = Conv2D(20, kernel_size=3, kernel_initializer=kernel_initializer, name='conv2')(pool1) pool2 = MaxPooling2D(pool_size=2, strides=2, name='pool2')(conv2) flatten1 = Flatten(name='flatten1')(pool1) flatten2 = Flatten(name='flatten2')(pool2) merged = concatenate([flatten1, flatten2], axis=1) activation = Activation('relu')(merged) fc = dense(activation, kernel_initializer=kernel_initializer, name='fc') outputs = construct_outputs([fc] * 6) return outputs
def construct_ls_vo_rt_no_decoder_model(inputs, output_size=500, regularization=0, kernel_initializer='glorot_normal'): inputs = concat(inputs) features, _ = construct_encoder(inputs, kernel_initializer=kernel_initializer) fc_rotation = dense(features, output_size=output_size, layers_num=2, regularization=regularization, kernel_initializer=kernel_initializer, name='rotation') fc_translation = dense(features, output_size=output_size, layers_num=2, regularization=regularization, kernel_initializer=kernel_initializer, name='translation') outputs = construct_outputs([fc_rotation] * 3 + [fc_translation] * 3, regularization=regularization) return outputs
def construct_resnet50_model(inputs, weights='imagenet', kernel_initializer='glorot_normal'): inputs = concat(inputs) conv0 = Conv2D(3, kernel_size=7, padding='same', activation='relu', kernel_initializer=kernel_initializer, name='conv0')(inputs) features = ResNet50(weights=weights, include_top=False, pooling=None)(conv0) flatten = Flatten()(features) fc = dense(flatten, output_size=500, num_layers=2, activation='relu', kernel_initializer=kernel_initializer) outputs = construct_outputs([fc] * 6) return outputs
def construct_encoder(inputs, layers=4, filters=[[16, 16, 32]] * 4, kernel_sizes=[[7, 5, 3]] * 4, strides=2, dilation_rates=None, kernel_initializer='glorot_normal', use_gated_convolutions=False): conv = gated_conv2d if use_gated_convolutions else conv2d makelist = lambda x: [x] if isinstance(x, int) else x if isinstance(filters, int): filters = [filters] * layers if isinstance(kernel_sizes, int): kernel_sizes = [kernel_sizes] * layers if isinstance(strides, int): strides = [strides] * layers if dilation_rates is None: dilation_rates = [1] * layers for i in range(layers): layer_filters = makelist(filters[i]) layer_kernel_sizes = makelist(kernel_sizes[i]) layer_dilation_rates = makelist(dilation_rates[i]) layer_stride = strides[i] convs = max(len(layer_filters), max(len(layer_kernel_sizes), len(layer_dilation_rates))) assert len(layer_filters) in (1, convs) assert len(layer_kernel_sizes) in (1, convs) assert len(layer_dilation_rates) in (1, convs) if len(layer_filters) == 1: layer_filters *= convs if len(layer_kernel_sizes) == 1: layer_kernel_sizes *= convs if len(layer_dilation_rates) == 1: layer_dilation_rates *= convs print(f'Layer {i + 1}: {convs} convolutions') outputs = [] for flt, kernel_size, dilation_rate in zip(layer_filters, layer_kernel_sizes, layer_dilation_rates): print(f'\tfilters={flt}, kernel size={kernel_size}, stride={layer_stride}, dilation rate={dilation_rate}') outputs.append( conv(inputs, flt, kernel_size=kernel_size, strides=layer_stride, dilation_rate=dilation_rate, padding='same', activation='relu', kernel_initializer=kernel_initializer) ) inputs = concat(outputs) merged = conv(inputs, 64, kernel_size=1, padding='same', activation='relu', kernel_initializer=kernel_initializer) flatten = Flatten()(merged) return flatten
def construct_encoder(inputs, use_depth=True, use_flow=True, use_association_layer=True, use_grid=False, concat_axis=3, filters=256, stride=2, f_x=1, f_y=1, c_x=0.5, c_y=0.5, kernel_initializer='glorot_normal'): # flow convolutional branch if use_flow: flow = concat(inputs[:2]) if use_grid: flow = add_grid(flow, f_x=f_x, f_y=f_y, c_x=c_x, c_y=c_y) for i in range(1, 5): flow = conv2d(flow, 2 ** (i + 5), kernel_size=3, strides=2, kernel_initializer=kernel_initializer, name=f'conv{i}_flow') # depth convolutional branch if use_depth: if use_association_layer: # pass flow_z as input depth = depth_flow(concat(inputs)) else: depth = concat(inputs[2:]) if use_grid: depth = add_grid(depth, f_x=f_x, f_y=f_y, c_x=c_x, c_y=c_y) for i in range(1, 5): depth = conv2d(depth, 2 ** (i + 5), kernel_size=3, strides=2, kernel_initializer=kernel_initializer, name=f'conv{i}_depth') if use_flow and use_depth: concatenated = concat([flow, depth]) elif use_flow: concatenated = flow elif use_depth: concatenated = depth merged = conv2d(concatenated, filters, kernel_size=1, strides=stride, kernel_initializer=kernel_initializer, name='merge') flatten = Flatten()(merged) return flatten
def construct_flexible_model(inputs, kernel_sizes=[7, 5, 3, 3, 3, 3], strides=[2, 1, 4, 1, 2, 1], dilation_rates=None, output_size=500, regularization=0, activation='relu', kernel_initializer='glorot_normal', use_gated_convolutions=False, use_batch_norm=False, split=False, transform=None, agnostic=True, channel_wise=False, concat_scale_to_fc=False, multiply_outputs_by_scale=False, confidence_mode=None): inputs, scale = transform_inputs(inputs, transform=transform, agnostic=agnostic, channel_wise=channel_wise) features = construct_encoder(inputs, kernel_sizes=kernel_sizes, strides=strides, dilation_rates=dilation_rates, activation=activation, kernel_initializer=kernel_initializer, use_gated_convolutions=use_gated_convolutions, use_batch_norm=use_batch_norm) if concat_scale_to_fc: fc_rotation = features fc_translation = features for i in range(2): fc_rotation = concat([fc_rotation, scale]) fc_translation = concat([fc_translation, scale]) fc_rotation = dense(fc_rotation, output_size=output_size, layers_num=1, regularization=regularization, activation=activation, kernel_initializer=kernel_initializer) fc_translation = dense(fc_translation, output_size=output_size, layers_num=1, regularization=regularization, activation=activation, kernel_initializer=kernel_initializer) else: fc_rotation = dense(features, output_size=output_size, layers_num=2, regularization=regularization, activation=activation, kernel_initializer=kernel_initializer, name='rotation') fc_translation = dense(features, output_size=output_size, layers_num=2, regularization=regularization, activation=activation, kernel_initializer=kernel_initializer, name='translation') if split: fc = chunk(fc_rotation, n=3) + chunk(fc_translation, n=3) else: fc = [fc_rotation] * 3 + [fc_translation] * 3 outputs = construct_outputs( fc, regularization=regularization, scale=scale if multiply_outputs_by_scale else None, confidence_mode=confidence_mode) return outputs
def construct_sequential_rt_model(inputs, intrinsics, use_input_flow=False, use_diff_flow=False, use_rotation_flow=False, kernel_sizes=[7, 5, 3, 3, 3, 3], strides=[2, 1, 4, 1, 2, 1], dilation_rates=None, hidden_size=500, regularization=0, activation='relu', kernel_initializer='glorot_normal', use_gated_convolutions=False, use_batch_norm=False, return_confidence=False): assert use_input_flow or use_diff_flow or use_rotation_flow inputs = concat(inputs) features_rotation = construct_encoder( inputs, kernel_sizes=kernel_sizes, strides=strides, dilation_rates=dilation_rates, kernel_initializer=kernel_initializer, use_gated_convolutions=use_gated_convolutions, use_batch_norm=use_batch_norm) fc_rotation = dense(features_rotation, output_size=hidden_size, regularization=regularization, activation=activation, kernel_initializer=kernel_initializer, layers_num=2, name='rotation') output_rotation = construct_output(fc_rotation, name='rotation', regularization=regularization) rotation_flow = flow_composer(output_rotation, intrinsics=intrinsics) inputs_for_translation = [] if use_input_flow: inputs_for_translation.append(inputs) if use_diff_flow: inputs_for_translation.append(Subtract()([inputs, rotation_flow])) if use_rotation_flow: inputs_for_translation.append(rotation_flow) features_translation = construct_encoder( concat(inputs_for_translation), kernel_sizes=kernel_sizes, strides=strides, dilation_rates=dilation_rates, kernel_initializer=kernel_initializer, use_gated_convolutions=use_gated_convolutions, use_batch_norm=use_batch_norm) fc_translation = dense(features_translation, output_size=hidden_size, regularization=regularization, activation=activation, kernel_initializer=kernel_initializer, layers_num=2, name='translation') output_translation = construct_output(fc_translation, name='translation', regularization=regularization) return output_rotation + output_translation