/
models.py
125 lines (96 loc) · 5.85 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from functools import partial
import slim
import tensorflow as tf
from slim import ops
from slim import scopes
def align_reference_shape(reference_shape, reference_shape_bb, im, bb):
def norm(x):
return tf.sqrt(tf.reduce_sum(tf.square(x - tf.reduce_mean(x, 0))))
ratio = norm(bb) / norm(reference_shape_bb)
align_mean_shape = (reference_shape - tf.reduce_mean(reference_shape_bb, 0)) * ratio + tf.reduce_mean(bb, 0)
new_size = tf.to_int32(tf.to_float(tf.shape(im)[:2]) / ratio)
return tf.image.resize_bilinear(tf.expand_dims(im, 0), new_size)[0, :, :, :], align_mean_shape / ratio, ratio
def normalized_rmse(pred, gt_truth):
norm = tf.sqrt(tf.reduce_sum(((gt_truth[:, 36, :] - gt_truth[:, 45, :])**2), 1))
return tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(pred - gt_truth), 2)), 1) / (norm * 68)
def conv_model(inputs, is_training=True, scope=''):
# summaries or losses.
net = {}
with tf.name_scope(scope, 'Conv_lay', [inputs]):
with scopes.arg_scope([ops.conv2d, ops.fc], is_training=is_training):
with scopes.arg_scope([ops.conv2d], activation=tf.nn.relu, padding='VALID'):
net['conv_1'] = ops.conv2d(inputs, 32, [7, 7], scope='conv_1')
net['pool_1'] = ops.max_pool(net['conv_1'], [2, 2])
net['conv_2'] = ops.conv2d(net['pool_1'], 64, [3, 3], scope='conv_2')
net['pool_2'] = ops.max_pool(net['conv_2'], [2, 2])
net['conv_3'] = ops.conv2d(net['pool_2'], 64, [3, 3], scope='conv_3')
net['pool_3'] = ops.max_pool(net['conv_3'], [2, 2])
net['concat'] = net['pool_3']
return net
def model(images, inits, num_iterations=3, num_patches=68, patch_shape=(36, 36), num_channels=3,reuse = False):
batch_size = images.get_shape().as_list()[0]
# print(batch_size)
hiden = tf.zeros((batch_size, 512))
dx = tf.zeros((batch_size, num_patches, 2))
endpoints = {}
dxs = []
m_module = tf.load_op_library('./extract_patches.so')
with tf.variable_scope('models', reuse=reuse):
for step in range(num_iterations):
with tf.device('/cpu:0'):
patches = m_module.extract_patches(images, tf.constant(patch_shape), inits+dx)
patches = tf.reshape(patches, (batch_size * num_patches, patch_shape[0], patch_shape[1], num_channels))
endpoints['patches'] = patches
with tf.variable_scope('convnet', reuse=step>0):
net = conv_model(patches)
ims = net['concat']
num,h,w,c = net['concat'].get_shape().as_list()
ims_all = tf.reshape(ims, (batch_size, -1))
ims_1 = slim.ops.conv2d(net['concat'] , 64, [1,1], scope='cita')
ims_2 = slim.ops.conv2d(net['concat'] , 64, [1, 1], scope='feita')
ims_3 = slim.ops.conv2d(net['concat'], 64, [1, 1], scope='gama')
ims_1 = tf.reshape(ims_1, (batch_size, -1, 64))
ims_2 = tf.reshape(ims_2, (batch_size, 64, -1))
ims_3 = tf.reshape(ims_3, (batch_size, -1, 64))
ims_4 = tf.matmul(ims_1, ims_2)
ims_4 = tf.nn.softmax(ims_4)
ims_5 = tf.matmul(ims_4, ims_3)
ims_5 = tf.reshape(ims_5, (batch_size*num_patches, h, w, 64))/(num*h*w*c)
ims_5 = slim.ops.conv2d(ims_5, c, [1, 1], scope='beata')
ims_6 = ims_5+net['concat']
# ims_6 = tf.reshape(ims_6, (batch_size, -1, 16))
ims_fllaten = tf.reshape(ims_6, (batch_size, num_patches, -1))
ims_d = ims_fllaten[:,0:17,:]
ims_d = tf.reshape(ims_d, (batch_size, -1))
ims_t1 = ims_fllaten[:, 17:27, :]
ims_t1 = tf.reshape(ims_t1, (batch_size, -1))
ims_t2 = ims_fllaten[:, 36:48, :]
ims_t2 = tf.reshape(ims_t2, (batch_size, -1))
ims_m1 = ims_fllaten[:, 27:36, :]
ims_m1 = tf.reshape(ims_m1, (batch_size, -1))
ims_m2 = ims_fllaten[:, 48:68, :]
ims_m2 = tf.reshape(ims_m2, (batch_size, -1))
with tf.variable_scope('model', reuse=step>0) as scope:
hiden = slim.ops.fc(tf.concat([ims_all, hiden], 1), 512, scope='rnn', activation=tf.tanh)
top_f = slim.ops.fc(tf.concat([ims_t1,ims_t2], 1), 512, scope='top', activation=tf.tanh)
mid_f = slim.ops.fc(tf.concat([ims_m1,ims_m2], 1), 512, scope='mid', activation=tf.tanh)
down_f = slim.ops.fc(tf.concat([ims_d], 1), 512, scope='down', activation=tf.tanh)
brows_f = slim.ops.fc(tf.concat([top_f, mid_f,hiden], 1), 256, scope='brow')
eyes = slim.ops.fc(tf.concat([top_f, mid_f,hiden], 1), 256, scope='eye')
nose = slim.ops.fc(tf.concat([top_f, mid_f, down_f,hiden], 1), 256, scope='nose')
mouth = slim.ops.fc(tf.concat([top_f, mid_f, down_f,hiden], 1), 256, scope='mouth')
l = slim.ops.fc(tf.concat([mid_f, down_f,hiden], 1), 256, scope='l')
r = slim.ops.fc(tf.concat([mid_f, down_f,hiden], 1), 256, scope='r')
b_p = slim.ops.fc(tf.concat([brows_f, eyes], 1), 10, scope='brow_p', activation=None)
e_p = slim.ops.fc(tf.concat([brows_f, eyes], 1), 12, scope='e_p', activation=None)
n_p = slim.ops.fc(tf.concat([nose, mouth], 1), 9, scope='n_p', activation=None)
m_p = slim.ops.fc(tf.concat([nose, mouth], 1), 20, scope='m_p', activation=None)
l_p = slim.ops.fc(tf.concat([l, r], 1), 9, scope='l_p', activation=None)
r_p = slim.ops.fc(tf.concat([l, r], 1), 8, scope='r_p', activation=None)
rela = tf.concat([l_p, r_p, b_p, n_p, e_p, m_p], 1)
prediction_full = slim.ops.fc(rela, 68 * 2, scope='full', activation=None)
endpoints['prediction'] = prediction_full
prediction = tf.reshape(prediction_full, (batch_size, num_patches, 2))
dx += prediction
dxs.append(dx)
return inits + dx, dxs, endpoints