/
new_american_vs_british.py
408 lines (340 loc) · 18.2 KB
/
new_american_vs_british.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# from vox_audio import audiofile_to_input_vector_vox
import pysndfile
from pysndfile import PySndfile
#from pathlib2 import Path
# print(pysndfile.get_sndfile_formats())
import math
import numpy as np
from six.moves import range
import threading
import time
import os
import time
import threading
import tensorflow as tf
#from itertools import izip, izip_longest
from vox_audio import audiofile_to_input_vector_vox, audiofile_to_input_vector_ted
def weight_variable(shape, name):
return tf.get_variable(name,shape=shape, initializer=tf.contrib.layers.xavier_initializer())
# initial = tf.truncated_normal(shape, stddev=0.1)
# return tf.Variable(initial)
def bias_variable(shape, name):
# initial = tf.constant(0.1, shape=shape)
# return tf.Variable(initial)
return tf.get_variable(name,shape=shape, initializer=tf.contrib.layers.xavier_initializer())
def conv2d(x, W,stride):
return tf.nn.conv2d(x, W, strides=stride, padding='VALID')
def max_pool_2x2(x, window, stride):
return tf.nn.avg_pool(x, ksize=[1, window, 1, 1],
strides=[1, stride, 1, 1], padding='VALID')
from tensorflow.python.framework import ops
class FlipGradientBuilder(object):
def __init__(self):
self.num_calls = 0
def __call__(self, x, l=1.0):
grad_name = "FlipGradient%d" % self.num_calls
@ops.RegisterGradient(grad_name)
def _flip_gradients(op, grad):
return [tf.negative(grad) * l]
g = tf.get_default_graph()
with g.gradient_override_map({"Identity": grad_name}):
y = tf.identity(x)
self.num_calls += 1
return y
flip_gradient = FlipGradientBuilder()
def read_my_file_format(filename_queue):
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# print(value)
# print(key)
record_defaults = [['a'], ['b']]
audio, label = tf.decode_csv(value, record_defaults=record_defaults)
# print(tf.as_string(audio))
# print(tf.as_string(label))
feats, labels = tf.py_func(audiofile_to_input_vector_vox, [audio, label], [tf.float32, tf.int64])
# print(feats.get_shape().as_list())
return feats, labels
# def input_pipeline(batch_size = 128, num_epochs=100):
# filename_queue
batch_size = 128
num_epochs=180
filename_queue1 = tf.train.string_input_producer(['clean_american-v2.csv'], num_epochs=num_epochs, shuffle=True)
filename_queue2 = tf.train.string_input_producer(['clean_british-v2.csv'], num_epochs=num_epochs*10, shuffle=True)
source_batch_x, source_batch_y = read_my_file_format(filename_queue1)
target_batch_x, target_batch_y = read_my_file_format(filename_queue2)
source_batch_x.set_shape([None, 4960 ])
source_batch_y.set_shape([None])
target_batch_x.set_shape([None, 4960])
target_batch_y.set_shape([None])
source_batch_x = tf.reshape(source_batch_x, [-1, 4960, 1, 1])
target_batch_x = tf.reshape(target_batch_x, [-1, 4960, 1, 1])
target_batch_y = tf.reshape(target_batch_y, [-1])
source_batch_y = tf.reshape(source_batch_y, [-1])
# print(source_x.get_shape().as_list())
min_after_dequeue = 10000
capacity = min_after_dequeue + 3*batch_size
# print(tf.rank(source_x))
source_x, source_y = tf.train.shuffle_batch([source_batch_x, source_batch_y], batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue, enqueue_many=True, num_threads=3)
target_x, target_y = tf.train.shuffle_batch([target_batch_x, target_batch_y], batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue, enqueue_many=True, num_threads=3)
# return source_batch_x, source_batch_y, target_batch_x, target_batch_y
# with tf.Session() as sess:
# coord = tf.train.Coordinator()
# threads = tf.train.start_queue_runners(coord=coord)
# for i in range(1200000):
# example = sess.run([feats_path_source])
# coord.request_stop()
# coord.join(threads)
batch_size = 128
num_steps = int(2031250*0.6)
class Acoustic_Model(object):
"""Simple MNIST domain adaptation model."""
def __init__(self):
self._build_model()
def _build_model(self):
# filename_queue = tf.train.string_input_producer(['american_train.tfrecords', 'british_train.tfrecords'], num_epochs=200)
# reader = tf.TFRecordReader()
count = tf.Variable(0, name='counter', dtype=tf.float32)
inc = tf.assign_add(count, 1, name='increment')
# self.token = tf.Variable(0, name='token', dtype=tf.float32)
# self.X_source = train_source_x
# tf.Print(self.X_source, [self.X_source])
# self.y_source = train_source_y
# self.X_target = train_target_x
# self.y_target = train_target_y
# self.X_source = source_batch_x
# self.y_source = source_batch_y
# self.X_target = target_batch_x
# self.y_target = target_batch_y
# source_x, source_y = custom_runner.queue.dequeue_many(128)
# target_x, target_y = custom_runner.queue2.dequeue_many(128)
# # self.X_source = source_x
# self.token2 = source_x + 1
# self.y_source = source_batch_y
# self.X_target = target_batch_x
# self.y_target = target_batch_y
# source_x, source_y, target_x, target_y = input_pipeline()
self.X = tf.concat([source_x, target_x], 0)
self.y = tf.concat([source_y, target_y], 0)
self.y = tf.cast(self.y, dtype=tf.int32)
# self.y = tf.cast(self.y, dtype=tf.int32)
self.y = tf.one_hot(self.y, depth=1677)
# self.X = tf.reshape(self.X, [None, 31*160, 1, 1])
# self.domain = tf.concat([self.domain_0, self.domain_1], 0)
self.train = tf.constant(True, dtype=tf.bool)
self.l = 2/(1+tf.exp(-10*(inc/num_steps))) -1
self.lr = 0.005/(tf.pow(1 + 10*(inc/num_steps),0.75))
# self.l = tf.constant(0.4, dtype=tf.float32)
# self.lr = tf.constant(0.01, dtype=tf.float32)
# self.domain_0 = tf.zeros([128], dtype=tf.int32)
# self.domain_1 = tf.ones([128], dtype=tf.int32)
self.domain_0 = tf.zeros([128], dtype=tf.int32)
self.domain_1 = tf.ones([128], dtype=tf.int32)
rand = tf.random_uniform(shape=[], maxval=1)
self.domain1 = lambda: tf.concat([self.domain_0, self.domain_1], 0)
self.domain2 = lambda: tf.concat([self.domain_1, self.domain_0], 0)
self.domain = tf.cond(tf.greater(rand, 0.9), self.domain2, self.domain1)
# self.domain = tf.concat([self.domain_0, self.domain_1], 0)
self.domain = tf.one_hot(self.domain, depth=2)
self.keep_prob = tf.constant(0.5, dtype=tf.float32)
# X_input = (tf.cast(self.X, tf.float32) - pixel_mean) / 255.
# CNN model for feature extractio
with tf.name_scope('feature_extractor'):
self.W_conv0 = weight_variable([256, 1, 1, 64], 'W_Conv0')
self.b_conv0 = bias_variable([64], 'b_Conv0')
# h_conv0 = conv2d(self.X, W_conv0, [1, 31,1,1])+ b_conv0
# h_conv0 = tf.contrib.layers.batch_norm(h_conv0, center=True, scale=True, is_training=self.train)
# h_conv0 = tf.nn.relu(h_conv0)
# print(self.X.get_shape().as_list())
h_conv0 = tf.nn.relu(conv2d(self.X, self.W_conv0, [1,31,1,1]) + self.b_conv0)
tf.Print(h_conv0, [h_conv0])
h_pool0 = max_pool_2x2(h_conv0, 2,2)
tf.summary.histogram('Conv 0 weight', self.W_conv0)
tf.summary.histogram('Conv 0 bias', self.b_conv0)
tf.summary.histogram('Conv 0 activation', h_conv0)
self.W_conv1 = weight_variable([15, 1, 64, 128], 'W_conv1')
self.b_conv1 = bias_variable([128], 'b_conv1')
# h_conv1 = conv2d(h_pool0, W_conv1, [1,1,1,1])+b_conv1
# h_conv1 = tf.contrib.layers.batch_norm(h_conv1, center=True, scale=True, is_training=self.train)
# h_conv1 = tf.nn.relu(h_conv1)
h_conv1 = tf.nn.tanh(conv2d(h_pool0, self.W_conv1, [1,1,1,1]) + self.b_conv1)
h_pool1 = max_pool_2x2(h_conv1, 2,2)
tf.summary.histogram('Conv 1 weight', self.W_conv1)
tf.summary.histogram('Conv 1 bias', self.b_conv1)
tf.summary.histogram('Conv 1 activation', h_conv1)
a, b, c, d = h_pool1.get_shape().as_list()
# The domain-invariant feature
self.feature = tf.reshape(h_pool1, [-1, b*c*d])
# MLP for class prediction
with tf.name_scope('label_predictor'):
# Switches to route target examples (second half of batch) differently
# depending on train or test mode.
all_features = lambda: self.feature
source_features = lambda: tf.slice(self.feature, [0, 0], [batch_size, -1])
# classify_feats = tf.cond(self.train, source_features, all_features)
classify_feats = self.feature
all_labels = lambda: self.y
source_labels = lambda: tf.slice(self.y, [0, 0], [batch_size, -1])
# self.classify_labels = tf.cond(self.train, source_labels, all_labels)
self.classify_labels = self.y
self.W_fc0 = weight_variable([b*c*d, 1024*2], 'W_fc0')
self.b_fc0 = bias_variable([1024*2], 'b_fc0')
h_fc0 = tf.nn.relu(tf.matmul(classify_feats, self.W_fc0) + self.b_fc0)
tf.summary.histogram('fc0 weight', self.W_fc0)
tf.summary.histogram('fc0 bias', self.b_fc0)
tf.summary.histogram('fc0 activation', h_fc0)
h_fc0 = tf.nn.dropout(h_fc0, self.keep_prob)
self.W_fc1 = weight_variable([1024*2, 1024*2], 'W_fc1')
self.b_fc1 = bias_variable([1024*2], 'b_fc1')
h_fc1 = tf.nn.relu(tf.matmul(h_fc0, self.W_fc1) + self.b_fc1)
tf.summary.histogram('fc1 weight', self.W_fc1)
tf.summary.histogram('fc1 bias', self.b_fc1)
tf.summary.histogram('fc1 activation', h_fc1)
h_fc1 = tf.nn.dropout(h_fc1, self.keep_prob)
self.W_fc2 = weight_variable([1024*2, 1024*2], 'W_fc2')
self.b_fc2 = bias_variable([1024*2], 'b_fc2')
h_fc2 = tf.nn.relu(tf.matmul(h_fc1,self.W_fc2) + self.b_fc2)
tf.summary.histogram('fc2 weight', self.W_fc2)
tf.summary.histogram('fc2 bias', self.b_fc2)
tf.summary.histogram('fc2 activation', h_fc2)
h_fc2 = tf.nn.dropout(h_fc2, self.keep_prob)
self.W_fc3 = weight_variable([1024*2, 1024*2], 'W_fc3')
self.b_fc3 = bias_variable([1024*2], 'b_fc3')
h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fc3) + self.b_fc3)
tf.summary.histogram('fc3 weight', self.W_fc3)
tf.summary.histogram('fc3 bias', self.b_fc3)
tf.summary.histogram('fc3 activation', h_fc3)
h_fc3 = tf.nn.dropout(h_fc3, self.keep_prob)
self.W_fc4 = weight_variable([1024*2, 1024*2], 'W_fc4')
self.b_fc4 = bias_variable([1024*2], 'b_fc4')
h_fc4 = tf.nn.relu(tf.matmul(h_fc3, self.W_fc4) + self.b_fc4)
tf.summary.histogram('fc4 weight', self.W_fc4)
tf.summary.histogram('fc4 bias', self.b_fc4)
tf.summary.histogram('fc4 activation', h_fc4)
h_fc4 = tf.nn.dropout(h_fc4, self.keep_prob)
self.W_fc5 = weight_variable([1024*2, 1024*2], 'W_fc5')
self.b_fc5 = bias_variable([1024*2], 'b_fc5')
h_fc5 = tf.nn.relu(tf.matmul(h_fc4, self.W_fc5) + self.b_fc5)
tf.summary.histogram('fc5 weight', self.W_fc5)
tf.summary.histogram('fc5 bias', self.b_fc5)
tf.summary.histogram('fc5 activation', h_fc5)
h_fc5 = tf.nn.dropout(h_fc5, self.keep_prob)
self.W_fc6 = weight_variable([1024*2, 1024*2], 'W_fc6')
self.b_fc6 = bias_variable([1024*2], 'b_fc6')
h_fc6 = tf.nn.relu(tf.matmul(h_fc5, self.W_fc6) + self.b_fc6)
tf.summary.histogram('fc6 weight', self.W_fc6)
tf.summary.histogram('fc6 bias', self.b_fc6)
tf.summary.histogram('fc6 activation', h_fc6)
h_fc6 = tf.nn.dropout(h_fc6, self.keep_prob)
self.W_fc7 = weight_variable([1024*2, 1677], 'W_fc7')
self.b_fc7 = bias_variable([1677], 'b_fc7')
logits = tf.matmul(h_fc6, self.W_fc7) + self.b_fc7
tf.summary.histogram('fc7 weight', self.W_fc7)
tf.summary.histogram('fc7 bias', self.b_fc7)
tf.summary.histogram('fc7 activation', logits)
with tf.name_scope('predicted_loss'):
self.pred = tf.nn.softmax(logits)
self.pred_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.classify_labels)
# Small MLP for domain prediction with adversarial loss
with tf.name_scope('domain_predictor'):
# Flip the gradient when backpropagating through this operation
feat = flip_gradient(self.feature, self.l)
d_W_fc0 = weight_variable([b*c*d, 1024*2], 'd_W_fc0')
d_b_fc0 = bias_variable([1024*2], 'd_b_fc0')
d_h_fc0 = tf.nn.relu(tf.matmul(feat, d_W_fc0) + d_b_fc0)
tf.summary.histogram('domain fc0 weights', d_W_fc0)
tf.summary.histogram('domain fc0 bias', d_b_fc0)
tf.summary.histogram('domain fc0 activation', d_h_fc0)
d_h_fc0 = tf.nn.dropout(d_h_fc0, keep_prob=self.keep_prob)
d_W_fc1 = weight_variable([1024*2, 1024*2], 'd_W_fc1')
d_b_fc1 = bias_variable([1024*2], 'd_b_fc1')
d_h_fc1 = tf.nn.relu(tf.matmul(d_h_fc0, d_W_fc1) + d_b_fc1)
tf.summary.histogram('domain fc1 weights', d_W_fc1)
tf.summary.histogram('domain fc1 bias', d_b_fc1)
tf.summary.histogram('domain fc1 activation', d_h_fc1)
d_h_fc1 = tf.nn.dropout(d_h_fc1, keep_prob=self.keep_prob)
d_W_fc2 = weight_variable([1024*2, 1024*2], 'd_W_fc2')
d_b_fc2 = bias_variable([1024*2], 'd_b_fc2')
d_h_fc2 = tf.nn.relu(tf.matmul(d_h_fc1, d_W_fc2) + d_b_fc2)
tf.summary.histogram('domain fc2 weights', d_W_fc2)
tf.summary.histogram('domain fc2 bias', d_b_fc2)
tf.summary.histogram('domain fc2 activation', d_h_fc2)
d_h_fc2 = tf.nn.dropout(d_h_fc2, keep_prob=self.keep_prob)
d_W_fc3 = weight_variable([1024*2, 1024*2], 'd_W_fc3')
d_b_fc3 = bias_variable([1024*2], 'd_b_fc3')
d_h_fc3 = tf.nn.relu(tf.matmul(d_h_fc2, d_W_fc3) + d_b_fc3)
tf.summary.histogram('domain fc3 weights', d_W_fc3)
tf.summary.histogram('domain fc3 bias', d_b_fc3)
tf.summary.histogram('domain fc3 activation', d_h_fc3)
d_h_fc3 = tf.nn.dropout(d_h_fc3, keep_prob=self.keep_prob)
d_W_fc4 = weight_variable([1024*2, 1024*2], 'd_W_fc4')
d_b_fc4 = bias_variable([1024*2], 'd_b_fc4')
d_h_fc4 = tf.nn.relu(tf.matmul(d_h_fc3, d_W_fc4) + d_b_fc4)
tf.summary.histogram('domain fc4 weights', d_W_fc4)
tf.summary.histogram('domain fc4 bias', d_b_fc4)
tf.summary.histogram('domain fc4 activation', d_h_fc4)
d_h_fc4 = tf.nn.dropout(d_h_fc4, keep_prob=self.keep_prob)
d_W_fc5 = weight_variable([1024*2, 2], 'd_W_fc5')
d_b_fc5 = bias_variable([2], 'd_b_fc5')
d_logits = tf.matmul(d_h_fc4, d_W_fc5) + d_b_fc5
tf.summary.histogram('domain fc5 weigth', d_W_fc5)
tf.summary.histogram('domain fc5 bias', d_b_fc5)
tf.summary.histogram('domain fc5 activation', d_logits)
with tf.name_scope('domain_loss'):
self.domain_pred = tf.nn.softmax(d_logits)
self.domain_loss = tf.nn.softmax_cross_entropy_with_logits(logits=d_logits, labels=self.domain)
graph = tf.get_default_graph()
with graph.as_default():
model = Acoustic_Model()
# learning_rate = tf.placeholder(tf.float32, [])
learning_rate = model.lr
pred_loss = tf.reduce_mean(model.pred_loss)
domain_loss = tf.reduce_mean(model.domain_loss)
total_loss = pred_loss + domain_loss
with tf.name_scope('Training'):
regular_train_op = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(pred_loss)
dann_train_op = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(total_loss)
# Evaluation
correct_label_pred = tf.equal(tf.argmax(model.classify_labels, 1), tf.argmax(model.pred, 1))
label_acc = tf.reduce_mean(tf.cast(correct_label_pred, tf.float32))
correct_domain_pred = tf.equal(tf.argmax(model.domain, 1), tf.argmax(model.domain_pred, 1))
domain_acc = tf.reduce_mean(tf.cast(correct_domain_pred, tf.float32))
tf.summary.scalar('prediction loss', pred_loss)
tf.summary.scalar('domain loss', domain_loss)
tf.summary.scalar('total loss', total_loss)
tf.summary.scalar('label accuracy', label_acc)
tf.summary.scalar('domain accuracy', domain_acc)
summary_op = tf.summary.merge_all()
with tf.Session(graph=graph, config=tf.ConfigProto(intra_op_parallelism_threads=12, allow_soft_placement=True)) as sess:
print('entered')
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
print('initilized')
coord = tf.train.Coordinator()
print('initilized2')
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
print('startting sess')
saver = tf.train.Saver()
sess.graph.finalize()
k = 0
avg_loss = []
avg_accuracy = []
avg_domain_loss = []
avg_domain_accuracy = []
for i in range(num_steps):
_, ploss, dloss, l_acc, d_acc,lam = sess.run([dann_train_op, pred_loss, domain_loss, label_acc, domain_acc, model.l])
avg_loss.append(ploss)
avg_accuracy.append(l_acc)
avg_domain_loss.append(dloss)
avg_domain_accuracy.append(d_acc)
if i%10000 == 0:
print('epoch:%d/%d ploss:%f dloss:%f accuracy:%f avg_loss:%f avg_accuracy:%f avg_domain_loss:%f avg_domain_accuracy:%f l_parameter:%f'%(k,i%234375, ploss, dloss,l_acc,sum(avg_loss)/len(avg_loss), sum(avg_accuracy)/len(avg_accuracy), sum(avg_domain_loss)/len(avg_domain_loss), sum(avg_domain_accuracy)/len(avg_domain_accuracy), lam))
if i%234375 == 0:
k = k+1
avg_loss = []
avg_accuracy = []
if i%10000 == 0:
save_path = saver.save(sess, 'saved_model_baseline_adver/baseline_model.ckpt')
coord.request_stop()
coord.join(threads)