def start(self): xx = T.matrix('features', config.floatX) yy = T.imatrix('targets') zm = BNUM*(xx.shape[0]//BNUM) x = xx[:zm].reshape((BNUM, zm//BNUM, xx.shape[1])).dimshuffle(1, 0, 2) y = yy[:zm].reshape((BNUM, zm//BNUM)).dimshuffle(1, 0) # x = xx[:zm].reshape((zm//16, 16, xx.shape[1])) # y = yy[:zm].reshape((zm//16, 16)) DIMS = [108*5, 200, 200, 200, LABEL] NUMS = [1, 1, 1, 1, 1] # DIMS = [108*5, 48] # NUMS = [1, 1] FUNCS = [ Rectifier, Rectifier, Rectifier, # Rectifier, # Rectifier, # Maxout(num_pieces=5), # Maxout(num_pieces=5), # Maxout(num_pieces=5), # SimpleRecurrent, # SimpleRecurrent, # SimpleRecurrent, # LSTM, # LSTM, # LSTM, # SequenceGenerator, # Softmax, None, ] def lllistool(i, inp, func): if func == LSTM: NUMS[i+1] *= 4 sdim = DIMS[i] if func == SimpleRecurrent or func == LSTM: sdim = DIMS[i] + DIMS[i+1] l = Linear(input_dim=DIMS[i], output_dim=DIMS[i+1] * NUMS[i+1], weights_init=IsotropicGaussian(std=sdim**(-0.5)), biases_init=IsotropicGaussian(std=sdim**(-0.5)), name='Lin{}'.format(i)) l.initialize() if func == SimpleRecurrent: gong = func(dim=DIMS[i+1], activation=Rectifier(), weights_init=IsotropicGaussian(std=sdim**(-0.5))) gong.initialize() ret = gong.apply(l.apply(inp)) elif func == LSTM: gong = func(dim=DIMS[i+1], activation=Tanh(), weights_init=IsotropicGaussian(std=sdim**(-0.5))) gong.initialize() print(inp) ret, _ = gong.apply( l.apply(inp), T.zeros((inp.shape[1], DIMS[i+1])), T.zeros((inp.shape[1], DIMS[i+1])), ) elif func == SequenceGenerator: gong = func( readout=None, transition=SimpleRecurrent(dim=100, activation=Rectifier(), weights_init=IsotropicGaussian(std=0.1))) ret = None elif func == None: ret = l.apply(inp) else: gong = func() ret = gong.apply(l.apply(inp)) return ret oup = x for i in range(len(DIMS)-1): oup = lllistool(i, oup, FUNCS[i]) y_hat = oup y_rsp = y.reshape((y.shape[0]*y.shape[1],)) y_dsf_rsp = y.dimshuffle(1, 0).reshape((y.shape[0]*y.shape[1],)) yh_rsp = y_hat.reshape((y_hat.shape[0]*y_hat.shape[1], y_hat.shape[2])) yh_dsf_rsp = y_hat.dimshuffle(1, 0, 2).reshape((y_hat.shape[0]*y_hat.shape[1], y_hat.shape[2])) sfmx = Softmax().apply(yh_rsp) # cost = CategoricalCrossEntropy().apply(y, y_hat).astype(config.floatX) # j, wlh = Yimumu(y_hat, y) # cost = CategoricalCrossEntropy().apply(y_rsp, sfmx) + j cost = CategoricalCrossEntropy().apply(y_rsp, sfmx) # cost_p = cost_p.astype(config.floatX) # cost = CTC_cost(y, y_hat) cost = cost.astype(config.floatX) cg = ComputationGraph(cost) # cg_p = ComputationGraph(cost_p) orig_cg = cg ips = VariableFilter(roles=[INPUT])(cg.variables) ops = VariableFilter(roles=[OUTPUT])(cg.variables) # print(ips, ops) # cg = apply_dropout(cg, ips[0:2:1], 0.2) # cg = apply_dropout(cg, ips[2:-2:1], 0.5) # cost = cg.outputs[0].astype(config.floatX) cost.name = 'cost' mps = theano.shared(np.array([ph2id(ph48239(id2ph(t))) for t in range(48)])) # yh_dsf_rsp = theano.printing.Print('YapYapYap')(yh_dsf_rsp) # z_hat = T.argmax(yh_dsf_rsp[:,:-1], axis=1) z_hat = T.argmax(yh_dsf_rsp, axis=1) # z_hat = theano.printing.Print('Yap')(z_hat) # z_hat = Yimumu_Decode()(y_hat, wlh) z_hat_hat = CTC_Decode()(y_hat) y39,_ = scan(fn=lambda t: mps[t], outputs_info=None, sequences=[y_dsf_rsp]) y_hat39,_ = scan(fn=lambda t: mps[t], outputs_info=None, sequences=[z_hat]) y_hat_hat39 = y_hat39 # y_hat_hat39,_ = scan(fn=lambda t: mps[t], outputs_info=None, sequences=[z_hat_hat]) # trm = TrimOp()(y_hat_hat39) # trm = trm[1:1+trm[0]] # trm = theano.printing.Print('Trm')(trm) lost01 = (T.sum(T.neq(y_hat39, y39)) / y39.shape[0]).astype(config.floatX) lost01.name = '0/1 loss' lost23 = (T.sum(T.neq(y_hat39, y39)) / y39.shape[0]).astype(config.floatX) lost23.name = '2/3 loss' edit01 = EditDistance()(y39, y_hat_hat39).astype(config.floatX) #+ T.sum(trm) * 1E-10 # edit01 = edit01.astype(config.floatX) edit01.name = '0/1 edit' edit23 = EditDistance()(y39, y_hat_hat39).astype(config.floatX) edit23.name = '2/3 edit' Ws = cg.parameters # Ws = Ws + [wlh] print(list(Ws)) norms = sum(w.norm(2) for w in Ws) norms = norms.astype(config.floatX) norms.name = 'norms' path = pjoin(PATH['fuel'], 'train_train.hdf5') data = H5PYDataset(path, which_set='train', load_in_memory=True, subset=slice(0, 100000)) # data = H5PYDataset(path, which_set='train', load_in_memory=True) data_v = H5PYDataset(pjoin(PATH['fuel'], 'train_validate.hdf5'), which_set='validate', load_in_memory=True) num = data.num_examples data_stream = DataStream(data, iteration_scheme=ShuffledScheme( num, batch_size=SLEN*BNUM)) data_stream_v = DataStream(data_v, iteration_scheme=SequentialScheme( data_v.num_examples, batch_size=SLEN*BNUM)) algo = GradientDescent(cost=cost, params=Ws, step_rule=CompositeRule([ Momentum(0.005, 0.9) # AdaDelta() ])) # algo_p = GradientDescent(cost=cost_p, params=cg_p.parameters, step_rule=CompositeRule([ # Momentum(0.01, 0.9) # # AdaDelta() # ])) monitor = DataStreamMonitoring( variables=[cost, lost01, edit01, norms], data_stream=data_stream) monitor_v = DataStreamMonitoring( variables=[lost23, edit23], data_stream=data_stream_v) plt = Plot('AlpYap', channels=[['0/1 loss', '2/3 loss'], ['0/1 edit', '2/3 edit']], after_epoch=True) # main_loop_p = MainLoop(data_stream = data_stream, # algorithm=algo_p, # extensions=[monitor, monitor_v, FinishAfter(after_n_epochs=10), Printing(), plt]) # main_loop_p.run() main_loop = MainLoop(data_stream = data_stream, algorithm=algo, extensions=[monitor, monitor_v, FinishAfter(after_n_epochs=2000), Printing(), plt]) main_loop.run() pfile = open('zzz.pkl', 'wb') pickle.dump(orig_cg, pfile) # pickle.dump(wlh, pfile) pfile.close() ################ test_feat = np.load(pjoin(PATH['numpy'], 'train_test_features.npy')).astype(config.floatX) func = theano.function([xx], y_hat.astype(config.floatX)) test_hat = [] for i in range(19): tmp = func(test_feat[i*10000:(i+1)*10000]) tmp = tmp.transpose((1, 0, 2)).reshape((tmp.shape[0]*tmp.shape[1], tmp.shape[2])) test_hat.append(tmp) test_hat = np.concatenate(test_hat, axis=0) test_hat = np.concatenate((test_hat, np.zeros((2, LABEL))), axis=0) alpha = T.tensor3(config.floatX) beta = alpha.argmax(axis=2) # beta = alpha[:,:,:-1].argmax(axis=2) # beta = Yimumu_Decode()(alpha, wlh) # beta = CTC_Decode()(alpha) func2 = theano.function([alpha], beta) lens = [] tags = [] with shelve.open(SHELVE['test']) as f: names = f['names'] for n in names: lens.append(len(f[n])) for i in range(lens[-1]): tags.append(n+'_'+str(i+1)) seq = [] seq2 = [] nowcnt = 0 for i in lens: nxt = nowcnt + i cur_hat = test_hat[nowcnt:nxt].reshape((i, 1, LABEL)).astype(config.floatX) nowcnt = nxt fc2 = func2(cur_hat).flatten() fc3 = [] fc4 = [] for j in fc2: fc3.append(ph48239(id2ph(j))) fc4.append(ph2c(ph48239(id2ph(j)))) seq.append(fc3) seq2.append(''.join(trim(fc4))) seq_flat = np.concatenate(seq) with open('hw1_outz.txt', 'w') as f: f.write('id,prediction\n') for t, i in zip(tags, seq_flat): f.write(t+','+i+'\n') with open('hw2_outz.txt', 'w') as f: f.write('id,phone_sequence\n') for n, i in zip(names, seq2): f.write(n+','+i+'\n')