from utils import load_data, optimizer, Accuracy np.random.seed(2020) # Data generation train_data, test_data = load_data('RedWine') x_train, y_train = train_data[0], train_data[1] x_test, y_test = test_data[0], test_data[1] # Hyper-parameter _epoch=1000 _batch_size=32 _lr = 0.001 _optim = 'SGD' # Build model model = LogisticRegression(num_features=x_train.shape[1]) optimizer = optimizer(_optim) # Solve print('Train start!') model.fit(x=x_train, y=y_train, epochs=_epoch, batch_size=_batch_size, lr=_lr, optim=optimizer) print('Trained done.') # Inference print('Predict on test data') inference = model.eval(x_test) # Assess model error = Accuracy(inference, y_test) print('Accuracy on Test Data : %.4f' % error)
1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100 ] _lr = 0.01 #search_param이 lr인 경우에는 batch_size는 32로 고정, batch_size인 경우에는 lr을 0.01로 고정 # ============================================================ train_results = [] test_results = [] search_space = _lr if search_param == 'lr' else _batch_size for i, space in enumerate(search_space): # Build model model = LinearRegression(num_features=x_train_data.shape[1]) optim = optimizer(_optim) # Train model with gradient descent if search_param == 'lr': model.numerical_solution(x=x_train_data, y=y_train_data, epochs=_epoch, batch_size=_batch_size, lr=space, optim=optim) else: model.numerical_solution(x=x_train_data, y=y_train_data, epochs=_epoch, batch_size=space, lr=_lr,
# OPTIMIZER OPTIMIZER = 'SGD' # ============================================================ assert DATA_NAME in ['Titanic', 'Digit'] assert OPTIMIZER in ['SGD', 'Momentum', 'RMSProp'] # Load dataset, model and evaluation metric train_data, test_data, logistic_regression, metric = _initialize(DATA_NAME) train_x, train_y = train_data num_data, num_features = train_x.shape print('# of Training data : ', num_data) # Make model & optimizer model = logistic_regression(num_features) optim = optimizer(OPTIMIZER, gamma=gamma, epsilon=epsilon) # TRAIN loss = model.train(train_x, train_y, num_epochs, batch_size, learning_rate, optim) print('Training Loss at last epoch: %.2f' % loss) # EVALUATION test_x, test_y = test_data pred = model.eval(test_x) ACC = metric(pred, test_y) print(OPTIMIZER, ' ACC on Test Data : %.3f' % ACC)
def train(self,dataset_path,num_classes,batch_size,lr_base,lr_decay,step_size,\ max_iteration,pretrained_model=None): ''' @description: 构建VGG-Net16网络结构,训练网络模型,输出训练过程中的logs,保存网络模型 @params: - dataset_path: 训练样本集和验证样本集对应的txt文件所在的路径 - num_classes: 分类数目 - batch_size: 训练过程中的每次输入网络中的样本数 - lr_base: 初始学习率 - lr_decay: 学习率衰减系数 - step_size: 学习率衰减速度 lr = lr_base * lr_decay ^ (global_step / step_size) - max_iteration: 迭代的最大次数 - pretrained_model: 预训练的模型所在的路径 @return: None ''' train_file_name = dataset_path + 'train_list.txt' valid_file_name = dataset_path + 'valid_list.txt' log_dir = './log/vgg' model_dir = './model/vgg' vgg = VGG(weight_decay=0.0005, keep_prob=0.5, num_classes=num_classes) train_summary_list = [] valid_summary_list = [] with tf.Graph().as_default(), tf.device('/gpu:0'): with tf.name_scope('input'): #队列读取训练数据 train_image,train_label = get_batch(train_file_name,self._image_H,\ self._image_W,batch_size) valid_image,valid_label = get_batch(valid_file_name,self._image_H,\ self._image_W,250,is_train=False) x = tf.placeholder(tf.float32,[None,self._image_H,self._image_W,\ self._image_channels],name='x') y = tf.placeholder(tf.int64, [None], name='y') #loss, accuracy, train_op logits, _ = vgg.vgg16(x) loss = utils.calc_loss(logits, y) accuracy = utils.calc_accuracy(logits, y) train_op, learning_rate, global_step = utils.optimizer( lr_base, step_size, lr_decay, loss) #summary train_summary_list.append(tf.summary.scalar('train_loss', loss)) valid_summary_list.append(tf.summary.scalar('valid_loss', loss)) train_summary_list.append( tf.summary.scalar('train_accuracy', accuracy)) valid_summary_list.append( tf.summary.scalar('test_accuracy', accuracy)) train_summary_list.append( tf.summary.scalar('learning rate', learning_rate)) valid_summary_list.append( tf.summary.scalar('learning rate', learning_rate)) for var in tf.trainable_variables(): valid_summary_list.append(tf.summary.histogram(var.name, var)) train_summary = tf.summary.merge(train_summary_list) valid_summary = tf.summary.merge(valid_summary_list) #session saver = tf.train.Saver(max_to_keep=50) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,\ log_device_placement=True)) as sess: train_writer = tf.summary.FileWriter(log_dir + 'train', sess.graph) test_writer = tf.summary.FileWriter(log_dir + 'valid') tf.global_variables_initializer().run() tf.local_variables_initializer().run() #启动多线程 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #加载预训练的模型 if pretrained_model != None: ckpt = tf.train.get_checkpoint_state(pretrained_model) print('Restoring pretrained model: %s' % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) train_time = 0 for step in range(max_iteration): #模型持久化操作 # graph_def = tf.get_default_graph().as_graph_def() # output_graph_def = graph_util.convert_variables_to_constants(sess,graph_def,['input/x','deepid/Relu']) # with tf.gfile.GFile(model_dir+'deepid_model.pb','wb') as file: # file.write(output_graph_def.SerializeToString()) # break start_time = time.time() image, label = sess.run([train_image, train_label]) _, train_loss, summary_str, train_step = sess.run( [train_op, loss, train_summary, global_step], feed_dict={ x: image, y: label }) train_writer.add_summary(summary_str, global_step=train_step) train_writer.flush() duration = time.time() - start_time train_time += duration #valid and save model if step % 1000 == 0 or (step + 1) == max_iteration: image, label = sess.run([valid_image, valid_label]) lr,summary_str,valid_loss,validation_accuracy,\ train_step = sess.run([learning_rate, valid_summary, loss, accuracy, global_step], feed_dict={x:image,y:label}) test_writer.add_summary(summary_str, global_step=train_step) test_writer.flush() print('Step %d: train loss = %.3f, valid loss = %.3f,valid accuracy = %.3f%%, lr = %.6f (%.3f sec)'%\ (train_step,train_loss,valid_loss,validation_accuracy,\ lr,train_time)) saver.save(sess, model_dir + 'model.ckpt', global_step=train_step) with open(log_dir + 'valid_result.txt', 'at') as file_writer: file_writer.write('%d\t%.3f%%\t%.5f\t%d\r\n' % (train_step, validation_accuracy, lr, train_time)) #退出多线程 coord.request_stop() coord.join(threads)
def train_model( max_epochs=5, # The maximum number of epoch to run decay_c=0., # Weight decay for weights lrate=1e-4, # Learning rate for sgd (not used for adadelta and rmsprop) batch_size=16, # The batch size during training valid_batch_size=64, # The batch size used for test set ): model_options = locals().copy() dataparams, train, test = load_data.load_data() model_options.update(dataparams) print('Building model...') params = init_params(model_options) tparams = init_tparams(params) (u, l, q, f_score, cost) = build_model(tparams, model_options) args = [u, l, q] def _l2_regularizer(decay_c): decay_c = theano.shared(np.asarray(decay_c, dtype=config.floatX), name='decay_c') l2r = 0. for kk, vv in tparams.items(): l2r += (tparams[kk] ** 2).sum() return decay_c * l2r cost += _l2_regularizer(decay_c) grads = T.grad(cost, wrt=list(tparams.values())) # f_grad = theano.function([x], grads, name='f_grad') lr = T.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, args, cost) kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, shuffle=True) print('Training...') uidx = 0 start = time.time() try: for eidx in range(max_epochs): n_samples = 0 logging.info('Time: %s' % (time.time() - start)) # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 u = [train[0][t] for t in train_index] l = [train[1][t] for t in train_index] q = [train[2][t] for t in train_index] n_samples += len(u) cost = f_grad_shared(u, l, q) f_update(lrate) print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost) logging.info('------ Epoch: %d, Update(cls): %d -------' % (eidx, uidx)) pred_error(f_score, test, kf_test) logging.info('------------------------------------') except KeyboardInterrupt: print("Training interupted")
def train(input_tfr_pool, val_tfr_pool, out_dir, log_dir, mean, sbatch, wd): """Train Multi-View Network for a number of steps.""" log_freq = 100 val_freq = 1000 model_save_freq = 10000 tf.logging.set_verbosity(tf.logging.ERROR) # maximum epochs total_iters = 140001 lrs = [0.01, 0.001, 0.0001] steps = [ int(total_iters * 0.5), int(total_iters * 0.4), int(total_iters * 0.1) ] # set config file config = tf.ConfigProto(log_device_placement=False) with tf.Graph().as_default(): sys.stderr.write("Building Network ... \n") global_step = tf.contrib.framework.get_or_create_global_step() images, gt_2d, gt_3d, gt_occ = create_bb_pip(input_tfr_pool, 1000, sbatch, mean, shuffle=True) # inference model k2d_dim = gt_2d.get_shape().as_list()[1] k3d_dim = gt_3d.get_shape().as_list()[1] pred_key = sk_net.infer_os(images, 36, tp=True) # Calculate loss total_loss, data_loss = sk_net.L2_loss_os(pred_key, [gt_2d, gt_3d, gt_occ], weight_decay=wd) train_op, _ = optimizer(total_loss, global_step, lrs, steps) sys.stderr.write("Train Graph Done ... \n") #add_bb_summary(images, pred_key[0], gt_2d, 'train', max_out=3) if val_tfr_pool: val_pool = [] val_iters = [] for ix, val_tfr in enumerate(val_tfr_pool): total_val_num = ndata_tfrecords(val_tfr) total_val_iters = int(float(total_val_num) / sbatch) val_iters.append(total_val_iters) val_images, val_gt_2d, val_gt_3d, _ = create_bb_pip( [val_tfr], 1000, sbatch, mean, shuffle=False) val_pred_key = sk_net.infer_os(val_images, 36, tp=False, reuse_=True) _, val_data_loss = sk_net.L2_loss_23d(val_pred_key, [val_gt_2d, val_gt_3d], None) val_pool.append(val_data_loss) #add_bb_summary(val_images, val_pred_key[0], val_gt_2d, 'val_c' + str(ix), max_out=3) sys.stderr.write("Validation Graph Done ... \n") # merge all summaries merged = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session(config=config) as sess: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) model_saver = tf.train.Saver(max_to_keep=15) sys.stderr.write("Initializing ... \n") # initialize graph sess.run(init_op) # initialize the queue threads to start to shovel data coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model_prefix = os.path.join(out_dir, 'single_key') timer = 0 timer_count = 0 sys.stderr.write("Start Training --- OUT DIM: %d, %d\n" % (k2d_dim, k3d_dim)) for i in xrange(total_iters): ts = time.time() if i > 0 and i % log_freq == 0: key_loss, _, summary = sess.run( [data_loss, train_op, merged]) summary_writer.add_summary(summary, i) summary_writer.flush() sys.stderr.write( 'Training %d (%fs) --- Key L2 Loss: %f\n' % (i, timer / timer_count, key_loss)) timer = 0 timer_count = 0 else: sess.run([train_op]) timer += time.time() - ts timer_count += 1 if val_tfr and i > 0 and i % val_freq == 0: sys.stderr.write('Validation %d\n' % i) for cid, v_dl in enumerate(val_pool): val_key_loss = eval_one_epoch(sess, v_dl, val_iters[cid]) sys.stderr.write('Class %d --- Key L2 Loss: %f\n' % (cid, val_key_loss)) if i > 0 and i % model_save_freq == 0: model_saver.save(sess, model_prefix, global_step=i) model_saver.save(sess, model_prefix, global_step=i) summary_writer.close() coord.request_stop() coord.join(threads, stop_grace_period_secs=5)
def main(): wandb.init(project="Multimodal") parser = argparse.ArgumentParser() parser.add_argument('--optim', type=str, default='adam', help='optimizer') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--epochs', type=int, default=20, help='learning rate') parser.add_argument('--batch_size', type=int, default=1024, help='train batch size') parser.add_argument('--latent_dim_mf', type=int, default=8, help='latent_dim_mf') parser.add_argument('--num_layers', type=int, default=3, help='num layers') parser.add_argument('--num_neg', type=int, default=4, help='negative sample') parser.add_argument('--l2', type=float, default=0.0, help='l2_regularization') parser.add_argument('--gpu', type=str, default='0', help='gpu number') args = parser.parse_args() wandb.config.update(args) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu data = pd.read_feather("/daintlab/data/movielens/movie_3706.ftr") print(data) MD = Make_Dataset(ratings=data) user, item, rating = MD.trainset evaluate_data = MD.evaluate_data #NCF model model = NeuralCF(num_users=6040, num_items=3706, embedding_size=args.latent_dim_mf, num_layers=args.num_layers) model.cuda() model = nn.DataParallel(model) print(model) optim = optimizer(optim=args.optim, lr=args.lr, model=model, weight_decay=args.l2) criterion = nn.BCEWithLogitsLoss() wandb.watch(model) N = [] patience = 0 for epoch in range(args.epochs): print('Epoch {} starts !'.format(epoch + 1)) print('-' * 80) t1 = time.time() model.train() total_loss = 0 sample = SampleGenerator(user=user, item=item, rating=rating, ratings=data, positive_len=MD.positive_len, num_neg=args.num_neg) train_loader = sample.instance_a_train_loader(args.batch_size) print("Train Loader 생성 완료") for batch_id, batch in enumerate(train_loader): users, items, ratings = batch[0], batch[1], batch[2] ratings = ratings.float() users, items, ratings = users.cuda(), items.cuda(), ratings.cuda() optim.zero_grad() output = model(users, items) loss = criterion(output, ratings) loss.backward() optim.step() loss = loss.item() wandb.log({'Batch Loss': loss}) total_loss += loss t2 = time.time() print("train : ", t2 - t1) engine = Engine() hit_ratio, ndcg = engine.evaluate(model, evaluate_data, epoch_id=epoch) wandb.log({"epoch": epoch, "HR": hit_ratio, "NDCG": ndcg}) N.append(ndcg) if N[-1] < max(N): if patience == 5: print("Patience = ") print("ndcg = {:.4f}".format(max(N))) break else: patience += 1 print("Patience = {} ndcg = {:.4f}".format(patience, max(N))) else: patience = 0 print("Patience = {}".format(patience))
def train(): start_time = time.time() # 第1步:命令行参数解析,获取集群的信息ps_hosts和worker_hosts,以及当前节点的角色信息job_name和task_index print("\n\n\n", start_time, "\n\n") if FLAGS.job_name is None or FLAGS.job_name == '': raise ValueError('Must specify an explicit job_name !') else: print('job_name : %s' % FLAGS.job_name) if FLAGS.task_index is None or FLAGS.task_index == '': raise ValueError('Must specify an explicit task_index!') else: print('task_index : %d' % FLAGS.task_index) ps_spec = FLAGS.ps_hosts.split(',') worker_spec = FLAGS.worker_hosts.split(',') # 第2步:创建当前task结点的Server # num_worker = len(worker_spec) cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) # 第3步:如果当前节点是ps,则调用server.join()无休止等待;如果是worker,则执行第4步。 if FLAGS.job_name == 'ps': server.join() is_chief = (FLAGS.task_index == 0) # worker_device = '/job:worker/task%d/cpu:0' % FLAGS.task_index # 读入数据 train_next_element = get_data("../data/train.tfrecords") test_next_element = get_data("../data/test.tfrecords") # 定义网络输入 with tf.name_scope('net_input'): p1 = tf.placeholder(tf.float32, [None, 20, 40, 3]) p2 = tf.placeholder(tf.float32, [None, 8, 10]) p3 = tf.placeholder(tf.float32, [None, 10]) y_ = tf.placeholder(tf.float32,[None,1]) x = [p1, p2, p3] # Assigns ops to the local worker by default. # 将op 挂载到各个本地的worker上 # tf.train.replica_device_setter()会根据job名,将with内的Variable op放到ps tasks, # 将其他计算op放到worker tasks。默认分配策略是轮询。 with tf.device(tf.train.replica_device_setter(cluster=cluster)): # 全局优化次数,主要用于分布式 with tf.name_scope('global_step'): global_step = tf.Variable(0, trainable=False) # 第4步:则构建要训练的模型 model = Three_branch_net() y_out = model.forward(x) model_loss = tf.losses.mean_squared_error(y_, y_out) train_op = optimizer(model_loss, learning_rate, global_step) saver = tf.train.Saver() # 用于tensorboard #直接获取所有的数据汇总 summary_op = tf.summary.merge_all() # 生成本地的参数初始化操作init_op # init_op = tf.global_variables_initializer() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) train_dir = tempfile.mkdtemp() # 第5步:创建tf.train.Supervisor来管理模型的训练过程 # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(is_chief=is_chief, logdir=train_dir, init_op=init_op, summary_op=summary_op, recovery_wait_secs=1, global_step=global_step) if is_chief: print('Worker %d: Initailizing session...' % FLAGS.task_index) else: print('Worker %d: Waiting for session to be initaialized...' % FLAGS.task_index) # The supervisor takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs. gpu_options = tf.GPUOptions(allow_growth=True) with sv.prepare_or_wait_for_session(server.target, config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # 用于tensorboard train_writer = tf.summary.FileWriter(MODEL_SAVE_PATH + '/train', sess.graph) test_writer = tf.summary.FileWriter(MODEL_SAVE_PATH + '/test') ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: print("\n restore model\n") saver.restore(sess, ckpt.model_checkpoint_path) else: print("\n new model train \n") # init_op = tf.global_variables_initializer() sess.run(init_op) i = 0 while True: if i % 10 == 0: try: p1_run, p2_run, p3_run, label_run = sess.run([test_next_element[0], test_next_element[1], test_next_element[2], test_next_element[3]]) losses, summary, step = sess.run([model_loss,summary_op, global_step], feed_dict={ p1: p1_run, p2: p2_run, p3: p3_run, y_: label_run}) test_writer.add_summary(summary, step) print("test loss at step %s:(global step %s) :%s " %(i, step, losses)) except tf.errors.OutOfRangeError: break else: try: p1_run, p2_run, p3_run, label_run = sess.run([train_next_element[0], train_next_element[1], train_next_element[2], train_next_element[3]]) # print(p1_run.shape, p2_run.shape, p3_run.shape, label_run.shape) if i % 100 == 1: pass #定义tensorflow运行选项。 run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) #定义运行的元信息。可以记录下来运算的时间、内存占用这些信息。 run_metadata = tf.RunMetadata() losses, _, summary, step = sess.run([model_loss, train_op,summary_op, global_step], feed_dict={ p1: p1_run, p2: p2_run, p3: p3_run, y_: label_run}) train_writer.add_run_metadata(run_metadata, 'step%03d'%step) train_writer.add_summary(summary, step) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=step) print("adding run metadata for ", step) else: losses, _, summary, step = sess.run([model_loss, train_op,summary_op, global_step], feed_dict={ p1: p1_run, p2: p2_run, p3: p3_run, y_: label_run}) train_writer.add_summary(summary, step) # print("accuracy at step %s:(global step %s) :%s " %(i, step, losses)) except tf.errors.OutOfRangeError: break i += 1 train_writer.close() test_writer.close() print("All time {}s".format(time.time()-start_time))
def main(): wandb.init(project="AttCF") parser = argparse.ArgumentParser() parser.add_argument('--data_path', type=str, default='/daintlab/data/recommend/Amazon-office-raw', help='path') parser.add_argument('--top_k', type=int, default=10, help='top_k') parser.add_argument('--optim', type=str, default='adam', help='optimizer') parser.add_argument('--epochs', type=int, default=5, help='epoch') parser.add_argument('--batch_size', type=int, default=256, help='batch size') parser.add_argument('--dim', type=int, default=128, help='dimension') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--gpu', type=str, default='0', help='gpu number') parser.add_argument('--num_sam', type=int, default=4, help='num of pos sample') parser.add_argument('--feature_type', default='all', type=str, help='Type of feature to use. [all, img, txt]') parser.add_argument( '--eval_type', default='leave-one-out', type=str, help='Evaluation protocol. [ratio-split, leave-one-out]') global args global sd global train_len global test_len args = parser.parse_args() wandb.config.update(args) args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # Load dataset print("Loading Dataset") data_path = os.path.join(args.data_path, args.eval_type) train_df, test_df, train_ng_pool, test_negative, num_user, num_item, images = D.load_data( data_path, args.feature_type) train_len = len(train_df) test_len = num_user train_dataset = D.CustomDataset(train_df, test_df, images, negative=train_ng_pool, istrain=True, feature_type=args.feature_type, num_sam=args.num_sam) test_dataset = D.CustomDataset(train_df, test_df, images, negative=test_negative, istrain=False, feature_type=args.feature_type, num_sam=args.num_sam) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=my_collate, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=my_collate_tst, pin_memory=True) # Model acf = ACF(num_user, num_item, images, args.dim) acf = torch.nn.DataParallel(acf) acf = acf.cuda() print(acf) # Optimizer optim = optimizer(optim=args.optim, lr=args.lr, model=acf) # Train & Eval for epoch in range(args.epochs): sd = np.random.randint(2021) start = time.time() train(acf, train_loader, epoch, optim) end = time.time() print("{}/{} Train Time : {}".format(epoch + 1, args.epochs, end - start)) if (epoch + 1) == args.epochs: start = time.time() test(acf, test_loader, epoch) end = time.time() print("{}/{} Evaluate Time : {}".format(epoch + 1, args.epochs, end - start))
def train_model( max_epochs=5, # The maximum number of epoch to run decay_c=0., # Weight decay for weights lrate=1e-4, # Learning rate for sgd (not used for adadelta and rmsprop) batch_size=16, # The batch size during training valid_batch_size=64, # The batch size used for test set ): model_options = locals().copy() dataparams, train, test = load_data.load_data() model_options.update(dataparams) print('Building model...') params = init_params(model_options) tparams = init_tparams(params) (u, l, q, f_score, cost) = build_model(tparams, model_options) args = [u, l, q] def _l2_regularizer(decay_c): decay_c = theano.shared(np.asarray(decay_c, dtype=config.floatX), name='decay_c') l2r = 0. for kk, vv in tparams.items(): l2r += (tparams[kk]**2).sum() return decay_c * l2r cost += _l2_regularizer(decay_c) grads = T.grad(cost, wrt=list(tparams.values())) # f_grad = theano.function([x], grads, name='f_grad') lr = T.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, args, cost) kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, shuffle=True) print('Training...') uidx = 0 start = time.time() try: for eidx in range(max_epochs): n_samples = 0 logging.info('Time: %s' % (time.time() - start)) # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 u = [train[0][t] for t in train_index] l = [train[1][t] for t in train_index] q = [train[2][t] for t in train_index] n_samples += len(u) cost = f_grad_shared(u, l, q) f_update(lrate) print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost) logging.info('------ Epoch: %d, Update(cls): %d -------' % (eidx, uidx)) pred_error(f_score, test, kf_test) logging.info('------------------------------------') except KeyboardInterrupt: print("Training interupted")