def rate(request): def nothing(): return HttpResponse(simplejson.dumps({'votes': 0, 'rating': 0})) post = get_var(request, 'post', None) rate = get_var(request, 'rating', None) if not post or not rate: return nothing() try: post = int(post) rate = int(rate) except ValueError: return nothing() post = Topic.objects.filter(pk=post) if not post: return nothing() post = post[0] post.add_rate(rate) votes = 0 if post.attrs.has_key('votes'): votes = post.attrs['votes'] avg = 0 if post.attrs.has_key('avg'): avg = post.attrs['avg'] return HttpResponse(simplejson.dumps({'votes': votes, 'avg': avg}))
def async_build(self): if self.worker: utils.log('killing existing thread') self.worker.terminate() self.worker.join() script = utils.get_var('squeeze_c_script') args = utils.get_var('squeeze_c_args') if args: self.guest_win.vars['squeeze_args'] = args else: self.guest_win.vars['squeeze_args'] = '<none>' path_script = os.path.join(vim.eval('s:plugin_path'), 'scripts/', script, 'objdump') self.out_q = multiprocessing.Queue() self.worker = AsyncWorker(self.out_q, self.host_win.buffer.name, path_script, args) self.worker.start() if len(polling_squeezers) == 0: vim.command(''' let g:squeeze_timer = timer_start(100, \ function('s:TimerHandler'), {'repeat': -1}) ''') else: vim.command('call timer_pause(g:squeeze_timer, 0)') polling_squeezers.add(self)
def evaluate(name, loader, F_s, F_d, C): F_s.eval() if F_d: F_d.eval() C.eval() it = iter(loader) correct = 0 total = 0 confusion = ConfusionMeter(opt.num_labels) for inputs, targets in tqdm(it): targets = utils.get_var(targets) if not F_d: # unlabeled domain d_features = utils.get_var( torch.zeros(len(targets), opt.domain_hidden_size)) else: d_features = F_d(inputs) features = torch.cat((F_s(inputs), d_features), dim=1) outputs = C(features) _, pred = torch.max(outputs, 1) confusion.add(pred.data, targets.data) total += targets.size(0) correct += (pred == targets).sum().data[0] acc = correct / total log.info('{}: Accuracy on {} samples: {}%'.format(name, total, 100.0 * acc)) log.debug(confusion.conf) return acc
def init_hidden(self): # Before we've done anything, we dont have any hidden state. # The axes semantics are (num_layers, minibatch_size, hidden_dim) return (utils.get_var(torch.zeros(self.n_layers * 2, 1, self.hidden_dim), gpu=self.gpu), utils.get_var(torch.zeros(self.n_layers * 2, 1, self.hidden_dim), gpu=self.gpu))
def init_hidden(self): # Initialize hidden state. # The axes semantics are (num_layers, minibatch_size, hidden_dim) return (utils.get_var(torch.zeros(self.n_layers * 2, 1, self.hidden_dim), gpu=True), utils.get_var(torch.zeros(self.n_layers * 2, 1, self.hidden_dim), gpu=True))
def init_hidden(self, batch_size=1): # Before we've done anything, we dont have any hidden state. # Refer to the Pytorch documentation to see exactly why they have this dimensionality. # The axes semantics are (num_layers, minibatch_size, hidden_dim) return (utils.get_var(torch.zeros(self.n_layers * 2, batch_size, self.hidden_dim), gpu=True), utils.get_var(torch.zeros(self.n_layers * 2, batch_size, self.hidden_dim), gpu=True))
def main(): tf.set_random_seed(1234) images, labels = read_data('./data/cifar10', 0.5) train_dataset = tf.data.Dataset.from_tensor_slices( (images["train"], labels["train"])) train_dataset = train_dataset.shuffle(100).batch(16) train_iter = train_dataset.make_initializable_iterator() x_train, y_train = train_iter.get_next() # x_train=images["train"][:16] # y_train=labels["train"][:16] logits, train_loss = Model_test(x_train, y_train, True) w_var = utils.get_var(tf.trainable_variables(), 'weight_var')[1] arch_var = utils.get_var(tf.trainable_variables(), 'arch_var')[1] _, unrolled_train_loss = Model_test(x_train, y_train, True, "unrolled_weight_var") unrolled_w_var = utils.get_var(tf.trainable_variables(), 'unrolled_weight_var')[1] cpoy_weight_opts = [v_.assign(v) for v_, v in zip(unrolled_w_var, w_var)] with tf.control_dependencies(cpoy_weight_opts): unrolled_optimizer = tf.train.GradientDescentOptimizer(0.001) unrolled_optimizer = unrolled_optimizer.minimize( unrolled_train_loss, var_list=unrolled_w_var) #w' with tf.control_dependencies([unrolled_optimizer]): valid_grads = tf.gradients(unrolled_train_loss, unrolled_w_var) R = 0.01 / tf.global_norm(valid_grads) #Original Implementation # opts=[v.assign(v+R*g) for v,g in zip(w_var,valid_grads)] # with tf.control_dependencies(opts): # arch_grad_after=tf.gradients(train_loss,arch_var) optimizer1 = tf.train.GradientDescentOptimizer(R) optimizer1 = optimizer1.apply_gradients(zip(valid_grads, w_var)) with tf.control_dependencies([optimizer1]): arch_grad_after = tf.gradients(train_loss, arch_var) config = tf.ConfigProto() os.environ["CUDA_VISIBLE_DEVICES"] = str(0) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) sess.run([train_iter.initializer]) print(sess.run(valid_grads)[0]) start = time.time() print(sess.run(arch_grad_after)[0]) print("time_is {}".format(time.time() - start)) print(sess.run(valid_grads)[0])
def _parse(stride, sess): offset = 0 genotype = [] arch_var_name, arch_var = utils.get_var(tf.trainable_variables(), 'arch_params') for i in range(cells_num): edges = [] edges_confident = [] for j in range(i + 2): with tf.variable_scope("", reuse=tf.AUTO_REUSE): weight = arch_var[arch_var_name.index( "arch_params/weight{}_{}:0".format(stride, offset + j))] value = sess.run(weight) if np.argmax(value) == PRIMITIVES.index('none'): value = np.delete(value, np.argmax(value)) edges.append((PRIMITIVES[np.argmax(value)], j)) edges_confident.append(np.max(value)) edges_confident = np.array(edges_confident) max_edges = [ edges[np.argsort(edges_confident)[-1]], edges[np.argsort(edges_confident)[-2]] ] genotype.extend(max_edges) offset += i + 2 return genotype
def update_nn_weights_derivative_free_old(cloud, cost, lr, N, kernel_a, alpha, beta, gamma): #get flattened weights, nn shape and weight names cloudf, nn_shape, weight_names = flatten_weights(cloud, N) #compute kernels kernels = [[kernel(cloudf[i], cloudf[j], kernel_a) for j in range(N)] for i in range(N)] gkernels = [[gkernel(cloudf[i], cloudf[j], kernel_a) for j in range(N)] for i in range(N)] #plt.imshow(kernels,vmin=0,vmax=1) #plt.colorbar() #compute mean and standart deviation cloud_mean = np.mean(cloudf, axis=0) cloud_var = get_var(cloudf, cloud_mean) #compute gradient flows updates = [] for nn in range(N): R = 0 P = 0 S = 0 Q = [ gkernels[nn][j] * cost[j] + kernels[nn][j] * cost[j] * np.divide( (cloudf[j] - cloud_mean), cloud_var) for j in range(N) ] Q = np.mean(Q, axis=0) if alpha > 0: R = [[kernels[nn][j] * (cloudf[j] - cloudf[k]) for j in range(N)] for k in range(N)] R = [item for sublist in R for item in sublist] #Flatten list of lists R = np.sum(R, axis=0) * float(1 / N**2) if beta > 0: P = [gkernels[nn][j] for j in range(N)] P = np.mean(P, axis=0) if gamma > 0: S = [ kernels[nn][j] * np.divide((cloudf[j] - cloud_mean), cloud_var) for j in range(N) ] S = np.mean(S, axis=0) updates.append(-lr * (Q + alpha * R + beta * P + gamma * S)) #update flattened tensors for nn in range(N): cloudf[nn] = cloudf[nn] + updates[nn] #restore NN weight shapes new_nn_weights = unflatten_weights(cloudf, nn_shape, weight_names, N) return new_nn_weights, cloud_var
def evaluate(data_loader, model, label_to_ix): ix_to_label = {v: k for k, v in label_to_ix.items()} correct = 0 total = 0 model.eval() loader_it = iter(data_loader) num_it = len(data_loader) instances = [] for j in range(num_it): mention_inputs, features, sentences, char_inputs, targets = utils.endless_get_next_batch( data_loader, loader_it) targets = utils.get_var(targets) pred = model(mention_inputs, char_inputs) _, y_pred = torch.max(pred, 1) total += targets.size(0) # correct += (y_pred == targets).sum().sample_data[0] correct += (y_pred == targets).sum().item() # output evaluate pred_numpy = (y_pred.data).cpu().numpy() y_pred_labels = [ix_to_label[ix] for ix in pred_numpy] assert len(y_pred_labels) == len( features), 'y_pred_labels and features have different lengths' for i, pred_label in enumerate(y_pred_labels): features[i][5] = pred_label instances.append(features[i]) acc = 100.0 * correct / total return acc, instances
def update_cloud(cloudf, gradientsf, lr, N, kernel_a, alpha, beta, gamma): #compute mean and standart deviation and difference matrix between particles cloud_mean = np.mean(cloudf, axis=0) cloud_var = get_var(cloudf) params_diff_matrix = cloudf[:, np.newaxis] - cloudf #compute kernels norm = np.sum(params_diff_matrix**2, axis=2) #no sqrt kernels = np.exp(-kernel_a * norm) gkernels = -2 * kernel_a * np.einsum('ijk,ij -> ijk', params_diff_matrix, kernels) Q = np.einsum('ij,jk -> ik', kernels, gradientsf) * float(1 / N) if alpha > 0: R = np.einsum('ij,jlk -> ik', kernels, params_diff_matrix) * float( 1 / N**2) else: R = 0 if beta > 0: P = np.einsum('ijk -> ik', gkernels) * float(1 / N) else: P = 0 if gamma > 0: S = np.einsum('ij,jk -> ik', kernels, np.divide(cloudf - cloud_mean, cloud_var)) * float(1 / N) else: S = 0 cloudf -= lr * (Q + alpha * R + beta * P + gamma * S) return cloudf, cloud_var
def _parse(stride, sess): offset = 0 genotype = [] arch_var_name, arch_var = utils.get_var(tf.trainable_variables(), 'arch_var') for i in range(cells_num): edges = [] edges_confident = [] for j in range(i + 2): with tf.variable_scope("", reuse=tf.AUTO_REUSE): weight = arch_var[arch_var_name.index( "arch_var/weight{}_{}:0".format(stride, offset + j))] value = sess.run(weight) value_sorted = value.argsort() max_index = value_sorted[-2] if value_sorted[ -1] == PRIMITIVES.index('none') else value_sorted[-1] edges.append((PRIMITIVES[max_index], j)) edges_confident.append(value[max_index]) edges_confident = np.array(edges_confident) max_edges = [ edges[np.argsort(edges_confident)[-1]], edges[np.argsort(edges_confident)[-2]] ] genotype.extend(max_edges) offset += i + 2 return genotype
def init_cloud(self, N, dispersion_factor=6): self.N = N self.cloud = [ init_layers(self.architecture, i, dispersion_factor) for i in range(N) ] self.cloud_mean = get_mean(self.cloud) cloudf, _, _ = flatten_weights(self.cloud, self.N) self.cloud_var = get_var(cloudf)
def update_cloud_derivative_free(cloudf, cost, lr, N, kernel_a, alpha, beta, gamma): #compute mean and standart deviation and difference matrix between particles cloud_mean = np.mean(cloudf, axis=0) cloud_var = get_var(cloudf) #np.var(cloudf, axis=0) works best params_diff_matrix = cloudf[:, np.newaxis] - cloudf #compute kernels norm = np.sum(params_diff_matrix**2, axis=2) kernels = np.exp(-kernel_a * norm) gkernels = -2 * kernel_a * np.einsum('ijk,ij -> ijk', params_diff_matrix, kernels) cost = np.squeeze(np.array(cost)) omega = np.divide(cloudf - cloud_mean, cloud_var) # Q = np.einsum('ijk,j -> ik', gkernels, cost) + np.einsum('ij,jk,j -> ik', kernels, omega, cost) # # if alpha > 0 : # R = np.einsum('ij,jk -> ik',kernels,cloudf-cloud_mean) # else: # R = 0 # # if beta > 0 : # P = np.einsum('ijk -> ik',gkernels) # else: # P = 0 # # if gamma > 0 : # S = np.einsum('ij,jk -> ik', kernels,omega) # else: # S = 0 # cloudf -= lr * (Q + alpha*R + beta*P + gamma*S) * float(1/N) gamma1 = gamma gamma2 = alpha Q = (np.einsum( 'ij,jk -> ik', kernels, np.einsum('j,jk -> jk', (cost + gamma1), omega) + gamma2 * (cloudf - cloud_mean)) + np.einsum('j,ijk -> ik', cost + gamma1, gkernels)) * float(1 / N) # if lr == "auto": # lr = (lr * N) / np.einsum('ij -> i',kernels) # cloudf -= np.einsum('i,ik -> ik',lr,Q) # else: cloudf -= lr * Q return cloudf, cloud_var
def __init__(self, train_loader, test_loader, opt_algo='gd', learning_rate=0.01, epoch=10, early_stop_round=None, l2_w=0., random_seed=None): self.graph = tf.Graph() self.train_loader = train_loader self.test_loader = test_loader self.var_list = [('w', [sum(config.FIELD_SIZES), 1], 'xavier'), ('b', [1], 'zero')] self.opt_algo = opt_algo self.learning_rate = learning_rate self.epoch = epoch self.early_stop_round = early_stop_round self.l2_w = l2_w self.random_seed = random_seed self.time_scores = [] self.train_scores = [] self.test_scores = [] with self.graph.as_default(): if self.random_seed is not None: tf.set_random_seed(self.random_seed) self.X = tf.sparse_placeholder(config.DTYPE) self.y = tf.placeholder(config.DTYPE) self.var_dict = utils.get_var(self.var_list) w = self.var_dict['w'] b = self.var_dict['b'] xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(tf.add(xw, b), [-1]) self.y_preds = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=self.y, logits=logits)) + self.l2_w * tf.nn.l2_loss(w) self.optimizer = utils.get_optimizer(self.opt_algo, self.learning_rate, self.loss) self.sess = tf.Session() tf.global_variables_initializer().run(session=self.sess)
def tnx(request): tnx = request.user.attrs.get('thanks', []) the_id = get_var(request, 'the_id', 0) if not the_id or the_id in tnx: return HttpReponse('"fail"') t = Topic.objects.filter(pk=the_id) if not t: return HttpReponse('"fail"') t = t[0] t.attrs['thanks'] = t.attrs.get('thanks', 0) + 1 t.save() tnx.append(t.id) request.user.attrs['thanks'] = tnx request.user.save() return HttpResponse(t.attrs['thanks'])
def compute_unrolled_step(x_valid, y_valid, w_var, train_loss, follower_opt): arch_var = utils.get_var(tf.trainable_variables(), 'arch_params')[1] with tf.control_dependencies([follower_opt]): leader_opt = tf.train.AdamOptimizer(args.arch_learning_rate, 0.5, 0.999) leader_grads = leader_opt.compute_gradients(train_loss, var_list=arch_var) _, valid_loss = Model(x_valid, y_valid, True, args.init_channels, CLASS_NUM, args.layers) tf.summary.scalar('valid_loss', valid_loss) valid_grads = tf.gradients(valid_loss, w_var) r = 1e-2 sum_grads = tf.get_variable(name='sum_grads', shape=[], initializer=tf.constant_initializer(0.0)) opt = sum_grads.assign(0) with tf.control_dependencies([opt]): for v in valid_grads: sum_grads = sum_grads + tf.reduce_sum(tf.square(v)) R = r / tf.sqrt(sum_grads) for v, g in zip(w_var, valid_grads): v.assign(v + R * g) train_grads_pos = tf.gradients(train_loss, arch_var) for v, g in zip(w_var, valid_grads): v.assign(v - 2 * R * g) train_grads_neg = tf.gradients(train_loss, arch_var) for v, g in zip(w_var, valid_grads): v.assign(v + R * g) implicit_grads = [ tf.divide(gp - gn, 2 * R) for gp, gn in zip(train_grads_pos, train_grads_neg) ] for i, (g, v) in enumerate(leader_grads): leader_grads[i] = (g - args.learning_rate * implicit_grads[i], v) leader_opt = leader_opt.apply_gradients(leader_grads) return leader_opt
def gen(sTxt, dContext=None): """ Генерация текста. @param sTxt: Tекст шаблона. @param dContext. Контекст. В качестве контекста может выступать любая словарная структура. По умолчанию контекст - локальное пространство имен модуля config. @return: Сгенерированный текст. """ if dContext is None: dContext = {} try: from ic import config except ImportError: import config for name in config.__dict__.keys(): dContext[name] = utils.get_var(name) return auto_replace(sTxt, dContext)
def compute_loss(self, all_factors_batch, loss_function): loss = utils.get_var(torch.FloatTensor([0.0]), self.gpu) # factor_kinds = [transition_factors, pairwise_factors, lstm_factors] # for k in targets.keys(): # loss += loss_function(tag_scores[k], targets[k]) for all_factors in all_factors_batch: for factor in all_factors: beliefs, scores = factor for k in beliefs.keys(): if k in scores and k in beliefs: belief = torch.stack(beliefs[k]).view( len(scores[k]), -1) score = Variable(torch.LongTensor(scores[k]), requires_grad=False) if self.gpu: score = score.cuda() loss += loss_function(belief, score) return loss
def train(vocab, train_sets, dev_sets, test_sets, unlabeled_sets): """ train_sets, dev_sets, test_sets: dict[domain] -> AmazonDataset For unlabeled domains, no train_sets are available """ # dataset loaders train_loaders, unlabeled_loaders = {}, {} train_iters, unlabeled_iters = {}, {} dev_loaders, test_loaders = {}, {} my_collate = utils.sorted_collate if opt.model == 'lstm' else utils.unsorted_collate for domain in opt.domains: train_loaders[domain] = DataLoader(train_sets[domain], opt.batch_size, shuffle=True, collate_fn=my_collate) train_iters[domain] = iter(train_loaders[domain]) for domain in opt.dev_domains: dev_loaders[domain] = DataLoader(dev_sets[domain], opt.batch_size, shuffle=False, collate_fn=my_collate) test_loaders[domain] = DataLoader(test_sets[domain], opt.batch_size, shuffle=False, collate_fn=my_collate) for domain in opt.all_domains: if domain in opt.unlabeled_domains: uset = unlabeled_sets[domain] else: # for labeled domains, consider which data to use as unlabeled set if opt.unlabeled_data == 'both': uset = ConcatDataset( [train_sets[domain], unlabeled_sets[domain]]) elif opt.unlabeled_data == 'unlabeled': uset = unlabeled_sets[domain] elif opt.unlabeled_data == 'train': uset = train_sets[domain] else: raise Exception( f'Unknown options for the unlabeled data usage: {opt.unlabeled_data}' ) unlabeled_loaders[domain] = DataLoader(uset, opt.batch_size, shuffle=True, collate_fn=my_collate) unlabeled_iters[domain] = iter(unlabeled_loaders[domain]) # models F_s = None F_d = {} C, D = None, None if opt.model.lower() == 'dan': F_s = DanFeatureExtractor(vocab, opt.F_layers, opt.shared_hidden_size, opt.sum_pooling, opt.dropout, opt.F_bn) for domain in opt.domains: F_d[domain] = DanFeatureExtractor(vocab, opt.F_layers, opt.domain_hidden_size, opt.sum_pooling, opt.dropout, opt.F_bn) elif opt.model.lower() == 'lstm': F_s = LSTMFeatureExtractor(vocab, opt.F_layers, opt.shared_hidden_size, opt.dropout, opt.bdrnn, opt.attn) for domain in opt.domains: F_d[domain] = LSTMFeatureExtractor(vocab, opt.F_layers, opt.domain_hidden_size, opt.dropout, opt.bdrnn, opt.attn) elif opt.model.lower() == 'cnn': F_s = CNNFeatureExtractor(vocab, opt.F_layers, opt.shared_hidden_size, opt.kernel_num, opt.kernel_sizes, opt.dropout) for domain in opt.domains: F_d[domain] = CNNFeatureExtractor(vocab, opt.F_layers, opt.domain_hidden_size, opt.kernel_num, opt.kernel_sizes, opt.dropout) else: raise Exception(f'Unknown model architecture {opt.model}') C = SentimentClassifier(opt.C_layers, opt.shared_hidden_size + opt.domain_hidden_size, opt.shared_hidden_size + opt.domain_hidden_size, opt.num_labels, opt.dropout, opt.C_bn) D = DomainClassifier(opt.D_layers, opt.shared_hidden_size, opt.shared_hidden_size, len(opt.all_domains), opt.loss, opt.dropout, opt.D_bn) if opt.use_cuda: F_s, C, D = F_s.cuda(), C.cuda(), D.cuda() for f_d in F_d.values(): f_d = f_d.cuda() # optimizers optimizer = optim.Adam(itertools.chain( *map(list, [F_s.parameters() if F_s else [], C.parameters()] + [f.parameters() for f in F_d.values()])), lr=opt.learning_rate) optimizerD = optim.Adam(D.parameters(), lr=opt.D_learning_rate) # testing if opt.test_only: log.info(f'Loading model from {opt.model_save_file}...') if F_s: F_s.load_state_dict( torch.load(os.path.join(opt.model_save_file, f'netF_s.pth'))) for domain in opt.all_domains: if domain in F_d: F_d[domain].load_state_dict( torch.load( os.path.join(opt.model_save_file, f'net_F_d_{domain}.pth'))) C.load_state_dict( torch.load(os.path.join(opt.model_save_file, f'netC.pth'))) D.load_state_dict( torch.load(os.path.join(opt.model_save_file, f'netD.pth'))) log.info('Evaluating validation sets:') acc = {} for domain in opt.all_domains: acc[domain] = evaluate(domain, dev_loaders[domain], F_s, F_d[domain] if domain in F_d else None, C) avg_acc = sum([acc[d] for d in opt.dev_domains]) / len(opt.dev_domains) log.info(f'Average validation accuracy: {avg_acc}') log.info('Evaluating test sets:') test_acc = {} for domain in opt.all_domains: test_acc[domain] = evaluate(domain, test_loaders[domain], F_s, F_d[domain] if domain in F_d else None, C) avg_test_acc = sum([test_acc[d] for d in opt.dev_domains]) / len(opt.dev_domains) log.info(f'Average test accuracy: {avg_test_acc}') return {'valid': acc, 'test': test_acc} # training best_acc, best_avg_acc = defaultdict(float), 0.0 for epoch in range(opt.max_epoch): F_s.train() C.train() D.train() for f in F_d.values(): f.train() # training accuracy correct, total = defaultdict(int), defaultdict(int) # D accuracy d_correct, d_total = 0, 0 # conceptually view 1 epoch as 1 epoch of the first domain num_iter = len(train_loaders[opt.domains[0]]) for i in tqdm(range(num_iter)): # D iterations utils.freeze_net(F_s) map(utils.freeze_net, F_d.values()) utils.freeze_net(C) utils.unfreeze_net(D) # WGAN n_critic trick since D trains slower n_critic = opt.n_critic if opt.wgan_trick: if opt.n_critic > 0 and ((epoch == 0 and i < 25) or i % 500 == 0): n_critic = 100 for _ in range(n_critic): D.zero_grad() loss_d = {} # train on both labeled and unlabeled domains for domain in opt.all_domains: # targets not used d_inputs, _ = utils.endless_get_next_batch( unlabeled_loaders, unlabeled_iters, domain) d_targets = utils.get_domain_label(opt.loss, domain, len(d_inputs[1])) shared_feat = F_s(d_inputs) d_outputs = D(shared_feat) # D accuracy _, pred = torch.max(d_outputs, 1) d_total += len(d_inputs[1]) if opt.loss.lower() == 'l2': _, tgt_indices = torch.max(d_targets, 1) d_correct += (pred == tgt_indices).sum().data[0] l_d = functional.mse_loss(d_outputs, d_targets) l_d.backward() else: d_correct += (pred == d_targets).sum().data[0] l_d = functional.nll_loss(d_outputs, d_targets) l_d.backward() loss_d[domain] = l_d.data[0] optimizerD.step() # F&C iteration utils.unfreeze_net(F_s) map(utils.unfreeze_net, F_d.values()) utils.unfreeze_net(C) utils.freeze_net(D) if opt.fix_emb: utils.freeze_net(F_s.word_emb) map(utils.freeze_net, F_d.values()) F_s.zero_grad() for f_d in F_d.values(): f_d.zero_grad() C.zero_grad() for domain in opt.domains: inputs, targets = utils.endless_get_next_batch( train_loaders, train_iters, domain) targets = utils.get_var(targets) shared_feat = F_s(inputs) domain_feat = F_d[domain](inputs) features = torch.cat((shared_feat, domain_feat), dim=1) c_outputs = C(features) l_c = functional.nll_loss(c_outputs, targets) l_c.backward(retain_graph=True) _, pred = torch.max(c_outputs, 1) total[domain] += targets.size(0) correct[domain] += (pred == targets).sum().data[0] # update F with D gradients on all domains for domain in opt.all_domains: d_inputs, _ = utils.endless_get_next_batch( unlabeled_loaders, unlabeled_iters, domain) shared_feat = F_s(d_inputs) d_outputs = D(shared_feat) if opt.loss.lower() == 'gr': d_targets = utils.get_domain_label(opt.loss, domain, len(d_inputs[1])) l_d = functional.nll_loss(d_outputs, d_targets) if opt.lambd > 0: l_d *= -opt.lambd elif opt.loss.lower() == 'bs': d_targets = utils.get_random_domain_label( opt.loss, len(d_inputs[1])) l_d = functional.kl_div(d_outputs, d_targets, size_average=False) if opt.lambd > 0: l_d *= opt.lambd elif opt.loss.lower() == 'l2': d_targets = utils.get_random_domain_label( opt.loss, len(d_inputs[1])) l_d = functional.mse_loss(d_outputs, d_targets) if opt.lambd > 0: l_d *= opt.lambd l_d.backward() optimizer.step() # end of epoch log.info('Ending epoch {}'.format(epoch + 1)) if d_total > 0: log.info('D Training Accuracy: {}%'.format(100.0 * d_correct / d_total)) log.info('Training accuracy:') log.info('\t'.join(opt.domains)) log.info('\t'.join( [str(100.0 * correct[d] / total[d]) for d in opt.domains])) log.info('Evaluating validation sets:') acc = {} for domain in opt.dev_domains: acc[domain] = evaluate(domain, dev_loaders[domain], F_s, F_d[domain] if domain in F_d else None, C) avg_acc = sum([acc[d] for d in opt.dev_domains]) / len(opt.dev_domains) log.info(f'Average validation accuracy: {avg_acc}') log.info('Evaluating test sets:') test_acc = {} for domain in opt.dev_domains: test_acc[domain] = evaluate(domain, test_loaders[domain], F_s, F_d[domain] if domain in F_d else None, C) avg_test_acc = sum([test_acc[d] for d in opt.dev_domains]) / len(opt.dev_domains) log.info(f'Average test accuracy: {avg_test_acc}') if avg_acc > best_avg_acc: log.info(f'New best average validation accuracy: {avg_acc}') best_acc['valid'] = acc best_acc['test'] = test_acc best_avg_acc = avg_acc with open(os.path.join(opt.model_save_file, 'options.pkl'), 'wb') as ouf: pickle.dump(opt, ouf) torch.save(F_s.state_dict(), '{}/netF_s.pth'.format(opt.model_save_file)) for d in opt.domains: if d in F_d: torch.save( F_d[d].state_dict(), '{}/net_F_d_{}.pth'.format(opt.model_save_file, d)) torch.save(C.state_dict(), '{}/netC.pth'.format(opt.model_save_file)) torch.save(D.state_dict(), '{}/netD.pth'.format(opt.model_save_file)) # end of training log.info(f'Best average validation accuracy: {best_avg_acc}') return best_acc
#start training dictionary logging.info("batch_size: %s, dict_num_iter %s, train num_iter %s" % (str(opt.batch_size), str(dict_num_iter), str(num_iter))) for epoch in range(opt.dict_iteration): epoch_start = time.time() # sum_dict_cost = 0.0 correct_1, total_1 = 0, 0 model.train() for i in range(dict_num_iter): dict_inputs, _, dict_sentences, dict_char_inputs, dict_targets = utils.endless_get_next_batch( dict_loader, dict_iter) dict_targets = utils.get_var(dict_targets) dict_batch_output = model(dict_inputs, dict_char_inputs) dict_cost = criterion(dict_batch_output, dict_targets) # sum_dict_cost += dict_cost.item() # for dict training accuracy total_1 += len(dict_inputs[1]) _, dict_pred = torch.max(dict_batch_output, 1) correct_1 += (dict_pred == dict_targets).sum().item() dict_cost.backward() optimizer.step() model.zero_grad() epoch_finish = time.time()
def backward(self): assert ('X' in self.phs) assert ('Y' in self.phs) assert ('fX' in self.vars) fX = self.vars['fX'] Y = self.phs['Y'] Y_one_hot = tf.one_hot(Y, depth=self.layer_sizes[-1], dtype=tf.float32) loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=fX, labels=Y_one_hot) loss = tf.reduce_mean(loss) self.vars['loss'] = loss self.vars['losses'] = {} self.vars['losses'][0] = loss var_list = self.get_trainable_vars() self.vars['orig_var_list'] = var_list # Fisher stuff print('Creating fisher ops') fisher_current = [ utils.get_var("fisher_diag_%sc" % var.name.split(":")[0].replace("/", "_")) for var in var_list ] grads = [ tf.gradients(self.vars['batch_log_likelihood'], var_list) for var_list in self.objs['fisher_var_lists'] ] fisher_delta = [] for i in range(len(self.objs['fisher_ws'])): fisher_delta += [tf.add_n([tf.square(g[i]) for g in grads])] fisher_sum_up_ops = [ tf.assign_add(fc, fd) for fc, fd in zip(fisher_current, fisher_delta) ] self.objs['fisher_sum_up_ops'] = fisher_sum_up_ops opt = tf.train.AdamOptimizer(self.lr) self.objs['opt'] = opt # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # with tf.control_dependencies(update_ops): # print("Trainable vars: %s" % str(var_list)) print("Trainable vars:") self.print_vars(var_list) op = self.objs['opt'].minimize(loss, var_list=var_list) self.vars['train_op'] = op self.vars['train_ops'] = {} self.vars['train_ops'][0] = op predictions = tf.argmax(tf.nn.softmax(fX), axis=1) predictions = tf.cast(predictions, tf.uint8) # cast to uint8, like Y self.vars['predictions'] = predictions acc = tf.equal(Y, predictions) acc = tf.cast(acc, tf.float32) # For averaging, first cast bool to float32 acc = tf.reduce_mean(acc) self.vars['acc'] = acc
def forward(self): X = self.phs['X'] if not self.embedding: X = tf.cast(X, tf.float32) * (1.0 / 255) layer = self.apply_feature_extractor(X) fisher_ws = [] fisher_diags = [] fisher_diagcs = [] fisher_old_ws = [] n_layers = len(self.layer_sizes) - 1 for i in range(n_layers): layer_name = "d%d" % (i + 1) layer = utils.dense2(layer, self.layer_sizes[i], self.layer_sizes[i + 1], name=layer_name) print('Applied dense (%d, %d) of name %s' % (self.layer_sizes[i], self.layer_sizes[i + 1], layer_name)) w = utils.get_var("%s/w" % layer_name) fisher_w_name = "fisher_diag_%s_w" % layer_name fisher_wc_name = "fisher_diag_%s_wc" % layer_name fisher_old_w_name = "fisher_old_%s_w" % layer_name self.vars[fisher_w_name] = tf.Variable(tf.zeros_like(w), name=fisher_w_name) self.vars[fisher_wc_name] = tf.Variable(tf.zeros_like(w), name=fisher_wc_name) self.vars[fisher_old_w_name] = tf.Variable(tf.zeros_like(w), name=fisher_old_w_name) fisher_ws += [w] fisher_diags += [self.vars[fisher_w_name]] fisher_diagcs += [self.vars[fisher_wc_name]] fisher_old_ws += [self.vars[fisher_old_w_name]] b = utils.get_var("%s/b" % layer_name) fisher_b_name = "fisher_diag_%s_b" % layer_name fisher_bc_name = "fisher_diag_%s_bc" % layer_name fisher_old_b_name = "fisher_old_%s_b" % layer_name self.vars[fisher_b_name] = tf.Variable(tf.zeros_like(b), name=fisher_b_name) self.vars[fisher_bc_name] = tf.Variable(tf.zeros_like(b), name=fisher_bc_name) self.vars[fisher_old_b_name] = tf.Variable(tf.zeros_like(b), name=fisher_old_b_name) fisher_ws += [b] fisher_diags += [self.vars[fisher_b_name]] fisher_diagcs += [self.vars[fisher_bc_name]] fisher_old_ws += [self.vars[fisher_old_b_name]] print('Created zero fishers') if i + 1 != len(self.layer_sizes) - 1: if self.use_dropout: layer = self.activation(layer) layer = tf.keras.layers.Dropout( rate=self.dropoutv, seed=self.seed)(layer, training=self.glob_training_ph) print('Applied activation -> dropout') else: layer = self.activation(layer) print('Applied activation') self.vars['fX'] = layer self.objs['fisher_ws'] = fisher_ws self.objs['fisher_diagcs'] = fisher_diagcs self.objs['fisher_diags'] = fisher_diags self.objs['fisher_old_ws'] = fisher_old_ws # Create fisher graph print('Creating fisher batch_log_likelihood') fisher_X = tf.cast(self.phs['fisher_X'], tf.float32) * (1.0 / 255) fisher_Y = tf.one_hot(self.phs['fisher_Y'], depth=self.layer_sizes[-1], dtype=tf.float32) if self.feature_extractor_needed: fisher_X = self.apply_feature_extractor(fisher_X) fisher_Xs = [ tf.reshape(fx, shape=(1, self.layer_sizes[0])) for fx in tf.unstack(fisher_X, num=self.fisher_batch_size, axis=0) ] else: fisher_Xs = [ tf.reshape(fx, shape=(1, *self.it.reshape_dims)) for fx in tf.unstack(fisher_X, num=self.fisher_batch_size, axis=0) ] fisher_Ys = tf.unstack(fisher_Y, num=self.fisher_batch_size, axis=0) log_likelihoods = [] fisher_var_lists = [] for i in range(self.fisher_batch_size): raw_output = fisher_Xs[i] fisher_var_list = [] for j in range(n_layers): layer_name = "d%d" % (j + 1) w = tf.identity(utils.get_var("%s/w" % layer_name)) b = tf.identity(utils.get_var("%s/b" % layer_name)) fisher_var_list += [w, b] raw_output = tf.add(tf.matmul(raw_output, w), b) if j + 1 != len(self.layer_sizes) - 1: raw_output = self.activation(raw_output) # No dropout; TODO log_likelihood = tf.multiply(fisher_Ys[i], tf.nn.log_softmax(raw_output)) log_likelihoods += [log_likelihood] fisher_var_lists += [fisher_var_list] batch_log_likelihood = tf.reduce_sum(log_likelihoods) self.vars['batch_log_likelihood'] = batch_log_likelihood self.objs['fisher_var_lists'] = fisher_var_lists
def __init__(self, train_loader, test_loader, embed_size=10, product_way='in', layer_size=None, layer_act=None, layer_keeps=None, opt_algo='gd', learning_rate=0.01, epoch=10, early_stop_round=None, l2=None, random_seed=None): self.graph = tf.Graph() self.train_loader = train_loader self.test_loader = test_loader self.embed_size = embed_size self.product_way = product_way self.layer_size = layer_size self.layer_act = layer_act self.layer_keeps = layer_keeps self.num_fields = len(config.FIELD_SIZES) self.var_list = [] for idx in range(self.num_fields): self.var_list.append([ 'embed_{}'.format(idx), [config.FIELD_SIZES[idx], self.embed_size], 'xavier' ]) num_pairs = int(self.num_fields * (self.num_fields - 1) / 2) if self.product_way == 'out': self.var_list.append([ 'kernel', [self.embed_size, num_pairs, self.embed_size], 'xavier' ]) in_size = self.num_fields * self.embed_size + num_pairs for idx in range(len(layer_size)): self.var_list.append( ['w_{}'.format(idx), [in_size, layer_size[idx]], 'xavier']) self.var_list.append( ['b_{}'.format(idx), [layer_size[idx]], 'zero']) in_size = layer_size[idx] self.var_dict = utils.get_var(self.var_list) self.opt_algo = opt_algo self.learning_rate = learning_rate self.epoch = epoch self.early_stop_round = early_stop_round self.l2 = l2 self.random_seed = random_seed self.time_scores = [] self.train_scores = [] self.test_scores = [] # with self.graph.as_default(): if self.random_seed is not None: tf.set_random_seed(self.random_seed) self.X = [ tf.sparse_placeholder(config.DTYPE) for n in range(self.num_fields) ] self.y = tf.placeholder(config.DTYPE) with tf.variable_scope('Embedding_Layer'): w_embed = [ self.var_dict['embed_{}'.format(idx)] for idx in range(self.num_fields) ] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[idx], w_embed[idx]) for idx in range(self.num_fields) ], 1) layer_out = xw xw3d = tf.reshape(xw, [-1, self.num_fields, self.embed_size]) with tf.variable_scope('Product_Layer'): row = [] col = [] for i in range(self.num_fields - 1): for j in range(i + 1, self.num_fields): row.append(i) col.append(j) p = tf.transpose(tf.gather(tf.transpose(xw3d, [1, 0, 2]), row), [1, 0, 2]) q = tf.transpose(tf.gather(tf.transpose(xw3d, [1, 0, 2]), col), [1, 0, 2]) p = tf.reshape(p, [-1, num_pairs, self.embed_size]) q = tf.reshape(q, [-1, num_pairs, self.embed_size]) if self.product_way == 'in': product = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs]) else: k = self.var_dict['kernel'] p = tf.expand_dims(p, 1) product = tf.reduce_sum( tf.multiply( tf.transpose(tf.reduce_sum(tf.multiply(p, k), -1), [0, 2, 1]), q), -1) layer_out = tf.concat([layer_out, product], 1) for idx in range(len(layer_size)): with tf.variable_scope('Hiden_Layer_{}'.format(idx)): wi = self.var_dict['w_{}'.format(idx)] bi = self.var_dict['b_{}'.format(idx)] layer_out = tf.nn.dropout( utils.activate( tf.matmul(layer_out, wi) + bi, self.layer_act[idx]), self.layer_keeps[idx]) layer_out = tf.squeeze(layer_out) self.y_preds = tf.sigmoid(layer_out) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=layer_out)) if self.l2 is not None: for idx in range(self.num_fields): self.loss += self.l2 * tf.nn.l2_loss( self.var_dict['embed_{}'.format(idx)]) for idx in range(len(self.layer_size)): self.loss += self.l2 * tf.nn.l2_loss( self.var_dict['w_{}'.format(idx)]) self.optimizer = utils.get_optimizer(self.opt_algo, self.learning_rate, self.loss) self.sess = tf.Session() tf.global_variables_initializer().run(session=self.sess)
def main(): global_step = tf.train.get_or_create_global_step() images, labels = read_data(args.data) train_dataset = tf.data.Dataset.from_tensor_slices((images["train"],labels["train"])) train_dataset=train_dataset.map(_pre_process).shuffle(5000).batch(args.batch_size) train_iter=train_dataset.make_initializable_iterator() x_train,y_train=train_iter.get_next() test_dataset = tf.data.Dataset.from_tensor_slices((images["test"],labels["test"])) test_dataset=test_dataset.shuffle(5000).batch(args.batch_size) test_iter=test_dataset.make_initializable_iterator() x_test,y_test=test_iter.get_next() genotype = eval("genotypes.%s" % args.arch) train_logits,aux_logits=Model(x_train,y_train,True,args.init_channels,CLASS_NUM,args.layers,args.auxiliary,genotype) train_loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_train, logits=train_logits)) w_regularization_loss = tf.add_n(utils.get_var(tf.losses.get_regularization_losses(), 'lw')[1]) train_loss+=1e4*args.weight_decay*w_regularization_loss # tf.summary.scalar('train_loss', train_loss) if args.auxiliary: loss_aux = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_train, logits=aux_logits)) train_loss += args.auxiliary_weight*loss_aux lr=tf.train.cosine_decay(args.learning_rate,global_step,50000/args.batch_size*args.epochs) accuracy=tf.reduce_mean(tf.cast(tf.nn.in_top_k(train_logits, y_train, 1), tf.float32)) test_logits,_=Model(x_test,y_test,False,args.init_channels,CLASS_NUM,args.layers,args.auxiliary,genotype) test_accuracy=tf.reduce_mean(tf.cast(tf.nn.in_top_k(test_logits, y_test, 1), tf.float32)) test_accuracy_top5=tf.reduce_mean(tf.cast(tf.nn.in_top_k(test_logits, y_test, 5), tf.float32)) tf.summary.scalar('test_accuracy_top1', test_accuracy) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): opt=tf.train.MomentumOptimizer(lr,args.momentum) opt=opt.minimize(train_loss,global_step) merged = tf.summary.merge_all() config = tf.ConfigProto() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) config.gpu_options.allow_growth = True sess=tf.Session(config=config) writer = tf.summary.FileWriter(output_dir+TIMESTAMP,sess.graph) saver = tf.train.Saver(max_to_keep=1) sess.run(tf.global_variables_initializer()) test_batch=0 for e in range(args.epochs): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() sess.run(train_iter.initializer) while True: try: _,loss, acc,crrunt_lr,gs=sess.run([opt,train_loss,accuracy,lr,global_step]) objs.update(loss, args.batch_size) top1.update(acc, args.batch_size) if gs % args.report_freq==0: print("epochs {} steps {} currnt lr is {:.3f} loss is {} train_acc is {}".format(e,gs,crrunt_lr,objs.avg,top1.avg)) except tf.errors.OutOfRangeError: print('-'*80) print("end of an train epoch") break if e % 5 ==0: test_top1 = utils.AvgrageMeter() sess.run(test_iter.initializer) while True: try: test_batch+=1 summary,test_acc=sess.run([merged,test_accuracy]) test_top1.update(test_acc, args.batch_size) if test_batch % 100: writer.add_summary(summary, test_batch) except tf.errors.OutOfRangeError: print("******************* epochs {} test_acc is {}".format(e,test_top1.avg)) saver.save(sess, output_dir+"model",test_batch) print('-'*80) print("end of an test epoch") break
def main(): if not os.path.isfile(args.model_name) or args.continue_train: if args.continue_train: print("Loading tagger model from " + args.model_name + "...") tagger_model = torch.load( args.model_name, map_location=lambda storage, loc: storage) if args.gpu: tagger_model = tagger_model.cuda() else: print("Creating new model...") tagger_model = factorial_crf_tagger.DynamicCRF(args, word_freq, langs, len(char_to_ix), \ len(word_to_ix), unique_tags) if args.gpu: tagger_model = tagger_model.cuda() if args.unit_test: tests = unit.TestBP() labelSum = sum([tag.size() for tag in tagger_model.uniqueTags]) # Create dummy LSTM features lstm_feats = utils.get_var( torch.Tensor(torch.randn(len(training_data[0][0]), labelSum)), args.gpu) tests.setUp(tagger_model, training_data[0][1], len(training_data[0][0]), lstm_feats) loss_function = nn.NLLLoss() # Provide (N,C) log probability values as input # loss_function = nn.CrossEntropyLoss() if args.optim == "sgd": optimizer = optim.SGD(tagger_model.parameters(), lr=1.0) elif args.optim == "adam": optimizer = optim.Adam(tagger_model.parameters()) elif args.optim == "adagrad": optimizer = optim.Adagrad(tagger_model.parameters()) print("Training FCRF-LSTM model...") patience_counter = 0 prev_avg_tok_accuracy = 0 for epoch in xrange(args.epochs): accuracies = [] sent = 0 batch_idx = 0 tokens = 0 cum_loss = 0 correct = 0 random.shuffle(train_order) print("Starting epoch %d .." % epoch) start_time = time.time() for start_idx, end_idx in train_order: train_data = training_data[start_idx:end_idx + 1] train_sents = [elem[0] for elem in train_data] morph_sents = [elem[1] for elem in train_data] lang_ids = train_lang_ids[start_idx:end_idx + 1] sent += end_idx - start_idx + 1 tokens += sum([len(sentence) for sentence in train_sents]) batch_idx += 1 if batch_idx % 5 == 0: print("[Epoch %d] \ Sentence %d/%d, \ Tokens %d \ Cum_Loss: %f \ Time: %f \ Tokens/Sec: %d" # Average Accuracy: %f" % (epoch, sent, len(training_data), tokens, cum_loss / tokens, time.time() - start_time, tokens / (time.time() - start_time))) # , correct/tokens)) tagger_model.zero_grad() sents_in = [] for i, sentence in enumerate(train_sents): sent_in = [] lang_id = [] if args.model_type == "universal": lang_id = [lang_ids[i]] for word in sentence: s_appended_word = lang_id + [c for c in word] + lang_id word_in = utils.prepare_sequence( s_appended_word, char_to_ix, args.gpu) # targets = utils.prepare_sequence(s_appended_word[1:], char_to_ix, args.gpu) sent_in.append(word_in) sents_in.append(sent_in) # sents_in = torch.stack(sent_in) tagger_model.char_hidden = tagger_model.init_hidden() tagger_model.hidden = tagger_model.init_hidden() if args.sum_word_char: all_word_seq = [] for sentence in train_sents: word_seq = utils.prepare_sequence( sentence, word_to_ix, args.gpu) all_word_seq.append(word_seq) else: all_word_seq = None if args.model_type == "specific" or args.model_type == "joint": lstm_feat_sents, graph, maxVal = tagger_model( sents_in, morph_sents, word_idxs=all_word_seq, langs=lang_ids) else: lstm_feat_sents, graph, maxVal = tagger_model( sents_in, morph_sents, word_idxs=all_word_seq) # Skip parameter updates if marginals are not within a threshold if maxVal > 10.00: print("Skipping parameter updates...") continue # Compute the loss, gradients, and update the parameters all_factors_batch = [] for k in range(len(train_sents)): all_factors = tagger_model.get_scores( graph, morph_sents[k], lstm_feat_sents[k], k) all_factors_batch.append(all_factors) loss = tagger_model.compute_loss(all_factors_batch, loss_function) # print("Loss:", loss) cum_loss += loss.cpu().data[0] loss.backward() # tagger_model.gradient_check(all_factors_batch[0]) optimizer.step() print("Loss: %f" % loss.cpu().data.numpy()) print("Saving model..") torch.save(tagger_model, args.model_name) if (epoch + 1) % 4 == 0: print("Evaluating on dev set...") avg_tok_accuracy, f1_score = eval_on_dev(tagger_model, curEpoch=epoch) # Early Stopping if avg_tok_accuracy <= prev_avg_tok_accuracy: patience_counter += 1 if patience_counter == args.patience: print( "Model hasn't improved on dev set for %d epochs. Stopping Training." % patience_counter) break prev_avg_tok_accuracy = avg_tok_accuracy else: print("Loading tagger model from " + args.model_name + "...") tagger_model = torch.load(args.model_name, map_location=lambda storage, loc: storage) if args.gpu: tagger_model = tagger_model.cuda() else: tagger_model.gpu = False if args.visualize: print("[Visualization Mode]") utils.plot_heatmap(unique_tags, tagger_model.pairwise_weights, "pair") #utils.plot_heatmap(unique_tags, tagger_model.transition_weights, "trans") #utils.plot_heatmap(unique_tags, tagger_model.lang_pairwise_weights, "pair", lang_idx=1) print("Stored plots in figures/ directory!") if args.test: avg_tok_accuracy, f1_score = eval_on_dev(tagger_model, dev_or_test="test")
def compute_unrolled_step(x_train, y_train, x_valid, y_valid, w_var, train_loss, lr): arch_var = utils.get_var(tf.trainable_variables(), 'arch_var')[1] _, unrolled_train_loss = Model(x_train, y_train, True, args.init_channels, CLASS_NUM, args.layers, name="unrolled_model") unrolled_w_var = utils.get_var(tf.trainable_variables(), 'unrolled_model')[1] cpoy_weight_opts = [v_.assign(v) for v_, v in zip(unrolled_w_var, w_var)] #w' with tf.control_dependencies(cpoy_weight_opts): unrolled_optimizer = tf.train.GradientDescentOptimizer(lr) unrolled_optimizer = unrolled_optimizer.minimize( unrolled_train_loss, var_list=unrolled_w_var) _, valid_loss = Model(x_valid, y_valid, True, args.init_channels, CLASS_NUM, args.layers, name="unrolled_model") tf.summary.scalar('valid_loss', valid_loss) with tf.control_dependencies([unrolled_optimizer]): valid_grads = tf.gradients(valid_loss, unrolled_w_var) r = 1e-2 R = r / tf.global_norm(valid_grads) optimizer_pos = tf.train.GradientDescentOptimizer(R) optimizer_pos = optimizer_pos.apply_gradients(zip(valid_grads, w_var)) optimizer_neg = tf.train.GradientDescentOptimizer(-2 * R) optimizer_neg = optimizer_neg.apply_gradients(zip(valid_grads, w_var)) optimizer_back = tf.train.GradientDescentOptimizer(R) optimizer_back = optimizer_back.apply_gradients(zip(valid_grads, w_var)) with tf.control_dependencies([optimizer_pos]): train_grads_pos = tf.gradients(train_loss, arch_var) with tf.control_dependencies([optimizer_neg]): train_grads_neg = tf.gradients(train_loss, arch_var) with tf.control_dependencies([optimizer_back]): leader_opt = tf.train.AdamOptimizer(args.arch_learning_rate, 0.5, 0.999) leader_grads = leader_opt.compute_gradients(valid_loss, var_list=arch_var) for i, (g, v) in enumerate(leader_grads): leader_grads[i] = ( g - args.learning_rate * tf.divide(train_grads_pos[i] - train_grads_neg[i], 2 * R), v) leader_opt = leader_opt.apply_gradients(leader_grads) return leader_opt
def main(): global_step = tf.train.get_or_create_global_step() images, labels = read_data(args.data, args.train_portion) train_dataset = tf.data.Dataset.from_tensor_slices( (images["train"], labels["train"])) train_dataset = train_dataset.map(_pre_process).shuffle(100).batch( args.batch_size) train_iter = train_dataset.make_initializable_iterator() x_train, y_train = train_iter.get_next() valid_dataset = tf.data.Dataset.from_tensor_slices( (images["valid"], labels["valid"])) valid_dataset = valid_dataset.shuffle(100).batch(args.batch_size) valid_iter = valid_dataset.make_initializable_iterator() x_valid, y_valid = valid_iter.get_next() logits, train_loss = Model(x_train, y_train, True, args.init_channels, CLASS_NUM, args.layers) lr = tf.train.cosine_decay(args.learning_rate, global_step, 50000 / args.batch_size * args.epochs, args.learning_rate_min) accuracy = tf.reduce_mean( tf.cast(tf.nn.in_top_k(logits, y_train, 1), tf.float32)) w_regularization_loss = tf.add_n( utils.get_var(tf.losses.get_regularization_losses(), 'model')[1]) train_loss += 1e4 * args.weight_decay * w_regularization_loss tf.summary.scalar('train_loss', train_loss) w_var = utils.get_var(tf.trainable_variables(), 'model')[1] leader_opt = compute_unrolled_step(x_train, y_train, x_valid, y_valid, w_var, train_loss, lr) with tf.control_dependencies( [leader_opt, tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS))]): follower_opt = tf.train.MomentumOptimizer(lr, args.momentum) follower_grads = tf.gradients(train_loss, w_var) clipped_gradients, norm = tf.clip_by_global_norm( follower_grads, args.grad_clip) follower_opt = follower_opt.apply_gradients( zip(clipped_gradients, w_var), global_step) infer_logits, infer_loss = Model(x_valid, y_valid, False, args.init_channels, CLASS_NUM, args.layers) valid_accuracy = tf.reduce_mean( tf.cast(tf.nn.in_top_k(infer_logits, y_valid, 1), tf.float32)) merged = tf.summary.merge_all() config = tf.ConfigProto() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) config.gpu_options.allow_growth = True sess = tf.Session(config=config) writer = tf.summary.FileWriter(output_dir + TIMESTAMP, sess.graph) saver = tf.train.Saver(max_to_keep=1) sess.run(tf.global_variables_initializer()) # saver.restore(sess, tf.train.latest_checkpoint(output_dir)) genotype_record_file = open(output_dir + "genotype_record_file.txt", 'w') for e in range(args.epochs): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() test_top1 = utils.AvgrageMeter() sess.run([train_iter.initializer, valid_iter.initializer]) while True: try: _, loss, acc, crrunt_lr, gs = sess.run( [follower_opt, train_loss, accuracy, lr, global_step]) objs.update(loss, args.batch_size) top1.update(acc, args.batch_size) if gs % args.report_freq == 0: print( "epochs {} steps {} currnt lr is {:.3f} loss is {} train_acc is {}" .format(e, gs, crrunt_lr, objs.avg, top1.avg)) summary = sess.run(merged) writer.add_summary(summary, gs) except tf.errors.OutOfRangeError: print('-' * 80) print("end of an epoch") break genotype = get_genotype(sess) print("genotype is {}".format(genotype)) genotype_record_file.write("{}".format(genotype) + '\n') valid_top1 = utils.AvgrageMeter() sess.run(valid_iter.initializer) while True: try: valid_acc = sess.run(valid_accuracy) test_top1.update(valid_acc, args.batch_size) except tf.errors.OutOfRangeError: print("******************* epochs {} valid_acc is {}".format( e, test_top1.avg)) saver.save(sess, output_dir + "model", gs) print('-' * 80) print("end of an valid epoch") break
class Config(object): """ Config Class hosts all the environment based variables for the project """ # Postgres TA database settings TA_PG_HOST = get_var( 'TA_PG_HOST', 'dipperprod-read.canbbkmz75pp.ap-south-1.rds.amazonaws.com') TA_PG_PORT = get_var('TA_PG_PORT', '5432') TA_PG_DATABASE = get_var('TA_PG_DATABASE', 'gps_development_postgres') TA_PG_USERNAME = get_var('TA_PG_USERNAME', 'ec2-user') TA_PG_PASSWORD = get_var('TA_PG_PASSWORD', 'tester') # Postgres TH database settings TH_PG_HOST = get_var( 'TH_PG_HOST', 'dipperprodnew-truck-histories-replica.canbbkmz75pp.ap-south-1.rds.amazonaws.com' ) TH_PG_PORT = get_var('TH_PG_PORT', '5432') TH_PG_DATABASE = get_var('TH_PG_DATABASE', 'gps_development_postgres') TH_PG_USERNAME = get_var('TH_PG_USERNAME', 'ec2-user') TH_PG_PASSWORD = get_var('TH_PG_PASSWORD', 'tester') # MYSQL database settings MSQL_HOST = get_var('MSQL_HOST', '35.154.141.143') MSQL_PORT = get_var('MSQL_PORT', '3306') MSQL_DATABASE = get_var('MSQL_DATABASE', 'dipper_development') MSQL_USERNAME = get_var('MSQL_USERNAME', 'root') MSQL_PASSWORD = get_var('MSQL_PASSWORD', 'DipperRead123') def __init__(self, flag): self.gts_pg_connection = self._get_pg_connection(flag) self.th_pg_connection = self._get_pg_connection(flag) self.mysql_connection = self._get_mysql_connection() def _get_pg_connection(self, flag='TA') -> object: """ Get Postgres Connection Object Using Pyscopg2 :param flag: flag is used to distinguish between TA(GTS TA App) & TH(Truck) conf :return: Connection object for Postgresql Pyscopg2. """ if flag == 'TA': connection = psycopg2.connect(user=Config.TA_PG_USERNAME, password=Config.TA_PG_PASSWORD, host=Config.TA_PG_HOST, port=Config.TA_PG_PORT, database=Config.TA_PG_DATABASE) else: connection = psycopg2.connect(user=Config.TH_PG_USERNAME, password=Config.TH_PG_PASSWORD, host=Config.TH_PG_HOST, port=Config.TH_PG_PORT, database=Config.TH_PG_DATABASE) return connection def _get_mysql_connection(self) -> object: """ Get Mysql Connection Object. :return: Mysql Connection Object """ mysql_conn = mysql.connector.connect(host=Config.MSQL_HOST, database=Config.MSQL_DATABASE, user=Config.MSQL_USERNAME, passwd=Config.MSQL_PASSWORD) return mysql_conn
def __init__(self, train_loader, test_loader, embed_size=None, layer_size=None, layer_act=None, layer_keeps=None, opt_algo='gd', learning_rate=0.01, epoch=10, early_stop_round=None, l2=None, random_seed=None): self.graph = tf.Graph() self.train_loader = train_loader self.test_loader = test_loader self.embed_size = embed_size self.layer_size = layer_size self.layer_act = layer_act self.layer_keeps = layer_keeps self.num_fields = len(config.FIELD_SIZES) self.var_list = [] for idx in range(self.num_fields): self.var_list.append([ 'embed_{}'.format(idx), [config.FIELD_SIZES[idx], self.embed_size[idx]], 'xavier' ]) in_size = sum(self.embed_size) for idx in range(len(layer_size)): self.var_list.append( ['w_{}'.format(idx), [in_size, layer_size[idx]], 'xavier']) self.var_list.append( ['b_{}'.format(idx), [layer_size[idx]], 'zero']) in_size = layer_size[idx] self.var_dict = utils.get_var(self.var_list) self.opt_algo = opt_algo self.learning_rate = learning_rate self.epoch = epoch self.early_stop_round = early_stop_round self.l2 = l2 self.random_seed = random_seed self.time_scores = [] self.train_scores = [] self.test_scores = [] # with self.graph.as_default(): if self.random_seed is not None: tf.set_random_seed(self.random_seed) self.X = [ tf.sparse_placeholder(config.DTYPE) for n in range(self.num_fields) ] self.y = tf.placeholder(config.DTYPE) with tf.variable_scope('Dense_Real_Layer'): w_embed = [ self.var_dict['embed_{}'.format(idx)] for idx in range(self.num_fields) ] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[idx], w_embed[idx]) for idx in range(self.num_fields) ], 1) layer_out = xw for idx in range(len(layer_size)): with tf.variable_scope('Hiden_Layer_{}'.format(idx)): wi = self.var_dict['w_{}'.format(idx)] bi = self.var_dict['b_{}'.format(idx)] layer_out = tf.nn.dropout( utils.activate( tf.matmul(layer_out, wi) + bi, self.layer_act[idx]), self.layer_keeps[idx]) layer_out = tf.squeeze(layer_out) self.y_preds = tf.sigmoid(layer_out) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=layer_out)) if self.l2 is not None: for idx in range(self.num_fields): self.loss += self.l2 * tf.nn.l2_loss( self.var_dict['embed_{}'.format(idx)]) for idx in range(len(self.layer_size)): self.loss += self.l2 * tf.nn.l2_loss( self.var_dict['w_{}'.format(idx)]) self.optimizer = utils.get_optimizer(self.opt_algo, self.learning_rate, self.loss) self.sess = tf.Session() tf.global_variables_initializer().run(session=self.sess)
def rpc_subscribtion(request): from django.contrib.auth.models import User from django.core.exceptions import ValidationError uid = get_var(request, 'uid', None) tags = get_var(request, 'tags', None) if uid: try: uid = int(uid) except ValueError: return HttpResponse('"fail"') user = User.objects.filter(pk=uid) if not user: return HttpResponse('"fail"') user = user[0] if user.id == request.user.id: return HttpResponse('"cannot"') #subscribe to yourself if request.user.attrs.has_key('beats'): if uid in request.user.attrs['beats']: a = request.user.attrs['beats'] a.remove(uid) request.user.attrs['beats'] = a else: a = request.user.attrs['beats'] a.append(uid) request.user.attrs['beats'] = a else: request.user.attrs['beats'] = [uid] request.user.save() if user.attrs.has_key('subscribers'): if request.user.id in user.attrs['subscribers']: a = user.attrs['subscribers'] a.remove(request.user.id) user.attrs['subscribers'] = a else: a = user.attrs['subscribers'] a.append(request.user.id) user.attrs['subscribers'] = a else: user.attrs['subscribers'] = [request.user.id] user.save() subs = Subscription.objects.filter(user=request.user) if subs: if user in subs[0].beats.all(): subs[0].beats.remove(user) else: subs = Subscription(user=request.user) subs.save() subs.beats.add(user) return HttpResponse('"success"') elif tags: from tagging.validators import isTagList from tagging.models import TaggedItem try: isTagList(tags, {}) except ValidationError: return HttpResponse('"fail"') item = Subscription.objects.filter(user=request.user) if not item: item = Subscription(user=request.user) item.save() else: item = item[0] item.tags = '%s%s'%(''.join(['%s, '%tag.name for tag in item.tags]), tags) return HttpResponse('"success"')