Пример #1
0
def rank_phrase(case_file):
    ph_dist_map = {}
    smoothing_factor = 0.0
    phrase_map, cell_map, cell_cnt = read_caseolap_result(case_file)
    unif = [1.0 / cell_cnt] * cell_cnt
    
    for ph in phrase_map:
        ph_vec = [x[1] for x in phrase_map[ph].items()]   # Modified by MILI
        if len(ph_vec) < cell_cnt:
            ph_vec += [0] * (cell_cnt - len(ph_vec))
		# smoothing
        ph_vec = [x + smoothing_factor for x in ph_vec]
        ph_vec = utils.l1_normalize(ph_vec)
        ph_dist_map[ph] = utils.kl_divergence(ph_vec, unif)
        
    ranked_list = sorted(ph_dist_map.items(), key=operator.itemgetter(1), reverse=True)
    
    return ranked_list
Пример #2
0
    def scips_approximable_pi(lmdp,
                              gamma: float,
                              sigma: float,
                              time_horizon=100,
                              n_samples=1000) -> Policy:
        """Generate policy tensor under SCIPS assumption.

        Args:
            lmdp (FLMDP): FLMDP for which to make SCIPS approximable policy.
            gamma (float): Time discounting parameter used in the SCIPS,
                in [0.0, 1.0].
            sigma (float): Standard deviaton of noise added to policy.
            time_horizon (int): Trajectory length.
            n_samples (int): Number of trajectories to simulate.

        policy (Distribution): Policy distribution over actions given the current history.

        """

        # Start with a random policy
        random_policy = FLMDP.random_pi(lmdp=lmdp)

        # Simuate some trajectories
        s_t, r_t, a_t = lmdp.simulate(policy=random_policy,
                                      time_horizon=time_horizon,
                                      n_samples=n_samples)

        # Fit the policy to SCIPS
        scips = sparsity_corrected_approx(states=s_t,
                                          actions=a_t,
                                          rewards=r_t,
                                          gamma=gamma,
                                          lmdp=lmdp)

        # Add noise
        for history_action in scips:
            scips[history_action] += np.random.normal(loc=0, scale=sigma)

        # Normalize the next action distribution
        for history in history_tuples(lmdp.state_size, lmdp.history_length):
            scips[history] = l1_normalize(scips[history])

        return scips
Пример #3
0
def expan(embs, l_prel_file, dp_file, lp_file, mode='EMB'):
	# the part to verify iterative expansion
	# Mode = EMB: meaning that the similarity is learned from embedding
	# Mode = DIS: meaning that the similarity is from L-P assignment

	target_type = 'p'
	source_type = 'l'
	multiplier = 5
	thre_softmax = 0.5

	ori_embs = embs
	agg_embs = copy.copy(embs)
	pd_map = load_dp(dp_file, reverse=True)
	dp_map = load_edge_map(dp_file)
	lp_map = load_edge_map(lp_file)
	dist_map = {x:1 for x in embs[target_type]}
	vec_size = 0
	for d in ori_embs[target_type]:
		vec_size = len(ori_embs[target_type][d])
		break

	seeds_map = {}	# label : seed set
	all_seeds = set()
	with open(l_prel_file, 'r') as f:
		for line in f:
			segs = line.strip('\r\n').split('\t')
			if segs[1] == '*':
				continue
			seeds_map[segs[1]] = set()
			seeds_map[segs[1]].add(segs[2].lower())
			all_seeds.add(segs[2].lower())	

	print '*********** Direct Embedding'
	evaluate(ori_embs, true_file, target_dim)

	agg_embs[source_type] = weighted_avg_embedding(lp_map, agg_embs[target_type], dist_map, vec_size)
	agg_embs['d'] = weighted_avg_embedding(dp_map, agg_embs[target_type], dist_map, vec_size)

	print '*********** Aggregate without expansion'
	evaluate(agg_embs, true_file, target_dim)

	for i in range(2):
		print '======== iter ' + str(i) + ' of expansion.'
		extended_seeds = expan_round(agg_embs, seeds_map, all_seeds, 3, 1, mode=mode, pd_map=pd_map)
		print '============= seeds expanded'

		for seed in extended_seeds:
			label, phrase = seed.split('@')
			if label not in lp_map or phrase in lp_map[label]:
				print 'ERRRROR!!! ' + seed
			all_seeds.add(phrase.lower())
			seeds_map[label].add(phrase.lower())
			lp_map[label][phrase] = 1

		agg_embs[source_type] = weighted_avg_embedding(lp_map, agg_embs[target_type], dist_map, vec_size)

		print '*********** Aggregate with expansion at iter ' + str(i)
		evaluate(agg_embs, true_file, target_dim)

	normal = False
	source_type = 'd'
	target_type = 'l'
	mid_type = 'p'

	for i in range(2):

		if i > 0:
			normal = True

		print '============= iter ' + str(i) + ' of dist started.'

		pred_label, doc_score = doc_assignment(agg_embs, 'd', 'l')
		top_labels = [w.path for w in hier.get_nodes_at_level(1)]

		print '============= docs assigned to labels'

		# # print meta stats
		# top_label_cnts = {}
		# for label in top_labels:
		# 	top_label_cnts[label] = 0
		# for doc_pair in filtered_docs:
		# 	l = pred_label[doc_pair[0]]
		# 	top_label_cnts[l] += 1
		# print top_label_cnts
		# print 'top level labels: ' + str(top_labels)

		label_to_idx = {}
		for idx, label in enumerate(top_labels):
			label_to_idx[label] = idx
		uniform_vec = [1.0/len(top_labels)] * len(top_labels)
		# print uniform_vec
		label_to_doc = {}
		
		for label in top_labels:
			label_to_doc[label] = set()


		docs_used = {}

		if normal:
			print 'used docs in reweighting: ' + str(len(pred_label))
			for doc, score in doc_score.iteritems():
				label_to_doc[pred_label[doc]].add(doc)
		else:
			for label in top_labels:
				p = label.lower()
				# idx = label_to_idx[label]
				for doc in pd_map[p]:
					label_to_doc[label].add(doc)
					if doc not in docs_used:
						docs_used[doc] = set()
					docs_used[doc].add(label)
			print 'docs used: %d' % len(docs_used)


		cnt_vec = [0.0] * len(top_labels)
		for label in label_to_doc:
			cnt_vec[label_to_idx[label]] = len(label_to_doc[label])
		comp_vec = utils.l1_normalize(cnt_vec)

		print cnt_vec

		# print comp_vec

		distinct_map = {}
		
		if normal:
			for phrase in embs[mid_type]:
				p_vec = [0.0] * len(top_labels)

				# if len(pd_map[phrase]) < 100:
				# 	continue

				for doc in pd_map[phrase]:
					idx = label_to_idx[pred_label[doc]]
					p_vec[idx] += 1.0
				
				if sum(p_vec) == 0:
					print 'ERROR!!!!!!!!!!'
					continue

				p_vec = utils.l1_normalize(p_vec)

				# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
				kl = utils.kl_divergence(p_vec, uniform_vec)
				# kl = utils.kl_divergence(p_vec, comp_vec)
				distinct_map[phrase] = kl
		else:
			for phrase in embs[mid_type]:
				p_vec = [0.0] * len(top_labels)

				# if len(pd_map[phrase]) < 100:
				# 	continue

				for doc in pd_map[phrase]:
					if doc in docs_used:
						for label in docs_used[doc]:
							idx = label_to_idx[label]
							p_vec[idx] += 1.0

				# print p_vec
				
				if sum(p_vec) == 0:
					distinct_map[phrase] = 0
					# print 'ERROR!!!!!!!!!!'
					continue
				
				# p_vec = [x / cnt_vec[i] for i, x in enumerate(p_vec)]


				p_vec = utils.l1_normalize(p_vec)

				# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
				# kl = utils.kl_divergence(p_vec, uniform_vec)
				kl = utils.kl_divergence(p_vec, comp_vec)
				distinct_map[phrase] = kl

		dist_map = distinct_map
		with open('focal_comp.txt', 'w+') as g:
			for (ph, score) in sorted(dist_map.items(), key=operator.itemgetter(1), reverse=True):
				g.write('%s,%f\t' % (ph, score))

		print '============= phrase distinctness computed.'

		agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size)
		print '============= doc embedding aggregated.'

		print '*********** Aggregate with distinct at iter ' + str(i)
		evaluate(agg_embs, true_file, target_dim)


	return
Пример #4
0
def reweight_test(embs, dp_file):

	source_type = 'd'
	target_type = 'l'

	target_embs = embs[target_type]
	pred_label = {}
	doc_score = {}
	ratio = 1

	for doc in embs[source_type]:
		doc_emb = embs[source_type][doc]
		sim_map = classify_doc(doc_emb, target_embs)
		pred_label[doc] = hier.get_node(sim_map[0][0]).get_ascendant(1).path
		doc_score[doc] = sim_map[0][1]

	doc_score = sorted(doc_score.items(), key=operator.itemgetter(1), reverse=True)	
	filtered_docs = doc_score[:int(len(doc_score)*ratio)]

	top_labels = [w.path for w in hier.get_nodes_at_level(1)]

	# print meta stats
	top_label_cnts = {}
	for label in top_labels:
		top_label_cnts[label] = 0
	for doc_pair in filtered_docs:
		l = pred_label[doc_pair[0]]
		top_label_cnts[l] += 1
	print top_label_cnts
	print 'top level labels: ' + str(top_labels)
	# return

	label_to_idx = {}
	for idx, label in enumerate(top_labels):
		label_to_idx[label] = idx
	uniform_vec = [1.0/len(top_labels)] * len(top_labels)
	print uniform_vec
	label_to_doc = {}

	# new_filter = []
	# new_pred_ls = {}
	# for (doc, score) in filtered_docs:
	# 	if pred_label[doc] not in top_labels:
	# 		continue
	# 	new_filter.append((doc, score))
	# 	new_pred_ls[doc] = pred_label[doc]
	# filtered_docs = new_filter
	# pred_label = new_pred_ls

	pd_map = load_dp(dp_file, reverse=True)
	
	for label in top_labels:
		label_to_doc[label] = set()

	print 'used docs in reweighting: ' + str(len(filtered_docs))
	for (doc, score) in filtered_docs:
		label_to_doc[pred_label[doc]].add(doc)

	distinct_map = {}
	cnt = 0
	for phrase in embs['p']:
		p_vec = [0.0] * len(top_labels)

		if len(pd_map[phrase]) < 100:
			continue

		for doc in pd_map[phrase]:
			if doc not in pred_label:
				continue
			idx = label_to_idx[pred_label[doc]]
			p_vec[idx] += 1.0
		
		if sum(p_vec) == 0:
			continue

		p_vec = utils.l1_normalize(p_vec)

		kl = utils.kl_divergence(p_vec, uniform_vec)
		distinct_map[phrase] = kl

	distinct_map = sorted(distinct_map.items(), key=operator.itemgetter(1), reverse=False)
	print distinct_map[:100]
	print 
	print distinct_map[:-100]
Пример #5
0
def reweight(embs, dp_file, lp_file):
	source_type = 'd'
	target_type = 'l'
	mid_type = 'p'

	ori_embs = embs
	agg_embs = copy.copy(embs)

	# Step 0: check original embedding's performance
	print '*********** Direct Embedding'
	evaluate(ori_embs, true_file, target_dim)

	pd_map = load_dp(dp_file, reverse=True)
	dp_map = load_edge_map(dp_file)
	lp_map = load_edge_map(lp_file)
	dist_map = {x:1 for x in embs[mid_type]}
	vec_size = 0
	for d in ori_embs[mid_type]:
		vec_size = len(ori_embs[mid_type][d])
		break

	# print '============= dp, pd maps loaded'


	# Step 1: check with D weighted avg, what's the performance
	agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size)

	# optional L - embedding also aggregated from P
	normal = False

	if not normal:
		agg_embs[target_type] = weighted_avg_embedding(lp_map, agg_embs[mid_type], dist_map, vec_size)


	# print '============= doc embedding aggregated.'

	print '*********** Aggregate iter 0'
	evaluate(agg_embs, true_file, target_dim)

	

	for i in range(2):

		if i > 0:
			normal = True

		print '============= iter ' + str(i+1) + ' of dist started.'

		pred_label, doc_score = doc_assignment(agg_embs, source_type, target_type)
		top_labels = [w.path for w in hier.get_nodes_at_level(1)]

		# print '============= docs assigned to labels'

		# # print meta stats
		# top_label_cnts = {}
		# for label in top_labels:
		# 	top_label_cnts[label] = 0
		# for doc_pair in filtered_docs:
		# 	l = pred_label[doc_pair[0]]
		# 	top_label_cnts[l] += 1
		# print top_label_cnts
		# print 'top level labels: ' + str(top_labels)

		label_to_idx = {}
		for idx, label in enumerate(top_labels):
			label_to_idx[label] = idx
		uniform_vec = [1.0/len(top_labels)] * len(top_labels)
		# print uniform_vec
		label_to_doc = {}
		
		for label in top_labels:
			label_to_doc[label] = set()


		docs_used = {}

		if normal:
			print 'used docs in reweighting: ' + str(len(pred_label))
			for doc, score in doc_score.iteritems():
				label_to_doc[pred_label[doc]].add(doc)
		else:
			for label in top_labels:
				p = label.lower()
				# idx = label_to_idx[label]
				for doc in pd_map[p]:
					label_to_doc[label].add(doc)
					if doc not in docs_used:
						docs_used[doc] = set()
					docs_used[doc].add(label)
			print 'docs used: %d' % len(docs_used)




		cnt_vec = [0.0] * len(top_labels)
		for label in label_to_doc:
			cnt_vec[label_to_idx[label]] = len(label_to_doc[label])
		comp_vec = utils.l1_normalize(cnt_vec)

		print cnt_vec

		# print comp_vec

		distinct_map = {}
		
		if normal:
			for phrase in embs[mid_type]:
				p_vec = [0.0] * len(top_labels)

				# if len(pd_map[phrase]) < 100:
				# 	continue

				for doc in pd_map[phrase]:
					idx = label_to_idx[pred_label[doc]]
					p_vec[idx] += 1.0
				
				if sum(p_vec) == 0:
					print 'ERROR!!!!!!!!!!'
					continue

				p_vec = utils.l1_normalize(p_vec)

				# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
				kl = utils.kl_divergence(p_vec, uniform_vec)
				# kl = utils.kl_divergence(p_vec, comp_vec)
				distinct_map[phrase] = kl
		else:
			for phrase in embs[mid_type]:
				p_vec = [0.0] * len(top_labels)

				# if len(pd_map[phrase]) < 100:
				# 	continue

				for doc in pd_map[phrase]:
					if doc in docs_used:
						for label in docs_used[doc]:
							idx = label_to_idx[label]
							p_vec[idx] += 1.0

				# print p_vec
				
				if sum(p_vec) == 0:
					distinct_map[phrase] = 0
					# print 'ERROR!!!!!!!!!!'
					continue
				
				# p_vec = [x / cnt_vec[i] for i, x in enumerate(p_vec)]


				p_vec = utils.l1_normalize(p_vec)

				# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
				# kl = utils.kl_divergence(p_vec, uniform_vec)
				kl = utils.kl_divergence(p_vec, comp_vec)
				distinct_map[phrase] = kl

		dist_map = distinct_map
		# with open('focal_comp.txt', 'w+') as g:
		# 	for (ph, score) in sorted(dist_map.items(), key=operator.itemgetter(1), reverse=True):
		# 		g.write('%s,%f\t' % (ph, score))

		# print '============= phrase distinctness computed.'

		agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size)
		# print '============= doc embedding aggregated.'

		print '*********** Aggregate with distinct at iter ' + str(i + 1)
		evaluate(agg_embs, true_file, target_dim)
Пример #6
0
def expan_round(embs, seeds_map, all_seeds, limit, cate_lim, mode='EMB', pd_map=None):

	target_type = 'p'

	multiplier = 5
	thre_softmax = 0.5

	extended_seeds = set()
	candidates = {}

	if mode == 'EMB':
		for phrase in embs[target_type]:
			if phrase in all_seeds:
				continue
			t_emb = embs[target_type][phrase]
			rel_values = {}
			# flat comparison
			for label in seeds_map:
				max_sim = 0
				for seed in seeds_map[label]:
					sim = multiplier * utils.cossim(t_emb, embs[target_type][seed])
					if sim > max_sim:
						max_sim = sim
				rel_values[label] = max_sim

			utils.softmax_for_map(rel_values)
			best_label = sorted(rel_values.items(), key=operator.itemgetter(1), reverse=True)[0][0]
			candidates[best_label + '@' + phrase] = rel_values[best_label]
	
	elif mode == 'DIS':
		pred_label, doc_score = doc_assignment(embs, 'd', 'l', mode='FLAT')
		top_labels = [w.path for w in hier.get_all_nodes()]
		print 'Doc Assignment done...'

		label_to_idx = {}
		for idx, label in enumerate(top_labels):
			label_to_idx[label] = idx
		# print uniform_vec
		label_to_doc = {}
		
		for label in top_labels:
			label_to_doc[label] = set()
		for doc, score in doc_score.iteritems():
			label_to_doc[pred_label[doc]].add(doc)
		cnt_vec = [0.0] * len(top_labels)
		for label in label_to_doc:
			cnt_vec[label_to_idx[label]] = len(label_to_doc[label])
		comp_vec = utils.l1_normalize(cnt_vec)

		uniform_vec = [1.0/len(top_labels)] * len(top_labels)
		# print cnt_vec
		# print comp_vec

		for phrase in embs['p']:
			if phrase in all_seeds:
				continue

			p_vec = [0.0] * len(top_labels)

			for doc in pd_map[phrase]:
				idx = label_to_idx[pred_label[doc]]
				p_vec[idx] += 1.0

			max_label_value = 0
			best_label = ''
			best_cnt = 0
			for label in top_labels:
				idx = label_to_idx[label]
				if p_vec[idx] > 0:
					norm_value = p_vec[idx] / cnt_vec[idx]
					if norm_value > max_label_value:
						max_label_value = norm_value
						best_label = label
						best_cnt = p_vec[idx]

			if sum(p_vec) == 0:
				print 'ERROR!!!!!!!!!!'
				continue
			p_vec = utils.l1_normalize(p_vec)
			# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
			# kl = utils.kl_divergence(p_vec, comp_vec)
			kl = utils.kl_divergence(p_vec, uniform_vec)

			# best_label = sorted(rel_values.items(), key=operator.itemgetter(1), reverse=True)[0][0]
			pop = max_label_value
			# * (1 + math.log(1 + max_label_value))
			candidates[best_label + '@' + phrase] = kl * max_label_value

	candidates = sorted(candidates.items(), key=operator.itemgetter(1), reverse=True)

	# cands_by_label = {}
	# for cand in candidates:
	# 	label, phrase = cand.split('@')
	# 	if label not in cands_by_label:
	# 		cands_by_label[label] = {}
	# 	cands_by_label[label][phrase] = candidates[cand]

	# for label in cands_by_label:
	# 	print '\n' + label
	# 	cand_cate = cands_by_label[label]
	# 	best_exps = sorted(cand_cate.items(), key=operator.itemgetter(1), reverse=True)[:10]
	# # best_exps = sorted(candidates.items(), key=operator.itemgetter(1), reverse=True)[:30]
	# 	print best_exps

	# exit(1)

	added = 0
	added_cates = {}
	for (cand, score) in candidates:
		label, phrase = cand.split('@')
		if label not in added_cates:
			added_cates[label] = 0
		if added_cates[label] >= cate_lim:
			continue
		if len(seeds_map[label]) >= 3:
			continue
		extended_seeds.add(cand)
		added_cates[label] += 1
		added += 1
		if added > limit:
			break

	print 'extended: ' + str(extended_seeds)
	return extended_seeds
Пример #7
0
    def _build_model(self):
        """Build the core model within the graph."""
        with tf.variable_scope('im_dup'):
            # Duplicate images to get multiple draws from the DP label
            # ditribution (each duplicate gets an independent noise draw
            # before going through the rest of the network).
            ones = tf.ones([len(self._images.get_shape()) - 1], dtype=tf.int32)
            x = tf.tile(self._images,
                        tf.concat([[self.hps.n_draws], ones], axis=0))

        with tf.variable_scope('init'):
            with tf.variable_scope('init_conv'):
                filter_size = 3
                in_filters = 3
                out_filters = 16
                stride = 1
                strides = self._stride_arr(2)
                n = filter_size * filter_size * out_filters
                self.kernel = tf.get_variable(
                    'DW', [filter_size, filter_size, in_filters, out_filters],
                    tf.float32,
                    initializer=tf.random_normal_initializer(
                        stddev=np.sqrt(2.0 / n)))

                if self.hps.noise_scheme == 'l2_l2_s1':
                    # Parseval projection, see: https://arxiv.org/abs/1704.08847
                    self._parseval_convs.append(self.kernel)
                    k = stride * self.kernel / float(filter_size)
                elif self.hps.noise_scheme == 'l1_l2_s1':
                    # Sensitivity 1 by L2 normalization
                    k = tf.nn.l2_normalize(self.kernel, dim=[0, 1, 3])
                elif self.hps.noise_scheme == 'l1_l1_s1':
                    # Sensitivity 1 by L1 normalization
                    k = utils.l1_normalize(self.kernel, dim=[0, 1, 3])
                else:
                    k = self.kernel

                x = tf.nn.conv2d(x, k, strides, padding='SAME')

            ############
            # DP noise #

            # This is a factor applied to the noise layer,
            # used to rampup the noise at the beginning of training.
            self.noise_scale = tf.placeholder(tf.float32,
                                              shape=(),
                                              name='noise_scale')

            if self.hps.noise_scheme == 'l1_l2':
                sqr_sum = tf.reduce_sum(tf.square(x), [0, 1, 3],
                                        keep_dims=True)
                self.l2_norms = tf.sqrt(sqr_sum)

                dp_mult = self._dp_mult()
                epsilon = tf.random_normal(tf.shape(x), mean=0, stddev=1)
                self.sensitivity = tf.reduce_max(self.l2_norms)
                self.sigma = tf.multiply(dp_mult, self.sensitivity)

                self.noise_stddev = self.noise_scale * self.sigma
                self.noise = self.noise_stddev * epsilon
                x = x + self.noise
            elif self.hps.noise_scheme == 'l1_l2_s1':
                dp_mult = self._dp_mult()
                epsilon = tf.random_normal(tf.shape(x), mean=0, stddev=1)
                self.sensitivity = 1.0  # we bound it
                self.sigma = tf.multiply(dp_mult, self.sensitivity)

                self.noise_stddev = self.noise_scale * self.sigma
                self.noise = self.noise_stddev * epsilon
                x = x + self.noise
            elif self.hps.noise_scheme == 'l2_l2_s1':
                # Compute the actual sensitivity to rescale later
                shape = self.kernel.get_shape().as_list()
                w_t = tf.reshape(self.kernel, [-1, shape[-1]])
                w = tf.transpose(w_t)
                self.norms = tf.svd(w, compute_uv=False)
                self.sensitivity_multiplier = tf.reduce_max(self.norms)
                #

                dp_mult = self._dp_mult()
                epsilon = tf.random_normal(tf.shape(x), mean=0, stddev=1)
                self.sensitivity = 1.0
                self.sigma = tf.multiply(dp_mult, self.sensitivity)

                self.noise_stddev = self.noise_scale * self.sigma
                self.noise = self.noise_stddev * epsilon
                x = x + self.noise
            elif self.hps.noise_scheme == 'l1_l1':
                self.l1_norms = tf.reduce_sum(tf.abs(x), [0, 1, 3],
                                              keep_dims=True)

                dp_mult = self._dp_mult()
                laplace_shape = tf.shape(x)
                loc = tf.zeros(laplace_shape, dtype=tf.float32)
                scale = tf.ones(laplace_shape, dtype=tf.float32)
                epsilon = tf.distributions.Laplace(loc, scale).sample()

                self.sensitivity = tf.reduce_max(self.l1_norms)
                self.b = self.noise_scale * dp_mult * self.sensitivity

                self.noise = self.b * epsilon
                x = x + self.noise
            elif self.hps.noise_scheme == 'l1_l1_s1':
                dp_mult = self._dp_mult()
                laplace_shape = tf.shape(x)
                loc = tf.zeros(laplace_shape, dtype=tf.float32)
                scale = tf.ones(laplace_shape, dtype=tf.float32)
                epsilon = tf.distributions.Laplace(loc, scale).sample()

                self.sensitivity = 1.0  # because we normalize
                self.b = self.noise_scale * dp_mult * self.sensitivity

                self.noise = self.b * epsilon
                x = x + self.noise
            # DP noise #
            ############

        strides = [1, 2, 2]
        activate_before_residual = [True, False, False]
        if self.hps.use_bottleneck:
            res_func = self._bottleneck_residual
            filters = [16, 64, 128, 256]
        else:
            res_func = self._residual
            #  filters = [16, 16, 32, 64]
            # Uncomment the following codes to use w28-10 wide residual network.
            # It is more memory efficient than very deep residual network and has
            # comparably good performance.
            # https://arxiv.org/pdf/1605.07146v1.pdf
            filters = [out_filters, 160, 320, 640]
            # Update hps.num_residual_units to 4

        with tf.variable_scope('unit_1_0'):
            x = res_func(x, filters[0], filters[1],
                         self._stride_arr(strides[0]),
                         activate_before_residual[0])
        for i in six.moves.range(1, self.hps.num_residual_units):
            with tf.variable_scope('unit_1_%d' % i):
                x = res_func(x, filters[1], filters[1], self._stride_arr(1),
                             False)

        with tf.variable_scope('unit_2_0'):
            x = res_func(x, filters[1], filters[2],
                         self._stride_arr(strides[1]),
                         activate_before_residual[1])
        for i in six.moves.range(1, self.hps.num_residual_units):
            with tf.variable_scope('unit_2_%d' % i):
                x = res_func(x, filters[2], filters[2], self._stride_arr(1),
                             False)

        with tf.variable_scope('unit_3_0'):
            x = res_func(x, filters[2], filters[3],
                         self._stride_arr(strides[2]),
                         activate_before_residual[2])
        for i in six.moves.range(1, self.hps.num_residual_units):
            with tf.variable_scope('unit_3_%d' % i):
                x = res_func(x, filters[3], filters[3], self._stride_arr(1),
                             False)

        with tf.variable_scope('unit_last'):
            x = self._batch_norm('final_bn', x)
            x = self._relu(x, self.hps.relu_leakiness)
            x = self._global_avg_pool(x)

        with tf.variable_scope('logit'):
            logits = self._fully_connected(x, self.hps.num_classes)
            self.pre_softmax = logits
            self.predictions = tf.nn.softmax(logits)

        with tf.variable_scope('label_dup'):
            ones = tf.ones([len(self.labels.get_shape()) - 1], dtype=tf.int32)
            labels = tf.tile(self.labels,
                             tf.concat([[self.hps.n_draws], ones], axis=0))

        with tf.variable_scope('costs'):
            xent = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=labels)
            self.cost = tf.reduce_mean(xent, name='xent')
            self.cost += self._decay()

            tf.summary.scalar('cost', self.cost)