def show(): data = Data() d = data.train.make_one_shot_iterator().get_next() d = utils.Object(**d) from models import UTILS sess = UTILS.get_session() cnt = 0 hit = np.zeros(10) pbar = tqdm() while True: try: x = sess.run(d) except tf.errors.OutOfRangeError: break cnt += 1 pbar.update(1) # for i, v in enumerate(x.seq): # neighbors = data.dp.neighbors[0][v] # if x.ans in neighbors: # hit[i] += 1 if cnt >= 10000: break pbar.close() print(hit, cnt, hit / cnt)
def augment_mask_probs(sample, state, probs, meta): for prop, value in sample.items(): if prop not in meta.feat_labels: print("skipping", prop) continue fid = meta.feat_labels.index(prop) bid = meta.bag_idx[fid] prob = probs.p[fid].item() mask = state.mask[fid].item() sample[prop] = {'value': value, 'prob': prob, 'mask': mask} if bid is not None: bag_prob = probs.bags[bid] if bag_prob is None or len(bag_prob.p) == 0: sample[prop]['value'] = None continue # compute softmax over all whole bag bag_p = torch.softmax(bag_prob.p.flatten(), dim=0).view_as(bag_prob.p) # for all items go deeper for idx, it in enumerate(value): it_probs = utils.Object() it_probs.p = bag_p[idx] it_probs.bags = bag_prob.bags[idx] if len( bag_prob.bags) > 0 else None meta_bag = meta.bags[bid][1] it_state = state.bags[bid][idx] augment_mask_probs(it, it_state, it_probs, meta_bag)
def __init__(self, name=None, cmd=None, cwd=None, proc=None, input=None, output=None, *args, **kwargs): if name is None: name = cmd super().__init__(*args, name=name, readOnly=True, **kwargs) self.input = None self.cmd = cmd self.cwd = cwd self.proc = proc or utils.Object(stdout=input, stdin=output) self.lines = collections.deque() Thread(target=self.run, daemon=True).start()
class Base: deep = True args = utils.Object() # feature: [type..], [tags..], mid def __init__(self, data): self.raw_adjs = data.adjs # self.save_name = f'{utils.save_dir}/{args.run_name}/model.ckpt' self.save_dir = f'{utils.save_dir}/{args.run_name}' self.tb_name = f'{utils.save_dir}/{args.run_name}' self.graph = tf.Graph() with self.graph.as_default(): tf.set_random_seed(args.seed) self.compile() self.fit_step = 0 def compile(self): self.make_io() self.make_model() if args.run_tb: self.all_summary = tf.summary.merge_all() self.tbfw = tf.summary.FileWriter(self.tb_name, self.sess.graph) def placeholder(self, dtype, shape, name, to_list): ph = tf.placeholder(dtype, shape, name) self.placeholder_dict[name] = ph to_list.append(ph) return ph def make_io(self): self.placeholder_dict = {} self.inputs = [] L = args.seq_length self.placeholder(tf.int32, [None, L], 'share_seq', self.inputs) self.placeholder(tf.int32, [None, L], 'click_seq', self.inputs) self.placeholder(tf.int32, [None], 'pos', self.inputs) self.placeholder(tf.int32, [None, None], 'neg', self.inputs) self.adjs = [ tf.constant(adj, dtype=tf.int32) for adj in self.raw_adjs ] # [N, M] * 4, in_0, out_0, in_1, out_1 def get_data_map(self, data): data_map = dict(zip(self.inputs, data)) return data_map def make_model(self): with tf.variable_scope( 'Graph', reuse=tf.AUTO_REUSE, regularizer=self.l2_loss('all')) as self.graph_scope: n = args.nb_nodes k = args.dim_k self.embedding_matrix = tf.get_variable(name='emb_w', shape=[n, k]) with tf.variable_scope( 'graph_agg', reuse=tf.AUTO_REUSE) as self.graph_agg_scope: pass with tf.variable_scope('Network', reuse=tf.AUTO_REUSE, regularizer=self.l2_loss('all')): score, label = self.forward(*self.inputs) seq_loss = tf.losses.softmax_cross_entropy(label, score) tf.summary.scalar('seq_loss', seq_loss) self.loss = seq_loss self.loss += tf.losses.get_regularization_loss() opt = tf.train.AdamOptimizer(learning_rate=args.lr) self.minimizer = opt.minimize(self.loss) tf.summary.scalar('loss', self.loss) graph_var_list = tf.trainable_variables(scope='^Graph/') network_var_list = tf.trainable_variables(scope='^Network/') for v in graph_var_list: print('graph', v) for v in network_var_list: print('network', v) self.saver = tf.train.Saver() self.sess = self.get_session() self.sess.run(tf.global_variables_initializer()) def get_session(self): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=1, visible_device_list=args.gpu, allow_growth=True, ) config = tf.ConfigProto(gpu_options=gpu_options) session = tf.Session(config=config) return session def fit(self, data): data = dict(zip(self.inputs, data)) if args.run_tb: _, loss, summary = self.sess.run( [self.minimizer, self.loss, self.all_summary], data) self.tbfw.add_summary(summary, self.fit_step) else: _, loss = self.sess.run([self.minimizer, self.loss], data) self.fit_step += 1 return loss def topk(self, data): data = self.get_data_map(data) return self.sess.run([self.topkV, self.topkI], data) def save(self): name = f'{self.save_dir}/model.ckpt' self.saver.save(self.sess, name) def restore(self): name = f'{self.save_dir}/model.ckpt' try: self.saver.restore(self.sess, name) except Exception as e: print(f'can not restore model: {name}') raise e def l2_loss(self, name): alpha = args.get(f'l2_{name}', 0) if alpha < 1e-7: return None return lambda x: alpha * tf.nn.l2_loss(x) def Mean(self, seq, seq_length=None, mask=None, name=None): # seq: (None, L, k), seq_length: (None, ), mask: (None, L) # ret: (None, k) if seq_length is None and mask is None: with tf.variable_scope('Mean'): return tf.reduce_sum(seq, -2) with tf.variable_scope('MaskMean'): if mask is None: mask = tf.sequence_mask(seq_length, maxlen=tf.shape(seq)[1], dtype=tf.float32) mask = tf.expand_dims(mask, -1) # (None, L, 1) seq = seq * mask seq = tf.reduce_sum(seq, -2) # (None, k) seq = seq / (tf.reduce_sum(mask, -2) + eps) return seq def MLP(self, x, fc, activation, name): with tf.variable_scope(f'MLP_{name}'): for i in range(len(fc)): x = tf.layers.dense(x, fc[i], activation=activation, name=f'dense_{i}') return x def gate(self, a, b, name): with tf.variable_scope(name): alpha = tf.layers.dense(tf.concat([a, b], -1), 1, activation=tf.nn.sigmoid, name='gateW') ret = alpha * a + (1 - alpha) * b return ret def Embedding(self, node, name='node', mask_zero=False): # node: [BS] with tf.variable_scope(f'Emb_{name}'): emb_w = self.embedding_matrix t = tf.gather(emb_w, node) if mask_zero: mask = tf.not_equal(node, 0) mask = tf.cast(mask, tf.float32) else: mask = None return t, mask def forward(self, share_seq, click_seq, pos, neg): pos2 = tf.expand_dims(pos, -1) nxt = tf.concat([pos2, neg], -1) # [BS, M + 1] label = tf.concat([ tf.ones_like(pos2, dtype=tf.int32), tf.zeros_like(neg, dtype=tf.int32) ], -1) # [BS, M + 1] seq_emb = self.merge_seq(share_seq, click_seq) seq_emb = tf.layers.dense(seq_emb, args.dim_k, name='dense_W', use_bias=False) score = tf.matmul(seq_emb, self.embedding_matrix, transpose_b=True) topk = tf.math.top_k(score, k=500) self.topkV = topk.values self.topkI = topk.indices nxt_embs, _ = self.Embedding(nxt) # [BS, M + 1, k] nxt_score = tf.reduce_sum(tf.expand_dims(seq_emb, 1) * nxt_embs, -1) return nxt_score, label def node_embedding(self, node): # node: [BS, L] embs, mask = self.Embedding(node, mask_zero=True) return embs, mask def merge_seq(self, share_seq, click_seq): with tf.variable_scope(f'merge_seq', reuse=tf.AUTO_REUSE): share_seq_embs, share_mask = self.node_embedding(share_seq) share_emb = self.seq_embedding(share_seq_embs, share_mask, 'share') click_seq_embs, click_mask = self.node_embedding(click_seq) click_emb = self.seq_embedding(click_seq_embs, click_mask, 'click') emb = self.gate(share_emb, click_emb, 'merge_share_and_click_seq') return emb def seq_embedding(self, seq, mask, name): # seq: [BS, L, k] with tf.variable_scope(f'seq_embedding_{name}', reuse=tf.AUTO_REUSE): seq_emb = self.Mean(seq, mask=mask) return seq_emb
def get_input_seq(self, user, pred_pos, name): if name in ('train', 'vali') and (user, pred_pos) in self.tv_cache: return self.tv_cache[(user, pred_pos)] item_seq = self.dp.user2item_seq[user] ts_seq = self.dp.user2ts_seq[user] if name == 'train': ans = item_seq[pred_pos] if ans == 1: vali_phase, vali_pos, vali_ans = self.dp.vali_user2ppa[user] if args.use_unused_vali and args.mode_pred_phase != 'all' and str( vali_phase) not in args.mode_pred_phase: ans = vali_ans if ans < 3: return None elif name == 'vali': phase, vali_pos, ans = self.dp.vali_user2ppa[user] assert vali_pos == pred_pos assert item_seq[vali_pos] == 1 elif name == 'test': phase, test_pos, ans = self.dp.test_user2ppa[user] assert test_pos == pred_pos assert item_seq[test_pos] == 2 if user in self.dp.vali_user2ppa: vali_phase, vali_pos, vali_ans = self.dp.vali_user2ppa[user] item_seq = list(item_seq) item_seq[vali_pos] = vali_ans else: raise Exception q_ts = ts_seq[pred_pos] pre_ts = ts_seq[pred_pos - 1] if pred_pos else -100 assert args.data_dt_less_than < 0 or args.data_dt_greater_than < 0 # noinspection PyChainedComparisons if args.data_dt_less_than > 0 and name != 'train' and not q_ts - pre_ts < args.data_dt_less_than: return None if args.data_dt_greater_than > 0 and name != 'train' and not q_ts - pre_ts >= args.data_dt_greater_than: return None _item_seq, _ts_seq = [], [] # _item_seq = [] for i in range(pred_pos)[::-1]: item, ts = item_seq[i], ts_seq[i] # item = item_seq[i] # if _item_seq and ts_seq[i + 1] - ts > args.seq_max_dt: if q_ts - ts > args.seq_max_dt: break if item == 1: vali_phase, vali_pos, vali_ans = self.dp.vali_user2ppa[user] if args.use_unused_vali and args.mode_pred_phase != 'all' and str( vali_phase) not in args.mode_pred_phase: item = vali_ans if item >= 3 and item != ans: _item_seq.append(item) _ts_seq.append(ts) if len(_item_seq) >= args.seq_length: break if not _item_seq: _item_seq, _ts_seq = [0], [-100] out = utils.Object( seq=_item_seq, ts=_ts_seq, ) self.tv_cache[(user, pred_pos)] = out return out
def load_data(self): self.raw_ids = utils.Object(**self.dp.raw_id_list) self.tv_cache = {}
def __init__(self, input_param, para, **kwargs): input_param_new = {} for k in input_param.keys(): if '__' not in k: input_param_new[k] = input_param[k] del input_param input_param = input_param_new del input_param_new from imports import * for k in para: globals()[k] = para[k] setattr(self, k, para[k]) for k in input_param.keys(): if '__' not in k: setattr(self, k, input_param[k]) for key, value in kwargs.items(): input_param[key] = value setattr(self, key, value) if self.scale == 'Default': print('Getting scale by filename:') a = self.filename a1 = a.split('.')[0] a1 = a1.split('_') for k in range(0, len(a1)): if 'scale' == a1[k]: self.scale = float(a1[k + 1]) print(self.scale) else: print(self.scale) input_param['scale'] = self.scale print('') if self.timeunit == 'Default': print('Getting timestep by filename:') a = self.filename a1 = a.split('.')[0] a1 = a1.split('_') for k in range(0, len(a1)): if 'timestep' == a1[k]: self.timeunit = a1[k + 1] print(self.timeunit) if self.timeunit != 'days' and self.timeunit != 'seconds': print('Could not obtain proper timestep') else: print(self.timeunit) print('') input_param['timeunit'] = self.timeunit self.input_param = input_param stat = utils.Object() for k in input_param.keys(): if '__' not in k: try: setattr(stat, k, input_param[k]) except: print('Not copy:') print(k, input_param[k]) print('') pass self.stat = stat
import os import imports import utils import numpy as np # Static settings stat = utils.Object() stat.filename = '' stat.read_max = 'all' stat.scale = 'Default' stat.method = 'fsolve' stat.new_folder = True stat.calc_method = 'Abram' stat.dir_savefig = os.getcwd( ) + '/' # The directory where the figures will be saved. If False, it will be in the current working directory stat.noise_check = False stat.home = '/home/ester/git/synthlisa/' # Home directory stat.directory_imp = False stat.num_back = 0 stat.dir_orbits = '/home/ester/git/synthlisa/orbits/' # Folder with orbit files stat.length_calc = 'all' # Length of number of imported datapoints of orbit files. 'all' is also possible stat.dir_extr = 'zzzAbram_no_abberation' # This will be added to the folder name of the figures stat.timeunit = 'Default' # The timeunit of the plots (['minutes'],['days']['years']) stat.LISA_opt = 'cache' # If a LISA object from syntheticLISA will be used for further calculations (not sure if it works properly if this False) stat.arm_influence = True # Set True to consider the travel time of the photons when calculating the nominal armlengths stat.tstep = False stat.delay = True #'Not ahead' or False stat.method = 'fsolve' # Method used to solve the equation for the photon traveling time stat.valorfunc = 'Function' # stat.select = 'Hallion' # Select which orbit files will be imported ('all' is all) stat.aberration = True
def fit(self, args, args_i, args_n): self.args_i = args_i self.args_n = args_n self.args = utils.Object(**args) self.pre_fit() self.prt_info() self.make_model() unique_fn = '{}-{}'.format(logger.unique_fn, self.args_i) tensorboard_dir = 'tensorboard/{}'.format(unique_fn) self.tb_dirs.append(tensorboard_dir) train_writer = tf.summary.FileWriter(tensorboard_dir, self.sess.graph) saver = tf.train.Saver() summ_loss = tf.Summary() summ_loss_v = summ_loss.value.add() summ_loss_v.tag = 'loss_per_batch' summaries = tf.summary.merge_all() batch_cnt = 0 best_vali = None brk = 0 # ret, _ = self.data.evaluate('vali', self.predict) # print(ret) has_ckpt = False try: for epochs in range(self.max_epochs): loss = [] progress_bar = utils.ProgressBar(self.args.batch_steps, msg='training') for step in range(self.args.batch_steps): batch = next(self.data_generator) data = dict(zip(self.train_inputs, batch)) # print(len(self.train_inputs), len(batch)); input() # print(data); input() if step == 0 and summaries is not None: summ = self.sess.run(summaries, data) train_writer.add_summary(summ, global_step=batch_cnt) if self.minimize2 is None: _, _loss = self.sess.run([self.minimize, self.loss], data) else: _, _, _loss = self.sess.run([self.minimize, self.minimize2, self.loss], data) batch_cnt += 1 loss.append(_loss) summ_loss_v.simple_value = _loss train_writer.add_summary(summ_loss, global_step=batch_cnt) progress_bar.make_a_step() self.good_log = True train_time = progress_bar.stop() vali, vali_time = self.data.evaluate('vali', self.predict) if vali.is_better_than(best_vali): brk = 0 best_vali = vali saver.save(self.sess, 'save/{}_model.ckpt'.format(unique_fn)) has_ckpt = True else: brk += 1 if self.run_test: test, test_time = self.data.evaluate('test', self.predict) vali = '{} {}'.format(vali, test) vali_time += test_time msg = '#{}/{}, loss: {:.5f}, vali: {}, brk: {}, time: {:.1f}s {:.1f}s'.format( epochs + 1, self.max_epochs, np.mean(loss), vali, brk, train_time, vali_time) log(msg, i=-1, red=(brk == 0)) if self.early_stop > 0 and brk >= self.early_stop: break except KeyboardInterrupt: utils.timer.stop() log('KeyboardInterrupt') except Exception as e: utils.timer.stop() log('Exception: {}'.format(e), red=True) if has_ckpt: saver.restore(self.sess, 'save/{}_model.ckpt'.format(unique_fn)) return self.after_fit()