Exemplo n.º 1
0
def show():
    data = Data()

    d = data.train.make_one_shot_iterator().get_next()
    d = utils.Object(**d)
    from models import UTILS
    sess = UTILS.get_session()

    cnt = 0
    hit = np.zeros(10)
    pbar = tqdm()
    while True:
        try:
            x = sess.run(d)
        except tf.errors.OutOfRangeError:
            break
        cnt += 1
        pbar.update(1)
        # for i, v in enumerate(x.seq):
        #     neighbors = data.dp.neighbors[0][v]
        #     if x.ans in neighbors:
        #         hit[i] += 1
        if cnt >= 10000: break
    pbar.close()
    print(hit, cnt, hit / cnt)
Exemplo n.º 2
0
def augment_mask_probs(sample, state, probs, meta):
    for prop, value in sample.items():
        if prop not in meta.feat_labels:
            print("skipping", prop)
            continue

        fid = meta.feat_labels.index(prop)
        bid = meta.bag_idx[fid]

        prob = probs.p[fid].item()
        mask = state.mask[fid].item()

        sample[prop] = {'value': value, 'prob': prob, 'mask': mask}

        if bid is not None:
            bag_prob = probs.bags[bid]
            if bag_prob is None or len(bag_prob.p) == 0:
                sample[prop]['value'] = None
                continue

            # compute softmax over all whole bag
            bag_p = torch.softmax(bag_prob.p.flatten(),
                                  dim=0).view_as(bag_prob.p)

            # for all items go deeper
            for idx, it in enumerate(value):
                it_probs = utils.Object()
                it_probs.p = bag_p[idx]
                it_probs.bags = bag_prob.bags[idx] if len(
                    bag_prob.bags) > 0 else None

                meta_bag = meta.bags[bid][1]
                it_state = state.bags[bid][idx]

                augment_mask_probs(it, it_state, it_probs, meta_bag)
Exemplo n.º 3
0
 def __init__(self,
              name=None,
              cmd=None,
              cwd=None,
              proc=None,
              input=None,
              output=None,
              *args,
              **kwargs):
     if name is None: name = cmd
     super().__init__(*args, name=name, readOnly=True, **kwargs)
     self.input = None
     self.cmd = cmd
     self.cwd = cwd
     self.proc = proc or utils.Object(stdout=input, stdin=output)
     self.lines = collections.deque()
     Thread(target=self.run, daemon=True).start()
Exemplo n.º 4
0
class Base:
    deep = True
    args = utils.Object()

    # feature: [type..], [tags..], mid
    def __init__(self, data):
        self.raw_adjs = data.adjs

        # self.save_name = f'{utils.save_dir}/{args.run_name}/model.ckpt'
        self.save_dir = f'{utils.save_dir}/{args.run_name}'
        self.tb_name = f'{utils.save_dir}/{args.run_name}'

        self.graph = tf.Graph()
        with self.graph.as_default():
            tf.set_random_seed(args.seed)
            self.compile()
        self.fit_step = 0

    def compile(self):
        self.make_io()
        self.make_model()
        if args.run_tb:
            self.all_summary = tf.summary.merge_all()
            self.tbfw = tf.summary.FileWriter(self.tb_name, self.sess.graph)

    def placeholder(self, dtype, shape, name, to_list):
        ph = tf.placeholder(dtype, shape, name)
        self.placeholder_dict[name] = ph
        to_list.append(ph)
        return ph

    def make_io(self):
        self.placeholder_dict = {}
        self.inputs = []
        L = args.seq_length
        self.placeholder(tf.int32, [None, L], 'share_seq', self.inputs)
        self.placeholder(tf.int32, [None, L], 'click_seq', self.inputs)

        self.placeholder(tf.int32, [None], 'pos', self.inputs)
        self.placeholder(tf.int32, [None, None], 'neg', self.inputs)

        self.adjs = [
            tf.constant(adj, dtype=tf.int32) for adj in self.raw_adjs
        ]  # [N, M] * 4, in_0, out_0, in_1, out_1

    def get_data_map(self, data):
        data_map = dict(zip(self.inputs, data))
        return data_map

    def make_model(self):
        with tf.variable_scope(
                'Graph', reuse=tf.AUTO_REUSE,
                regularizer=self.l2_loss('all')) as self.graph_scope:
            n = args.nb_nodes
            k = args.dim_k
            self.embedding_matrix = tf.get_variable(name='emb_w', shape=[n, k])
            with tf.variable_scope(
                    'graph_agg', reuse=tf.AUTO_REUSE) as self.graph_agg_scope:
                pass

        with tf.variable_scope('Network',
                               reuse=tf.AUTO_REUSE,
                               regularizer=self.l2_loss('all')):
            score, label = self.forward(*self.inputs)
            seq_loss = tf.losses.softmax_cross_entropy(label, score)
            tf.summary.scalar('seq_loss', seq_loss)

        self.loss = seq_loss
        self.loss += tf.losses.get_regularization_loss()

        opt = tf.train.AdamOptimizer(learning_rate=args.lr)
        self.minimizer = opt.minimize(self.loss)
        tf.summary.scalar('loss', self.loss)

        graph_var_list = tf.trainable_variables(scope='^Graph/')
        network_var_list = tf.trainable_variables(scope='^Network/')
        for v in graph_var_list:
            print('graph', v)
        for v in network_var_list:
            print('network', v)

        self.saver = tf.train.Saver()
        self.sess = self.get_session()
        self.sess.run(tf.global_variables_initializer())

    def get_session(self):
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=1,
            visible_device_list=args.gpu,
            allow_growth=True,
        )
        config = tf.ConfigProto(gpu_options=gpu_options)
        session = tf.Session(config=config)
        return session

    def fit(self, data):
        data = dict(zip(self.inputs, data))
        if args.run_tb:
            _, loss, summary = self.sess.run(
                [self.minimizer, self.loss, self.all_summary], data)
            self.tbfw.add_summary(summary, self.fit_step)
        else:
            _, loss = self.sess.run([self.minimizer, self.loss], data)
        self.fit_step += 1
        return loss

    def topk(self, data):
        data = self.get_data_map(data)
        return self.sess.run([self.topkV, self.topkI], data)

    def save(self):
        name = f'{self.save_dir}/model.ckpt'
        self.saver.save(self.sess, name)

    def restore(self):
        name = f'{self.save_dir}/model.ckpt'
        try:
            self.saver.restore(self.sess, name)
        except Exception as e:
            print(f'can not restore model: {name}')
            raise e

    def l2_loss(self, name):
        alpha = args.get(f'l2_{name}', 0)
        if alpha < 1e-7:
            return None
        return lambda x: alpha * tf.nn.l2_loss(x)

    def Mean(self, seq, seq_length=None, mask=None, name=None):
        # seq: (None, L, k), seq_length: (None, ), mask: (None, L)
        # ret: (None, k)
        if seq_length is None and mask is None:
            with tf.variable_scope('Mean'):
                return tf.reduce_sum(seq, -2)

        with tf.variable_scope('MaskMean'):
            if mask is None:
                mask = tf.sequence_mask(seq_length,
                                        maxlen=tf.shape(seq)[1],
                                        dtype=tf.float32)
            mask = tf.expand_dims(mask, -1)  # (None, L, 1)
            seq = seq * mask
            seq = tf.reduce_sum(seq, -2)  # (None, k)
            seq = seq / (tf.reduce_sum(mask, -2) + eps)
        return seq

    def MLP(self, x, fc, activation, name):
        with tf.variable_scope(f'MLP_{name}'):
            for i in range(len(fc)):
                x = tf.layers.dense(x,
                                    fc[i],
                                    activation=activation,
                                    name=f'dense_{i}')
        return x

    def gate(self, a, b, name):
        with tf.variable_scope(name):
            alpha = tf.layers.dense(tf.concat([a, b], -1),
                                    1,
                                    activation=tf.nn.sigmoid,
                                    name='gateW')
            ret = alpha * a + (1 - alpha) * b
        return ret

    def Embedding(self, node, name='node', mask_zero=False):
        # node: [BS]
        with tf.variable_scope(f'Emb_{name}'):
            emb_w = self.embedding_matrix
            t = tf.gather(emb_w, node)
            if mask_zero:
                mask = tf.not_equal(node, 0)
                mask = tf.cast(mask, tf.float32)
            else:
                mask = None
        return t, mask

    def forward(self, share_seq, click_seq, pos, neg):
        pos2 = tf.expand_dims(pos, -1)
        nxt = tf.concat([pos2, neg], -1)  # [BS, M + 1]
        label = tf.concat([
            tf.ones_like(pos2, dtype=tf.int32),
            tf.zeros_like(neg, dtype=tf.int32)
        ], -1)  # [BS, M + 1]

        seq_emb = self.merge_seq(share_seq, click_seq)
        seq_emb = tf.layers.dense(seq_emb,
                                  args.dim_k,
                                  name='dense_W',
                                  use_bias=False)
        score = tf.matmul(seq_emb, self.embedding_matrix, transpose_b=True)

        topk = tf.math.top_k(score, k=500)
        self.topkV = topk.values
        self.topkI = topk.indices

        nxt_embs, _ = self.Embedding(nxt)  # [BS, M + 1, k]
        nxt_score = tf.reduce_sum(tf.expand_dims(seq_emb, 1) * nxt_embs, -1)
        return nxt_score, label

    def node_embedding(self, node):
        # node: [BS, L]
        embs, mask = self.Embedding(node, mask_zero=True)
        return embs, mask

    def merge_seq(self, share_seq, click_seq):
        with tf.variable_scope(f'merge_seq', reuse=tf.AUTO_REUSE):
            share_seq_embs, share_mask = self.node_embedding(share_seq)
            share_emb = self.seq_embedding(share_seq_embs, share_mask, 'share')
            click_seq_embs, click_mask = self.node_embedding(click_seq)
            click_emb = self.seq_embedding(click_seq_embs, click_mask, 'click')

            emb = self.gate(share_emb, click_emb, 'merge_share_and_click_seq')
            return emb

    def seq_embedding(self, seq, mask, name):
        # seq: [BS, L, k]
        with tf.variable_scope(f'seq_embedding_{name}', reuse=tf.AUTO_REUSE):
            seq_emb = self.Mean(seq, mask=mask)
        return seq_emb
Exemplo n.º 5
0
    def get_input_seq(self, user, pred_pos, name):
        if name in ('train', 'vali') and (user, pred_pos) in self.tv_cache:
            return self.tv_cache[(user, pred_pos)]
        item_seq = self.dp.user2item_seq[user]
        ts_seq = self.dp.user2ts_seq[user]
        if name == 'train':
            ans = item_seq[pred_pos]
            if ans == 1:
                vali_phase, vali_pos, vali_ans = self.dp.vali_user2ppa[user]
                if args.use_unused_vali and args.mode_pred_phase != 'all' and str(
                        vali_phase) not in args.mode_pred_phase:
                    ans = vali_ans

            if ans < 3:
                return None
        elif name == 'vali':
            phase, vali_pos, ans = self.dp.vali_user2ppa[user]
            assert vali_pos == pred_pos
            assert item_seq[vali_pos] == 1
        elif name == 'test':
            phase, test_pos, ans = self.dp.test_user2ppa[user]
            assert test_pos == pred_pos
            assert item_seq[test_pos] == 2
            if user in self.dp.vali_user2ppa:
                vali_phase, vali_pos, vali_ans = self.dp.vali_user2ppa[user]
                item_seq = list(item_seq)
                item_seq[vali_pos] = vali_ans
        else:
            raise Exception

        q_ts = ts_seq[pred_pos]
        pre_ts = ts_seq[pred_pos - 1] if pred_pos else -100

        assert args.data_dt_less_than < 0 or args.data_dt_greater_than < 0
        # noinspection PyChainedComparisons
        if args.data_dt_less_than > 0 and name != 'train' and not q_ts - pre_ts < args.data_dt_less_than:
            return None

        if args.data_dt_greater_than > 0 and name != 'train' and not q_ts - pre_ts >= args.data_dt_greater_than:
            return None

        _item_seq, _ts_seq = [], []
        # _item_seq = []
        for i in range(pred_pos)[::-1]:
            item, ts = item_seq[i], ts_seq[i]
            # item = item_seq[i]
            # if _item_seq and ts_seq[i + 1] - ts > args.seq_max_dt:
            if q_ts - ts > args.seq_max_dt:
                break

            if item == 1:
                vali_phase, vali_pos, vali_ans = self.dp.vali_user2ppa[user]
                if args.use_unused_vali and args.mode_pred_phase != 'all' and str(
                        vali_phase) not in args.mode_pred_phase:
                    item = vali_ans

            if item >= 3 and item != ans:
                _item_seq.append(item)
                _ts_seq.append(ts)

            if len(_item_seq) >= args.seq_length:
                break

        if not _item_seq:
            _item_seq, _ts_seq = [0], [-100]

        out = utils.Object(
            seq=_item_seq,
            ts=_ts_seq,
        )
        self.tv_cache[(user, pred_pos)] = out
        return out
Exemplo n.º 6
0
 def load_data(self):
     self.raw_ids = utils.Object(**self.dp.raw_id_list)
     self.tv_cache = {}
Exemplo n.º 7
0
    def __init__(self, input_param, para, **kwargs):
        input_param_new = {}
        for k in input_param.keys():
            if '__' not in k:
                input_param_new[k] = input_param[k]
        del input_param
        input_param = input_param_new
        del input_param_new

        from imports import *
        for k in para:
            globals()[k] = para[k]
            setattr(self, k, para[k])
        for k in input_param.keys():
            if '__' not in k:
                setattr(self, k, input_param[k])

        for key, value in kwargs.items():
            input_param[key] = value
            setattr(self, key, value)

        if self.scale == 'Default':
            print('Getting scale by filename:')
            a = self.filename
            a1 = a.split('.')[0]
            a1 = a1.split('_')
            for k in range(0, len(a1)):
                if 'scale' == a1[k]:
                    self.scale = float(a1[k + 1])
            print(self.scale)
        else:
            print(self.scale)
        input_param['scale'] = self.scale
        print('')

        if self.timeunit == 'Default':
            print('Getting timestep by filename:')
            a = self.filename
            a1 = a.split('.')[0]
            a1 = a1.split('_')
            for k in range(0, len(a1)):
                if 'timestep' == a1[k]:
                    self.timeunit = a1[k + 1]
                    print(self.timeunit)
            if self.timeunit != 'days' and self.timeunit != 'seconds':
                print('Could not obtain proper timestep')
        else:
            print(self.timeunit)
        print('')
        input_param['timeunit'] = self.timeunit

        self.input_param = input_param
        stat = utils.Object()
        for k in input_param.keys():
            if '__' not in k:
                try:
                    setattr(stat, k, input_param[k])
                except:
                    print('Not copy:')
                    print(k, input_param[k])
                    print('')
                    pass
        self.stat = stat
Exemplo n.º 8
0
import os
import imports
import utils
import numpy as np

# Static settings
stat = utils.Object()
stat.filename = ''
stat.read_max = 'all'
stat.scale = 'Default'
stat.method = 'fsolve'
stat.new_folder = True
stat.calc_method = 'Abram'
stat.dir_savefig = os.getcwd(
) + '/'  # The directory where the figures will be saved. If False, it will be in the current working directory
stat.noise_check = False
stat.home = '/home/ester/git/synthlisa/'  # Home directory
stat.directory_imp = False
stat.num_back = 0
stat.dir_orbits = '/home/ester/git/synthlisa/orbits/'  # Folder with orbit files
stat.length_calc = 'all'  # Length of number of imported datapoints of orbit files. 'all' is also possible
stat.dir_extr = 'zzzAbram_no_abberation'  # This will be added to the folder name of the figures
stat.timeunit = 'Default'  # The timeunit of the plots (['minutes'],['days']['years'])
stat.LISA_opt = 'cache'  # If a LISA object from syntheticLISA will be used for further calculations (not sure if it works properly if this False)
stat.arm_influence = True  # Set True to consider the travel time of the photons when calculating the nominal armlengths
stat.tstep = False
stat.delay = True  #'Not ahead' or False
stat.method = 'fsolve'  # Method used to solve the equation for the photon traveling time
stat.valorfunc = 'Function'  #
stat.select = 'Hallion'  # Select which orbit files will be imported ('all' is all)
stat.aberration = True
Exemplo n.º 9
0
    def fit(self, args, args_i, args_n):
        self.args_i = args_i
        self.args_n = args_n
        self.args = utils.Object(**args)

        self.pre_fit()
        self.prt_info()
        self.make_model()
        unique_fn = '{}-{}'.format(logger.unique_fn, self.args_i)

        tensorboard_dir = 'tensorboard/{}'.format(unique_fn)
        self.tb_dirs.append(tensorboard_dir)
        train_writer = tf.summary.FileWriter(tensorboard_dir, self.sess.graph)

        saver = tf.train.Saver()

        summ_loss = tf.Summary()
        summ_loss_v = summ_loss.value.add()
        summ_loss_v.tag = 'loss_per_batch'

        summaries = tf.summary.merge_all()

        batch_cnt = 0
        best_vali = None
        brk = 0
        # ret, _ = self.data.evaluate('vali', self.predict)
        # print(ret)
        has_ckpt = False
        try:
            for epochs in range(self.max_epochs):
                loss = []
                progress_bar = utils.ProgressBar(self.args.batch_steps, msg='training')
                for step in range(self.args.batch_steps):
                    batch = next(self.data_generator)
                    data = dict(zip(self.train_inputs, batch))
                    # print(len(self.train_inputs), len(batch)); input()
                    # print(data); input()
                    if step == 0 and summaries is not None:
                        summ = self.sess.run(summaries, data)
                        train_writer.add_summary(summ, global_step=batch_cnt)

                    if self.minimize2 is None:
                        _, _loss = self.sess.run([self.minimize, self.loss], data)
                    else:
                        _, _, _loss = self.sess.run([self.minimize, self.minimize2, self.loss],
                                                    data)

                    batch_cnt += 1
                    loss.append(_loss)
                    summ_loss_v.simple_value = _loss
                    train_writer.add_summary(summ_loss, global_step=batch_cnt)
                    progress_bar.make_a_step()
                    self.good_log = True
                train_time = progress_bar.stop()

                vali, vali_time = self.data.evaluate('vali', self.predict)
                if vali.is_better_than(best_vali):
                    brk = 0
                    best_vali = vali
                    saver.save(self.sess, 'save/{}_model.ckpt'.format(unique_fn))
                    has_ckpt = True
                else:
                    brk += 1

                if self.run_test:
                    test, test_time = self.data.evaluate('test', self.predict)
                    vali = '{} {}'.format(vali, test)
                    vali_time += test_time

                msg = '#{}/{}, loss: {:.5f}, vali: {}, brk: {}, time: {:.1f}s {:.1f}s'.format(
                    epochs + 1, self.max_epochs, np.mean(loss), vali, brk, train_time, vali_time)
                log(msg, i=-1, red=(brk == 0))
                if self.early_stop > 0 and brk >= self.early_stop:
                    break

        except KeyboardInterrupt:
            utils.timer.stop()
            log('KeyboardInterrupt')
        except Exception as e:
            utils.timer.stop()
            log('Exception: {}'.format(e), red=True)
        if has_ckpt:
            saver.restore(self.sess, 'save/{}_model.ckpt'.format(unique_fn))
        return self.after_fit()