Exemplo n.º 1
0
 def load_wwang_parts(self):
     self.load_word_vec()
     self.qid2wids = iu.load_pickle(self.qid2wids_file)
     self.aid2wids = iu.load_pickle(self.aid2wids_file)
     print('load awids/qwids over')
     self.aid2vote = iu.load_pickle(self.aid2vote_file)
     self.aid2qu = iu.load_pickle(self.aid2qu_file)
     assert len(self.aid2vote) == len(self.aid2wids) == len(self.aid2qu)
     print('load qid/aid/uid over'.format(self.name))
Exemplo n.º 2
0
 def get_qid2auv_bert(self):
     qau_list = iu.load_pickle(self.fill('_mid', 'qau_list.pkl'))
     uid2uint = iu.load_pickle(self.fill('_mid', 'uid2uint_dict.pkl'))
     aid2pf16 = iu.load_pickle(self.fill('bert', 'aid2pf16.pkl'))
     aid2vote = iu.load_pickle(self.fill('aid2vote_dict.pkl'))
     print(len(qau_list), len(uid2uint), len(aid2pf16), len(aid2vote))
     self.qid2auv_bert = dict()
     for qid, aid, uid in qau_list:
         if qid not in self.qid2auv_bert:
             self.qid2auv_bert[qid] = [], [], []
         al, ul, vl = self.qid2auv_bert[qid]
         al.append(aid2pf16[aid])
         ul.append(uid2uint[uid])
         vl.append(aid2vote[aid])
Exemplo n.º 3
0
 def load_clu_init(self, embed_dim: int, topic_ratio: float = 1) -> np.ndarray:
     clu_init_file = self.get_clu_init_file(embed_dim, topic_ratio)
     return iu.load_pickle(clu_init_file)
Exemplo n.º 4
0
 def load_word2vec(self, embed_dim: int) -> Dict[str, np.ndarray]:
     word2vec_file = self.get_word2vec_file(embed_dim)
     return iu.load_pickle(word2vec_file)
Exemplo n.º 5
0
def partition_qids(aid2qu_file, seed, pidx):
    class User:
        lookup = dict()

        def __init__(self, uid):
            self.uid = uid
            self.qs = list()
            self.rvs2qs = None

        def add_q(self, q):
            self.qs.append(q)

        def split_rvs_qs(self):
            self.rvs2qs = dict((x, list()) for x in RVS)
            for q in self.qs:
                self.rvs2qs[q.rvs].append(q)
            return self.rvs2qs

        @staticmethod
        def find(uid):
            if uid not in User.lookup:
                User.lookup[uid] = User(uid)
            return User.lookup[uid]

    class Ques:
        lookup = dict()

        def __init__(self, qid):
            self.qid = qid
            self.rvs = np.random.choice(RVS, p=[0.78, 0.11, 0.11])

        def set_rvs(self, x):
            # assert x in RVS
            self.rvs = x

        @staticmethod
        def find(qid):
            if qid not in Ques.lookup:
                Ques.lookup[qid] = Ques(qid)
            return Ques.lookup[qid]

    np.random.seed(seed)
    for aid, (qid, uid) in iu.load_pickle(aid2qu_file).items():
        ques = Ques.find(qid)
        user = User.find(uid)
        ques.add_user(user)
        user.add_q(ques)
    for i in range(10000):
        print(i, 'th trial')
        for uid, user in User.lookup.items():
            rvs_quess = user.split_rvs_qs()
            rc, vc, sc = [len(rvs_quess[x]) for x in RVS]
            if rc < 3:
                print('recheck on ', uid)
                if vc > 0:
                    user.rvs2quess[V][0].set_rvs(R)
                elif sc > 0:
                    user.rvs2quess[S][0].set_rvs(R)
        can = True
        for uid, user in User.lookup.items():
            rvs_quess = user.split_rvs_qs()
            counts = rc, vc, sc = [len(rvs_quess[x]) for x in RVS]
            if rc < 3 or vc < 1 or sc < 1:
                print(pidx, counts)
                can = False
                break
        if can:
            # print(pidx, 'done')
            rvs2qid = dict((x, list()) for x in RVS)
            for qid, ques in Ques.lookup.items():
                rvs2qid[ques.rvs].append(qid)
            lenarr = [len(qids) for x, qids in rvs2qid.items()]
            print(lenarr, [s / sum(lenarr) for s in lenarr])

            all_qids = list(Ques.lookup.keys())
            rvs_qids = au.merge(rvs2qid.values())
            print(len(all_qids), len(set(all_qids)), '; ', len(rvs_qids),
                  len(set(rvs_qids)))
            assert len(all_qids) == len(rvs_qids) and set(all_qids) == set(
                rvs_qids)
            print('valid partition found')
            return rvs2qid
Exemplo n.º 6
0
 def load_cdong_full(self):
     self.load_word_vec()
     self.load_user_vec()
     self.rvs2qids = iu.load_pickle(self.rvs2qids_file)
     self.qid2qauv = iu.load_pickle(self.qid2qauv_file)
Exemplo n.º 7
0
 def load_bert_full(self):
     self.load_user_vec_bert()
     self.get_qid2auv_bert()
     self.rvs2qids = iu.load_pickle(self.rvs2qids_file)
Exemplo n.º 8
0
 def load_user_vec_bert(self):
     file = self.fill('bert', 'user_vec_bert.pkl')
     self.user_vec_bert = iu.load_pickle(file)
Exemplo n.º 9
0
 def load_user_vec(self):
     self.user_vec = iu.load_pickle(self.user_vec_file)
Exemplo n.º 10
0
 def load_word_vec(self):
     self.word_vec = iu.load_pickle(self.word_vec_file)