예제 #1
0
    def _getCommentEmbedding(self, type, if_exchange):
        print('os.path.join(self.input_dir, type+self.src_domain+"*.pk") :',
              os.path.join(self.input_dir, type + self.src_domain + "*.pk"))

        for f in glob(
                os.path.join(self.input_dir, type + self.src_domain) + "*.pk"):
            src_Comment_Embedding = pkload(f)

        for f in glob(
                os.path.join(self.input_dir, type + self.tgt_domain) + "*.pk"):
            tgt_Comment_Embedding = pkload(f)
        if if_exchange:
            src_Comment_Embedding, tgt_Comment_Embedding = tgt_Comment_Embedding, src_Comment_Embedding

        a = list(src_Comment_Embedding.keys())
        b = list(tgt_Comment_Embedding.keys())
        a.sort()
        b.sort()
        w = []
        w.append([0] * len(src_Comment_Embedding[a[0]]))
        for i in list(range(len(a))):
            w.append(src_Comment_Embedding[a[i]])
        for i in list(range(len(b))):
            w.append(tgt_Comment_Embedding[b[i]])
        w = np.array(w)
        print('comment_embedding.shape:', w.shape)
        return w
예제 #2
0
    def _getCommentEmbedding(self, type, if_exchange):
        #print ('os.path.join(self.input_dir, type+self.src_domain+"*.pk") :',os.path.join(self.input_dir, type+self.src_domain+"*.pk"))
        #('os.path.join(self.input_dir, type+self.src_domain+"*.pk") :', u'/software/home/jinyaru/DSNRec_1024/exam/preprocess/uirepresent/Musical_Instruments_Automotive/comment*Musical_Instruments*.pk')
        src_input_dir = self.input_dir.replace(self.tgt_domain,
                                               self.src_domain)
        for f in glob(
                os.path.join(src_input_dir, type + self.src_domain) + "*.pk"):
            print('start load src_comment', f)
            #src_Comment_Embedding = pkload(f)
            if self.src_domain != 'Books':
                src_Comment_Embedding = pkload(f)
            else:
                src_Comment_Embedding = {}
                a = pkload(f)
                f1 = '/data1/home/jinyaru/DSNRec_1024/exam/preprocess/sentiRecOutput/Books.pk'
                b = pkload(f1)

                def get_vector_from_index((uit, index)):
                    return (index, list(b[uit]))

                d_items = map(get_vector_from_index, list(a.items()))
                # print(d_items)
                src_Comment_Embedding = dict(d_items)
                #del b
                del d_items
                del a
            print('end load src_comment',
                  'len(src_Comment_Embedding):%s' % len(src_Comment_Embedding))
            print(' ')
        for f in glob(
                os.path.join(self.input_dir, type + self.tgt_domain) + "*.pk"):
            print('start load tgt_comment', f)
            tgt_Comment_Embedding = pkload(f)
            print('end load tgt_comment',
                  'len(tgt_Comment_Embedding):%s' % len(tgt_Comment_Embedding))
            print(' ')
        if if_exchange:
            src_Comment_Embedding, tgt_Comment_Embedding = tgt_Comment_Embedding, src_Comment_Embedding

        a = list(src_Comment_Embedding.keys())
        b = list(tgt_Comment_Embedding.keys())
        a.sort()
        b.sort()
        w = []
        w.append([0] * len(src_Comment_Embedding[a[0]]))
        for i in list(range(len(a))):
            w.append(src_Comment_Embedding[a[i]])
        for i in list(range(len(b))):
            w.append(tgt_Comment_Embedding[b[i]])
        w = np.array(w)
        print('comment_embedding.shape:', w.shape)
        return w
예제 #3
0
    def _buildData(self, type):
        print('os.path.join(self.input_dir, type+self.src_domain+"*.pk") :',
              os.path.join(self.input_dir, type + self.src_domain + "*.pk"))

        for f in glob(
                os.path.join(self.input_dir, type + self.src_domain) + "*.pk"):
            src_data = pkload(f)

        for f in glob(
                os.path.join(self.input_dir, type + self.tgt_domain) + "*.pk"):
            tgt_data = pkload(f)

        return src_data, tgt_data
예제 #4
0
    def __getUserSplitTrainTest(self):
        for fn in os.listdir(self.cold_dir):
            if self.src_domain in fn:
                fn = os.path.join(self.cold_dir, fn)
                self.src_user_cold = pkload(fn)
            elif self.tgt_domain in fn:
                fn = os.path.join(self.cold_dir, fn)
                self.tgt_user_cold = pkload(fn)
            else:
                fn = os.path.join(self.cold_dir, fn)
                self.overlap_user = pkload(fn)
                random.shuffle(self.overlap_user)
                test_num = int(len(self.overlap_user) * self.overlap_rate)
                self.overlap_user = self.overlap_user[:test_num]

        src_u = defaultdict(list)
        tgt_u = defaultdict(list)
        for ui, rating in self.src_rating.items():
            src_u[ui[0]].append((ui, rating))

        for ui, rating in self.tgt_rating.items():
            tgt_u[ui[0]].append((ui, rating))

        src_train = []
        for u in self.src_user_cold:
            src_train.extend(src_u[u])
        # for u in self.overlap_user:   #这一行和下一行是新加的,说明训练的时候源域所有评论都参加训练
        # src_train.extend(src_u[u])

        src_test = []
        for u in self.overlap_user:
            src_test.extend(src_u[u])

        tgt_train = []
        for u in self.tgt_user_cold:
            tgt_train.extend(tgt_u[u])

        tgt_test = []
        for u in self.overlap_user:
            tgt_test.extend(tgt_u[u])

        return {
            "src": {
                "train": src_train,
                "test": src_test
            },
            "tgt": {
                "train": tgt_train,
                "test": tgt_test
            }
        }
예제 #5
0
def csv_form(data_dir, fields, output_dir, src_domain, tgt_domain):
    if fields == "*":
        fields = "reviewerID,asin,overall"

    getter = itemgetter(*fields.split(","))

    transform = "%s/preprocess/transform" % data_dir
    output_dir = "%s/preprocess/%s/%s_%s/" % (data_dir, output_dir, src_domain,
                                              tgt_domain)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cold_dir = "%s/preprocess/cold/%s_%s/overlapUser.pk" % (
        data_dir, src_domain, tgt_domain)
    cold_user = pkload(cold_dir)

    need_transform = [
        'reviews_%s_5.json' % src_domain,
        'reviews_%s_5.json' % tgt_domain
    ]
    for fn in need_transform:

        @recordTime
        def transCSV(fn):
            inf = os.path.join(transform, fn)
            ext = os.path.splitext(fn)[-1]
            out_train = os.path.join(output_dir, fn).replace(ext, "_train.csv")
            out_test = os.path.join(output_dir, fn).replace(ext, "_test.csv")
            data = [getter(d) for d in readJson(inf)]
            train, test = [], []
            user_dic = {}
            for d in data:
                if d[0] in cold_user:
                    test.append(",".join(map(str, d)))
                else:
                    train.append(",".join(map(str, d)))

            with open(out_train, "w") as f:
                f.write("\n".join(train))

            with open(out_test, "w") as f:
                f.write("\n".join(test))

        transCSV(fn)
예제 #6
0
def save_w_src_tgt(df_SeleSrc, df_SeleTgt, u_less, i_less, if_time, outputpath,
                   source_name, target_name):
    '''作用
    df_SeleSrc: 源域选出的数据,['uid','iid','rating','time']
    df_SeleTgt: 目标域选出的数据,['uid','iid','rating','time']
    '''
    if if_time:
        df_SeleSrc = df_SeleSrc.sort_values(by='time')
        df_SeleTgt = df_SeleTgt.sort_values(by='time')
        df_SeleSrc = df_SeleSrc.reset_index(drop=True)
        df_SeleTgt = df_SeleTgt.reset_index(drop=True)

    def GetCommentIndex_SU(uid):
        arr_index = np.array(
            df_SeleSrc[df_SeleSrc.loc[:, 'uid'] == uid].index) + 1
        arr_index = list(arr_index)
        arr_index.sort()
        return str(arr_index)

    def GetCommentIndex_SI(iid):
        arr_index = np.array(
            df_SeleSrc[df_SeleSrc.loc[:, 'iid'] == iid].index) + 1
        arr_index = list(arr_index)
        arr_index.sort()
        return str(arr_index)

    # def GetCommentIndex_TU(uid):
    # arr_index = np.array(df_SeleTgt[df_SeleTgt.loc[:,'uid']==uid].index)+1+len(df_SeleSrc)
    # return str(list(arr_index))
    def GetCommentIndex_TI(iid):
        arr_index = np.array(df_SeleTgt[df_SeleTgt.loc[:, 'iid'] ==
                                        iid].index) + 1 + len(df_SeleSrc)
        arr_index = list(arr_index)
        arr_index.sort()
        return str(arr_index)

    df_SrcUlist = pd.DataFrame()
    df_SrcIlist = pd.DataFrame()
    # df_TgtUlist = pd.DataFrame()
    df_TgtIlist = pd.DataFrame()
    SrcUList = df_SeleSrc['uid'].unique()
    SrcIList = df_SeleSrc['iid'].unique()
    TgtUList = df_SeleTgt['uid'].unique()
    TgtIList = df_SeleTgt['iid'].unique()
    df_SrcUlist['uid'] = SrcUList
    df_SrcIlist['iid'] = SrcIList
    # df_TgtUlist['uid'] = TgtUList
    df_TgtIlist['iid'] = TgtIList
    df_SrcUlist['u_list'] = map(GetCommentIndex_SU, SrcUList)
    df_SrcIlist['i_list'] = map(GetCommentIndex_SI, SrcIList)
    # df_TgtUlist['u_list'] = map(GetCommentIndex_TU,TgtUList)
    df_TgtIlist['i_list'] = map(GetCommentIndex_TI, TgtIList)
    df_SeleSrc = pd.merge(df_SeleSrc, df_SrcUlist, how='left', on=['uid'])
    df_SeleSrc = pd.merge(df_SeleSrc, df_SrcIlist, how='left', on=['iid'])
    df_SeleTgt = pd.merge(df_SeleTgt, df_SrcUlist, how='left', on=['uid'])
    df_SeleTgt = pd.merge(df_SeleTgt, df_TgtIlist, how='left', on=['iid'])
    SrcComment = outputpath + 'sentiRecOutput/%s.pk' % (source_name)
    src_Comment_Embedding = pkload(SrcComment)
    TgtComment = outputpath + 'sentiRecOutput/%s.pk' % (target_name)
    tgt_Comment_Embedding = pkload(TgtComment)

    def get_SrcW(key):
        return dic[key]

    srckey_list = zip(df_SeleSrc['uid'].to_list(), df_SeleSrc['iid'].to_list(),
                      df_SeleSrc['time'].to_list())
    tgtkey_list = zip(df_SeleTgt['uid'].to_list(), df_SeleTgt['iid'].to_list(),
                      df_SeleTgt['time'].to_list())
    w = []
    w.append([0] * 50)
    for key in srckey_list:
        w.append(src_Comment_Embedding[key])
    for key in tgtkey_list:
        w.append(tgt_Comment_Embedding[key])
    w = np.array(w)
    print('w.shape:', w.shape)
    print(
        '---------------------------------------------------over-----------------------------------------------------'
    )
    print('')
    print('')

    out_path = outputpath + 'sele_data/%s_%s/uThan%s_iThan%s/' % (
        source_name, target_name, u_less, i_less)
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    np.save(out_path + 'CommentArr.npy', w)
    df_SeleSrc.to_csv(out_path + 'Src.csv', index=False)
    df_SeleTgt.to_csv(out_path + 'Tgt.csv', index=False)
예제 #7
0
    def _buildData(self, type):
        #print ('os.path.join(self.input_dir, type+self.src_domain+"*.pk") :',os.path.join(self.input_dir, type+self.src_domain+"*.pk"))
        if type == 'rating_time*':
            input_dir = self.input_dir.replace(
                "uirepresent/source_%s/%s_%s" %
                (self.src_domain, self.src_domain, self.tgt_domain),
                "transform")
            #print(type,input_dir)
            for f in glob(
                    os.path.join(input_dir, type + self.src_domain) + "*.pk"):
                print('start load src_rating_time*', f)
                src_data = pkload(f)
                print('end load src_rating_time*')
                print(' ')
            for f in glob(
                    os.path.join(input_dir, type + self.tgt_domain) + "*.pk"):
                print('start load tgt_rating_time*', f)
                tgt_data = pkload(f)
                print('end load tgt_rating_time*')
                print(' ')
        else:
            input_dir = self.input_dir
            if type == 'user*':
                norm_num = self.u_num_comment
            elif type == 'item*':
                norm_num = self.i_num_comment
            src_input_dir = input_dir.replace(self.tgt_domain, self.src_domain)
            for f in glob(
                    os.path.join(src_input_dir, type + self.src_domain) +
                    "*.pk"):
                src_data = pkload(f)
                #print('src_data',f)
            for f in glob(
                    os.path.join(input_dir, type + self.tgt_domain) + "*.pk"):
                tgt_data = pkload(f)
                #print('tgt_data',f)

            def get_comment_vec(cur_index):
                return self.w[cur_index]

            def reget_src_comIndexList(u):
                index_list = src_data[u]
                if len(index_list) > norm_num:
                    index_list = index_list[-norm_num:]
                elif len(index_list) < norm_num:
                    index_list = [0
                                  ] * (norm_num - len(index_list)) + index_list
                comment_vec_list = list(map(get_comment_vec, index_list))
                comment_vec_list = np.array(comment_vec_list)
                return comment_vec_list

            def reget_tgt_comIndexList(i):
                index_list = tgt_data[i]
                if len(index_list) > norm_num:
                    index_list = index_list[-norm_num:]
                elif len(index_list) < norm_num:
                    index_list = [0
                                  ] * (norm_num - len(index_list)) + index_list
                comment_vec_list = list(map(get_comment_vec, index_list))
                comment_vec_list = np.array(comment_vec_list)
                return comment_vec_list

            src_index_list = list(src_data.keys())
            src_commentList_list = list(
                map(reget_src_comIndexList, src_index_list))
            src_data = dict(zip(src_index_list, src_commentList_list))
            tgt_index_list = list(tgt_data.keys())
            tgt_commentList_list = list(
                map(reget_tgt_comIndexList, tgt_index_list))
            tgt_data = dict(zip(tgt_index_list, tgt_commentList_list))

        return src_data, tgt_data
예제 #8
0
    def __getUserTrainTest(self):
        df_SrcTrain = pd.read_csv(
            self.input_dir + '/%s_%s/uThan%s_iThan%s/fold_%s/src_train.csv' %
            (self.src_domain, self.tgt_domain, self.u_less, self.i_less,
             self.fold))
        df_SrcTest = pd.read_csv(
            self.input_dir + '/%s_%s/uThan%s_iThan%s/fold_%s/src_test.csv' %
            (self.src_domain, self.tgt_domain, self.u_less, self.i_less,
             self.fold))
        df_TgtTrain = pd.read_csv(
            self.input_dir + '/%s_%s/uThan%s_iThan%s/fold_%s/tgt_train.csv' %
            (self.src_domain, self.tgt_domain, self.u_less, self.i_less,
             self.fold))
        df_TgtTest = pd.read_csv(
            self.input_dir + '/%s_%s/uThan%s_iThan%s/fold_%s/tgt_test.csv' %
            (self.src_domain, self.tgt_domain, self.u_less, self.i_less,
             self.fold))

        df_SrcTrain = df_SrcTrain.sample(frac=1.0, random_state=2020)
        df_SrcTrain = df_SrcTrain.reset_index(drop=True)

        df_SrcTest = df_SrcTest.sample(frac=1.0, random_state=2020)
        df_SrcTest = df_SrcTest.reset_index(drop=True)

        df_TgtTrain = df_TgtTrain.sample(frac=1.0, random_state=2020)
        df_TgtTrain = df_TgtTrain.reset_index(drop=True)

        df_TgtTest = df_TgtTest.sample(frac=1.0, random_state=2020)
        df_TgtTest = df_TgtTest.reset_index(drop=True)

        def reget_u_list(arr):
            arr = eval(arr)
            arr.sort()
            if len(arr) < self.u_num_comment:
                arr = [0] * (self.u_num_comment - len(arr)) + arr
            else:
                arr = arr[-self.u_num_comment:]
            return str(arr)

        def reget_i_list(arr):
            arr = eval(arr)
            arr.sort()
            if len(arr) < self.i_num_comment:
                arr = [0] * (self.i_num_comment - len(arr)) + arr
            else:
                arr = arr[-self.i_num_comment:]
            return str(arr)

        df_SrcTrain['u_list'] = map(reget_u_list,
                                    df_SrcTrain['u_list'].to_list())
        df_SrcTrain['i_list'] = map(reget_i_list,
                                    df_SrcTrain['i_list'].to_list())
        df_SrcTest['u_list'] = map(reget_u_list,
                                   df_SrcTest['u_list'].to_list())
        df_SrcTest['i_list'] = map(reget_i_list,
                                   df_SrcTest['i_list'].to_list())
        df_TgtTrain['u_list'] = map(reget_u_list,
                                    df_TgtTrain['u_list'].to_list())
        df_TgtTrain['i_list'] = map(reget_i_list,
                                    df_TgtTrain['i_list'].to_list())
        df_TgtTest['u_list'] = map(reget_u_list,
                                   df_TgtTest['u_list'].to_list())
        df_TgtTest['i_list'] = map(reget_i_list,
                                   df_TgtTest['i_list'].to_list())

        uid_all = df_SrcTrain['uid'].to_list() + df_SrcTest['uid'].to_list(
        ) + df_TgtTrain['uid'].to_list() + df_TgtTest['uid'].to_list()
        uid_list_all = df_SrcTrain['u_list'].to_list(
        ) + df_SrcTest['u_list'].to_list() + df_TgtTrain['u_list'].to_list(
        ) + df_TgtTest['u_list'].to_list()
        iid_all = df_SrcTrain['iid'].to_list() + df_SrcTest['iid'].to_list(
        ) + df_TgtTrain['iid'].to_list() + df_TgtTest['iid'].to_list()
        iid_list_all = df_SrcTrain['i_list'].to_list(
        ) + df_SrcTest['i_list'].to_list() + df_TgtTrain['i_list'].to_list(
        ) + df_TgtTest['i_list'].to_list()
        self.dict_u_list = dict(zip(uid_all, uid_list_all))  ######
        self.dict_i_list = dict(zip(iid_all, iid_list_all))  ######

        #####ranking
        df_SrcTrain['rating'] = [[1, 0]] * len(df_SrcTrain)  # = 1
        df_SrcTest['rating'] = [[1, 0]] * len(df_SrcTest)  # = 1
        df_TgtTrain['rating'] = [[1, 0]] * len(df_TgtTrain)  # = 1
        df_TgtTest['rating'] = [[1, 0]] * len(df_TgtTest)  # = 1
        ###neg_dic_src_train
        neg_path = self.input_dir + '/%s_%s/uThan%s_iThan%s/fold_%s/' % (
            self.src_domain, self.tgt_domain, self.u_less, self.i_less,
            self.fold)
        src_trainNegatives = {}
        src_trainNegatives_init = pkload(
            neg_path + 'train_%s_NegItemsListUDict_sample%s.pk' %
            (self.src_domain, self.train_neg_sample))
        src_tr_items_keys = src_trainNegatives_init.keys()
        for key in src_tr_items_keys:
            src_trainNegatives[key] = src_trainNegatives_init[key][:self.
                                                                   negRatio]
        ###neg_dic_src_test
        src_testNegatives = pkload(neg_path +
                                   'test_%s_NegItemsListUDict_sample%s.pk' %
                                   (self.src_domain, self.test_neg_sample))
        ###neg_dic_tgt_train
        tgt_trainNegatives = {}
        tgt_trainNegatives_init = pkload(
            neg_path + 'train_%s_NegItemsListUDict_sample%s.pk' %
            (self.tgt_domain, self.train_neg_sample)
        )  #args.negRatio,self.train_neg_sample,self.test_neg_sample
        tgt_tr_items_keys = tgt_trainNegatives_init.keys()
        for key in tgt_tr_items_keys:
            tgt_trainNegatives[key] = tgt_trainNegatives_init[key][:self.
                                                                   negRatio]
        ###neg_dic_tgt_test
        tgt_testNegatives = pkload(neg_path +
                                   'test_%s_NegItemsListUDict_sample%s.pk' %
                                   (self.tgt_domain, self.test_neg_sample))

        ###neg_df
        def get_new_df(negUDict):
            new_neg_df_dic = {}
            iid_list = []
            uid_list = []
            u_list_list = []
            i_list_list = []
            rat_list = []
            for (user, neg_items) in negUDict.items():  #src_trainNegatives:
                cur_iid_list = []
                cur_uid_list = []
                cur_u_list_list = []
                cur_i_list_list = []
                cur_rat_list = []
                for neg_item in neg_items:
                    cur_iid_list.append(neg_item)
                    cur_uid_list.append(user)
                    cur_u_list_list.append(self.dict_u_list[user])
                    cur_i_list_list.append(self.dict_i_list[neg_item])
                    cur_rat_list.append([0, 1])  #cur_rat_list.append(0)
                iid_list.extend(cur_iid_list)
                uid_list.extend(cur_uid_list)
                u_list_list.extend(cur_u_list_list)
                i_list_list.extend(cur_i_list_list)
                rat_list.extend(cur_rat_list)
            new_neg_df_dic['uid'] = uid_list
            new_neg_df_dic['iid'] = iid_list
            new_neg_df_dic['u_list'] = u_list_list
            new_neg_df_dic['i_list'] = i_list_list
            new_neg_df_dic['rating'] = rat_list
            new_neg_df = pd.DataFrame.from_dict(new_neg_df_dic)
            return new_neg_df

        print('src_trainNegatives.items()[:2]:',
              src_trainNegatives.items()[:2])
        neg_df_src_train = get_new_df(src_trainNegatives)
        neg_df_src_test = get_new_df(src_testNegatives)
        neg_df_tgt_train = get_new_df(tgt_trainNegatives)
        neg_df_tgt_test = get_new_df(tgt_testNegatives)
        print(
            'len(df_SrcTrain):%s,len(df_SrcTest):%s,len(df_TgtTrain):%s,len(df_TgtTest):%s,'
            % (len(df_SrcTrain), len(df_SrcTest), len(df_TgtTrain),
               len(df_TgtTest)))
        print(
            'len(neg_df_src_train):%s,len(neg_df_src_test):%s,len(neg_df_tgt_train):%s,len(neg_df_tgt_test):%s,'
            % (len(neg_df_src_train), len(neg_df_src_test),
               len(neg_df_tgt_train), len(neg_df_tgt_test)))
        df_SrcTrain = df_SrcTrain.append(neg_df_src_train)
        df_SrcTest = df_SrcTest.append(neg_df_src_test)
        df_TgtTrain = df_TgtTrain.append(neg_df_tgt_train)
        df_TgtTest = df_TgtTest.append(neg_df_tgt_test)
        df_SrcTrain = df_SrcTrain.sample(frac=1.0, random_state=2020)
        df_SrcTrain = df_SrcTrain.reset_index(drop=True)
        df_SrcTest = df_SrcTest.sample(frac=1.0, random_state=2020)
        df_SrcTest = df_SrcTest.reset_index(drop=True)
        df_TgtTrain = df_TgtTrain.sample(frac=1.0, random_state=2020)
        df_TgtTrain = df_TgtTrain.reset_index(drop=True)
        df_TgtTest = df_TgtTest.sample(frac=1.0, random_state=2020)
        df_TgtTest = df_TgtTest.reset_index(drop=True)
        print(
            'len(df_SrcTrain):%s,len(df_SrcTest):%s,len(df_TgtTrain):%s,len(df_TgtTest):%s,'
            % (len(df_SrcTrain), len(df_SrcTest), len(df_TgtTrain),
               len(df_TgtTest)))
        src_train = zip(
            zip(df_SrcTrain['uid'].to_list(), df_SrcTrain['iid'].to_list()),
            df_SrcTrain['rating'].to_list())
        src_test = zip(
            zip(df_SrcTest['uid'].to_list(), df_SrcTest['iid'].to_list()),
            df_SrcTest['rating'].to_list())
        tgt_train = zip(
            zip(df_TgtTrain['uid'].to_list(), df_TgtTrain['iid'].to_list()),
            df_TgtTrain['rating'].to_list())
        tgt_test = zip(
            zip(df_TgtTest['uid'].to_list(), df_TgtTest['iid'].to_list()),
            df_TgtTest['rating'].to_list())
        print(
            'len(src_train):%s,len(src_test):%s,len(tgt_train):%s,len(tgt_test):%s,'
            % (len(src_train), len(src_test), len(tgt_train), len(tgt_test)))

        # print('type(src_train):',type(src_train),'type(src_test):',type(src_test),'type(tgt_train):',type(tgt_train),'type(tgt_test):',type(tgt_test))
        return {
            "src": {
                "train": src_train,
                "test": src_test
            },
            "tgt": {
                "train": tgt_train,
                "test": tgt_test
            }
        }
예제 #9
0
def generate_track(val_list_file, results_file, feat_dir, bbox_dir, res_file):
    """
    generate tracklet from attention value
    :param val_list_file:
    :param results_dir:
    :return:
    """
    val_list = load_file(val_list_file)
    total_n = len(val_list)
    pre_vname = ''
    results, video_bboxes = None, None
    sample_frames = None
    results_all = load_file(results_file)

    final_res = {}
    video_res = {}

    for i, sample in enumerate(val_list):

        vname, nframe, width, height, relation = sample

        # if vname != 'ILSVRC2015_train_00267002': continue
        # if relation.split('-')[0] == relation.split('-')[-1]: continue
        # if nframe <= 120: continue
        if vname != pre_vname:
            cache_file = osp.join(bbox_dir, vname + '.pkl')
            data = pkload(cache_file)
            if not (data is None):
                video_bboxes, sample_frames = data
            else:
                video_bboxes, sample_frames = load_video_bbox(vname, feat_dir, nframe)
                pkdump((video_bboxes, sample_frames), cache_file)
            results = results_all[vname]
            if i > 0:
                final_res[pre_vname] = video_res
            video_res = {}
            print(i, vname)

        alpha_s = np.array(results[relation]['sub'])
        alpha_o = np.array(results[relation]['obj'])

        beta1 = results[relation]['beta1']
        beta2 = results[relation]['beta2']

        # print(alpha_o.shape, beta1.shape)

        nsample, nclip = len(beta1), len(beta2)
        beta1 = np.asarray(beta1)
        beta2 = np.asarray(beta2)
        step = nsample//nclip
        temp = np.zeros(nsample)
        for cp in range(nclip):
            temp[cp*step:(cp+1)*step] = beta2[cp] + beta1[cp*step:step*(cp+1)]

        t1 = time.time()
        sub_bboxes, obj_bboxes, sid, valid_frame_idx = link_bbox(video_bboxes, alpha_s, alpha_o,
                                                                 temp, beta_thresh,sample_frames, nframe)
        t2 = time.time()
        if valid_frame_idx is None:
            sub_bboxes = {}
            obj_bboxes = {}
        else:
            if nframe > sample_fnum:
                sub_bboxes, obj_bboxes = interpolate(sub_bboxes,obj_bboxes,valid_frame_idx,sample_frames,nframe)

            sid = sample_frames[sid]
            sub_bboxes = {fid+sid:bbox for fid, bbox in enumerate(sub_bboxes)}
            obj_bboxes = {fid+sid:bbox for fid, bbox in enumerate(obj_bboxes)}

        ins = {"sub": sub_bboxes, "obj": obj_bboxes}
        video_res[relation] = ins
        # vis_prediction_online(ins, vname, relation)
        pre_vname = vname

        if i == total_n -1:
            final_res[vname] = video_res

    save_results(res_file, final_res)