def _cal_mat1(self, current_tim):
     # calculate the temporal relation matrix
     mat = np.zeros((self.max_len, self.max_len))
     cur_len = len(current_tim)
     for i in range(cur_len):
         for j in range(cur_len):
             off = abs(cal_timeoff(current_tim[i], current_tim[j]))
             mat[i][j] = off
             if off > self.ex[3]:
                 self.ex[3] = off
     return mat
Exemplo n.º 2
0
 def encode(self, uid, trajectories):
     """standard encoder use the same method as DeepMove
     Recode poi id. Encode timestamp with its hour.
     Args:
         uid ([type]): same as AbstractTrajectoryEncoder
         trajectories ([type]): same as AbstractTrajectoryEncoder
             trajectory1 = [
                 (location ID, timestamp, timezone_offset_in_minutes),
                 (location ID, timestamp, timezone_offset_in_minutes),
                 .....
             ]
     """
     # 直接对 uid 进行重编码
     uid = self.uid
     self.uid += 1
     encoded_trajectories = []
     history_loc = []
     history_tim = []
     for index, traj in enumerate(trajectories):
         current_loc = []
         current_tim = []
         start_time = parse_time(traj[0][1], traj[0][2])
         # 以当天凌晨的时间作为计算 time_off 的基准
         base_time = cal_basetime(start_time, True)
         for point in traj:
             loc = point[0]
             now_time = parse_time(point[1], point[2])
             if loc not in self.location2id:
                 self.location2id[loc] = self.loc_id
                 self.id2locid[self.loc_id] = loc
                 self.loc_id += 1
             current_loc.append(self.location2id[loc])
             time_code = int(cal_timeoff(now_time, base_time))
             if time_code > self.tim_max:
                 self.tim_max = time_code
             current_tim.append(time_code)
         # 完成当前轨迹的编码,下面进行输入的形成
         if index == 0:
             # 因为要历史轨迹特征,所以第一条轨迹是不能构成模型输入的
             if self.history_type == 'splice':
                 history_loc += current_loc
                 history_tim += current_tim
             else:
                 history_loc.append(current_loc)
                 history_tim.append(current_tim)
             continue
         trace = []
         target = current_loc[-1]
         target_tim = current_tim[-1]
         current_loc = current_loc[:-1]
         current_tim = current_tim[:-1]
         trace.append(history_loc)
         trace.append(history_tim)
         trace.append(current_loc)
         trace.append(current_tim)
         trace.append(target)
         trace.append(target_tim)
         trace.append(uid)
         encoded_trajectories.append(trace)
         if self.history_type == 'splice':
             history_loc += current_loc
             history_tim += current_tim
         else:
             history_loc.append(current_loc)
             history_tim.append(current_tim)
     return encoded_trajectories
Exemplo n.º 3
0
 def cutter_filter(self):
     """
     切割后的轨迹存储格式: (dict)
         {
             uid: [
                 [
                     checkin_record,
                     checkin_record,
                     ...
                 ],
                 [
                     checkin_record,
                     checkin_record,
                     ...
                 ],
                 ...
             ],
             ...
         }
     """
     # load data according to config
     traj = pd.read_csv(os.path.join(
         self.data_path, '{}.dyna'.format(self.config['dataset'])))
     # filter inactive poi
     group_location = traj.groupby('location').count()
     filter_location = group_location[group_location['time'] >= self.config['min_checkins']]
     location_index = filter_location.index.tolist()
     traj = traj[traj['location'].isin(location_index)]
     user_set = pd.unique(traj['entity_id'])
     res = {}
     min_session_len = self.config['min_session_len']
     max_session_len = self.config['max_session_len']
     min_sessions = self.config['min_sessions']
     window_size = self.config['window_size']
     cut_method = self.config['cut_method']
     if cut_method == 'time_interval':
         # 按照时间窗口进行切割
         for uid in tqdm(user_set, desc="cut and filter trajectory"):
             usr_traj = traj[traj['entity_id'] == uid]
             sessions = []  # 存放该用户所有的 session
             session = []  # 单条轨迹
             for index, row in usr_traj.iterrows():
                 now_time = parse_time(row['time'])
                 if index == 0:
                     session.append(row.tolist())
                     prev_time = now_time
                 else:
                     time_off = cal_timeoff(now_time, prev_time)
                     if time_off < window_size and time_off >= 0 and len(session) < max_session_len:
                         session.append(row.tolist())
                     else:
                         if len(session) >= min_session_len:
                             sessions.append(session)
                         session = []
                         session.append(row.tolist())
                 prev_time = now_time
             if len(session) >= min_session_len:
                 sessions.append(session)
             if len(sessions) >= min_sessions:
                 res[str(uid)] = sessions
     elif cut_method == 'same_date':
         # 将同一天的 check-in 划为一条轨迹
         for uid in tqdm(user_set, desc="cut and filter trajectory"):
             usr_traj = traj[traj['entity_id'] == uid]
             sessions = []  # 存放该用户所有的 session
             session = []  # 单条轨迹
             prev_date = None
             for index, row in usr_traj.iterrows():
                 now_time = parse_time(row['time'])
                 now_date = now_time.day
                 if index == 0:
                     session.append(row.tolist())
                 else:
                     if prev_date == now_date and len(session) < max_session_len:
                         # 还是同一天
                         session.append(row.tolist())
                     else:
                         if len(session) >= min_session_len:
                             sessions.append(session)
                         session = []
                         session.append(row.tolist())
                 prev_date = now_date
             if len(session) >= min_session_len:
                 sessions.append(session)
             if len(sessions) >= min_sessions:
                 res[str(uid)] = sessions
     else:
         # cut by fix window_len used by STAN
         if max_session_len != window_size:
             raise ValueError('the fixed length window is not equal to max_session_len')
         for uid in tqdm(user_set, desc="cut and filter trajectory"):
             usr_traj = traj[traj['entity_id'] == uid]
             sessions = []  # 存放该用户所有的 session
             session = []  # 单条轨迹
             for index, row in usr_traj.iterrows():
                 if len(session) < window_size:
                     session.append(row.tolist())
                 else:
                     sessions.append(session)
                     session = []
                     session.append(row.tolist())
             if len(session) >= min_session_len:
                 sessions.append(session)
             if len(sessions) >= min_sessions:
                 res[str(uid)] = sessions
     return res
Exemplo n.º 4
0
    def cutter_filter(self):
        """
        切割后的轨迹存储格式: (dict)
            {
                uid: [
                    [
                        checkin_record,
                        checkin_record,
                        ...
                    ],
                    [
                        checkin_record,
                        checkin_record,
                        ...
                    ],
                    ...
                ],
                ...
            }
        """
        # load data according to config
        traj = pd.read_csv(
            os.path.join(self.data_path,
                         '{}.dyna'.format(self.config['dataset'])))
        # filter inactive poi
        group_location = traj.groupby('location').count()
        filter_location = group_location[
            group_location['time'] > self.config['min_checkins']]
        location_index = filter_location.index.tolist()
        traj = traj[traj['location'].isin(location_index)]

        user_set = pd.unique(traj['entity_id'])
        res = {}
        min_session_len = self.config['min_session_len']
        min_sessions = self.config['min_sessions']
        window_size = self.config['window_size']
        window_type = self.config['window_type']
        if window_type == 'time_window':
            # 按照时间窗口进行切割
            base_zero = window_size > 12
            for uid in tqdm(user_set, desc="cut and filter trajectory"):
                usr_traj = traj[traj['entity_id'] == uid]
                sessions = []  # 存放该用户所有的 session
                session = []  # 单条轨迹
                # 这里还是使用当地时间吧
                start_time = parse_time(usr_traj.iloc[0]['time'])
                base_time = cal_basetime(start_time, base_zero)
                for index, row in usr_traj.iterrows():
                    if index == 0:
                        assert start_time.hour - base_time.hour < window_size
                        session.append(row.tolist())
                    else:
                        now_time = parse_time(row['time'])
                        time_off = cal_timeoff(now_time, base_time)
                        if time_off < window_size and time_off >= 0:
                            session.append(row.tolist())
                        else:
                            if len(session) >= min_session_len:
                                sessions.append(session)
                            session = []
                            start_time = now_time
                            base_time = cal_basetime(start_time, base_zero)
                            session.append(row.tolist())
                if len(session) >= min_session_len:
                    sessions.append(session)
                if len(sessions) >= min_sessions:
                    # update counter
                    for s in sessions:
                        for row in s:
                            self.counter.update({row[4]: 1})
                    res[str(uid)] = sessions
        else:
            # 按照轨迹长度进行划分
            for uid in tqdm(user_set, desc="cut and filter trajectory"):
                usr_traj = traj[traj['entity_id'] == uid]
                sessions = []  # 存放该用户所有的 session
                session = []  # 单条轨迹
                for index, row in usr_traj.iterrows():
                    if len(session) < window_size:
                        session.append(row.tolist())
                    else:
                        sessions.append(session)
                        session = []
                        session.append(row.tolist())
                if len(session) >= min_session_len:
                    sessions.append(session)
                if len(sessions) >= min_sessions:
                    # update counter
                    for s in sessions:
                        for row in s:
                            self.counter.update({row[4]: 1})
                    res[str(uid)] = sessions
        return res
Exemplo n.º 5
0
 def encode(self, uid, trajectories):
     """standard encoder use the same method as DeepMove
     Recode poi id. Encode timestamp with its hour.
     Args:
         uid ([type]): same as AbstractTrajectoryEncoder
         trajectories ([type]): same as AbstractTrajectoryEncoder
             trajectory1 = [
                 (location ID, timestamp, timezone_offset_in_minutes),
                 (location ID, timestamp, timezone_offset_in_minutes),
                 .....
             ]
     """
     # 直接对 uid 进行重编码
     uid = self.uid
     self.uid += 1
     encoded_trajectories = []
     history_loc = []
     history_tim = []
     for index, traj in enumerate(trajectories):
         current_loc = []
         current_tim = []
         current_longi = []
         current_lati = []
         current_points = []
         start_time = parse_time(traj[0][1], traj[0][2])
         # 以当天凌晨的时间作为计算 time_off 的基准
         base_time = cal_basetime(start_time, True)
         for point in traj:
             loc = point[0]
             now_time = parse_time(point[1], point[2])
             if loc not in self.location2id:
                 self.location2id[loc] = self.loc_id
                 self.loc_id += 1
             current_points.append(loc)
             current_loc.append(self.location2id[loc])
             current_lati.append(self.geo_coord[loc][0])
             current_longi.append(self.geo_coord[loc][1])
             time_code = int(cal_timeoff(now_time, base_time))
             if time_code > self.tim_max:
                 self.tim_max = time_code
             current_tim.append(time_code)
         # 完成当前轨迹的编码,下面进行输入的形成
         trace = []
         target = current_loc[-1]
         target_tim = current_tim[-1]
         current_loc = current_loc[:-1]
         current_tim = current_tim[:-1]
         lati = self.geo_coord[self.location2id[current_points[-1]]][0]
         lati = np.array([lati for i in range(len(current_loc))])
         longi = self.geo_coord[self.location2id[current_points[-1]]][1]
         longi = np.array([longi for i in range(len(current_loc))])
         current_dis = euclidean_dist(lati - current_lati[:-1],
                                      longi - current_longi[:-1])
         trace.append(history_loc)
         trace.append(history_tim)
         trace.append(current_loc)
         trace.append(current_tim)
         trace.append(target)
         trace.append(target_tim)
         trace.append(uid)
         trace.append(current_dis)
         encoded_trajectories.append(trace)
         if self.history_type == 'splice':
             history_loc += current_loc
             history_tim += current_tim
         else:
             history_loc.append(current_loc)
             history_tim.append(current_tim)
     return encoded_trajectories