def _create_dilated_rnn_input(self, current_loc):
     current_loc.reverse()
     sequence_length = len(current_loc)
     session_dilated_rnn_input_index = [0] * sequence_length
     for i in range(sequence_length - 1):
         current_poi = current_loc[i]
         poi_before = current_loc[i + 1:]
         current_poi_profile = self.poi_profile.loc[
             self.poi_profile['geo_id'] ==
             self.id2location[current_poi]].iloc[0]
         lon_cur, lat_cur = parse_coordinate(
             current_poi_profile['coordinates'])
         distance_row_explicit = []
         for target in poi_before:
             lon, lat = parse_coordinate(self.poi_profile.loc[
                 self.poi_profile['geo_id'] ==
                 self.id2location[target]].iloc[0]['coordinates'])
             distance_row_explicit.append(
                 geodistance(lat_cur, lon_cur, lat, lon))
         index_closet = np.argmin(distance_row_explicit).item()
         # reverse back
         session_dilated_rnn_input_index[
             sequence_length - i -
             1] = sequence_length - 2 - index_closet - i
     current_loc.reverse()
     return session_dilated_rnn_input_index
 def _cal_poi_matrix(self):
     poi_profile = pd.read_csv('./raw_data/{}/{}.geo'.format(
         self.config['dataset'], self.config['dataset']))
     mat = np.zeros((self.loc_id - 1, self.loc_id - 1))
     for i in tqdm(range(1, self.loc_id),
                   desc='calculate poi distance matrix'):
         lon_i, lat_i = parse_coordinate(
             poi_profile.iloc[self.id2location[i]]['coordinates'])
         for j in range(1, self.loc_id):
             lon_j, lat_j = parse_coordinate(
                 poi_profile.iloc[self.id2location[j]]['coordinates'])
             dis = haversine(lon_i, lat_i, lon_j, lat_j)
             mat[i - 1][j - 1] = dis
             if dis > self.ex[0]:
                 self.ex[0] = dis
     return mat.tolist()
    def calculate_loss(self, batch):
        user = batch['uid']
        dst = batch['target'].tolist()
        dst_time = batch['target_tim']
        current_loc = batch['current_loc']
        current_tim = batch['current_tim']
        # 计算 td ld
        batch_size = len(dst)
        td = dst_time.unsqueeze(1) - current_tim
        ld = torch.zeros(current_loc.shape).to(self.device)
        loc_len = batch.get_origin_len('current_loc')
        current_loc = current_loc.tolist()
        for i in range(batch_size):
            target = dst[i]
            lon_i, lat_i = parse_coordinate(
                self.poi_profile.iloc[target]['coordinates'])
            for j in range(loc_len[i]):
                origin = current_loc[i][j]
                lon_j, lat_j = parse_coordinate(
                    self.poi_profile.iloc[origin]['coordinates'])
                # 计算 target - origin 的距离,并写入 ld[i][j] 中
                ld[i][j] = distance.distance((lat_i, lon_i),
                                             (lat_j, lon_j)).kilometers

        td_upper = torch.LongTensor([self.up_time] * batch_size).to(
            self.device).unsqueeze(1)
        td_upper = td_upper - td
        td_lower = td  # 因为 lower 是 0
        ld_upper = torch.LongTensor([self.up_loc] * batch_size).to(
            self.device).unsqueeze(1)
        ld_upper = ld_upper - ld
        ld_lower = ld  # 因为下界是 0
        # batch_size * hidden_size
        h_tq = self.forward(td_upper, td_lower, ld_upper, ld_lower,
                            batch['current_loc'], loc_len)
        dst = batch['target']
        p_u = self.permanet_weight(user)  # batch_size * hidden_size
        q_v = self.location_weight(dst)  # batch_size * hidden_size
        user_vector = h_tq + p_u
        output = torch.zeros([batch_size, 1])
        for i in range(batch_size):
            output[i] = torch.dot(user_vector[i], q_v[i])
        output = torch.sum(output, dim=0)
        return torch.log(1 + torch.exp(torch.neg(output)))
    def predict(self, batch):
        user = batch['uid']
        dst = batch['target'].tolist()
        dst_time = batch['target_tim']
        current_loc = batch['current_loc']
        current_tim = batch['current_tim']
        # 计算 td ld
        batch_size = len(dst)
        td = dst_time.unsqueeze(1) - current_tim
        ld = torch.zeros(current_loc.shape).to(self.device)
        loc_len = batch.get_origin_len('current_loc')
        current_loc = current_loc.tolist()
        for i in range(batch_size):
            target = dst[i]
            lon_i, lat_i = parse_coordinate(
                self.poi_profile.iloc[target]['coordinates'])
            for j in range(loc_len[i]):
                origin = current_loc[i][j]
                lon_j, lat_j = parse_coordinate(
                    self.poi_profile.iloc[origin]['coordinates'])
                # 计算 target - origin 的距离,并写入 ld[i][j] 中
                ld[i][j] = distance.distance((lat_i, lon_i),
                                             (lat_j, lon_j)).kilometers

        td_upper = torch.LongTensor([self.up_time] * batch_size).to(
            self.device).unsqueeze(1)
        td_upper = td_upper - td
        td_lower = td  # 因为 lower 是 0
        ld_upper = torch.LongTensor([self.up_loc] * batch_size).to(
            self.device).unsqueeze(1)
        ld_upper = ld_upper - ld
        ld_lower = ld  # 因为下界是 0
        # batch_size * hidden_size
        h_tq = self.forward(td_upper, td_lower, ld_upper, ld_lower,
                            batch['current_loc'], loc_len)
        p_u = self.permanet_weight(user)  # batch_size * hidden_size
        user_vector = h_tq + p_u  # batch_size * hidden_size
        # 这里有问题,因为 user_vector 是依据 target 来算的,实际上应该是每个 loc 一个对应的 user_vector
        # batch_size * loc_size
        ret = torch.mm(user_vector, self.location_weight.weight.T)
        return ret
 def _gen_distance_matrix(self, current_loc, history_loc_central):
     # 使用 profile 计算当前位置与历史轨迹中心点之间的距离
     history_avg_distance = []  # history_session_count
     now_loc = current_loc[-1]
     lon_cur, lat_cur = parse_coordinate(
         self.poi_profile.loc[self.poi_profile['geo_id'] == self.
                              id2location[now_loc]].iloc[0]['coordinates'])
     for central in history_loc_central:
         dis = geodistance(central[0], central[1], lat_cur, lon_cur)
         if dis < 1:
             dis = 1
         history_avg_distance.append(dis)
     return history_avg_distance
    def encode(self, uid, trajectories, negative_sample=None):
        """standard encoder use the same method as DeepMove

        Recode poi id. Encode timestamp with its hour.

        Args:
            uid ([type]): same as AbstractTrajectoryEncoder
            trajectories ([type]): same as AbstractTrajectoryEncoder
                trajectory1 = [
                    (location ID, timestamp, timezone_offset_in_minutes),
                    (location ID, timestamp, timezone_offset_in_minutes),
                    .....
                ]
        """
        # 直接对 uid 进行重编码
        uid = self.uid
        self.uid += 1
        encoded_trajectories = []
        history_loc = []
        history_loc_central = []
        history_tim = []
        for index, traj in enumerate(trajectories):
            current_loc = []
            current_tim = []
            for point in traj:
                loc = point[4]
                now_time = parse_time(point[2])
                if loc not in self.location2id:
                    self.location2id[loc] = self.loc_id
                    self.id2location[self.loc_id] = loc
                    self.loc_id += 1
                current_loc.append(self.location2id[loc])
                time_code = self._time_encode(now_time)
                current_tim.append(time_code)
                if time_code not in self.time_checkin_set:
                    self.time_checkin_set[time_code] = set()
                self.time_checkin_set[time_code].add(self.location2id[loc])
            # 完成当前轨迹的编码,下面进行输入的形成
            if index == 0:
                # 因为要历史轨迹特征,所以第一条轨迹是不能构成模型输入的
                history_loc.append(current_loc)
                history_tim.append(current_tim)
                lon = []
                lat = []
                for poi in current_loc:
                    lon_cur, lat_cur = parse_coordinate(self.poi_profile.loc[
                        self.poi_profile['geo_id'] ==
                        self.id2location[poi]].iloc[0]['coordinates'])
                    lon.append(lon_cur)
                    lat.append(lat_cur)
                history_loc_central.append((np.mean(lat), np.mean(lon)))
                continue
            # 一条轨迹可以生成多个数据点
            for i in range(len(current_loc) - 1):
                trace = []
                target = current_loc[i + 1]
                dilated_rnn_input_index = self._create_dilated_rnn_input(
                    current_loc[:i + 1])
                history_avg_distance = self._gen_distance_matrix(
                    current_loc[:i + 1], history_loc_central)
                trace.append(history_loc.copy())
                trace.append(history_tim.copy())
                trace.append(current_loc[:i + 1])
                trace.append(current_tim[:i + 1])
                trace.append(dilated_rnn_input_index)
                trace.append(history_avg_distance)
                trace.append(target)
                trace.append(uid)
                if negative_sample is not None:
                    neg_loc = []
                    for neg in negative_sample[index]:
                        if neg not in self.location2id:
                            self.location2id[neg] = self.loc_id
                            self.loc_id += 1
                        neg_loc.append(self.location2id[neg])
                    trace.append(neg_loc)
                encoded_trajectories.append(trace)
            history_loc.append(current_loc)
            history_tim.append(current_tim)
            # calculate current_loc
            lon = []
            lat = []
            for poi in current_loc:
                lon_cur, lat_cur = parse_coordinate(self.poi_profile.loc[
                    self.poi_profile['geo_id'] ==
                    self.id2location[poi]].iloc[0]['coordinates'])
                lon.append(lon_cur)
                lat.append(lat_cur)
            history_loc_central.append((np.mean(lat), np.mean(lon)))
        return encoded_trajectories