def user_features(): """用户特征""" users_feature = {} for line in loadfile(base_path + "ml-1m/users.dat", encoding="ISO-8859-1"): arr = line.split("::") sex_feature = judge_sex(arr[1]) age_feature = judge_age(int(arr[2])) user_feature_list = sex_feature + age_feature + [int(arr[3])] users_feature.setdefault(arr[0], user_feature_list) return users_feature
def user_item_rating(): """user-item""" user_item = {} for line in loadfile(base_path + "ml-1m/ratings.dat"): arr = line.split("::") uid = arr[0] user_item.setdefault(uid, [[], []]) user_item[uid][0].append(arr[1]) user_item[uid][1].append(arr[2]) return user_item
def create_user_item_dict(): """生成user-item 字典""" item_dict = create_item_dict() user_item_dict = {} for line in loadfile(base_path + "ml-1m/ratings.dat", encoding="ISO-8859-1"): arr = line.split("::") user_item_dict.setdefault(arr[0], list()) movie = item_dict.get(arr[1]) user_item_dict.get(arr[0]).append(movie) return user_item_dict
def load_data(train_rate=1): train_items_ratings = {} test_data = {} for line in loadfile(base_path + "ml-1m/ratings.dat"): arr = line.split("::") if random.random() < train_rate: train_items_ratings.setdefault(arr[1], set()) train_items_ratings[arr[1]].add(arr[2]) else: test_data.setdefault(arr[0], set()) test_data[arr[0]].add(arr[1]) return train_items_ratings, test_data
def create_item_dict(): """生成item 字典""" item_dict = {} for line in loadfile(base_path + "ml-1m/movies.dat", encoding="ISO-8859-1"): arr = line.split("::") item_dict.setdefault(arr[0], {}) title = remove_punctuation(arr[1]) genres = ' '.join(g.strip('\n') for g in arr[2].split("|")) item_dict[arr[0]]['title'] = title item_dict[arr[0]]['genres'] = genres return item_dict
def item_features(): """商品特征矩阵""" item_feature = {} for line in loadfile(base_path + "ml-1m/movies.dat", encoding="ISO-8859-1"): arr = line.split("::") year = int(re.findall("\d{4}", arr[1])[0]) year_label = judge_year(year) genres = arr[2].split("|") genres_list = judge_genres(genres) item_feature_list = year_label + genres_list item_feature.setdefault(arr[0], item_feature_list) return item_feature
item_feature_list = year_label + genres_list item_feature.setdefault(arr[0], item_feature_list) return item_feature if __name__ == '__main__': count_2000 = 0 count_1990 = 0 count_1980 = 0 count_1970 = 0 other = 0 item_feature = {} for line in loadfile(base_path + "ml-1m/movies.dat", encoding="ISO-8859-1"): arr = line.split("::") year = int(re.findall("\d{4}", arr[1])[0]) year_label = judge_year(year) genres = arr[2].split("|") genres_list = judge_genres(genres) item_feature_list = year_label + genres_list item_feature.setdefault(arr[0], item_feature_list) if year >= 2000: count_2000 += 1 elif year >= 1990 and year < 2000: count_1990 += 1 elif year >= 1980 and year < 1990: count_1980 += 1 elif year >= 1970 and year < 1980: count_1970 += 1