inner_ht_list = list(
                NL_processor(x[j].Description).noun_count_result.keys())

            dictionary_set = copy.deepcopy(ht_list)
            dictionary_set.extend(inner_ht_list)
            dictionary_set = list(set(dictionary_set))

            # print("dict_set : ", dictionary_set)

            IR_Vector_inner = make_user_interest_vector(
                inner_ht_list, dictionary_set)
            IR_Vector_outer = make_user_interest_vector(
                ht_list, dictionary_set)
            # print(IR_Vector_inner)
            # print(IR_Vector_outer)
            sim = cosine_similarity(IR_Vector_inner, IR_Vector_outer)
            # print(sim)

            if len(inner_sim_best3) == recommend_size:
                # print('if문', file=sys.stderr)
                for k in range(recommend_size):
                    if inner_sim_best3[k][0] < sim:
                        inner_sim_best3[k] = [sim, x[j]]
                        inner_sim_best3.sort(key=lambda element: element[0])
                        break
                        # print(inner_sim_best3)
            else:
                # print('else 문', file=sys.stderr)
                inner_sim_best3.append([sim, x[j]])
                inner_sim_best3.sort(key=lambda element: element[0])
                # print(inner_sim_best3)
Пример #2
0
########################################################################################################################
# more researching.............................................................................
# map : 지정된 리스트나 튜플을 지정된 함수로 처리하는 함수.
# 사용 : map([함수명], [데이터])
########################################################################################################################
# user_interest_matrix = map(make_user_interest_vector, users_interests)
#
# for i in user_interest_matrix:
#     print(i, type(i))

user_interest_matrix = []
for i in range(len(users_interests)):
    user_interest_matrix.append(make_user_interest_vector(users_interests[i]))

user_similarities = [[cosine_similarity(interest_vector_i, interest_vector_j)
                      for interest_vector_j in user_interest_matrix]
                     for interest_vector_i in user_interest_matrix]


# for i in user_similarities:
#     print(i)
# print(user_similarities[13][0])


# sorted 함수 찾아볼 것!!!!!

# 사용자의 관심사를 모든 사용자와의 유사도를 구하여, 튜플리스트로 리턴
def most_similar_users_to(user_id):
    pairs = [(other_user_id, similarity)
             for other_user_id, similarity in enumerate(user_similarities[user_id])
print(sample_dict, "\n", list1, "\n", list2, "\n")


def make_user_interest_vector(user_interests, sample_dict):
    # unique_interests[i] 가 관심사 리스트에 존재한다면 i 번째 요소가 1이고, 존재하지 않으면 0인 벡터를 생성
    return [1 if interest in user_interests else 0 for interest in sample_dict]


interestV_1 = make_user_interest_vector(list1, sample_dict)
print(interestV_1, type(interestV_1))
interestV_2 = make_user_interest_vector(list2, sample_dict)
print(interestV_2)

print(interestV_1, type(interestV_1))
print(interestV_2)
sim = cosine_similarity(interestV_1, interestV_2)

print(sim)

for i in range(100):
    interestV_1.append(1)
    interestV_2.append(1)

print(interestV_1, type(interestV_1))
print(interestV_2)
sim = cosine_similarity(interestV_1, interestV_2)

print(sim)

for i in range(100):
    interestV_1.append(0)
def make_recommend_list(added_Album):
    batchsize = 200
    offset = 0
    # added_Album = Album_VO()

    # 추가된 앨범의 변수 및 추천리스트 초기화
    added_Album_rec = Album_recommend_VO()
    added_Album_rec.Album_ID = added_Album.Album_ID

    added_Album_dict = NL_processor(added_Album.Description).noun_count_result
    added_Album_dict_list = list(added_Album_dict.keys())

    existing_Album_list = Album_VO.query.offset(offset).limit(batchsize).all()

    # 쿼리셋이 있다면 루프수행
    while (len(existing_Album_list) != 0):
        # batch 사이즈 만큼 돌면서 루프 수행 : 개별요소 비교
        # 리스트자나....포문 바꾸자.
        # for i in range(len(existing_Album_list)):
        for existing_Album in existing_Album_list:
            # sim 초기화
            sim = 0

            # if existing_Album_list[i].Singer_ID == added_Album.Singer_ID:
            if existing_Album.Singer_ID == added_Album.Singer_ID:
                sim += 0.3

            existing_Album_rec = Album_recommend_VO.query.filter(
                Album_recommend_VO.Album_ID ==
                existing_Album.Album_ID).first()

            if existing_Album_rec is None:
                existing_Album_rec = Album_recommend_VO()
                existing_Album_rec.Album_ID = existing_Album.Album_ID

            # 분기점
            # desc 가 있을때에만 비교하여 sim에 추가하는 과정이 필요

            # 유사도 함수 자체에 null 일 경우 처리를 할 수 있지만
            # 불필요한 루프는 여기서 제거
            if existing_Album.Description is not None and added_Album.Description is not None:
                existing_Album_dict = NL_processor(
                    existing_Album.Description).noun_count_result
                exsisting_Album_dict_list = list(existing_Album_dict.keys())
                # 딥카피 해서 dict set을 별도로 만들기 위한 ref
                # dictionary_set : 유사도 비교할 두 객체의 dict sum

                dictionary_set = copy.deepcopy(added_Album_dict_list)
                dictionary_set.extend(exsisting_Album_dict_list)

                print(added_Album_dict_list)
                print(exsisting_Album_dict_list)

                dictionary_set = list(set(dictionary_set))

                print("dict_set : ", dictionary_set)

                added_IR_Vector = make_user_interest_vector(
                    added_Album_dict_list, dictionary_set)
                existing_IR_Vector = make_user_interest_vector(
                    exsisting_Album_dict_list, dictionary_set)

                print(added_IR_Vector, type(added_IR_Vector))
                print(existing_IR_Vector, type(existing_IR_Vector))

                sim += cosine_similarity(added_IR_Vector, existing_IR_Vector)
            # print(sim, type(sim))

            # print(existing_Album_rec.as_values())

            existing_Album_rec_dict = existing_Album_rec.as_dict()

            if existing_Album_rec_dict is None:
                existing_Album_rec.set_dict({added_Album.Album_ID: sim})
            else:
                existing_Album_rec_dict.update()
            print(existing_Album_rec.as_dict())

########################################################################################################################
# 삽입 / 정렬 방법 생각해볼 것!!!
# 그냥 기존 set 추가, key로 정렬할 것인지...
# list 비교 인덱스로 접근해서 키밸류 변경 할지...

# print(existing_Album_rec.as_keys())
# print(existing_Album_rec.as_values())

# dict1 = existing_Album_rec.as_dict().
# dict1.update({added_Album.Album_ID: sim})
# print(dict1)
#
# print(dict1, dict2)

# sorted_by_value1 = sorted(dict1.items(), key=lambda kv: kv[1])[0:2]
# sorted_by_value2 = sorted(dict2.items(), key=lambda kv: kv[1])[0:2]

# print(sorted_by_value1)
# print(sorted_by_value2)

# last condition
        offset += batchsize
        existing_Album_list = Album_VO.query.offset(offset).limit(
            batchsize).all()