Exemplo n.º 1
0
    if listen_music_count[train_user[i]] < 5:
        small_train_user.append(train_user[i]); small_train_music.append(train_music[i]); small_train_count.append(train_count[i])
        train_user[i] = 0; train_music[i] = 0; train_count[i] = 0

train = coo_matrix((train_count, (train_user, train_music))).tocsr()
test = coo_matrix((test_count, (test_user, test_music)))

ex_train_user = sorted(set(train_user))
ex_train_music = sorted(set(train_music))
ex_test_user = sorted(set(test_user))
ex_test_music = sorted(set(test_music))

print("Status : Train data loaded. CV ready.")

# ALS 모델 생성
model = wmf(factors=NO_COMPONENT, regularization=REGULARIZATION)
model.fit(train.transpose() * ALPHA)

model_item_factors = model.item_factors
model_user_factors = model.user_factors

N_CLUSTERS = 2

n_cluster_users = []
n_cluster_train_user = []
n_cluster_train_music = []
n_cluster_train_count = []
n_cluster_test_user = []
n_cluster_test_music = []
n_cluster_test_count = []
    def do_GET(self):
        """요청 메시지의 메서드가 GET 일 때 호출되어, 응답 메시지를 전송한다."""
        # 응답 메시지의 상태 코드를 전송한다
        self.send_response(200)
        self.send_header('Access-Control-Allow-Origin', '*')

        # 응답 메시지의 헤더를 전송한다
        self.send_header('Content-type', 'text/plain; charset=utf-8')
        self.end_headers()

        # 응답 메시지의 본문을 전송한다
        query = urlparse(self.path).query
        query_components = dict(qc.split("=") for qc in query.split("&"))

        TYPE = query_components['type']

        if TYPE == "recommend":
            WEATHER = query_components['weather']
            USER = query_components['user']
            TOP_K = int(query_components['top_k'])

            mydb = mysql.connector.connect(
                host="bethebest.online",
                user="******",
                passwd="비밀번호",
                database="project"
            )
            
            # ID로 유저 검색
            sql = "SELECT user_id FROM user WHERE user_name = \"%s\"" % (USER)

            mycursor = mydb.cursor()
            mycursor.execute(sql)
            myresult = mycursor.fetchall()

            # 결과 전송을 위한 딕셔너리 선언
            result = {}
            
            if len(myresult) == 0: # ID가 존재하지 않는 경우
                result['result'] = "nouser"
            else:
                USER = myresult[0][0]

                # 필요한 변수 선언
                train_user = []
                train_music = []
                train_count = []
                ex_train_user = []
                ex_train_music = []
                sql = ""

                # 요청한 날씨별로 SQL 분기
                if WEATHER == 'hot':
                    sql = "SELECT user_id, music_id, count(*) as play_count FROM `history`, weather WHERE history.play_time_date=weather.date and weather.high_temp >= 30 group by user_id, music_id"

                elif WEATHER == 'cold':
                    sql = "SELECT user_id, music_id, count(*) as play_count FROM `history`, weather WHERE history.play_time_date=weather.date and weather.low_temp <= 10 group by user_id, music_id"

                elif WEATHER == 'rainy':
                    sql = "SELECT user_id, music_id, count(*) as play_count FROM `history`, weather WHERE history.play_time_date=weather.date and weather.rain >= 0 group by user_id, music_id"

                elif WEATHER == 'snowy':
                    sql = "SELECT user_id, music_id, count(*) as play_count FROM `history`, weather WHERE history.play_time_date=weather.date and weather.snow >= 0 group by user_id, music_id"

                elif WEATHER == 'cool':
                    sql = "SELECT user_id, music_id, count(*) as play_count FROM `history`, weather WHERE history.play_time_date=weather.date and weather.high_temp < 30 and weather.low_temp > 10 group by user_id, music_id"

                mycursor = mydb.cursor()
                mycursor.execute(sql)
                myresult = mycursor.fetchall()
                
                # 사용자와, 음악 ID, 청취횟수 데이터 저장
                for x in myresult:
                    train_user.append(int(x[0]))
                    train_music.append(int(x[1]))
                    train_count.append(int(x[2]))
                
                # 중복이 제거된 사용자와 음악 ID
                ex_train_user = sorted(set(train_user))
                ex_train_music = sorted(set(train_music))

                if binary_search(USER, ex_train_user): # 추천 요청한 날씨에 해당 유저의 청취데이터 존재하는지 확인
                    train = coo_matrix((train_count, (train_user, train_music))).tocsr()

                    print("[Status] Train data %d loaded. CV ready." % (len(myresult)))

                    # WMF 모델 생성
                    model = wmf(factors=NO_COMPONENT, regularization=REGULARIZATION)
                    model.fit(train.transpose() * ALPHA)

                    N_CLUSTERS = 2

                    n_cluster_users = []
                    n_cluster_train_user = []
                    n_cluster_train_music = []
                    n_cluster_train_count = []
                    n_cluster_test_user = []
                    n_cluster_test_music = []
                    n_cluster_test_count = []

                    # K-Means로 클러스터링
                    kmeans = KMeans(n_clusters=N_CLUSTERS)
                    kmeans = kmeans.fit(model.user_factors[ex_train_user])
                    kmeans = np.array(kmeans.predict(model.user_factors[ex_train_user]))

                    print("[Status] %d clusters created." % (N_CLUSTERS))

                    for n in range(N_CLUSTERS):
                        idx = np.where(kmeans == n)[0]
                        n_cluster_users = np.take(ex_train_user, idx)

                        n_cluster_train_user = [];
                        n_cluster_train_music = [];
                        n_cluster_train_count = []
                        n_cluster_test_user = [];
                        n_cluster_test_music = [];
                        n_cluster_test_count = []

                        max_music_id = 0

                        # Training data cluster 기준으로 분리
                        for i in range(len(train_user)):
                            idx = binary_search(train_user[i], n_cluster_users)
                            if idx != None:
                                n_cluster_train_user.append(train_user[i]);
                                n_cluster_train_music.append(train_music[i]);
                                n_cluster_train_count.append(train_count[i])
                                if max_music_id < train_music[i]: max_music_id = train_music[i]

                        n_cluster_train = coo_matrix(
                            (n_cluster_train_count, (n_cluster_train_user, n_cluster_train_music))).tocsr()

                        n_cluster_train_user = sorted(set(n_cluster_train_user))
                        n_cluster_train_music = sorted(set(n_cluster_train_music))

                        if binary_search(USER, n_cluster_train_user):
                            model = wmf(factors=NO_COMPONENT, regularization=REGULARIZATION)
                            model.fit(n_cluster_train.transpose() * ALPHA)

                            recommendations = model.recommend(USER, train.tocsr(), N=TOP_K)

                            print("[Status] Recommendation process is completed.")

                            result['result'] = "ok"
                            result['recommendations'] = {}

                            print("[Status] Requesting album arts trough ManiaDB.")

                            for i in range(len(recommendations)):
                                sql = "SELECT music_title, artist_name, img FROM music, artist WHERE music.artist_id = artist.artist_id and music_id = %d" % (recommendations[i][0])
                                mycursor = mydb.cursor()
                                mycursor.execute(sql)
                                myresult = mycursor.fetchall()

                                result['recommendations'][str(i + 1)] = {}

                                # 앨범아트 검색 - ManiaDB 사용
                                if myresult[0][2] == None: # 앨범아트 정보가 DB에 존재하지 않는 경우
                                    img_src = ""
                                    result_src = ""

                                    try:
                                        url = "http://www.maniadb.com/api/search/" + quote(
                                            myresult[0][0]) + "/?sr=song&display=30&[email protected]&v=0.5"
                                        response_p = urllib.request.urlopen(url)
                                        tree = parse(response_p)
                                        note = tree.getroot()

                                        for parent in tree.getiterator():
                                            for child in parent:
                                                if child.tag == 'image':
                                                    img_src = child.text

                                                if child.tag.strip() == 'name':
                                                    if str(child.text).find(myresult[0][1].strip()) > -1:
                                                        result_src = img_src
                                                        break

                                    except Exception as e:
                                        print(e)

                                    if result_src != "":
                                        mycursor = mydb.cursor()
                                        sql = "UPDATE music SET img = '%s' WHERE music_id = '%d'" % (result_src, recommendations[i][0])
                                        mycursor.execute(sql)
                                        mydb.commit()

                                    result['recommendations'][str(i + 1)]['artist'] = myresult[0][1]
                                    result['recommendations'][str(i + 1)]['title'] = myresult[0][0]
                                    result['recommendations'][str(i + 1)]['img'] = result_src
                                else: # 앨범아트 정보가 DB에 존재하는 경우
                                    result['recommendations'][str(i + 1)]['artist'] = myresult[0][1]
                                    result['recommendations'][str(i + 1)]['title'] = myresult[0][0]
                                    result['recommendations'][str(i + 1)]['img'] = myresult[0][2]
                else: # 추천 요청한 날씨에 해당 유저의 청취데이터가 존재하지 않을 경우
                    result['result'] = "nodata"
            
            # JSON 전송
            print(result)
            self.wfile.write(bytes(printJSON(result), 'utf-8'))

        elif TYPE == "register":
            LASTFM_ID = query_components['lastfm_id']