示例#1
0
def breakouts_complements():
    '''
    对爆发信息进一步补充(用于对爆发分类)
    '''
    conn = MyConn()
    logspace = [(0, 100), (100, 180), (180, 326), (326, 589), (589, 1066),
                (1066, 3494), (3494, 30000)]
    blevel_num = len(logspace)
    logspace_count = dict(zip(logspace, blevel_num * [0]))
    breakout_tracks = [
        r[0] for r in conn.query(targets=["DISTINCT(track_id)"],
                                 table="breakouts",
                                 conditions={"release_drive": 0})
    ]

    for track_id in breakout_tracks:
        reviews_num, first_review, last_review = conn.query(
            targets=["reviews_num", "first_review", "last_review"],
            conditions={"track_id": track_id},
            fetchall=False)
        breakouts = conn.query(
            targets=["flag", "reviews_num", "beta", "release_drive"],
            table="breakouts",
            conditions={"track_id": track_id})
        days_num = (last_review - first_review).days
        # 除去爆发点的平均评论数
        avg_normal = float((reviews_num - np.sum([b[1] for b in breakouts])) /
                           (days_num - len(breakouts)))

        blevel_vec = blevel_num * [0]
        for b in breakouts:
            if b[3] == 1: continue  # 不考虑release_drive爆发
            for i in range(blevel_num):
                if b[2] >= logspace[i][0] and b[2] < logspace[i][1]:  # 考察beta区间
                    blevel_vec[i] += 1
                    logspace_count[logspace[i]] += 1
                    break

        breakouts_num = int(np.sum(blevel_vec))
        blevel = 0
        for i in range(len(blevel_vec)):
            blevel += i * blevel_vec[i]
        blevel = blevel * 1.0 / breakouts_num
        settings = {
            "track_id": track_id,
            "average_reviews_num": avg_normal,
            "blevel_vec": ' '.join(map(str, blevel_vec)),
            "breakouts_num": breakouts_num,
            "blevel": blevel
        }
        conn.insert_or_update(table="breakouts_complements", settings=settings)
        # print(settings)
        print(track_id)
示例#2
0
def upload_details():
    '''
    将歌曲的基本信息上传至数据库(歌曲名称、歌手姓名、专辑名称...)
    '''
    def extract_details(filename):
        with open(filename) as f:
            content = json.load(f)
        details = {
            "name":
            content["songs"][0]["name"],
            "artist":
            ",".join([item["name"] for item in content["songs"][0]["ar"]]),
            "pop":
            content["songs"][0]["pop"],
            "album":
            content["songs"][0]["al"]["name"]
        }
        return details

    read_path = "/Volumes/nmusic/NetEase2020/data/simple_proxied_tracks_details"
    conn = MyConn()

    for root, dirs, files in os.walk(read_path):
        for file in files:
            if "DS" in file: continue
            filepath = os.path.join(root, file)
            track_id = file[:-5]
            try:
                details = extract_details(filepath)
            except Exception as e:
                print(filepath)
                # print(traceback.format_exc())
                print(e)

            # print(details)
            conn.insert_or_update(table="details",
                                  settings={
                                      "track_id": track_id,
                                      "name": details["name"],
                                      "artist": details["artist"],
                                      "album": details["album"],
                                      "pop": details["pop"]
                                  })