Exemplo n.º 1
0
def predict(otrain):
    binary = (otrain > 0)
    norm = NormalizePositive(axis=1)
    train = norm.fit_transform(otrain)

    dists = distance.pdist(binary, 'correlation')
    dists = distance.squareform(dists)

    neighbors = dists.argsort(axis=1)
    filled = train.copy()
    for u in range(filled.shape[0]):
        # n_u are the neighbors of user
        n_u = neighbors[u, 1:]
        for m in range(filled.shape[1]):
            # This code could be faster using numpy indexing trickery as the
            # cost of readibility (this is left as an exercise to the reader):
            revs = [train[neigh, m] for neigh in n_u if binary[neigh, m]]
            if len(revs):
                n = len(revs)
                n //= 2
                n += 1
                revs = revs[:n]
                filled[u, m] = np.mean(revs)

    return norm.inverse_transform(filled)
def predict(otrain):
    binary = (otrain > 0)
    norm = NormalizePositive(axis=1)
    train = norm.fit_transform(otrain)

    dists = distance.pdist(binary, 'correlation')
    dists = distance.squareform(dists)

    neighbors = dists.argsort(axis=1)
    filled = train.copy()
    for u in range(filled.shape[0]):
        # n_u are the neighbors of user
        n_u = neighbors[u, 1:]
        for m in range(filled.shape[1]):
            # This code could be faster using numpy indexing trickery as the
            # cost of readibility (this is left as an exercise to the reader):
            revs = [train[neigh, m]
                    for neigh in n_u
                    if binary[neigh, m]]
            if len(revs):
                n = len(revs)
                n //= 2
                n += 1
                revs = revs[:n]
                filled[u,m] = np.mean(revs)

    return norm.inverse_transform(filled)
Exemplo n.º 3
0
def predict(otrain):
    binary = (otrain > 0)
    norm = NormalizePositive(axis=1)
    train = norm.fit_transform(otrain)

    dists = distance.pdist(binary, 'correlation')
    dists = distance.squareform(dists)

    neighbors = dists.argsort(axis=1)
    filled = train.copy()
    for u in range(filled.shape[0]):
        # n_u는 사용자 이웃이다
        n_u = neighbors[u, 1:]
        for m in range(filled.shape[1]):
            
            # 이 코드는 numpy 인덱싱을 사용하여 빠르게 할 수 있다
            # 이해하기는 좀 더 힘들다
            revs = [train[neigh, m]
                    for neigh in n_u
                    if binary[neigh, m]]
            if len(revs):
                n = len(revs)
                n //= 2
                n += 1
                revs = revs[:n]
                filled[u,m] = np.mean(revs)

    return norm.inverse_transform(filled)
def predict(otrain):
    #拿到打过分的index
    binary = (otrain > 0)
    norm = NormalizePositive(axis=1)
    #归一化
    train = norm.fit_transform(otrain)
    #计算用户打分之间的相似度,这里是只关心打分与否,而不关心多少分?
    dists = distance.pdist(binary, 'correlation')
    dists = distance.squareform(dists)
    #对dists进行排序,axis=1代表对行排序,每一行代表从近到远对应列下标,如 2 0 1代表第二列是距离最近,第0列其次..
    neighbors = dists.argsort(axis=1)
    filled = train.copy()
    for u in range(filled.shape[0]):
        # n_u 是第u个用户的邻居
        n_u = neighbors[u, 1:]
        #m是电影
        for m in range(filled.shape[1]):
            # This code could be faster using numpy indexing trickery as the
            # cost of readibility (this is left as an exercise to the reader):
            #对于用户u的邻居n_u,如果他对电影m打分了,则进入到revs列表中
            revs = [train[neigh, m] for neigh in n_u if binary[neigh, m]]
            #将revs列表中的前一半的均值作为 用户u对电影m的预测
            if len(revs):
                n = len(revs)
                n //= 2
                n += 1
                revs = revs[:n]
                filled[u, m] = np.mean(revs)

    return norm.inverse_transform(filled)
Exemplo n.º 5
0
def predict(train):
    binary = (train > 0)
    reg = ElasticNetCV(fit_intercept=True,
                       alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    norm = NormalizePositive()
    train = norm.fit_transform(train)
    filled = train.copy()
    for u in range(filled.shape[0]):
        curtrain = np.delete(train, u, axis=0)
        bu = binary[u]
        if np.sum(bu) > 5:
            reg.fit(curtrain[:, bu].T, train[u, bu])
            filled[u, ~bu] = reg.predict(curtrain[:, ~bu].T)
    return norm.inverse_transform(filled)
Exemplo n.º 6
0
def predict(train):
    binary = (train > 0)
    reg = ElasticNetCV(fit_intercept=True, alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    norm = NormalizePositive()
    train = norm.fit_transform(train)

    filled = train.copy()
    # iterate over all users
    for u in range(train.shape[0]):
        # remove the current user for training
        curtrain = np.delete(train, u, axis=0)
        bu = binary[u]
        if np.sum(bu) > 5:
            reg.fit(curtrain[:,bu].T, train[u, bu])

            # Fill the values that were not there already
            filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
    return norm.inverse_transform(filled)
Exemplo n.º 7
0
def predict(train):
    binary = (train > 0)
    reg = ElasticNetCV(fit_intercept=True, alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    norm = NormalizePositive()
    train = norm.fit_transform(train)

    filled = train.copy()
    # 모든 사용자에 대해 반복
    for u in range(train.shape[0]):
        # 훈련에서 현재 사용자 제거
        curtrain = np.delete(train, u, axis=0)
        bu = binary[u]
        if np.sum(bu) > 5:
            reg.fit(curtrain[:,bu].T, train[u, bu])

            # 이전에 없는 값을 넣는다
            filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
    return norm.inverse_transform(filled)
Exemplo n.º 8
0
def predict(train):
    binary = (train > 0)
    reg = ElasticNetCV(fit_intercept=True,
                       alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    norm = NormalizePositive()
    train = norm.fit_transform(train)

    filled = train.copy()
    # 对于用户u
    for u in range(train.shape[0]):
        # curtrain是去掉了用户u的训练集
        curtrain = np.delete(train, u, axis=0)
        bu = binary[u]
        #对于那些打分总数超过5的用户才进行预测
        if np.sum(bu) > 5:
            #输入是其余用户对 用户u打过分的这些电影 的打分,标签是用户u实际的打分
            reg.fit(curtrain[:, bu].T, train[u, bu])
            # 对于用户u没打分的那部分电影进行预测
            filled[u, ~bu] = reg.predict(curtrain[:, ~bu].T)
    return norm.inverse_transform(filled)