示例#1
0
def standRegres(page_name,hotnum,timeline,percent = '',notshow = None):
    from fan import sharefan
    from fan import likesum_nothotfan
    from numpy import *
    #loaddata
    dataMat = []
    labelMat = []
    (hotfan,onehoursdict,msgdict) = sharefan(page_name=page_name,hotfannum=hotnum,hours=timeline,percent=percent,justgetdata=True)
    timedict = likesum_nothotfan(page_name=page_name,hotnum=hotnum,timeline=timeline,percent=percent,justgetdata=True)
    lendatanum = 0
    for i in onehoursdict:
        if i in timedict:
            tempx = 0
            #lendatanum += 1
            if not onehoursdict[i]:
                if msgdict[i][0]>50:
                    print str(msgdict[i][0])+msgdict[i][2]+str(msgdict[i][1])+' '+i
                continue
            lendatanum += 1
            for j in hotfan:
                if (j,) in onehoursdict[i]:
                    tempx += 1
            x = tempx/len(onehoursdict[i])
            if msgdict[i][0]:
                if x <=0 :
                    x = 1
                if timedict[i]['like_sum'] <= 0:
                    timedict[i]['like_sum'] = 1
                y = msgdict[i][0]
                dataMat.append([log10(x),log10(timedict[i]['like_sum']),1.0])
                labelMat.append(log10(y))
    #计算w
    print '总条数:'+str(lendatanum)
    print '总数据:'+str(len(dataMat))
    testx = []
    testy = []
    (dataMat,labelMat,testx,testy) = devide_data(dataMat,labelMat)
    print '训练数据:'+str(len(dataMat))
    print '测试数据:'+str(len(testx))
    xMat = mat(dataMat); yMat = mat(labelMat).T
    xTx = xMat.T*xMat
    if linalg.det(xTx) == 0.0:
        print '没有解'
        return
    ws = xTx.I * (xMat.T*yMat)
    return drawtest(dataMat,labelMat,ws.getA1(),page_name,hotnum,timeline,testx,testy,percent=percent,xlabelinput=['sharehotfan','like_not_fan'],titleinput='like_sum_notfan and sharehotfan:share_count',notshow = notshow)
示例#2
0
def standRegres_share(page_name,hotnum,timeline,percent = '',notshow = None):
    from fan import sharefan
    from fan import likesum_nothotfan
    from numpy import *
    #loaddata
    dataMat = []
    labelMat = []
    (hotfan,onehoursdict,msgdict) = sharefan(page_name=page_name,hotfannum=hotnum,hours=timeline,percent=percent,justgetdata=True)
    id2num = dict()
    lendatanum = 0
    for i in onehoursdict:
        if onehoursdict[i] and msgdict[i]:
            id2num[i] = lendatanum
            lendatanum += 1
            tempx = 0
            for j in onehoursdict[i]:
                if j[0] in hotfan:
                    tempx += 1
            x = tempx/len(onehoursdict[i])
            if msgdict[i][0]:
                y = msgdict[i][0]
                if x > 0:
                    dataMat.append([log10(x),log10(len(onehoursdict[i])),1.0])
                else:
                    x = 1
                    dataMat.append([log10(x),log10(len(onehoursdict[i])),1.0])
                labelMat.append(log10(y))
    xMat = mat(dataMat); yMat = mat(labelMat).T
    xTx = xMat.T*xMat
    if linalg.det(xTx) == 0.0:
        print '没有解'
        return
    ws = xTx.I * (xMat.T*yMat)
    ws = ws.getA1()
    print ws
    while(1):
        test_id = raw_input('please input id:')
        if test_id == 'q':
            break
        print xMat[id2num[test_id]]
        tempx = dataMat[id2num[test_id]]
        print tempx
        testresult = 10**(ws[0]*tempx[0] + ws[1]*tempx[1] + ws[2])
        print '预测结果:' + str(testresult)
        print '真实结果:' + str(msgdict[test_id][0])
示例#3
0
def standRegres_share(page_name,hotnum,timeline,percent = '',notshow = None):
    from fan import sharefan
    from fan import likesum_nothotfan
    from numpy import *
    #loaddata
    dataMat = []
    labelMat = []
    (hotfan,onehoursdict,msgdict) = sharefan(page_name=page_name,hotfannum=hotnum,hours=timeline,percent=percent,justgetdata=True)
    lendatanum = 0
    for i in onehoursdict:
        if onehoursdict[i] and msgdict[i]:
            lendatanum += 1
            tempx = 0
            for j in onehoursdict[i]:
                if j[0] in hotfan:
                    tempx += 1
            x = tempx/len(onehoursdict[i])
            if msgdict[i][0]:
                y = msgdict[i][0]
                if x > 0:
                    dataMat.append([log10(x),log10(len(onehoursdict[i])),1.0])
                else:
                    x = 1
                    dataMat.append([log10(x),log10(len(onehoursdict[i])),1.0])
                labelMat.append(log10(y))
    print '总条数:'+str(lendatanum)
    print '总数据:'+str(len(dataMat))
    testx = []
    testy = []
    (dataMat,labelMat,testx,testy) = devide_data(dataMat,labelMat)
    print '训练数据:'+str(len(dataMat))
    print '测试数据:'+str(len(testx))
    xMat = mat(dataMat); yMat = mat(labelMat).T
    xTx = xMat.T*xMat
    if linalg.det(xTx) == 0.0:
        print '没有解'
        return
    ws = xTx.I * (xMat.T*yMat)
    return drawtest(dataMat,labelMat,ws.getA1(),page_name,hotnum,timeline,testx,testy,percent=percent,xlabelinput=['share_hourall','share_hotfan_per'],titleinput='shareall and sharehotfan:share_count',notshow = notshow)