Пример #1
0
# Extract UserInfo and ItemInfo
from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo
from sqlalchemy.sql.expression import func
import numpy as np

cnt=0;
for usr in session.query(Users.name).order_by(Users.uid).all():
    count = session.query(Record).filter(Record.name==usr.name).count()
    ratecount = session.query(Record).filter(Record.name==usr.name, Record.rate != None).count()
    average = session.query(func.avg(Record.rate).label('average')).\
    filter(Record.name==usr.name, Record.rate != None).scalar();
    temp = [];
    for q in session.query(Record.rate).filter(Record.name==usr.name, Record.rate != None):
        temp.append(q.rate)
    sd = np.std(temp)
    if count>0:
        if ratecount>0:
            itm = UserInfo(name=usr.name, index=cnt, count=count, ratecount=ratecount, \
            average = average, sd=sd)
        else:
            itm = UserInfo(name=usr.name, index=cnt, count=count, ratecount=ratecount)
        session.add(itm)
        cnt+=1
    else:
        itm = UserInfo(name=usr.name, count=0, ratecount=0)
        session.add(itm)
session.commit()
nUsers=cnt+1

cnt=0;
for rec in session.query(Record.iid).group_by(Record.iid).order_by(Record.iid).all():
Пример #2
0
from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo
from sqlalchemy.sql.expression import func
from scipy.sparse import csr_matrix
import cPickle

fr = open('dat/mat.dat','rb')
cPickle.load(fr)
tableUI = cPickle.load(fr)
tableII = cPickle.load(fr)
fr.close()

imask=dict()
umask=dict()

# all item mask
count = session.query(Record.iid).group_by(Record.iid).count()
data = [True]*count
rowidx = [0]*count
colidx = range(count)
imask['all']=csr_matrix((data,(rowidx,colidx)),dtype='b',shape=(1,count))

# other item mask
for itp in ['anime','book','music','game','real']:
    scount = session.query(Record.iid).filter(Record.typ==itp).group_by(Record.iid).count()
    data = [True]*scount
    rowidx = [0]*scount
    colidx = [0]*scount
    i=0
    for q in session.query(Record.iid).filter(Record.typ==itp).group_by(Record.iid).all():
        colidx[i]=tableII[q.iid]
        i+=1
Пример #3
0
from fetch import session, Record
from scipy.sparse import coo_matrix
import cPickle

fr = open("dat/mat.dat", "rb")
cPickle.load(fr)
tableUI = cPickle.load(fr)
tableII = cPickle.load(fr)
fr.close()

M = session.query(Record.name).group_by(Record.name).count()
N = session.query(Record.iid).group_by(Record.iid).count()

irow = []
icol = []
data = []

d = {"wish": 0, "do": 1, "collect": 2, "on_hold": 3, "dropped": 4}

for q in session.query(Record.name, Record.iid, Record.state).all():
    irow.append(tableUI[q.name])
    icol.append(tableII[q.iid])
    data.append(d[q.state])

S = coo_matrix((data, (irow, icol)), dtype="i", shape=(M, N))

fw = open("dat/training.dat", "ab")
cPickle.dump(S.tolil(), fw)
fw.close()
Пример #4
0
fr.close()
fr = open('dat/mat.dat','rb')
cPickle.load(fr)
tableUI = cPickle.load(fr)
tableII = cPickle.load(fr)
fr.close()

seed()

### Phase of selecting an item type

tp = 'real'

states = ["wish","do","collect","on_hold","dropped","all","states"];

M = session.query(Record.name).filter(Record.typ==tp).group_by(Record.name).count()
N = session.query(Record.iid).filter(Record.typ==tp).group_by(Record.iid).count()

c = session.query(Record).filter(Record.typ==tp, Record.rate != None).count()
irow=dict()
icol=dict()
data=dict()
for s in states:
    irow[s]=[]
    icol[s]=[]
    data[s]=[]

for q in session.query(Record.name, Record.iid, Record.rate, Record.state).filter(Record.typ==tp, Record.rate != None).all():
    i = umask[tp][:,:tableUI[q.name]+1].sum()-1
    j = imask[tp][:,:tableII[q.iid]+1].sum()-1
    irow['all'].append(i)
Пример #5
0
# Construct utiliary mtx
import numpy as np
from scipy import sparse
from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo
from sqlalchemy.sql.expression import func
from settings import TYPE_LIST
import pickle

#tableState = {'do':1,'collect':2,'wish':3,'on_hold':4,'dropped':5}
tableUI=dict()
tableII=dict()
userAvg=dict()



for usr in session.query(UserInfo.name, UserInfo.index, UserInfo.average).filter(UserInfo.index != None).all():
    tableUI[usr.name]=usr.index

for rec in session.query(ItemInfo.i_index, ItemInfo.index).all():
    tableII[rec.i_index]=rec.index

#nUsers=session.query(UserInfo).filter(UserInfo.index!=None).count()
nItms=session.query(ItemInfo).count()

gp=dict()
gp['bias_states']={}
global_avg=session.query(func.avg(Record.rate).label('average')).filter(Record.rate!=None).scalar();
for q in session.query(Record.state, func.avg(Record.rate).label('average')).\
    filter(Record.rate != None).group_by(Record.state):
    gp['bias_states'][q.state]=float(q.average)-float(global_avg)