# Extract UserInfo and ItemInfo from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo from sqlalchemy.sql.expression import func import numpy as np cnt=0; for usr in session.query(Users.name).order_by(Users.uid).all(): count = session.query(Record).filter(Record.name==usr.name).count() ratecount = session.query(Record).filter(Record.name==usr.name, Record.rate != None).count() average = session.query(func.avg(Record.rate).label('average')).\ filter(Record.name==usr.name, Record.rate != None).scalar(); temp = []; for q in session.query(Record.rate).filter(Record.name==usr.name, Record.rate != None): temp.append(q.rate) sd = np.std(temp) if count>0: if ratecount>0: itm = UserInfo(name=usr.name, index=cnt, count=count, ratecount=ratecount, \ average = average, sd=sd) else: itm = UserInfo(name=usr.name, index=cnt, count=count, ratecount=ratecount) session.add(itm) cnt+=1 else: itm = UserInfo(name=usr.name, count=0, ratecount=0) session.add(itm) session.commit() nUsers=cnt+1 cnt=0; for rec in session.query(Record.iid).group_by(Record.iid).order_by(Record.iid).all():
from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo from sqlalchemy.sql.expression import func from scipy.sparse import csr_matrix import cPickle fr = open('dat/mat.dat','rb') cPickle.load(fr) tableUI = cPickle.load(fr) tableII = cPickle.load(fr) fr.close() imask=dict() umask=dict() # all item mask count = session.query(Record.iid).group_by(Record.iid).count() data = [True]*count rowidx = [0]*count colidx = range(count) imask['all']=csr_matrix((data,(rowidx,colidx)),dtype='b',shape=(1,count)) # other item mask for itp in ['anime','book','music','game','real']: scount = session.query(Record.iid).filter(Record.typ==itp).group_by(Record.iid).count() data = [True]*scount rowidx = [0]*scount colidx = [0]*scount i=0 for q in session.query(Record.iid).filter(Record.typ==itp).group_by(Record.iid).all(): colidx[i]=tableII[q.iid] i+=1
from fetch import session, Record from scipy.sparse import coo_matrix import cPickle fr = open("dat/mat.dat", "rb") cPickle.load(fr) tableUI = cPickle.load(fr) tableII = cPickle.load(fr) fr.close() M = session.query(Record.name).group_by(Record.name).count() N = session.query(Record.iid).group_by(Record.iid).count() irow = [] icol = [] data = [] d = {"wish": 0, "do": 1, "collect": 2, "on_hold": 3, "dropped": 4} for q in session.query(Record.name, Record.iid, Record.state).all(): irow.append(tableUI[q.name]) icol.append(tableII[q.iid]) data.append(d[q.state]) S = coo_matrix((data, (irow, icol)), dtype="i", shape=(M, N)) fw = open("dat/training.dat", "ab") cPickle.dump(S.tolil(), fw) fw.close()
fr.close() fr = open('dat/mat.dat','rb') cPickle.load(fr) tableUI = cPickle.load(fr) tableII = cPickle.load(fr) fr.close() seed() ### Phase of selecting an item type tp = 'real' states = ["wish","do","collect","on_hold","dropped","all","states"]; M = session.query(Record.name).filter(Record.typ==tp).group_by(Record.name).count() N = session.query(Record.iid).filter(Record.typ==tp).group_by(Record.iid).count() c = session.query(Record).filter(Record.typ==tp, Record.rate != None).count() irow=dict() icol=dict() data=dict() for s in states: irow[s]=[] icol[s]=[] data[s]=[] for q in session.query(Record.name, Record.iid, Record.rate, Record.state).filter(Record.typ==tp, Record.rate != None).all(): i = umask[tp][:,:tableUI[q.name]+1].sum()-1 j = imask[tp][:,:tableII[q.iid]+1].sum()-1 irow['all'].append(i)
# Construct utiliary mtx import numpy as np from scipy import sparse from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo from sqlalchemy.sql.expression import func from settings import TYPE_LIST import pickle #tableState = {'do':1,'collect':2,'wish':3,'on_hold':4,'dropped':5} tableUI=dict() tableII=dict() userAvg=dict() for usr in session.query(UserInfo.name, UserInfo.index, UserInfo.average).filter(UserInfo.index != None).all(): tableUI[usr.name]=usr.index for rec in session.query(ItemInfo.i_index, ItemInfo.index).all(): tableII[rec.i_index]=rec.index #nUsers=session.query(UserInfo).filter(UserInfo.index!=None).count() nItms=session.query(ItemInfo).count() gp=dict() gp['bias_states']={} global_avg=session.query(func.avg(Record.rate).label('average')).filter(Record.rate!=None).scalar(); for q in session.query(Record.state, func.avg(Record.rate).label('average')).\ filter(Record.rate != None).group_by(Record.state): gp['bias_states'][q.state]=float(q.average)-float(global_avg)