def test_bivariate_optimise(): w_spams = FistaFlat(**{ "intercept": True, "loss":"square", "regul":"l1" }) u_spams = FistaFlat(**{ "intercept": True, "loss":"square", "regul":"l1" }) learner = BatchBivariateLearner(w_spams,u_spams) gen = RandomBiGen() ndays = 20 Xt,Y = gen.generate(n=ndays) Xt = vstack(Xt) Y = vstack(Y) folds = [x for x in tscv.tsfi(ndays,ntest=2)] lrng = np.arange(0.1,1,0.1) fold = folds[0] X = ssp.csc_matrix(Xt.transpose()) Xparts,Yparts = BatchBivariateLearner.XYparts(fold,X,Y) learner.optimise_lambda(lrng,lrng,Yparts,Xparts) print w_spams.params print u_spams.params
def test_bivariate_model(): spamsDict = {"lambda1": 0.01, "intercept": True} spamsDict["loss"] = "square" spamsDict["regul"] = "l1" w_spams = FistaFlat(**spamsDict) u_spams = FistaFlat(**spamsDict) learner = BatchBivariateLearner(w_spams, u_spams) gen = RandomBiGen() ndays = 4 Xt, Y = gen.generate(n=ndays) Xt = vstack(Xt) Y = vstack(Y) learner.process(Y, Xt=ssp.csc_matrix(Xt))
def test_bivariate_model(): w_func = FistaFlat( **{ "intercept": True, "loss": "square", "regul": "l1l2", "it0": 50, "max_it": 1000, "verbose": True }) u_func = FistaFlat( **{ "intercept": True, "loss": "square", "regul": "l1l2", "it0": 50, "max_it": 1000, "verbose": True }) learner = BatchBivariateLearner(w_func, u_func, **spamsDict) gen = RandomBiGen(noise=0.01) allX = [] allY = [] for i in range(1000): X, Y = gen.generate() allX += [X] allY += [Y] X = hstack(allX) Y = vstack(allY) # learner.process(Y,X) print X.shape print Y.shape
def test_bivariate_model(): spamsDict = { "lambda1": 0.01, "intercept": True } spamsDict["loss"] = "square" spamsDict["regul"] = "l1" w_spams = FistaFlat(**spamsDict) u_spams = FistaFlat(**spamsDict) learner = BatchBivariateLearner(w_spams,u_spams) gen = RandomBiGen() ndays = 4 Xt,Y = gen.generate(n=ndays) Xt = vstack(Xt) Y = vstack(Y) learner.process(Y,Xt=ssp.csc_matrix(Xt))
def test_bivariate_optimise(): w_spams = FistaFlat(**{"intercept": True, "loss": "square", "regul": "l1"}) u_spams = FistaFlat(**{"intercept": True, "loss": "square", "regul": "l1"}) learner = BatchBivariateLearner(w_spams, u_spams) gen = RandomBiGen() ndays = 20 Xt, Y = gen.generate(n=ndays) Xt = vstack(Xt) Y = vstack(Y) folds = [x for x in tscv.tsfi(ndays, ntest=2)] lrng = np.arange(0.1, 1, 0.1) fold = folds[0] X = ssp.csc_matrix(Xt.transpose()) Xparts, Yparts = BatchBivariateLearner.XYparts(fold, X, Y) learner.optimise_lambda(lrng, lrng, Yparts, Xparts) print w_spams.params print u_spams.params
}) u_spams = FistaFlat(**{ "intercept": True, "loss":"square", "regul":"elastic-net", "max_it":1000, "lambda2":0.5, "lambda1":0.3 }) es.exp("randomExp",fake=False) es.state("random") gen = randomgen.RandomBiGen(noise=0.01,brng=(100,1000),ntasks=1,wu_sparcity=0.6,wrng=(2,3),urng=(2,3),nusers=100,nwords=400) x,y = gen.generate(n=1000) x = ssp.csc_matrix(vstack(x).T) y = array(y) fold = [f for f in tscv.tsfi(y.shape[0],ntest=100,ntraining=900)][0] Xparts,Yparts = BatchBivariateLearner.XYparts(fold,x,y) learner = BatchBivariateLearner(w_spams,u_spams,bivar_max_it=10) learner.process(Yparts.train_all,Xparts.train_all,tests={"test":(Xparts.test,Yparts.test)}) print learner.w.todense() print gen._w print learner.u.todense() print gen._u print learner.w_bias print learner.u_bias print gen._bias embed()
def experiment(o): logger.info("Reading initial data") start = o["start"];ndays = o["ndays"];end = start + ndays folds = tscv.tsfi(ndays,ntest=o['f_ntest'],nvalidation=o['f_nval'],ntraining=o['f_ntrain']) tasks = billdata.taskvals(o["task_file"]) ndays_total = tasks.yvalues.shape[0] if o["user_file_corrected"] is None or not os.path.exists(o["user_file_corrected"]): logger.info("...Loading and correcting from source") if "voc_file" in o and not o["word_subsample"] < 1: logger.info("...Reading vocabulary") voc = billdata.voc(o["voc_file"]).voc() # voc = None else: voc = None logger.info("...Reading user days") user_col, word_col = billdata.suserdayword( o["user_file"],ndays_total, nwords=billdata.count_cols_h5(o["word_file"]) ).mat( days=(start,end), voc=voc ) if o["user_file_corrected"] is not None: logger.info("...Saving corrected user_mat") sio.savemat(o["user_file_corrected"],{"data":user_col.data,"indices":user_col.indices,"indptr":user_col.indptr,"shape":user_col.shape}) else: logger.info("...Loading corrected user_mat") # csc_matrix((data, indices, indptr), [shape=(M, N)]) user_col_d = sio.loadmat(o["user_file_corrected"]) user_col = ssp.csc_matrix((user_col_d["data"][:,0],user_col_d["indices"][:,0],user_col_d["indptr"][:,0]),shape=user_col_d["shape"]) logger.info("...User Col read, dimensions: %s"%str(user_col.shape)) logger.info("...Reading task data") tasks = tasks.mat(days=(start,end),cols=[3,4,5]) logger.info("...Reading tree file") tree = billdata.tree(o["tree_file"]).spamsobj() if o["word_subsample"] < 1 or o["user_subsample"] < 1: user_col=billdata.subsample(user_col,word_subsample=o["word_subsample"],user_subsample=o["user_subsample"],ndays=ndays) # At this point we've just loaded all the data # Prepare the optimisation functions u_lambdas = [float(x) for x in o['u_lambdas_str'].split(",")] w_lambdas = [float(x) for x in o['w_lambdas_str'].split(",")] u_lambdas = np.arange(*u_lambdas) w_lambdas = np.arange(*w_lambdas) spams_avail = { "tree":FistaTree(tree,**{ "intercept": True, "loss":"square", "regul":"multi-task-tree", "it0":10, "lambda2":1000, "max_it":1000, "verbose":True }), "treecheck":FistaTree(tree,**{ "intercept": True, "loss":"square", "regul":"multi-task-tree", "it0":10, "max_it":100, "lambda2":1000, "verbose":True }), "flatcheck":FistaFlat(**{ "intercept": True, "loss":"square", "regul":"l1l2", "it0":50, "max_it":100, "verbose":True }), "flat":FistaFlat(**{ "intercept": True, "loss":"square", "regul":"l1l2", "it0":50, "max_it":1000, "verbose":True }) } w_spams = copy.deepcopy(spams_avail[o["w_spams"]]) u_spams = copy.deepcopy(spams_avail[o["u_spams"]]) lambda_set = False if o["lambda_file"] is not None and os.path.exists(o["lambda_file"]): logger.info("... loading existing lambda") lambda_d = sio.loadmat(o["lambda_file"]) w_spams.params["lambda1"] = lambda_d["w_lambda"][0][0] u_spams.params["lambda1"] = lambda_d["u_lambda"][0][0] lambda_set = True # Prepare the learner learner = BatchBivariateLearner(w_spams,u_spams,bivar_max_it=o["bivar_max_it"]) fold_i = 0 es.exp(os.sep.join([o['exp_out'],"ds:politics_word:l1_user:l1_task:multi"]),fake=False) # Go through the folds! for fold in folds: es.state("fold_%d"%fold_i) logger.info("Working on fold: %d"%fold_i) logger.info("... preparing fold parts") Xparts,Yparts = BatchBivariateLearner.XYparts(fold,user_col,tasks) if not o["optimise_lambda_once"] or (o["optimise_lambda_once"] and not lambda_set): logger.debug("... Setting max it to optimisation mode: %d"%o["opt_maxit"]) w_spams.params["max_it"] = o["opt_maxit"] u_spams.params["max_it"] = o["opt_maxit"] logger.info("... optimising fold lambda") ulambda,wlambda = learner.optimise_lambda( w_lambdas,u_lambdas,Yparts,Xparts, w_lambda=o["w_lambda"],u_lambda=o["u_lambda"] ) lambda_set = True if o["lambda_file"] is not None: logger.info("... saving optimised lambdas") sio.savemat(o["lambda_file"],{"w_lambda":wlambda[1],"u_lambda":ulambda[1]}) logger.info("... training fold") logger.debug("... Setting max it to training mode: %d"%o["train_maxit"]) w_spams.params["max_it"] = o["train_maxit"] u_spams.params["max_it"] = o["train_maxit"] learner.process( Yparts.train_all,Xparts.train_all, tests={ "test":(Xparts.test,Yparts.test), "val_it":(Xparts.val_it,Yparts.val_it) } ) es.add(locals(),"fold_i","w_lambdas","u_lambdas","fold","Yparts","o") es.state()["w_spams_params"] = w_spams.params es.state()["u_spams_params"] = u_spams.params logger.info("... Saving output") es.flush() fold_i += 1 if o["f_maxiter"] is not None and fold_i >= o["f_maxiter"]: break
"loss": "square", "regul": "l1", "it0": 10, "max_it": 1000 }) u_spams = FistaFlat( **{ "intercept": True, "loss": "square", "regul": "elastic-net", "max_it": 1000, "lambda2": 0.5 }) # Prepare the learner learner = BatchBivariateLearner(w_spams, u_spams) fold_i = 0 es.exp("%s/Experiments/EMNLP2013/ds:politics_word:l1_user:l1_task:multi" % home) # Go through the folds! for fold in folds: es.state("fold_%d" % fold_i) logger.info("Working on fold: %d" % fold_i) logger.info("... preparing fold parts") Xparts, Yparts = BatchBivariateLearner.XYparts(fold, user_col, tasks) logger.info("... optimising fold lambda") learner.optimise_lambda(w_lambdas, u_lambdas, Yparts, Xparts) logger.info("... training fold") learner.process(Yparts.train_all, Xparts.train_all, tests={
es.exp("randomExp", fake=False) es.state("random") gen = randomgen.RandomBiGen(noise=0.01, brng=(100, 1000), ntasks=1, wu_sparcity=0.6, wrng=(2, 3), urng=(2, 3), nusers=100, nwords=400) x, y = gen.generate(n=1000) x = ssp.csc_matrix(vstack(x).T) y = array(y) fold = [f for f in tscv.tsfi(y.shape[0], ntest=100, ntraining=900)][0] Xparts, Yparts = BatchBivariateLearner.XYparts(fold, x, y) learner = BatchBivariateLearner(w_spams, u_spams, bivar_max_it=10) learner.process(Yparts.train_all, Xparts.train_all, tests={"test": (Xparts.test, Yparts.test)}) print learner.w.todense() print gen._w print learner.u.todense() print gen._u print learner.w_bias print learner.u_bias print gen._bias embed()
import scipy.sparse as ssp import sys if len(sys.argv[1:]) != 4: nusers = 10 nwords = 20 ntasks = 1 ndays = 3 else: (nusers, nwords, ntasks, ndays) = [int(x) for x in sys.argv[1:]] print "" W = np.asfortranarray(zeros((nwords,ntasks))) U = ssp.csc_matrix(ones((nusers,ntasks))) X = ssp.rand(nwords,nusers*ndays,format="csc") Y = np.asfortranarray(rand(ndays,ntasks)) Y = BatchBivariateLearner._expandY(Y) logger.debug("Input created!") def cols_for_day(day): return slice(day * nusers, (day+1) * nusers) logger.debug("Creating Vprime!") Vprime = BatchBivariateLearner._calculateVprime(X,U) logger.debug("Calculating W") W = spams.fistaFlat(Y,Vprime,W,False,loss="square",regul="l1",lambda1=0.01) W = ssp.csc_matrix(W) logger.debug("Creating DPrime") Dprime = BatchBivariateLearner._calculateDprime(X,W,U.shape) U = np.asfortranarray(zeros((nusers,ntasks))) logger.debug("Calculating U") U = spams.fistaFlat(Y,Dprime,U,False,loss="square",regul="l1",lambda1=0.01)
import scipy.sparse as ssp import sys if len(sys.argv[1:]) != 4: nusers = 10 nwords = 20 ntasks = 1 ndays = 3 else: (nusers, nwords, ntasks, ndays) = [int(x) for x in sys.argv[1:]] print "" W = np.asfortranarray(zeros((nwords, ntasks))) U = ssp.csc_matrix(ones((nusers, ntasks))) X = ssp.rand(nwords, nusers * ndays, format="csc") Y = np.asfortranarray(rand(ndays, ntasks)) Y = BatchBivariateLearner._expandY(Y) logger.debug("Input created!") def cols_for_day(day): return slice(day * nusers, (day + 1) * nusers) logger.debug("Creating Vprime!") Vprime = BatchBivariateLearner._calculateVprime(X, U) logger.debug("Calculating W") W = spams.fistaFlat(Y, Vprime, W, False,
"intercept": True, "loss":"square", "regul":"l1", "it0":10, "max_it":1000 }) u_spams = FistaFlat(**{ "intercept": True, "loss":"square", "regul":"elastic-net", "max_it":1000, "lambda2":0.5 }) # Prepare the learner learner = BatchBivariateLearner(w_spams,u_spams) fold_i = 0 es.exp("%s/Experiments/EMNLP2013/ds:politics_word:l1_user:l1_task:multi"%home) # Go through the folds! for fold in folds: es.state("fold_%d"%fold_i) logger.info("Working on fold: %d"%fold_i) logger.info("... preparing fold parts") Xparts,Yparts = BatchBivariateLearner.XYparts(fold,user_col,tasks) logger.info("... optimising fold lambda") learner.optimise_lambda(w_lambdas,u_lambdas,Yparts,Xparts) logger.info("... training fold") learner.process(Yparts.train_all,Xparts.train_all,tests={"test":(Xparts.test,Yparts.test),"val_it":(Xparts.val_it,Yparts.val_it)}) es.add(locals(),"fold_i","w_lambdas","u_lambdas","fold","Yparts") es.state()["w_spams_params"] = w_spams.params es.state()["u_spams_params"] = u_spams.params