Exemplo n.º 1
0
    def run(self, type, n_jobs=8, n_components=100, max_iter=50, vad_K=100):
        lam_alpha = lam_beta = 1e-1
        lam_theta = lam_gamma = 1e-1
        lam_gamma_p = 1e-1
        lam_gamma_n = 1e-1
        lam_theta_p = 1e-1
        lam_theta_n = 1e-1
        c0 = 1.
        c1 = 20.

        # print 'lam_alpha:', lam_alpha
        # print 'lam_beta:', lam_beta
        # print 'lam_theta:', lam_theta
        # print 'lam_gamma:', lam_gamma
        # print 'lam_gamma_p:', lam_gamma_p
        # print 'lam_gamma_n:', lam_gamma_n

        best_ndcg_10 = 0.0
        best_U = None
        best_V = None
        if type == 'cofactor':
            print 'modified cofactor model, using weight for the embedding'
            best_mu = 1.0
            for mu in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
                print 'mu = %.2f' % (mu)
                print self.save_dir
                self.clean_savedir()
                coder = cofactor.CoFacto(mu=mu,
                                         n_components=n_components,
                                         max_iter=max_iter,
                                         batch_size=1000,
                                         init_std=0.01,
                                         dtype=np.float32,
                                         n_jobs=n_jobs,
                                         random_state=98765,
                                         save_params=True,
                                         save_dir=self.save_dir,
                                         early_stopping=True,
                                         verbose=True,
                                         lambda_alpha=lam_alpha,
                                         lambda_theta=lam_theta,
                                         lambda_beta=lam_beta,
                                         lambda_gamma=lam_gamma,
                                         c0=c0,
                                         c1=c1)
                coder.fit(self.train_data,
                          self.X_sppmi,
                          vad_data=self.vad_data,
                          batch_users=300,
                          k=vad_K,
                          clear_invalid=False,
                          n_jobs=4)
                self.test_data.data = np.ones_like(self.test_data.data)
                n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
                params = np.load(
                    os.path.join(
                        self.save_dir, 'CoFacto_K%d_iter%d.npz' %
                        (n_components, n_params - 1)))
                U, V = params['U'], params['V']
                (is_better, new_U, new_V,
                 new_ndcg_10) = self.local_eval(U, V, best_ndcg_10)
                if is_better:
                    best_ndcg_10 = new_ndcg_10
                    best_U = new_U
                    best_V = new_V
                    best_mu = mu
            print 'Best with mu = %.2f' % (best_mu)
            model_out_name = 'ModifiedCofactor_K100_best_ndcd10_%.4f.npz' % (
                best_ndcg_10)
            np.savez(model_out_name, U=best_U, V=best_V)

        if type == 'model2':
            print 'positive project embedding + positive user embedding'
            best_mu_u = 0.0
            for mu_u in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
                print 'mu_u = %.2f , mu_p = %.2f' % (mu_u, (1.0 - mu_u))
                print self.save_dir
                self.clean_savedir()
                coder = mymodel2.ParallelMFPosUserPosProjectEmbedding(
                    mu_u=mu_u,
                    mu_p=1.0 - mu_u,
                    n_components=n_components,
                    max_iter=max_iter,
                    batch_size=1000,
                    init_std=0.01,
                    dtype=np.float32,
                    n_jobs=n_jobs,
                    random_state=98765,
                    save_params=True,
                    save_dir=self.save_dir,
                    early_stopping=True,
                    verbose=True,
                    lambda_alpha=lam_alpha,
                    lambda_theta=lam_theta,
                    lambda_beta=lam_beta,
                    lambda_gamma=lam_gamma,
                    c0=c0,
                    c1=c1)
                coder.fit(self.train_data,
                          self.X_sppmi,
                          self.Y_sppmi,
                          vad_data=self.vad_data,
                          batch_users=300,
                          k=vad_K,
                          clear_invalid=False,
                          n_jobs=4)

                self.test_data.data = np.ones_like(self.test_data.data)
                n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
                params = np.load(
                    os.path.join(
                        self.save_dir, 'CoFacto_K%d_iter%d.npz' %
                        (n_components, n_params - 1)))
                U, V = params['U'], params['V']
                (is_better, new_U, new_V,
                 new_ndcg_10) = self.local_eval(U, V, best_ndcg_10)
                if is_better:
                    best_ndcg_10 = new_ndcg_10
                    best_U = new_U
                    best_V = new_V
                    best_mu_u = mu_u
            print 'Best with mu_u = %.2f and mu_p = %.2f' % (best_mu_u,
                                                             1.0 - best_mu_u)
            model_out_name = 'Model2_K100_best_ndcd10_%.4f.npz' % (
                best_ndcg_10)
            np.savez(model_out_name, U=best_U, V=best_V)

        if type == 'model3':
            print 'positive project embedding + negative project embedding'
            best_mu_p_p = 0.0
            for mu_p_p in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
                print 'mu_p_p = %.2f , mu_p_n = %.2f' % (mu_p_p,
                                                         (1.0 - mu_p_p))
                print self.save_dir
                self.clean_savedir()

                coder = mymodel3.ParallelMFPositiveNegativeProjectEmbedding(
                    mu_p_p=mu_p_p,
                    mu_p_n=1.0 - mu_p_p,
                    n_components=n_components,
                    max_iter=max_iter,
                    batch_size=1000,
                    init_std=0.01,
                    dtype=np.float32,
                    n_jobs=n_jobs,
                    random_state=98765,
                    save_params=True,
                    save_dir=self.save_dir,
                    early_stopping=True,
                    verbose=True,
                    lambda_alpha=lam_alpha,
                    lambda_theta=lam_theta,
                    lambda_beta=lam_beta,
                    lambda_gamma_p=lam_gamma_p,
                    lambda_gamma_n=lam_gamma_n,
                    c0=c0,
                    c1=c1)  # lambda_gamma = 1e-1
                coder.fit(self.train_data,
                          self.X_sppmi,
                          self.X_neg_sppmi,
                          vad_data=self.vad_data,
                          batch_users=300,
                          k=vad_K,
                          clear_invalid=False,
                          n_jobs=4)
                self.test_data.data = np.ones_like(self.test_data.data)
                n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
                params = np.load(
                    os.path.join(
                        self.save_dir, 'CoFacto_K%d_iter%d.npz' %
                        (n_components, n_params - 1)))
                U, V = params['U'], params['V']
                (is_better, new_U, new_V,
                 new_ndcg_10) = self.local_eval(U, V, best_ndcg_10)
                if is_better:
                    best_ndcg_10 = new_ndcg_10
                    best_U = new_U
                    best_V = new_V
                    best_mu_p_p = mu_p_p
            print 'Best with mu_p_p = %.2f and mu_p_n = %.2f' % (
                best_mu_p_p, 1.0 - best_mu_p_p)
            model_out_name = 'Model3_K100_best_ndcd10_%.4f.npz' % (
                best_ndcg_10)
            np.savez(model_out_name, U=best_U, V=best_V)

        if type == 'model4':
            print 'positive user embedding + negative user embedding'
            best_mu_u_p = 0.1
            for mu_u_p in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
                print 'mu_u_p = %.2f , mu_u_n = %.2f' % (mu_u_p,
                                                         (1.0 - mu_u_p))
                print self.save_dir
                self.clean_savedir()

                coder = mymodel4.ParallelMFPositiveNegativeUserEmbedding(
                    mu_u_p=mu_u_p,
                    mu_u_n=1.0 - mu_u_p,
                    n_components=n_components,
                    max_iter=max_iter,
                    batch_size=1000,
                    init_std=0.01,
                    dtype=np.float32,
                    n_jobs=n_jobs,
                    random_state=98765,
                    save_params=True,
                    save_dir=self.save_dir,
                    early_stopping=True,
                    verbose=True,
                    lambda_alpha=lam_alpha,
                    lambda_theta_p=lam_theta_p,
                    lambda_theta_n=lam_theta_n,
                    lambda_beta=lam_beta,
                    c0=c0,
                    c1=c1)
                coder.fit(self.train_data,
                          self.Y_sppmi,
                          self.Y_neg_sppmi,
                          vad_data=self.vad_data,
                          batch_users=300,
                          k=vad_K,
                          clear_invalid=False,
                          n_jobs=4)

                self.test_data.data = np.ones_like(self.test_data.data)
                n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
                params = np.load(
                    os.path.join(
                        self.save_dir, 'CoFacto_K%d_iter%d.npz' %
                        (n_components, n_params - 1)))
                U, V = params['U'], params['V']
                (is_better, new_U, new_V,
                 new_ndcg_10) = self.local_eval(U, V, best_ndcg_10)
                if is_better:
                    best_ndcg_10 = new_ndcg_10
                    best_U = new_U
                    best_V = new_V
                    best_mu_u_p = mu_u_p
            print 'Best with mu_u_p = %.2f and mu_u_n = %.2f' % (
                best_mu_u_p, 1.0 - best_mu_u_p)
            model_out_name = 'Model4_K100_best_ndcd10_%.4f.npz' % (
                best_ndcg_10)
            np.savez(model_out_name, U=best_U, V=best_V)
        if type == 'model5':
            print 'positive and negative project embedding + positive user embedding'
            best_mu_u_p = 0.0
            best_mu_p_p = 0.0
            best_mu_p_n = 0.0
            count = 0
            for mu_u_p in np.arange(0.1, 0.9, 0.1):
                for mu_p_p in np.arange(0.1, 1.0 - mu_u_p, 0.1):
                    mu_p_n = 1.0 - mu_u_p - mu_p_p
                    if mu_p_n <= 0.001:
                        continue
                    else:
                        count += 1
                        print 'mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (
                            mu_u_p, mu_p_p, mu_p_n)
                        print self.save_dir
                        self.clean_savedir()

                        coder = mymodel5.ParallelMFPosUserPosNegProjectEmbedding(
                            mu_u_p=mu_u_p,
                            mu_p_p=mu_p_p,
                            mu_p_n=mu_p_n,
                            n_components=n_components,
                            max_iter=max_iter,
                            batch_size=1000,
                            init_std=0.01,
                            dtype=np.float32,
                            n_jobs=n_jobs,
                            random_state=98765,
                            save_params=True,
                            save_dir=self.save_dir,
                            early_stopping=True,
                            verbose=True,
                            lambda_alpha=lam_alpha,
                            lambda_theta_p=lam_theta_p,
                            lambda_beta=lam_beta,
                            lambda_gamma_p=lam_gamma_p,
                            lambda_gamma_n=lam_gamma_n,
                            c0=c0,
                            c1=c1)
                        coder.fit(self.train_data,
                                  self.X_sppmi,
                                  self.X_neg_sppmi,
                                  self.Y_sppmi,
                                  vad_data=self.vad_data,
                                  batch_users=300,
                                  k=vad_K,
                                  clear_invalid=False,
                                  n_jobs=4)

                        self.test_data.data = np.ones_like(self.test_data.data)
                        n_params = len(
                            glob.glob(os.path.join(self.save_dir, '*.npz')))
                        params = np.load(
                            os.path.join(
                                self.save_dir, 'CoFacto_K%d_iter%d.npz' %
                                (n_components, n_params - 1)))
                        U, V = params['U'], params['V']
                        (is_better, new_U, new_V,
                         new_ndcg_10) = self.local_eval(U, V, best_ndcg_10)
                        if is_better:
                            best_ndcg_10 = new_ndcg_10
                            best_U = new_U
                            best_V = new_V
                            best_mu_u_p = mu_u_p
                            best_mu_p_p = mu_p_p
                            best_mu_p_n = mu_p_n
            print count, ' cases'
            print 'Best with mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (
                best_mu_u_p, best_mu_p_p, best_mu_p_n)
            model_out_name = 'Model5_K100_best_ndcd10_%.4f.npz' % (
                best_ndcg_10)
            np.savez(model_out_name, U=best_U, V=best_V)
        if type == 'model6':
            print 'positive project embedding + positive + negative user embedding'
            best_mu_u_p = 0.0
            best_mu_u_n = 0.0
            best_mu_p_p = 0.0
            count = 0
            for mu_u_p in np.arange(0.1, 0.9, 0.1):
                for mu_u_n in np.arange(0.1, 1.0 - mu_u_p, 0.1):
                    mu_p_p = 1.0 - mu_u_p - mu_u_n
                    if mu_p_p <= 0.001:
                        continue
                    else:
                        count += 1
                        print 'mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f' % (
                            mu_u_p, mu_u_n, mu_p_p)
                        print self.save_dir
                        self.clean_savedir()

                        coder = mymodel6.ParallelMFPosNegUserPosProjectEmbedding(
                            mu_u_p=mu_u_p,
                            mu_u_n=mu_u_n,
                            mu_p_p=mu_p_p,
                            n_components=n_components,
                            max_iter=max_iter,
                            batch_size=1000,
                            init_std=0.01,
                            dtype=np.float32,
                            n_jobs=n_jobs,
                            random_state=98765,
                            save_params=True,
                            save_dir=self.save_dir,
                            early_stopping=True,
                            verbose=True,
                            lambda_alpha=lam_alpha,
                            lambda_theta_p=lam_theta_p,
                            lambda_theta_n=lam_theta_n,
                            lambda_beta=lam_beta,
                            lambda_gamma_p=lam_gamma_p,
                            c0=c0,
                            c1=c1)
                        coder.fit(self.train_data,
                                  self.X_sppmi,
                                  self.Y_sppmi,
                                  self.Y_neg_sppmi,
                                  vad_data=self.vad_data,
                                  batch_users=300,
                                  k=vad_K,
                                  clear_invalid=False,
                                  n_jobs=4)

                        self.test_data.data = np.ones_like(self.test_data.data)
                        n_params = len(
                            glob.glob(os.path.join(self.save_dir, '*.npz')))
                        params = np.load(
                            os.path.join(
                                self.save_dir, 'CoFacto_K%d_iter%d.npz' %
                                (n_components, n_params - 1)))
                        U, V = params['U'], params['V']
                        (is_better, new_U, new_V,
                         new_ndcg_10) = self.local_eval(U, V, best_ndcg_10)
                        if is_better:
                            best_ndcg_10 = new_ndcg_10
                            best_U = new_U
                            best_V = new_V
                            best_mu_u_p = mu_u_p
                            best_mu_u_n = mu_u_n
                            best_mu_p_p = mu_p_p
            print count, ' cases'
            print 'Best with mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f' % (
                best_mu_u_p, best_mu_u_n, best_mu_p_p)
            model_out_name = 'Model6_K100_best_ndcd10_%.4f.npz' % (
                best_ndcg_10)
            np.savez(model_out_name, U=best_U, V=best_V)
        if type == 'model7':
            print 'positive + negative project embedding + positive + negative user embedding'
            best_mu_u_p = 0.0
            best_mu_u_n = 0.0
            best_mu_p_p = 0.0
            best_mu_p_n = 0.0
            count = 0
            for mu_u_p in np.arange(0.1, 0.8, 0.1):
                for mu_u_n in np.arange(0.1, 0.9 - mu_u_p, 0.1):
                    for mu_p_p in np.arange(0.1, 1.0 - mu_u_p - mu_u_n, 0.1):
                        mu_p_n = 1.0 - mu_u_p - mu_u_n - mu_p_p
                        if mu_p_n <= 0.001:
                            continue
                        else:
                            count += 1
                            print 'mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (
                                mu_u_p, mu_u_n, mu_p_p, mu_p_n)
                            print self.save_dir
                            self.clean_savedir()

                            coder = mymodel7.ParallelMFPositiveNegativeUserProjectEmbedding(
                                mu_u_p=mu_u_p,
                                mu_u_n=mu_u_n,
                                mu_p_p=mu_p_p,
                                mu_p_n=mu_p_n,
                                n_components=n_components,
                                max_iter=max_iter,
                                batch_size=1000,
                                init_std=0.01,
                                dtype=np.float32,
                                n_jobs=n_jobs,
                                random_state=98765,
                                save_params=True,
                                save_dir=self.save_dir,
                                early_stopping=True,
                                verbose=True,
                                lambda_alpha=lam_alpha,
                                lambda_theta_p=lam_theta_p,
                                lambda_theta_n=lam_theta_n,
                                lambda_beta=lam_beta,
                                lambda_gamma_p=lam_gamma_p,
                                lambda_gamma_n=lam_gamma_n,
                                c0=c0,
                                c1=c1)
                            coder.fit(self.train_data,
                                      self.X_sppmi,
                                      self.X_neg_sppmi,
                                      self.Y_sppmi,
                                      self.Y_neg_sppmi,
                                      vad_data=self.vad_data,
                                      batch_users=300,
                                      k=vad_K,
                                      clear_invalid=False,
                                      n_jobs=4)

                            self.test_data.data = np.ones_like(
                                self.test_data.data)
                            n_params = len(
                                glob.glob(os.path.join(self.save_dir,
                                                       '*.npz')))
                            params = np.load(
                                os.path.join(
                                    self.save_dir, 'CoFacto_K%d_iter%d.npz' %
                                    (n_components, n_params - 1)))
                            U, V = params['U'], params['V']
                            (is_better, new_U, new_V,
                             new_ndcg_10) = self.local_eval(
                                 U, V, best_ndcg_10)
                            if is_better:
                                best_ndcg_10 = new_ndcg_10
                                best_U = new_U
                                best_V = new_V
                                best_mu_u_p = mu_u_p
                                best_mu_u_n = mu_u_n
                                best_mu_p_p = mu_p_p
                                best_mu_p_n = mu_p_n
            print count, ' cases'
            print 'Best with mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (
                best_mu_u_p, best_mu_u_n, best_mu_p_p, best_mu_p_n)
            model_out_name = 'Model7_K100_best_ndcd10_%.4f.npz' % (
                best_ndcg_10)
            np.savez(model_out_name, U=best_U, V=best_V)
Exemplo n.º 2
0
    def run(self, type, n_jobs = 16, n_components = 100, max_iter = 50, vad_K = 100, **kwargs):
        saved_model = kwargs.get('saved_model', False)
        if saved_model:
            MODELS_DIR = 'MODELS'
            if not os.path.exists(MODELS_DIR): os.mkdir(MODELS_DIR)
        lam = kwargs.get('lam', 1e-1)
        lam_alpha = lam_beta = lam
        lam_emb = kwargs.get('lam_emb', lam)
        lam_theta = lam_gamma = lam_gamma_p = lam_gamma_n = lam_theta_p = lam_emb
        c0 = 1.
        c1 = 10.


        self.n_components, self.lam, self.lam_emb = n_components, lam, lam_emb


        print '*************************************lam =  %.3f ******************************************' % lam
        print '*************************************lam embedding =  %.3f ******************************************' % lam_emb
        if type == 'wmf':
            U, V = wmf.decompose(self.train_data, self.vad_data, num_factors=n_components, lam=lam)
            (recall_all, ndcg_all, map_all) = self.eval(U, V)
            if saved_model:
                model_out_name = os.path.join(constants.SAVED_MODLE_DIR, 'WMF_K%d_lambda%.4f.npz' % (n_components, lam))
                np.savez(model_out_name, U=U, V=V)

        elif type == 'cofactor':
            print 'cofactor model'
            print self.save_dir
            self.clean_savedir()
            CoFacto = cofactor.CoFacto(
                n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True,
                lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0,
                c1=c1)
            CoFacto.fit(self.train_data, self.X_sppmi, vad_data=self.vad_data, batch_users=3000, k=vad_K,
                      clear_invalid=False, n_jobs = 16)
            self.test_data.data = np.ones_like(self.test_data.data)
            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(os.path.join(self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            (recall_all, ndcg_all, map_all) = self.eval(U, V)
            recall100, ndcg100, map100 = recall_all[-1], ndcg_all[-1], map_all[-1]
            if saved_model:
                model_out_name = os.path.join(constants.SAVED_MODLE_DIR, 'Cofactor_K%d_lambda%.4f.npz' % (n_components, lam))
                np.savez(model_out_name, U=U, V=V)


        elif type == 'rme':
            ret_params_only = bool(kwargs.get("ret_params_only", False))
            print 'positive and negative project embedding + positive user embedding'
            mu_p_p = float(kwargs.get('mu_p_p', 1.0)) #weight to indicate importance of liked item embeddings
            mu_p_n = float(kwargs.get('mu_p_n', 1.0)) #weight to indicate importance of disliked item embeddings
            mu_u_p = float(kwargs.get('mu_u_p', 1.0)) #weight to indicate importance of user embeddings


            print 'mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (mu_u_p, mu_p_p, mu_p_n)
            print self.save_dir
            self.clean_savedir()

            RME = ParallelRME(mu_u_p=mu_u_p, mu_p_p=mu_p_p, mu_p_n=mu_p_n,
                                 n_components=n_components, max_iter=max_iter, batch_size=3000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs,
                                 random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True,
                                 lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p,
                                 lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n,
                                 c0=c0, c1=c1)
            RME.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, self.Y_sppmi,
                      vad_data=self.vad_data, batch_users=3000, k=vad_K, clear_invalid=False, n_jobs = 15)


            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(os.path.join(self.save_dir, 'RME_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            if (ret_params_only):
                return (U, V, self.cal_ndcg(U,V,K=vad_K))
            self.test_data.data = np.ones_like(self.test_data.data)
            (recall_all, ndcg_all, map_all) = self.eval(U, V)
            recall100, ndcg100, map100 = recall_all[-1], ndcg_all[-1], map_all[-1]
            if saved_model:
                model_out_name = os.path.join(constants.SAVED_MODLE_DIR, 'RME_K%d_lambda%.4f.npz' % (n_components, lam))
                np.savez(model_out_name, U=U, V=V)

        else:
            print 'Please select model from: rme, cofactor, wmf'
            sys.exit(1)

        U, V = None, None
        return (recall_all, ndcg_all, map_all)
Exemplo n.º 3
0
    def run_alone(self,
                  type,
                  n_jobs=8,
                  n_components=100,
                  max_iter=50,
                  vad_K=100,
                  **kwargs):
        lam_alpha = lam_beta = 1e-1
        lam_theta = lam_gamma = 1e-1
        lam_gamma_p = 1e-1
        lam_gamma_n = 1e-1
        lam_theta_p = 1e-1
        lam_theta_n = 1e-1
        c0 = 1.
        c1 = 20.
        fold = kwargs.get('fold', -1)
        if fold == -1:
            fold = ''
        else:
            fold = 'fold%d_' % fold
        if type == 'cofactor':
            print 'cofactor model'
            print self.save_dir
            self.clean_savedir()
            coder = cofactor.CoFacto(n_components=n_components,
                                     max_iter=max_iter,
                                     batch_size=1000,
                                     init_std=0.01,
                                     dtype=np.float32,
                                     n_jobs=n_jobs,
                                     random_state=98765,
                                     save_params=True,
                                     save_dir=self.save_dir,
                                     early_stopping=True,
                                     verbose=True,
                                     lambda_alpha=lam_alpha,
                                     lambda_theta=lam_theta,
                                     lambda_beta=lam_beta,
                                     lambda_gamma=lam_gamma,
                                     c0=c0,
                                     c1=c1)
            coder.fit(self.train_data,
                      self.X_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=False,
                      n_jobs=4)
            self.test_data.data = np.ones_like(self.test_data.data)
            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(
                os.path.join(
                    self.save_dir,
                    'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Cofactor_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)

        if type == 'model2':
            print 'positive project embedding + positive user embedding'
            mu_p = float(kwargs.get('mu_p_p', 0.6))
            mu_u = float(kwargs.get('mu_u_p', -1.0))
            if mu_u == -1.0:
                mu_u = 1.0 - mu_p
            print 'mu_u = %.2f , mu_p = %.2f' % (mu_u, mu_p)
            print self.save_dir
            self.clean_savedir()
            # coder = mymodel2.MFPositiveUserProjectEmbedding(mu_u = mu_u, mu_p = mu_p,
            #                  n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs,
            #                  random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True,
            #                  lambda_alpha = lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1)
            coder = mymodel2.ParallelMFPosUserPosProjectEmbedding(
                mu_u=mu_u,
                mu_p=mu_p,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta=lam_theta,
                lambda_beta=lam_beta,
                lambda_gamma=lam_gamma,
                c0=c0,
                c1=c1)
            coder.fit(self.train_data,
                      self.X_sppmi,
                      self.Y_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=False,
                      n_jobs=4)

            self.test_data.data = np.ones_like(self.test_data.data)
            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(
                os.path.join(
                    self.save_dir,
                    'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Model2_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)

        if type == 'model3':
            print 'positive project embedding + negative project embedding'
            mu_p_p = float(kwargs.get('mu_p_p', 0.4))
            mu_p_n = float(kwargs.get('mu_p_n', -1.0))
            if mu_p_n == -1.0:
                mu_p_n = 1.0 - mu_p_p
            print 'mu_p_p = %.2f , mu_p_n = %.2f' % (mu_p_p, mu_p_n)
            print self.save_dir
            self.clean_savedir()

            coder = mymodel3.ParallelMFPositiveNegativeProjectEmbedding(
                mu_p_p=mu_p_p,
                mu_p_n=mu_p_n,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta=lam_theta,
                lambda_beta=lam_beta,
                lambda_gamma_p=lam_gamma_p,
                lambda_gamma_n=lam_gamma_n,
                c0=c0,
                c1=c1)  # lambda_gamma = 1e-1
            coder.fit(
                self.train_data,
                self.X_sppmi,
                self.X_neg_sppmi,
                vad_data=self.vad_data,
                batch_users=300,
                k=vad_K,
                clear_invalid=False,
                n_jobs=4
            )  #for active_cate selection, should use clear_invalid = False

            self.test_data.data = np.ones_like(self.test_data.data)
            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(
                os.path.join(
                    self.save_dir,
                    'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Model3_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)

        if type == 'model4':
            print 'positive user embedding + negative user embedding'
            mu_u_p = float(kwargs.get('mu_u_p', 0.4))
            mu_u_n = float(kwargs.get('mu_u_n', -1.0))
            if mu_u_n == -1.0:
                mu_u_n = 1.0 - mu_u_p
            print 'mu_u_p = %.2f , mu_u_n = %.2f' % (mu_u_p, mu_u_n)
            print self.save_dir
            self.clean_savedir()

            coder = mymodel4.ParallelMFPositiveNegativeUserEmbedding(
                mu_u_p=mu_u_p,
                mu_u_n=mu_u_n,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta_p=lam_theta_p,
                lambda_theta_n=lam_theta_n,
                lambda_beta=lam_beta,
                c0=c0,
                c1=c1)
            coder.fit(self.train_data,
                      self.Y_sppmi,
                      self.Y_neg_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=False,
                      n_jobs=4)

            self.test_data.data = np.ones_like(self.test_data.data)
            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(
                os.path.join(
                    self.save_dir,
                    'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Model4_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)
        if type == 'mcf':
            print 'positive and negative project embedding + positive user embedding'
            mu_p_p = float(kwargs.get('mu_p_p', 0.4))
            mu_p_n = float(kwargs.get('mu_p_n', 0.4))
            mu_u_p = float(kwargs.get('mu_u_p', -1.0))
            if mu_u_p == -1.0:
                mu_u_p = 1.0 - mu_p_p - mu_p_n

            print 'mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (
                mu_u_p, mu_p_p, mu_p_n)
            print self.save_dir
            self.clean_savedir()

            coder = mymodel5.ParallelMFPosUserPosNegProjectEmbedding(
                mu_u_p=mu_u_p,
                mu_p_p=mu_p_p,
                mu_p_n=mu_p_n,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta_p=lam_theta_p,
                lambda_beta=lam_beta,
                lambda_gamma_p=lam_gamma_p,
                lambda_gamma_n=lam_gamma_n,
                c0=c0,
                c1=c1)
            coder.fit(self.train_data,
                      self.X_sppmi,
                      self.X_neg_sppmi,
                      self.Y_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=False,
                      n_jobs=4)

            self.test_data.data = np.ones_like(self.test_data.data)
            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(
                os.path.join(
                    self.save_dir,
                    'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Model5_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)
        if type == 'model6':
            print 'positive project embedding + positive + negative user embedding'
            mu_u_p = float(kwargs.get('mu_u_p', 0.4))
            mu_u_n = float(kwargs.get('mu_u_n', 0.4))
            mu_p_p = float(kwargs.get('mu_p_p', -1.0))
            if mu_p_p == -1.0:
                mu_p_p = 1.0 - mu_u_p - mu_u_n

            print 'mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f' % (
                mu_u_p, mu_u_n, mu_p_p)
            print self.save_dir
            self.clean_savedir()

            coder = mymodel6.ParallelMFPosNegUserPosProjectEmbedding(
                mu_u_p=mu_u_p,
                mu_u_n=mu_u_n,
                mu_p_p=mu_p_p,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta_p=lam_theta_p,
                lambda_theta_n=lam_theta_n,
                lambda_beta=lam_beta,
                lambda_gamma_p=lam_gamma_p,
                c0=c0,
                c1=c1)

            coder.fit(self.train_data,
                      self.X_sppmi,
                      self.Y_sppmi,
                      self.Y_neg_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=False,
                      n_jobs=4)

            self.test_data.data = np.ones_like(self.test_data.data)
            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(
                os.path.join(
                    self.save_dir,
                    'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Model6_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)
        if type == 'model7':
            print 'positive + negative project embedding + positive + negative user embedding'
            mu_u_p = float(kwargs.get('mu_u_p', 0.3))
            mu_u_n = float(kwargs.get('mu_u_n', 0.3))
            mu_p_p = float(kwargs.get('mu_p_p', 0.3))
            mu_p_n = float(kwargs.get('mu_p_n', -1.0))
            if mu_p_n == -1.0:
                mu_p_n = 1.0 - mu_u_p - mu_u_n - mu_p_p

            print 'mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (
                mu_u_p, mu_u_n, mu_p_p, mu_p_n)
            print self.save_dir
            self.clean_savedir()

            coder = mymodel7.ParallelMFPositiveNegativeUserProjectEmbedding(
                mu_u_p=mu_u_p,
                mu_u_n=mu_u_n,
                mu_p_p=mu_p_p,
                mu_p_n=mu_p_n,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta_p=lam_theta_p,
                lambda_theta_n=lam_theta_n,
                lambda_beta=lam_beta,
                lambda_gamma_p=lam_gamma_p,
                lambda_gamma_n=lam_gamma_n,
                c0=c0,
                c1=c1)
            coder.fit(self.train_data,
                      self.X_sppmi,
                      self.X_neg_sppmi,
                      self.Y_sppmi,
                      self.Y_neg_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=False,
                      n_jobs=4)

            self.test_data.data = np.ones_like(self.test_data.data)
            n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz')))
            params = np.load(
                os.path.join(
                    self.save_dir,
                    'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Model7_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)

        if type == 'separatedmodel2':
            print 'positive project embedding + positive user embedding'
            mu_p = float(kwargs.get('mu_p_p', 0.6))
            mu_u = float(kwargs.get('mu_u_p', -1.0))
            if mu_u == -1.0:
                mu_u = 1.0 - mu_p
            print 'mu_u = %.2f , mu_p = %.2f' % (mu_u, mu_p)
            print self.save_dir
            self.clean_savedir()
            # coder = mymodel2.MFPositiveUserProjectEmbedding(mu_u = mu_u, mu_p = mu_p,
            #                  n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs,
            #                  random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True,
            #                  lambda_alpha = lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1)
            coder = separatedmymodel2.SeparatedParallelMFPosUserPosProjectEmbedding(
                mu_u=mu_u,
                mu_p=mu_p,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta=lam_theta,
                lambda_beta=lam_beta,
                lambda_gamma=lam_gamma,
                c0=c0,
                c1=c1)
            coder.fit(self.train_data,
                      self.X_sppmi,
                      self.Y_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=True,
                      n_jobs=16)

            self.test_data.data = np.ones_like(self.test_data.data)
            params = np.load(
                os.path.join(self.save_dir,
                             'CoFacto_K%d_separated.npz' % (n_components)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Separated_Model2_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)

        if type == 'separatedmodel3':
            print 'separated positive project embedding + negative project embedding'
            mu_p_p = float(kwargs.get('mu_p_p', 0.4))
            mu_p_n = float(kwargs.get('mu_p_n', -1.0))
            if mu_p_n == -1.0:
                mu_p_n = 1.0 - mu_p_p
            print 'mu_p_p = %.2f , mu_p_n = %.2f' % (mu_p_p, mu_p_n)
            print self.save_dir
            self.clean_savedir()

            coder = separatedmymodel3.SeparatedParallelMFPositiveNegativeProjectEmbedding(
                mu_p_p=mu_p_p,
                mu_p_n=mu_p_n,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta=lam_theta,
                lambda_beta=lam_beta,
                lambda_gamma_p=lam_gamma_p,
                lambda_gamma_n=lam_gamma_n,
                c0=c0,
                c1=c1)  # lambda_gamma = 1e-1
            coder.fit(self.train_data,
                      self.X_sppmi,
                      self.X_neg_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=True,
                      n_jobs=16)

            self.test_data.data = np.ones_like(self.test_data.data)
            params = np.load(
                os.path.join(self.save_dir,
                             'CoFacto_K%d_separated.npz' % (n_components)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Separated_Model3_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)

        if type == 'separatedmcf':
            print 'positive and negative project embedding + positive user embedding'
            mu_p_p = float(kwargs.get('mu_p_p', 0.4))
            mu_p_n = float(kwargs.get('mu_p_n', 0.4))
            mu_u_p = float(kwargs.get('mu_u_p', -1.0))
            if mu_u_p == -1.0:
                mu_u_p = 1.0 - mu_p_p - mu_p_n

            print 'mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (
                mu_u_p, mu_p_p, mu_p_n)
            print self.save_dir
            self.clean_savedir()

            coder = separatedmymodel5.SeparatedParallelMFPosUserPosNegProjectEmbedding(
                mu_u_p=mu_u_p,
                mu_p_p=mu_p_p,
                mu_p_n=mu_p_n,
                n_components=n_components,
                max_iter=max_iter,
                batch_size=1000,
                init_std=0.01,
                dtype=np.float32,
                n_jobs=n_jobs,
                random_state=98765,
                save_params=True,
                save_dir=self.save_dir,
                early_stopping=True,
                verbose=True,
                lambda_alpha=lam_alpha,
                lambda_theta_p=lam_theta_p,
                lambda_beta=lam_beta,
                lambda_gamma_p=lam_gamma_p,
                lambda_gamma_n=lam_gamma_n,
                c0=c0,
                c1=c1)
            coder.fit(self.train_data,
                      self.X_sppmi,
                      self.X_neg_sppmi,
                      self.Y_sppmi,
                      vad_data=self.vad_data,
                      batch_users=300,
                      k=vad_K,
                      clear_invalid=True,
                      n_jobs=16)

            self.test_data.data = np.ones_like(self.test_data.data)
            params = np.load(
                os.path.join(self.save_dir,
                             'CoFacto_K%d_separated.npz' % (n_components)))
            U, V = params['U'], params['V']
            (recall100, ndcg100, map100) = self.local_alone_eval(U, V)
            model_out_name = 'Separated_Model5_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % (
                fold, recall100, ndcg100, map100)
            np.savez(model_out_name, U=U, V=V)