Пример #1
0
    def __init__(self,
                 A,
                 loss,
                 regX,
                 regY,
                 k,
                 missing_list=None,
                 converge=None,
                 scale=True):

        self.scale = scale
        # Turn everything in to lists / convert to correct dimensions
        if not isinstance(A, list): A = [A]
        if not isinstance(loss, list): loss = [loss]
        if not isinstance(regY, list): regY = [regY]
        if len(regY) == 1 and len(regY) < len(loss):
            regY = [copy(regY[0]) for _ in range(len(loss))]
        if missing_list and not isinstance(missing_list[0], list):
            missing_list = [missing_list]

        loss = [L(Aj) for Aj, L in zip(A, loss)]

        # save necessary info
        self.A, self.k, self.L = A, k, loss
        if converge == None: self.converge = Convergence()
        else: self.converge = converge

        # initialize cvxpy problems
        self._initialize_probs(A, k, missing_list, regX, regY)
Пример #2
0
 def __init__(self,
              data_mat=None,
              W=None,
              H=None,
              res_dir=None,
              rank=4,
              SNR=-5,
              seed_num=1,
              true_labels=None):
     if data_mat is None or W is None or H is None or res_dir is None:
         raise ValueError('Error: some inputs are missing!')
     self.data_mat = data_mat
     self.W, self.H = W, H
     self.rank = rank
     self.SNR = SNR
     self.res_dir = res_dir
     self.seed_num = seed_num
     self.converge = Convergence(res_dir)
     self.labels = true_labels
     np.random.seed(
         seed_num
     )  # set the seed so that each run will get the same initial values
     (m, n) = self.data_mat.shape
     self.flag = 0  # flag to indicate whether to use LS or gradient descent to update W
     m_name = 'km' + str(self.flag)
     self.output_dir = path.join(self.res_dir, 'onmf', m_name,
                                 'rank' + str(self.rank), 'data' + str(SNR),
                                 'seed' + str(self.seed_num))
     self.time_used = 0  # record the time elapsed when running the simulations
Пример #3
0
    def __init__(self,
                 data_manager=None,
                 res_dir=None,
                 rank=4,
                 seed_num=1,
                 mul=0,
                 nu=0):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: some inputs are missing!')
        self.data_manager = data_manager
        self.W, self.H = self.data_manager.gen_inits_WH(init='random',
                                                        seed=seed_num,
                                                        H_ortho=True)
        self.data_mat = self.data_manager.get_data_mat()
        self.mul = mul
        self.nu = nu
        self.rank = rank
        self.res_dir = res_dir
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        self.true_labels = self.data_manager.get_labels()
        self.n_factor = LA.norm(self.data_mat, 'fro')**2
        self.W_bound = False  # flag to indicate whether to constrain W by upper bound and lower bound
        self.W_step = 0.51
        self.H_step = 0.51

        self.time_used = 0  # record the time elapsed when running the simulation
        start_time = time.time()
        self.initialize_penalty_para()
        end_time = time.time()
        self.time_used += end_time - start_time
        self.set_tol(1e-3)
        self.set_max_iters(400)

        W_bound = 'W_bound' if self.W_bound else 'W_nobound'
        self.output_dir = path.join(self.res_dir, 'onmf', 'sncp2_W1H1', \
     W_bound + '_' + 'epsilon' + str(self.inner_tol) + '&gamma' + str(self.gamma) + '&mul' + str(self.mul) + '&nu' + str(self.nu), \
     'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num))

        # we construct a result manager to manage and save the result
        res_dir1 = path.join(res_dir, 'onmf', 'sncp2_new', self.data_manager.get_data_name(), 'cls' + str(rank), W_bound + 'W'+ str(self.W_step) + 'H' + str(self.H_step), \
            'inner' + str(self.inner_tol) + '&gamma' + str(self.gamma) + '&mul' + str(self.mul) + '&nu' + str(self.nu), 'seed' + str(self.seed_num))
        self.res_manager = ClusterONMFManager(
            root_dir=res_dir1, save_pdv=False
        )  # get an instance of ClusterONMFManager to manage the generated result

        # initialize some variables to store info
        self.acc_iter = []  # record the clustering accuracy for each iteration
        self.time_iter = []  # record the time for each iteration
        self.nmf_cost_iter = []  # record the nmf cost after each iteration
        self.pobj_iter = [
        ]  # record the penalized objective value after each iteration
        self.obj_iter = []  # record the objective value for each iteration
Пример #4
0
    def __init__(self, data_manager = None, res_dir = None, rank = 4, seed_num = 1):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: some inputs are missing!')
	self.data_manager = data_manager
        self.W, self.H = self.data_manager.gen_inits_WH(init = 'random', seed = seed_num, H_ortho = False)
        self.data_mat = self.data_manager.get_data_mat()
       	self.H = np.asmatrix(self.H).transpose()
        self.res_dir = res_dir
        self.rank = rank
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        #np.random.seed(seed_num)  # set the seed so that each run will get the same initial values
        (m, n) = self.data_mat.shape
        #self.n_factor = m * n # set the normalization factor to normalize the objective value
        self.n_factor = LA.norm(self.data_mat, 'fro') ** 2
        self.time_used = 0 # record the time used by the method
Пример #5
0
    def __init__(self, data_manager = None, res_dir = None, rank = 4, seed_num = 1):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: input is missing!')
        self.rank = rank
        self.res_dir = res_dir
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        self.data_manager = data_manager
	self.data_mat = self.data_manager.get_data_mat()
        self.W, self.H = self.data_manager.gen_inits_WH(init = 'random', seed = seed_num, H_ortho = True)
	self.W = np.asmatrix(self.W, dtype = np.float64)
	self.H = np.asmatrix(self.H, dtype = np.float64)
        #np.random.seed(seed_num)  # set the seed so that each run will get the same initial values
        #(m, n) = self.data_mat.shape
        self.n_factor = LA.norm(self.data_mat, 'fro') ** 2 # set the normalization factor to normalize the objective value
        self.time_used = 0
	self.flag = 0 # the flag indicates whether the W can be negative or not depending on the data
Пример #6
0
def test_main(tmpdir):

    in_path = os.path.join(data_dir_path, "prD.do")
    out_path = str(tmpdir.join("test_main.txt"))

    assert not os.path.isfile(out_path)

    # Read in the file
    main_list = simple_read(in_path)

    # Run convergence study
    mainver = Convergence(main_list)

    # Write the report
    mainver.add_file(out_path, write_mode='w')
    mainver(coarse=True, ratios=True)

    assert os.path.isfile(out_path)
Пример #7
0
    def __init__(self, data_manager = None, res_dir = None, rank = 4, seed_num = 1):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: some inputs are missing!')
        self.data_manager = data_manager
	self.data_mat = self.data_manager.get_data_mat()
        self.W, self.H = self.data_manager.gen_inits_WH(init = 'random', seed = seed_num, H_ortho = False)
        self.res_dir = res_dir
        self.rank = rank
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        #np.random.seed(seed_num)   # set the seed so that each run will get the same initial values
        (m, n) = self.data_mat.shape
        #self.n_factor = m * n # set the normalization factor to normalize the objective value
        self.n_factor = LA.norm(self.data_mat, 'fro') ** 2
        self.flag = 0 # a flag to indicate which (problem, method) pair to be used,
                      # 0: nmf_fro + multiplicative rule
                      # 1: nmf_kl + nultiplicative rule
                      # 2: nmf_fro + palm
        self.time_used = 0 # record the time used by the method
Пример #8
0
 def __init__(self, data_manager=None, res_dir=None, rank=4, seed_num=1):
     if data_manager is None or res_dir is None:
         raise ValueError('Error: some inputs are missing!')
     self.data_manager = data_manager
     self.data_mat = self.data_manager.get_data_mat()
     self.data_mat = np.asmatrix(np.copy(self.data_mat).transpose())
     W_init, H_init = self.data_manager.gen_inits_WH(init='random',
                                                     seed=seed_num,
                                                     H_ortho=False)
     self.F, self.G = H_init.transpose(), W_init
     self.res_dir = res_dir
     self.rank = rank
     #self.SNR = SNR
     self.seed_num = seed_num
     self.converge = Convergence(res_dir)
     #np.random.seed(seed_num)  # set the seed so that each run will get the same initial values
     (m, n) = self.data_mat.shape
     self.flag = 0  # flag to indicate whether G can be negative or not
     # flag = 0 : the G should be nonnegative
     # flag = 1: the G can be negative
     #self.n_factor = m * n # set the normalization factor to normalize the objective value
     self.n_factor = LA.norm(self.data_mat, 'fro')**2
     self.time_used = 0  # record the time used by the method
     self.U = None  # used  for update F