Пример #1
0
def loadSubsetData(data, RS, subset_sizes, clientNum):
    N_rows = data['X'].shape[0]
    partitions = []

    countPre = 0
    for i in range (0,clientNum):

        # Count = (i*subset_sizes)/N_rows
        # if Count> countPre:
        #     data = dictslice(data, RS.permutation(N_rows))
        #     countPre = Count
        startNum = (i*subset_sizes)%N_rows
        print ("current startNum " +str(startNum))
        if (startNum+subset_sizes) > N_rows:
            idxs = slice(startNum, N_rows)
            idxs1 = slice(0, (startNum+subset_sizes-N_rows))
            part1 = dictslice(data, idxs)
            part2 = dictslice(data,idxs1)
            subset = part1

            subset['X']=np.concatenate((part1['X'] , part2['X']), axis=0)
            subset['T']=np.concatenate((part1['T'] ,part2['T']), axis=0)
        else:
            idxs = slice(startNum, startNum + subset_sizes)
            subset = dictslice(data, idxs)

        # subset = random_partition(subset, RS, [subset_sizes]).__getitem__(0)
        partitions.append(subset)

    partitions = partitions
    return partitions
Пример #2
0
def loadSubsetData(data, RS, subset_sizes, clientNum):
    N_rows = data['X'].shape[0]
    partitions = []

    countPre = 0
    for i in range(0, clientNum):

        # Count = (i*subset_sizes)/N_rows
        # if Count> countPre:
        #     data = dictslice(data, RS.permutation(N_rows))
        #     countPre = Count
        startNum = (i * subset_sizes) % N_rows
        print("current startNum " + str(startNum))
        if (startNum + subset_sizes) > N_rows:
            idxs = slice(startNum, N_rows)
            idxs1 = slice(0, (startNum + subset_sizes - N_rows))
            part1 = dictslice(data, idxs)
            part2 = dictslice(data, idxs1)
            subset = part1

            subset['X'] = np.concatenate((part1['X'], part2['X']), axis=0)
            subset['T'] = np.concatenate((part1['T'], part2['T']), axis=0)
        else:
            idxs = slice(startNum, startNum + subset_sizes)
            subset = dictslice(data, idxs)

        # subset = random_partition(subset, RS, [subset_sizes]).__getitem__(0)
        partitions.append(subset)

    partitions = partitions
    return partitions
Пример #3
0
    def hyperloss(hyperparam_vect,
                  i_hyper,
                  alphabets,
                  verbose=True,
                  report_train_loss=False):
        RS = RandomState((seed, i_hyper, "hyperloss"))
        alphabet = shuffle_alphabet(RS.choice(alphabets), RS)
        N_train = alphabet['X'].shape[0] - N_valid_dpts
        train_data = dictslice(alphabet, slice(None, N_train))
        if report_train_loss:
            valid_data = dictslice(alphabet, slice(None, N_valid_dpts))
        else:
            valid_data = dictslice(alphabet, slice(N_train, None))

        def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True):
            RS = RandomState((seed, i_hyper, i_primal))
            idxs = RS.permutation(N_train)[:batch_size]
            minibatch = dictslice(train_data, idxs)
            loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty)
            if verbose and i_primal % 10 == 0:
                print "Iter {0}, loss, {1}".format(i_primal, getval(loss))
            return loss

        W0 = RS.randn(N_weights) * initialization_scale
        W_final = sgd(grad(primal_loss),
                      hyperparam_vect,
                      W0,
                      alpha,
                      beta,
                      N_iters,
                      callback=None)
        return reg_loss_fun(W_final,
                            valid_data,
                            hyperparam_vect,
                            reg_penalty=False)
Пример #4
0
def random_partition(data, RS, subset_sizes):
    N_rows = data['X'].shape[0]
    shuffled_data = dictslice(data, RS.permutation(N_rows))
    partitions = []
    start = 0
    for N in subset_sizes:
        idxs = slice(start, start + N)
        partitions.append(dictslice(shuffled_data, idxs))
        start += N
    return partitions
Пример #5
0
def random_partition(data, RS, subset_sizes):
    N_rows = data["X"].shape[0]
    shuffled_data = dictslice(data, RS.permutation(N_rows))
    partitions = []
    start = 0
    for N in subset_sizes:
        idxs = slice(start, start + N)
        partitions.append(dictslice(shuffled_data, idxs))
        start += N
    return partitions
Пример #6
0
 def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True):
     RS = RandomState((seed, i_hyper, i_primal))
     idxs = RS.permutation(N_train)[:batch_size]
     minibatch = dictslice(train_data, idxs)
     loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty)
     if verbose and i_primal % 10 == 0: print "Iter {0}, loss, {1}".format(i_primal, getval(loss))
     return loss
Пример #7
0
 def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True):
     RS = RandomState((seed, i_hyper, i_primal))
     idxs = RS.permutation(N_train)[:batch_size]
     minibatch = dictslice(train_data, idxs)
     loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty)
     if verbose and i_primal % 10 == 0: print "Iter {0}, loss, {1}".format(i_primal, getval(loss))
     return loss
Пример #8
0
 def primal_loss(z_vect, transform, i_primal, record_results=False):
     RS = RandomState((seed, i_primal, "primal"))
     idxs = RS.randint(N_data, size=batch_size)
     minibatch = dictslice(data, idxs)
     w_vect = transform_weights(z_vect, transform)
     loss = loss_fun(w_vect, **minibatch)
     reg = regularization(z_vect)
     return loss + reg
Пример #9
0
 def primal_loss(w_vect, reg, i_primal, record_results=False):
     RS = RandomState((seed, i_primal, "primal"))
     idxs = RS.randint(N_data, size=batch_size)
     minibatch = dictslice(data, idxs)
     loss = loss_fun(w_vect, **minibatch)
     reg = regularization(w_vect, reg)
     if record_results and i_primal % N_thin == 0:
         print "Iter {0}: train: {1}".format(i_primal, getval(loss))
     return loss + reg
Пример #10
0
 def primal_loss(w_vect, reg, i_primal, record_results=False):
     RS = RandomState((seed, i_primal, "primal"))
     idxs = RS.randint(N_data, size=batch_size)
     minibatch = dictslice(data, idxs)
     loss = loss_fun(w_vect, **minibatch)
     reg = regularization(w_vect, reg)
     if record_results and i_primal % N_thin == 0:
         print "Iter {0}: train: {1}".format(i_primal, getval(loss))
     return loss + reg
Пример #11
0
    def hyperloss(hyperparam_vect, i_hyper, alphabets, verbose=True, report_train_loss=False):
        RS = RandomState((seed, i_hyper, "hyperloss"))        
        alphabet = shuffle_alphabet(RS.choice(alphabets), RS)
        N_train = alphabet['X'].shape[0] - N_valid_dpts
        train_data = dictslice(alphabet, slice(None, N_train))
        if report_train_loss:
            valid_data = dictslice(alphabet, slice(None, N_valid_dpts))
        else:
            valid_data = dictslice(alphabet, slice(N_train, None))
        def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True):
            RS = RandomState((seed, i_hyper, i_primal))
            idxs = RS.permutation(N_train)[:batch_size]
            minibatch = dictslice(train_data, idxs)
            loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty)
            if verbose and i_primal % 10 == 0: print "Iter {0}, loss, {1}".format(i_primal, getval(loss))
            return loss

        W0 = RS.randn(N_weights) * initialization_scale
        W_final = sgd(grad(primal_loss), hyperparam_vect, W0, alpha, beta, N_iters, callback=None)
        return reg_loss_fun(W_final, valid_data, hyperparam_vect, reg_penalty=False)
Пример #12
0
 def sub_primal_stochastic_loss(z_vect, transform_vect, i_primal, i_script):
     RS = RandomState((seed, i_hyper, i_primal, i_script))
     N_train = train_data[i_script]['X'].shape[0]
     idxs = RS.permutation(N_train)[:batch_size]
     minibatch = dictslice(train_data[i_script], idxs)
     loss = loss_from_latents(z_vect, transform_vect, i_script, minibatch)
     reg = regularization(z_vect) if i_script == 0 else 0.0
     if i_primal % N_thin == 0 and i_script == 0:
         print "Iter {0}, full losses: train: {1}, valid: {2}, reg: {3}".format(
             i_primal,
             total_loss(train_data, getval(z_vect)),
             total_loss(valid_data, getval(z_vect)),
             getval(reg) / N_scripts_per_iter)
     return loss + reg
Пример #13
0
 def primal_loss(z_vect, transform, i_primal, record_results=False):
     RS = RandomState((seed, i_primal, "primal"))
     idxs = RS.randint(N_data, size=batch_size)
     minibatch = dictslice(data, idxs)
     w_vect = transform_weights(z_vect, transform) #TODO: this is a scale transformation, not regularization!
     loss = loss_fun(w_vect, **minibatch) #use new scale for prediction
     reg = regularization(z_vect) #regularize original scale
     #TODO: should be equivalent: w = z*e^transform, so 
     # f(z*e^transform) + e^\lambda||z||^2 = f(w) + e^\lambda||z||^2 = f(w) + e^(\lambda)||e^-2transform w||^2
     # see process_transform
     
     #if record_results and i_primal % N_thin == 0:
         #print "Iter {0}: train: {1}".format(i_primal, getval(loss))
     return loss + reg
Пример #14
0
 def sub_primal_stochastic_loss(z_vect, transform_vect, i_primal,
                                i_script):
     RS = RandomState((seed, i_hyper, i_primal, i_script))
     N_train = train_data[i_script]['X'].shape[0]
     idxs = RS.permutation(N_train)[:batch_size]
     minibatch = dictslice(train_data[i_script], idxs)
     loss = loss_from_latents(z_vect, transform_vect, i_script,
                              minibatch)
     reg = regularization(z_vect) if i_script == 0 else 0.0
     if i_primal % N_thin == 0 and i_script == 0:
         print "Iter {0}, full losses: train: {1}, valid: {2}, reg: {3}".format(
             i_primal, total_loss(train_data, getval(z_vect)),
             total_loss(valid_data, getval(z_vect)),
             getval(reg) / N_scripts_per_iter)
     return loss + reg
Пример #15
0
 def primal_stochastic_loss(z_vect, transform_vect, i_primal):
     RS = RandomState((seed, i_hyper, i_primal))
     loss = 0.0
     for _ in range(N_scripts_per_iter):
         i_script = RS.randint(N_scripts)
         N_train = train_data[i_script]['X'].shape[0]
         idxs = RS.permutation(N_train)[:batch_size]
         minibatch = dictslice(train_data[i_script], idxs)
         loss += loss_from_latents(z_vect, transform_vect, i_script, minibatch)
     reg  = regularization(z_vect)
     if i_primal % 1 == 0:
         print "Iter {0}, loss {1}, reg {2}".format(i_primal, getval(loss), getval(reg))
         print "Full losses: train: {0}, valid: {1}".format(
             total_loss(train_data, getval(z_vect)),
             total_loss(valid_data, getval(z_vect)))
     return loss + reg
Пример #16
0
 def primal_stochastic_loss(z_vect, transform_vect, i_primal):
     RS = RandomState((seed, i_hyper, i_primal))
     loss = 0.0
     for _ in range(N_scripts_per_iter):
         i_script = RS.randint(N_scripts)
         N_train = train_data[i_script]['X'].shape[0]
         idxs = RS.permutation(N_train)[:batch_size]
         minibatch = dictslice(train_data[i_script], idxs)
         loss += loss_from_latents(z_vect, transform_vect, i_script, minibatch)
     reg  = regularization(z_vect)
     if i_primal % 20 == 0:
         print "Iter {0}, loss {1}, reg {2}".format(i_primal, getval(loss), getval(reg))
         print "Full losses: train: {0}, valid: {1}".format(
             total_loss(train_data, getval(z_vect)),
             total_loss(valid_data, getval(z_vect)))
     return loss + reg
Пример #17
0
def split(alphabet, num_chars):
    cum_chars = np.cumsum(num_chars)
    def select_dataset(count):
        for i, N in enumerate(cum_chars):
            if count < N: return i

    labels = np.argmax(alphabet['T'], axis=1)
    label_counts = [0] * NUM_CHARS
    split_idxs = [[] for n in num_chars]
    for i_dpt, label in enumerate(labels):
        i_dataset = select_dataset(label_counts[label])
        split_idxs[i_dataset].append(i_dpt)
        label_counts[label] += 1

    data_splits = []
    for n, idxs in zip(num_chars, split_idxs):
        data_splits.append(dictslice(alphabet, idxs))
        totals = np.sum(data_splits[-1]['T'], axis=0)
        assert np.all(np.logical_or(totals == 0, totals == n))

    return data_splits
Пример #18
0
def split(alphabet, num_chars):
    cum_chars = np.cumsum(num_chars)
    def select_dataset(count):
        for i, N in enumerate(cum_chars):
            if count < N: return i

    labels = np.argmax(alphabet['T'], axis=1)
    label_counts = [0] * NUM_CHARS
    split_idxs = [[] for n in num_chars]
    for i_dpt, label in enumerate(labels):
        i_dataset = select_dataset(label_counts[label])
        split_idxs[i_dataset].append(i_dpt)
        label_counts[label] += 1

    data_splits = []
    for n, idxs in zip(num_chars, split_idxs):
        data_splits.append(dictslice(alphabet, idxs))
        totals = np.sum(data_splits[-1]['T'], axis=0)
        assert np.all(np.logical_or(totals == 0, totals == n))

    return data_splits
Пример #19
0
def shuffle_alphabet(alphabet, RS):
    # Shuffles both data and label indices
    N_rows, N_cols = alphabet['T'].shape
    alphabet['T'] = alphabet['T'][:, RS.permutation(N_cols)]
    return dictslice(alphabet, RS.permutation(N_rows))
Пример #20
0
def shuffle_rows(alphabet, RS):
    N_rows, N_cols = alphabet['T'].shape
    return dictslice(alphabet, RS.permutation(N_rows))
Пример #21
0
def shuffle_rows(alphabet, RS):
    N_rows, N_cols = alphabet['T'].shape
    return dictslice(alphabet, RS.permutation(N_rows))
Пример #22
0
def shuffle_alphabet(alphabet, RS):
    # Shuffles both data and label indices
    N_rows, N_cols = alphabet['T'].shape
    alphabet['T'] = alphabet['T'][:, RS.permutation(N_cols)]
    return dictslice(alphabet, RS.permutation(N_rows))