Exemplos de split_data em Python, exemplos de utils.data.split_data em Python

Exemplo n.º 1

0

Exibir arquivo

 def draw_tr_te_ds(self, split=0.8):
     weights = weight(self.fval_arr, self.goal, self.mean, mode=self.mode)
     # important sampling trick
     # weights /= self.prob_arr
     data = np.stack([self.data_arr, self.data_ind_arr], axis=1)
     ret = split_data(data, label=weights, train_per=split)
     return ret

Exemplo n.º 2

0

Exibir arquivo

Arquivo: raykar.py Projeto: JaimeLennox/crowd-malware-detection

    def run_semi_supervised_method(self):
        print "Raykar (Semi-Supervised) Method:"

        self._train_data = self._data_dict
        self._test_data = self._data_dict

        supervised_results = []
        supervised = []

        true_sensitivity, true_specificity = data.annotator_model(self._engine_count, self._train_data, self._type_dict)

        for i in range(1):
            supervised_proportion = 0.4
            print "Using supervised proportion:", supervised_proportion
            supervised = data.split_data(self._train_data.keys(), supervised_proportion, supervised)
            accuracy = self._run_train_test(functools.partial(self._semi_supervised_method, supervised=supervised))

            # sensitivity_diff = np.mean([abs(sensitivity[i] - true_sensitivity[i]) for i in range(len(sensitivity))])
            # specificity_diff = np.mean([abs(specificity[i] - true_specificity[i]) for i in range(len(specificity))])

            supervised_results.append((supervised_proportion, accuracy))
            print "Accuracy (Test Data):", accuracy

        if self._show_graphs:
            data.supervised_graph_accuracy(*zip(*supervised_results))

Exemplo n.º 3

0

Exibir arquivo

 def draw_tr_te_ds(self,
                   split=0.8,
                   only_positive=False,
                   normalize_weight=True):
     data = self._get_all_data()
     weights = self._weights(normalize_weight)
     if only_positive:
         data = data[weights > 0]
         weights = weights[weights > 0]
     train_x, test_x, train_w, test_w = split_data(data, weights, split)
     return train_x, test_x, train_w, test_w

Exemplo n.º 4

0

Exibir arquivo

Arquivo: train_recognition_model.py Projeto: estenhl/fish_classifier

def train_recognition_model(image_shape=DEFAULT_IMAGE_SHAPE, verbose=False):
	if verbose:
		print('Training recognition model')

	X, y, labels, ratios = parse_datastructure(SRC_FOLDER, image_shape, limit=1452, verbose=verbose)
	X, y = shuffle_data(X, y)
	train_X, train_y, val_X, val_y = split_data(X, y)

	height, width, channels = image_shape
	cnn = DeepCNN('Fishes', (height, width, channels), 2, class_weights=(1 - ratios))
	cnn.fit(train_X, train_y, val_X, val_y, epochs=10)

	if not os.path.isdir(OUTPUT_MODEL_FOLDER):
		os.mkdir(OUTPUT_MODEL_FOLDER)

	cnn.save(OUTPUT_MODEL_FOLDER)
	#test_recognition_model(cnn, verbose=verbose)

	return cnn

Exemplo n.º 5

0

Exibir arquivo

Arquivo: train_localization_model.py Projeto: estenhl/fish_classifier

def train_localization_model(recognition_cnn=None,
                             image_shape=DEFAULT_IMAGE_SHAPE,
                             verbose=False):
    if recognition_cnn is None:
        recognition_cnn = train_recognition_model(verbose=verbose)

    gridsize, images, Y = parse_localization_data(SRC_FOLDER,
                                                  DATA_FILE,
                                                  image_shape,
                                                  verbose=verbose)
    features = recognition_cnn.extract_features(images, LAYER_NAME)
    X, y = label_localization_data(features, Y)
    print('X.shape: ' + str(X.shape))
    print('y.shape: ' + str(y.shape))
    X, y = shuffle_data(X, y)
    X, y = balance_dataset(X, y, 2)
    train_X, train_y, val_X, val_y = split_data(X, y)

    cnn = SingleLayerNN('Fishes_localization', 512, 2)
    cnn.fit(train_X, train_y, val_X, val_y, epochs=100)

Exemplo n.º 6

0

Exibir arquivo

    def split_horizontal_data(self, num_clients, num_classes, batch_size=200):
        # split data from users
        self.splited_data = split_data(self.train_x, self.train_y, num_clients,
                                       num_classes)

        # build data loader & send to user
        for uid, item in self.splited_data.items():
            size = len(item[0])
            idx = np.random.choice(size, size, replace=False)
            item[0] = item[0][idx]
            item[1] = item[1][idx]
            print("-> send data to client:{}, size:{}".format(
                uid, len(item[1])))
            dataset = Dataset(item[0], item[1])
            self.data_loader[uid] = DataLoader(dataset,
                                               batch_size,
                                               shuffle=True)
        # build test loader
        dataset = Dataset(self.test_x, self.test_y)
        self.test_loader = DataLoader(dataset, batch_size, shuffle=False)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: uci.py Projeto: grasses/Privacy-Preserving-Machine-Learning

    def split_horizontal_data(self, num_clients, num_classes, batch_size=200):
        # split data from users
        self.splited_data = split_data(self.train_x, self.train_y, num_clients, num_classes)

        # build data loader & send to user
        for uid, item in self.splited_data.items():
            size = len(item[0])
            idx = np.random.choice(size, size, replace=False)
            item[0] = item[0][idx]
            item[1] = item[1][idx]

            y = item[1]
            z = len(np.where(y == 0)[0])
            zz = len(np.where(y == 1)[0])
            print(f"-> uid={uid} 0={z}, 1={zz} totoal={item[1].shape}\ny={y[:500]}")
            print("-> send data to client:{}, size:{}".format(uid, len(item[1])))
            dataset = UCIDataset(item[0], item[1])
            self.data_loader[uid] = DataLoader(dataset, batch_size, shuffle=True)

        # build test loader
        dataset = UCIDataset(self.test_x, self.test_y)
        self.test_loader = DataLoader(dataset, batch_size, shuffle=False)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_model3.py Projeto: kouroshHakha/gacem

    def main(self, seed=10):
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        data = sample_data(self.nsample)
        fval = ackley(data[:, 0, :])
        weights = weight(fval, self.goal, 4, mode='le')
        xtr, xte, wtr, wte = split_data(data, label=weights)

        D = self.dim
        self.model: nn.Module = MADE(D, self.hiddens, D * 100, seed=seed)
        self.model.to(self.device)
        self.opt = torch.optim.Adam(self.model.parameters(),
                                    self.lr,
                                    weight_decay=0)
        B = self.bsize
        N, D, _ = xtr.shape
        # per epoch
        tr_nll, te_nll = [], []
        for epoch_id in range(self.nepoch):
            nstep = N // B
            # per batch
            tr_nll_per_b, te_nll_per_b = 0, 0
            for step in range(nstep):
                self.model.train()
                xb = xtr[step * B:step * B + B]
                wb = wtr[step * B:step * B + B]
                xb_tens = torch.from_numpy(xb).to(self.device)
                wb_tens = torch.from_numpy(wb).to(self.device)

                xin = xb_tens[:, 0, :]
                xin_ind = xb_tens[:, 1, :].long()
                loss = self.get_nll(xin, xin_ind, weights=wb_tens)
                self.opt.zero_grad()
                loss.backward()
                self.opt.step()

                # print(loss)
                # for name, param in self.model.named_parameters():
                #     print(f'{name} = {param.grad}')
                # import pdb
                # pdb.set_trace()

                tr_nll_per_b += loss.to(self.cpu).item() / nstep

            self.model.eval()
            xte_tens = torch.from_numpy(xte).to(self.device)
            wte_tens = torch.from_numpy(wte).to(self.device)
            xin_te = xte_tens[:, 0, :]
            xin_ind_te = xte_tens[:, 1, :].long()
            te_loss = self.get_nll(xin_te, xin_ind_te, weights=wte_tens)
            te_nll.append(te_loss)

            print(f'epoch = {epoch_id}, tr_nll = {tr_nll_per_b}')
            print(f'epoch = {epoch_id}, te_nll = {te_loss}')
            tr_nll.append(tr_nll_per_b)

        self.plot_learning(tr_nll, te_nll)

        x1 = np.linspace(start=-5, stop=5, num=100)
        x2 = np.linspace(start=-5, stop=5, num=100)
        samples, _ = self.sample_model(self.nsample, x1, x2)
        samples = samples.to(self.cpu).data.numpy()
        plot_data(samples,
                  scatter_loc='figs/test_model3_scatter.png',
                  hist_loc='figs/test_model3_hist2D.png')

Exemplo n.º 9

0

Exibir arquivo

################################################################################


################################################################################
## MAIN ########################################################################
################################################################################


if __name__ == '__main__':

## RANDOM TESTING ##############################################################

	X,Y = load_data_from_csv('../data/binary.csv', 4, float)
	X,Y = bootstrap_data(X, Y, 1000)
	# X,mu,scale = rescale(X)
	Xtr,Xte,Ytr,Yte = split_data(X, Y, .8)
	
	nc = NNetClassify(Xtr, Ytr, [4,2,3,2], init='random', max_steps=5000, activation='htangent')
	print(nc.get_weights())
	print(nc)
	print(nc.predict(Xte))
	print(nc.predict_soft(Xte))
	print(nc.err(Xte, Yte))

## DETERMINISTIC TESTING #######################################################

#	data = [[float(val) for val in row[:-1]] for row in csv.reader(open('../data/classifier-data.csv'))]
#	trd = np.asarray(data[0:40] + data[50:90] + data[100:140])
#	ted = np.asarray(data[40:50] + data[90:100] + data[140:150])
#	classes = [float(row[-1].lower()) for row in csv.reader(open('../data/classifier-data.csv'))]
#	trc = np.asarray(classes[0:40] + classes[50:90] + classes[100:140])

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_model4_new_nll.py Projeto: kouroshHakha/gacem

    def main(self, seed=10):
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        data, weights = sample_data(self.nsample)
        plot_data(data[:, 0, :], label=weights)
        xtr, xte, wtr, wte = split_data(data, label=weights)

        D = self.dim
        self.model: nn.Module = MADE(D, self.hiddens, D * 100, seed=seed)
        self.model.to(self.device)
        self.opt = torch.optim.Adam(self.model.parameters(),
                                    self.lr,
                                    weight_decay=0)
        B = self.bsize
        N, D, _ = xtr.shape
        # per epoch
        tr_nll, te_nll = [], []
        for epoch_id in range(self.nepoch):
            nstep = N // B
            # per batch
            tr_nll_per_b, te_nll_per_b = 0, 0
            for step in range(nstep):
                self.model.train()
                xb = xtr[step * B:step * B + B]
                wb = wtr[step * B:step * B + B]
                xb_tens = torch.from_numpy(xb).to(self.device)
                wb_tens = torch.from_numpy(wb).to(self.device)

                xin = xb_tens[:, 0, :]
                xin_ind = xb_tens[:, 1, :].long()
                loss = self.get_nll(xin, xin_ind, weights=wb_tens, debug=False)
                self.opt.zero_grad()
                loss.backward()
                self.opt.step()

                tr_nll_per_b += loss.to(self.cpu).item() / nstep

            self.model.eval()
            xte_tens = torch.from_numpy(xte).to(self.device)
            wte_tens = torch.from_numpy(wte).to(self.device)
            xin_te = xte_tens[:, 0, :]
            xin_ind_te = xte_tens[:, 1, :].long()
            te_loss = self.get_nll(xin_te, xin_ind_te, weights=wte_tens)
            te_nll.append(te_loss)

            print(f'epoch = {epoch_id}, tr_nll = {tr_nll_per_b}')
            print(f'epoch = {epoch_id}, te_nll = {te_loss}')
            tr_nll.append(tr_nll_per_b)

        #     x1 = np.linspace(start=-5, stop=5, num=100)
        #     x2 = np.linspace(start=-5, stop=5, num=100)
        #     samples, _ = self.sample_model(self.nsample, x1, x2)
        #     samples = samples.to(self.cpu).data.numpy()
        #
        #     ax = plt.subplot(5, 5, epoch_id + 1)
        #     plot_data(samples, scatter_loc='figs/test_model_4_scatter.png',
        #               hist_loc='figs/test_model_4_hist2D.png', ax=ax)
        #     plt.tight_layout()
        # plt.savefig('figs/test_model_4_hist2D.png')

        self.plot_learning(tr_nll, te_nll)
        # pdb.set_trace()

        x1 = np.linspace(start=-5, stop=5, num=100)
        x2 = np.linspace(start=-5, stop=5, num=100)
        samples, _ = self.sample_model(10000, x1, x2)
        samples = samples.to(self.cpu).data.numpy()
        plot_data(samples,
                  scatter_loc='figs/test_model_4_scatter.png',
                  hist_loc='figs/test_model_4_hist2D.png')

Exemplo n.º 11

0

Exibir arquivo

    return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1)


# if K.image_dim_ordering() == 'th':
#       input_shape = (3, img_width, img_height)
# else:
# input_shape = (img_width, img_height, 3)

# N = 200
N_INGREDIENTS = 100
# images = np.random.normal(size=[N, 3, 32, 32])
# ingredientes = np.random.randint(low=0, high=2, size=[N, N_INGREDIENTS])

# print 'sum', np.sum(ingredientes, axis=1)

train_path, test_path, data_train, data_test = data.split_data(
    'pre-processed-recipes-ctc.json', './data/recipes-ctc/', train=0.15)

# Load images and ingredients array
input_images, input_ingredients = data.load(
    data_train,
    train_path,
    img_width=32,
    img_height=32,
    file_ingredients='./data/ingredients.txt')

NB_INPUT, NB_INGREDIENTS = input_ingredients.shape
print 'nb_input={}, nb_ingredients={}'.format(NB_INPUT, NB_INGREDIENTS)

input_image = Input(shape=[3, 32, 32])

x = Convolution2D(20, 3, 3)(input_image)

Exemplo n.º 12

0

Exibir arquivo

# model,train_hist,_ = train_model(model,X_train,y_train,num_epochs=120)
# DAYS_TO_PREDICT = 12
# predicted_cases,_ = predict_daily_cases(model,X_train,y_train,DAYS_TO_PREDICT,seq_len,scaler)
# predicted_cases = pd.Series(data=predicted_cases,
#     index=pd.date_range(start=diff_daily_cases.index[-1],
#                         periods=DAYS_TO_PREDICT + 1,
#                         closed='right'))
#
# plot_data(predicted_cases,'Predictions',label='Predicted Daily Cases')
# plot_real_predicted(diff_daily_cases,predicted_cases)

if __name__ == '__main__':
    setup_params()

    diff_daily_cases = prepare_data('time_series_19-covid-Confirmed.csv')
    train_data, test_data = split_data(diff_daily_cases, 20)
    train_data, test_data, scaler = scale_data(diff_daily_cases, train_data,
                                               test_data)
    seq_len = 5
    X_train, y_train = create_sequences(train_data, seq_len)
    X_test, y_test = create_sequences(test_data, seq_len)

    model = CoronaVirusPredictor(n_features=1,
                                 n_hidden=512,
                                 seq_len=seq_len,
                                 n_layers=2)

    model, train_hist, test_hist = train_model(model, X_train, y_train, X_test,
                                               y_test)

    plot_losses(train_hist, test_hist)

Exemplo n.º 13

0

Exibir arquivo

def main():
    # TODO salvar o modelo com o json e pesos separados, isso para nao dar erro quando salvamos funcoes de custo personalizadas
    # arrumar o salvar do history, e plot de figuras
    # K.set_image_dim_ordering('th')
    override = False
    evaluate_model = True

    # validation_split = 0.05  # 10 % of train data for validation, the last % of the data is used for validation
    nb_epoch = 90  # 100
    dropout = 0.5
    neurons_last_layer = 1024  # 512, 1024 256, 4096
    my_batch_size = 32
    custom_loss = None  #'weighted_binary_crossentropy' #'weighted_binary_crossentropy' or None for binary_crossentropy

    file_dist_ingredients_dict = 'inverse_distribution_ingredients_dict.npy'
    file_dist_ingredients_array = 'inverse_distribution_ingredients_array.npy'
    file_ingredients = './data/new-ingredients.txt'

    print 'Current parameters: nb_epoch={}, custom_loss={}, neurons_last_layer={}'.format(
        nb_epoch, custom_loss, neurons_last_layer)

    # Generate data for training and test
    # # data.split_data('pre-processed-full-recipes-dataset-v2.json', './data/full-recipes-dataset/', train=0.9)
    # train_path, val_path, test_path, data_train, data_val, data_test  = data.split_data('pre-processed-recipes-ctc.json', './data/recipes-ctc/',
    #                                                                                     train=0.2, validation_split=0.1)
    train_path, val_path, test_path, data_train, data_val, data_test = data.split_data(
        'pre-processed-full-recipes-dataset-v2.json',
        './data/full-recipes-dataset/',
        train=0.9,
        validation_split=0.1)

    # Load images and ingredients array. First for training and then for validation
    input_images_train, input_ingredients_train = data.load(
        data_train,
        train_path,
        img_width=C.IMG_WIDTH,
        img_height=C.IMG_HEIGHT,
        file_ingredients=file_ingredients)

    input_images_val, input_ingredients_val = data.load(
        data_val,
        val_path,
        img_width=C.IMG_WIDTH,
        img_height=C.IMG_HEIGHT,
        file_ingredients=file_ingredients)

    # Calculate the distribution of each ingredient in the data set for training. This distribution will be used
    # as a weight in the loss fuction, frequent ingredients will be assigned small weights.
    # https://github.com/fchollet/keras/pull/188
    ingredients_weight_dict = None
    ingredients_weight_array = None
    if not os.path.exists(file_dist_ingredients_dict) or override:
        ingredients_weight_dict, ingredients_weight_array = dist_samples_per_ingredient(
            data=data_train,
            file_ingredients=file_ingredients,
            generate_figure=False,
            image_file='dist_ingredients_train.png')
        np.save(open(file_dist_ingredients_dict, 'w'), ingredients_weight_dict)
        np.save(open(file_dist_ingredients_array, 'w'),
                ingredients_weight_array)
    else:
        ingredients_weight_dict = np.load(open(file_dist_ingredients_dict))
        ingredients_weight_array = np.load(open(file_dist_ingredients_array))
        print 'Loaded file {}'.format(file_dist_ingredients_dict)
        print 'Loaded file {}'.format(file_dist_ingredients_array)
    print ingredients_weight_dict
    print ingredients_weight_array

    class_weight = None
    if custom_loss is None:
        class_weight = ingredients_weight_dict  #ingredients_weight_dict
    elif custom_loss == 'weighted_binary_crossentropy':
        class_weight = ingredients_weight_array

    # Define which gpu we are going to use
    # with TB.tf.device('/gpu:1'):
    # TODO when using custom metric the keras doesnt load the model properly: avoid using custom metrics, or change
    # the save function to save the weights and json of the model
    if not os.path.exists(C.file_bottleneck_features_train) or override:
        classifier2.save_bottlebeck_features(
            C.file_bottleneck_features_train,
            C.file_bottleneck_features_validation,
            img_width=C.IMG_WIDTH,
            img_height=C.IMG_HEIGHT,
            input_data_train=input_images_train,
            input_data_validation=input_images_val,
            batch_size=my_batch_size)

    if not os.path.exists(C.top_model_weights_path) or override:
        classifier2.train_top_model(C.file_bottleneck_features_train,
                                    C.file_bottleneck_features_validation,
                                    C.top_model_weights_path,
                                    nb_epoch=nb_epoch,
                                    batch_size=my_batch_size,
                                    dropout=dropout,
                                    neurons_last_layer=neurons_last_layer,
                                    train_ingredients=input_ingredients_train,
                                    val_ingredients=input_ingredients_val,
                                    custom_loss=custom_loss,
                                    class_weight=class_weight)

    classifier3.fine_tuning(
        C.top_model_weights_path,
        final_vgg16_model=C.final_vgg16_model,
        img_width=C.IMG_WIDTH,
        img_height=C.IMG_HEIGHT,
        batch_size=my_batch_size,
        nb_epoch=nb_epoch,
        train_ingredients=input_ingredients_train,
        val_ingredients=input_ingredients_val,
        train_data=input_images_train,
        validation_data=input_images_val,  # validation_split=validation_split,
        class_weight=class_weight,
        dropout=dropout,
        neurons_last_layer=neurons_last_layer,
        custom_loss=custom_loss)

    # Evaluate test data with the final model
    if evaluate_model:
        assert os.path.exists(
            C.final_vgg16_model), 'File for the model <{}> not found.'.format(
                C.final_vgg16_model)
        evaluate(data_test,
                 test_path,
                 C.final_vgg16_model,
                 file_ingredients=file_ingredients)

Exemplo n.º 14

0

Exibir arquivo

    def main(self, seed=10):
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        data, delta, weights = self.sample_data(self.nsample)
        self.plot_data(data[:, 1, :].astype('int'))
        xtr, xte, wtr, wte = split_data(data, label=weights)

        D = self.dim
        self.model: nn.Module = MADE(D,
                                     self.hiddens,
                                     D * 3 * self.nr_mix,
                                     seed=seed)
        self.model.to(self.device)
        self.opt = torch.optim.Adam(self.model.parameters(),
                                    self.lr,
                                    weight_decay=0)
        self.lr_sch = StepLR(self.opt, step_size=50, gamma=0.9)
        B = self.bsize
        N, D, _ = xtr.shape
        # per epoch
        tr_nll, te_nll = [], []
        plt.figure(figsize=(15, 8))
        for epoch_id in range(self.nepoch):
            nstep = N // B
            # per batch
            tr_nll_per_b, te_nll_per_b = 0, 0
            for step in range(nstep):
                self.model.train()
                xb = xtr[step * B:step * B + B]
                wb = wtr[step * B:step * B + B]

                xb_tens = torch.from_numpy(xb).to(self.device)
                wb_tens = torch.from_numpy(wb).to(self.device)

                xin = xb_tens[:, 0, :]
                loss = self.get_nll(xin, delta, weights=wb_tens, debug=False)
                self.opt.zero_grad()
                loss.backward()
                self.opt.step()
                self.lr_sch.step(epoch_id)

                tr_nll_per_b += loss.to(self.cpu).item() / nstep

            self.model.eval()
            xte_tens = torch.from_numpy(xte).to(self.device)
            wte_tens = torch.from_numpy(wte).to(self.device)
            xin_te = xte_tens[:, 0, :]
            te_loss = self.get_nll(xin_te,
                                   delta,
                                   weights=wte_tens,
                                   debug=False)
            te_nll.append(te_loss)

            print(f'epoch = {epoch_id}, tr_nll = {tr_nll_per_b}')
            print(f'epoch = {epoch_id}, te_nll = {te_loss}')
            tr_nll.append(tr_nll_per_b)

        #     if (epoch_id + 1) % 20 == 0 and epoch_id <= 100:
        #         _, samples_ind = self.sample_model(1, delta)
        #         samples_ind = samples_ind.to(self.cpu).data.numpy().astype('int')
        #
        #         ax = plt.subplot(1, 5, epoch_id // 20 + 1, adjustable='box', aspect=1)
        #         self.plot_data(samples_ind, scatter_loc='figs/test_model_6_sub_scatter.png',
        #                        hist_loc='figs/test_model_6_sub_hist2D.png', ax=ax)
        #         plt.tight_layout()
        # plt.savefig('figs/test_model_6_sub_hist2D.png')

        self.plot_learning(tr_nll, te_nll)
        # pdb.set_trace()

        samples, samples_ind = self.sample_model(1000, delta)
        samples_ind = samples_ind.to(self.cpu).data.numpy().astype('int')
        self.plot_data(samples_ind,
                       scatter_loc='figs/test_model_6_scatter.png',
                       hist_loc='figs/test_model_6_hist2D.png')