def draw_tr_te_ds(self, split=0.8): weights = weight(self.fval_arr, self.goal, self.mean, mode=self.mode) # important sampling trick # weights /= self.prob_arr data = np.stack([self.data_arr, self.data_ind_arr], axis=1) ret = split_data(data, label=weights, train_per=split) return ret
def run_semi_supervised_method(self): print "Raykar (Semi-Supervised) Method:" self._train_data = self._data_dict self._test_data = self._data_dict supervised_results = [] supervised = [] true_sensitivity, true_specificity = data.annotator_model(self._engine_count, self._train_data, self._type_dict) for i in range(1): supervised_proportion = 0.4 print "Using supervised proportion:", supervised_proportion supervised = data.split_data(self._train_data.keys(), supervised_proportion, supervised) accuracy = self._run_train_test(functools.partial(self._semi_supervised_method, supervised=supervised)) # sensitivity_diff = np.mean([abs(sensitivity[i] - true_sensitivity[i]) for i in range(len(sensitivity))]) # specificity_diff = np.mean([abs(specificity[i] - true_specificity[i]) for i in range(len(specificity))]) supervised_results.append((supervised_proportion, accuracy)) print "Accuracy (Test Data):", accuracy if self._show_graphs: data.supervised_graph_accuracy(*zip(*supervised_results))
def draw_tr_te_ds(self, split=0.8, only_positive=False, normalize_weight=True): data = self._get_all_data() weights = self._weights(normalize_weight) if only_positive: data = data[weights > 0] weights = weights[weights > 0] train_x, test_x, train_w, test_w = split_data(data, weights, split) return train_x, test_x, train_w, test_w
def train_recognition_model(image_shape=DEFAULT_IMAGE_SHAPE, verbose=False): if verbose: print('Training recognition model') X, y, labels, ratios = parse_datastructure(SRC_FOLDER, image_shape, limit=1452, verbose=verbose) X, y = shuffle_data(X, y) train_X, train_y, val_X, val_y = split_data(X, y) height, width, channels = image_shape cnn = DeepCNN('Fishes', (height, width, channels), 2, class_weights=(1 - ratios)) cnn.fit(train_X, train_y, val_X, val_y, epochs=10) if not os.path.isdir(OUTPUT_MODEL_FOLDER): os.mkdir(OUTPUT_MODEL_FOLDER) cnn.save(OUTPUT_MODEL_FOLDER) #test_recognition_model(cnn, verbose=verbose) return cnn
def train_localization_model(recognition_cnn=None, image_shape=DEFAULT_IMAGE_SHAPE, verbose=False): if recognition_cnn is None: recognition_cnn = train_recognition_model(verbose=verbose) gridsize, images, Y = parse_localization_data(SRC_FOLDER, DATA_FILE, image_shape, verbose=verbose) features = recognition_cnn.extract_features(images, LAYER_NAME) X, y = label_localization_data(features, Y) print('X.shape: ' + str(X.shape)) print('y.shape: ' + str(y.shape)) X, y = shuffle_data(X, y) X, y = balance_dataset(X, y, 2) train_X, train_y, val_X, val_y = split_data(X, y) cnn = SingleLayerNN('Fishes_localization', 512, 2) cnn.fit(train_X, train_y, val_X, val_y, epochs=100)
def split_horizontal_data(self, num_clients, num_classes, batch_size=200): # split data from users self.splited_data = split_data(self.train_x, self.train_y, num_clients, num_classes) # build data loader & send to user for uid, item in self.splited_data.items(): size = len(item[0]) idx = np.random.choice(size, size, replace=False) item[0] = item[0][idx] item[1] = item[1][idx] print("-> send data to client:{}, size:{}".format( uid, len(item[1]))) dataset = Dataset(item[0], item[1]) self.data_loader[uid] = DataLoader(dataset, batch_size, shuffle=True) # build test loader dataset = Dataset(self.test_x, self.test_y) self.test_loader = DataLoader(dataset, batch_size, shuffle=False)
def split_horizontal_data(self, num_clients, num_classes, batch_size=200): # split data from users self.splited_data = split_data(self.train_x, self.train_y, num_clients, num_classes) # build data loader & send to user for uid, item in self.splited_data.items(): size = len(item[0]) idx = np.random.choice(size, size, replace=False) item[0] = item[0][idx] item[1] = item[1][idx] y = item[1] z = len(np.where(y == 0)[0]) zz = len(np.where(y == 1)[0]) print(f"-> uid={uid} 0={z}, 1={zz} totoal={item[1].shape}\ny={y[:500]}") print("-> send data to client:{}, size:{}".format(uid, len(item[1]))) dataset = UCIDataset(item[0], item[1]) self.data_loader[uid] = DataLoader(dataset, batch_size, shuffle=True) # build test loader dataset = UCIDataset(self.test_x, self.test_y) self.test_loader = DataLoader(dataset, batch_size, shuffle=False)
def main(self, seed=10): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) data = sample_data(self.nsample) fval = ackley(data[:, 0, :]) weights = weight(fval, self.goal, 4, mode='le') xtr, xte, wtr, wte = split_data(data, label=weights) D = self.dim self.model: nn.Module = MADE(D, self.hiddens, D * 100, seed=seed) self.model.to(self.device) self.opt = torch.optim.Adam(self.model.parameters(), self.lr, weight_decay=0) B = self.bsize N, D, _ = xtr.shape # per epoch tr_nll, te_nll = [], [] for epoch_id in range(self.nepoch): nstep = N // B # per batch tr_nll_per_b, te_nll_per_b = 0, 0 for step in range(nstep): self.model.train() xb = xtr[step * B:step * B + B] wb = wtr[step * B:step * B + B] xb_tens = torch.from_numpy(xb).to(self.device) wb_tens = torch.from_numpy(wb).to(self.device) xin = xb_tens[:, 0, :] xin_ind = xb_tens[:, 1, :].long() loss = self.get_nll(xin, xin_ind, weights=wb_tens) self.opt.zero_grad() loss.backward() self.opt.step() # print(loss) # for name, param in self.model.named_parameters(): # print(f'{name} = {param.grad}') # import pdb # pdb.set_trace() tr_nll_per_b += loss.to(self.cpu).item() / nstep self.model.eval() xte_tens = torch.from_numpy(xte).to(self.device) wte_tens = torch.from_numpy(wte).to(self.device) xin_te = xte_tens[:, 0, :] xin_ind_te = xte_tens[:, 1, :].long() te_loss = self.get_nll(xin_te, xin_ind_te, weights=wte_tens) te_nll.append(te_loss) print(f'epoch = {epoch_id}, tr_nll = {tr_nll_per_b}') print(f'epoch = {epoch_id}, te_nll = {te_loss}') tr_nll.append(tr_nll_per_b) self.plot_learning(tr_nll, te_nll) x1 = np.linspace(start=-5, stop=5, num=100) x2 = np.linspace(start=-5, stop=5, num=100) samples, _ = self.sample_model(self.nsample, x1, x2) samples = samples.to(self.cpu).data.numpy() plot_data(samples, scatter_loc='figs/test_model3_scatter.png', hist_loc='figs/test_model3_hist2D.png')
################################################################################ ################################################################################ ## MAIN ######################################################################## ################################################################################ if __name__ == '__main__': ## RANDOM TESTING ############################################################## X,Y = load_data_from_csv('../data/binary.csv', 4, float) X,Y = bootstrap_data(X, Y, 1000) # X,mu,scale = rescale(X) Xtr,Xte,Ytr,Yte = split_data(X, Y, .8) nc = NNetClassify(Xtr, Ytr, [4,2,3,2], init='random', max_steps=5000, activation='htangent') print(nc.get_weights()) print(nc) print(nc.predict(Xte)) print(nc.predict_soft(Xte)) print(nc.err(Xte, Yte)) ## DETERMINISTIC TESTING ####################################################### # data = [[float(val) for val in row[:-1]] for row in csv.reader(open('../data/classifier-data.csv'))] # trd = np.asarray(data[0:40] + data[50:90] + data[100:140]) # ted = np.asarray(data[40:50] + data[90:100] + data[140:150]) # classes = [float(row[-1].lower()) for row in csv.reader(open('../data/classifier-data.csv'))] # trc = np.asarray(classes[0:40] + classes[50:90] + classes[100:140])
def main(self, seed=10): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) data, weights = sample_data(self.nsample) plot_data(data[:, 0, :], label=weights) xtr, xte, wtr, wte = split_data(data, label=weights) D = self.dim self.model: nn.Module = MADE(D, self.hiddens, D * 100, seed=seed) self.model.to(self.device) self.opt = torch.optim.Adam(self.model.parameters(), self.lr, weight_decay=0) B = self.bsize N, D, _ = xtr.shape # per epoch tr_nll, te_nll = [], [] for epoch_id in range(self.nepoch): nstep = N // B # per batch tr_nll_per_b, te_nll_per_b = 0, 0 for step in range(nstep): self.model.train() xb = xtr[step * B:step * B + B] wb = wtr[step * B:step * B + B] xb_tens = torch.from_numpy(xb).to(self.device) wb_tens = torch.from_numpy(wb).to(self.device) xin = xb_tens[:, 0, :] xin_ind = xb_tens[:, 1, :].long() loss = self.get_nll(xin, xin_ind, weights=wb_tens, debug=False) self.opt.zero_grad() loss.backward() self.opt.step() tr_nll_per_b += loss.to(self.cpu).item() / nstep self.model.eval() xte_tens = torch.from_numpy(xte).to(self.device) wte_tens = torch.from_numpy(wte).to(self.device) xin_te = xte_tens[:, 0, :] xin_ind_te = xte_tens[:, 1, :].long() te_loss = self.get_nll(xin_te, xin_ind_te, weights=wte_tens) te_nll.append(te_loss) print(f'epoch = {epoch_id}, tr_nll = {tr_nll_per_b}') print(f'epoch = {epoch_id}, te_nll = {te_loss}') tr_nll.append(tr_nll_per_b) # x1 = np.linspace(start=-5, stop=5, num=100) # x2 = np.linspace(start=-5, stop=5, num=100) # samples, _ = self.sample_model(self.nsample, x1, x2) # samples = samples.to(self.cpu).data.numpy() # # ax = plt.subplot(5, 5, epoch_id + 1) # plot_data(samples, scatter_loc='figs/test_model_4_scatter.png', # hist_loc='figs/test_model_4_hist2D.png', ax=ax) # plt.tight_layout() # plt.savefig('figs/test_model_4_hist2D.png') self.plot_learning(tr_nll, te_nll) # pdb.set_trace() x1 = np.linspace(start=-5, stop=5, num=100) x2 = np.linspace(start=-5, stop=5, num=100) samples, _ = self.sample_model(10000, x1, x2) samples = samples.to(self.cpu).data.numpy() plot_data(samples, scatter_loc='figs/test_model_4_scatter.png', hist_loc='figs/test_model_4_hist2D.png')
return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) # if K.image_dim_ordering() == 'th': # input_shape = (3, img_width, img_height) # else: # input_shape = (img_width, img_height, 3) # N = 200 N_INGREDIENTS = 100 # images = np.random.normal(size=[N, 3, 32, 32]) # ingredientes = np.random.randint(low=0, high=2, size=[N, N_INGREDIENTS]) # print 'sum', np.sum(ingredientes, axis=1) train_path, test_path, data_train, data_test = data.split_data( 'pre-processed-recipes-ctc.json', './data/recipes-ctc/', train=0.15) # Load images and ingredients array input_images, input_ingredients = data.load( data_train, train_path, img_width=32, img_height=32, file_ingredients='./data/ingredients.txt') NB_INPUT, NB_INGREDIENTS = input_ingredients.shape print 'nb_input={}, nb_ingredients={}'.format(NB_INPUT, NB_INGREDIENTS) input_image = Input(shape=[3, 32, 32]) x = Convolution2D(20, 3, 3)(input_image)
# model,train_hist,_ = train_model(model,X_train,y_train,num_epochs=120) # DAYS_TO_PREDICT = 12 # predicted_cases,_ = predict_daily_cases(model,X_train,y_train,DAYS_TO_PREDICT,seq_len,scaler) # predicted_cases = pd.Series(data=predicted_cases, # index=pd.date_range(start=diff_daily_cases.index[-1], # periods=DAYS_TO_PREDICT + 1, # closed='right')) # # plot_data(predicted_cases,'Predictions',label='Predicted Daily Cases') # plot_real_predicted(diff_daily_cases,predicted_cases) if __name__ == '__main__': setup_params() diff_daily_cases = prepare_data('time_series_19-covid-Confirmed.csv') train_data, test_data = split_data(diff_daily_cases, 20) train_data, test_data, scaler = scale_data(diff_daily_cases, train_data, test_data) seq_len = 5 X_train, y_train = create_sequences(train_data, seq_len) X_test, y_test = create_sequences(test_data, seq_len) model = CoronaVirusPredictor(n_features=1, n_hidden=512, seq_len=seq_len, n_layers=2) model, train_hist, test_hist = train_model(model, X_train, y_train, X_test, y_test) plot_losses(train_hist, test_hist)
def main(): # TODO salvar o modelo com o json e pesos separados, isso para nao dar erro quando salvamos funcoes de custo personalizadas # arrumar o salvar do history, e plot de figuras # K.set_image_dim_ordering('th') override = False evaluate_model = True # validation_split = 0.05 # 10 % of train data for validation, the last % of the data is used for validation nb_epoch = 90 # 100 dropout = 0.5 neurons_last_layer = 1024 # 512, 1024 256, 4096 my_batch_size = 32 custom_loss = None #'weighted_binary_crossentropy' #'weighted_binary_crossentropy' or None for binary_crossentropy file_dist_ingredients_dict = 'inverse_distribution_ingredients_dict.npy' file_dist_ingredients_array = 'inverse_distribution_ingredients_array.npy' file_ingredients = './data/new-ingredients.txt' print 'Current parameters: nb_epoch={}, custom_loss={}, neurons_last_layer={}'.format( nb_epoch, custom_loss, neurons_last_layer) # Generate data for training and test # # data.split_data('pre-processed-full-recipes-dataset-v2.json', './data/full-recipes-dataset/', train=0.9) # train_path, val_path, test_path, data_train, data_val, data_test = data.split_data('pre-processed-recipes-ctc.json', './data/recipes-ctc/', # train=0.2, validation_split=0.1) train_path, val_path, test_path, data_train, data_val, data_test = data.split_data( 'pre-processed-full-recipes-dataset-v2.json', './data/full-recipes-dataset/', train=0.9, validation_split=0.1) # Load images and ingredients array. First for training and then for validation input_images_train, input_ingredients_train = data.load( data_train, train_path, img_width=C.IMG_WIDTH, img_height=C.IMG_HEIGHT, file_ingredients=file_ingredients) input_images_val, input_ingredients_val = data.load( data_val, val_path, img_width=C.IMG_WIDTH, img_height=C.IMG_HEIGHT, file_ingredients=file_ingredients) # Calculate the distribution of each ingredient in the data set for training. This distribution will be used # as a weight in the loss fuction, frequent ingredients will be assigned small weights. # https://github.com/fchollet/keras/pull/188 ingredients_weight_dict = None ingredients_weight_array = None if not os.path.exists(file_dist_ingredients_dict) or override: ingredients_weight_dict, ingredients_weight_array = dist_samples_per_ingredient( data=data_train, file_ingredients=file_ingredients, generate_figure=False, image_file='dist_ingredients_train.png') np.save(open(file_dist_ingredients_dict, 'w'), ingredients_weight_dict) np.save(open(file_dist_ingredients_array, 'w'), ingredients_weight_array) else: ingredients_weight_dict = np.load(open(file_dist_ingredients_dict)) ingredients_weight_array = np.load(open(file_dist_ingredients_array)) print 'Loaded file {}'.format(file_dist_ingredients_dict) print 'Loaded file {}'.format(file_dist_ingredients_array) print ingredients_weight_dict print ingredients_weight_array class_weight = None if custom_loss is None: class_weight = ingredients_weight_dict #ingredients_weight_dict elif custom_loss == 'weighted_binary_crossentropy': class_weight = ingredients_weight_array # Define which gpu we are going to use # with TB.tf.device('/gpu:1'): # TODO when using custom metric the keras doesnt load the model properly: avoid using custom metrics, or change # the save function to save the weights and json of the model if not os.path.exists(C.file_bottleneck_features_train) or override: classifier2.save_bottlebeck_features( C.file_bottleneck_features_train, C.file_bottleneck_features_validation, img_width=C.IMG_WIDTH, img_height=C.IMG_HEIGHT, input_data_train=input_images_train, input_data_validation=input_images_val, batch_size=my_batch_size) if not os.path.exists(C.top_model_weights_path) or override: classifier2.train_top_model(C.file_bottleneck_features_train, C.file_bottleneck_features_validation, C.top_model_weights_path, nb_epoch=nb_epoch, batch_size=my_batch_size, dropout=dropout, neurons_last_layer=neurons_last_layer, train_ingredients=input_ingredients_train, val_ingredients=input_ingredients_val, custom_loss=custom_loss, class_weight=class_weight) classifier3.fine_tuning( C.top_model_weights_path, final_vgg16_model=C.final_vgg16_model, img_width=C.IMG_WIDTH, img_height=C.IMG_HEIGHT, batch_size=my_batch_size, nb_epoch=nb_epoch, train_ingredients=input_ingredients_train, val_ingredients=input_ingredients_val, train_data=input_images_train, validation_data=input_images_val, # validation_split=validation_split, class_weight=class_weight, dropout=dropout, neurons_last_layer=neurons_last_layer, custom_loss=custom_loss) # Evaluate test data with the final model if evaluate_model: assert os.path.exists( C.final_vgg16_model), 'File for the model <{}> not found.'.format( C.final_vgg16_model) evaluate(data_test, test_path, C.final_vgg16_model, file_ingredients=file_ingredients)
def main(self, seed=10): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) data, delta, weights = self.sample_data(self.nsample) self.plot_data(data[:, 1, :].astype('int')) xtr, xte, wtr, wte = split_data(data, label=weights) D = self.dim self.model: nn.Module = MADE(D, self.hiddens, D * 3 * self.nr_mix, seed=seed) self.model.to(self.device) self.opt = torch.optim.Adam(self.model.parameters(), self.lr, weight_decay=0) self.lr_sch = StepLR(self.opt, step_size=50, gamma=0.9) B = self.bsize N, D, _ = xtr.shape # per epoch tr_nll, te_nll = [], [] plt.figure(figsize=(15, 8)) for epoch_id in range(self.nepoch): nstep = N // B # per batch tr_nll_per_b, te_nll_per_b = 0, 0 for step in range(nstep): self.model.train() xb = xtr[step * B:step * B + B] wb = wtr[step * B:step * B + B] xb_tens = torch.from_numpy(xb).to(self.device) wb_tens = torch.from_numpy(wb).to(self.device) xin = xb_tens[:, 0, :] loss = self.get_nll(xin, delta, weights=wb_tens, debug=False) self.opt.zero_grad() loss.backward() self.opt.step() self.lr_sch.step(epoch_id) tr_nll_per_b += loss.to(self.cpu).item() / nstep self.model.eval() xte_tens = torch.from_numpy(xte).to(self.device) wte_tens = torch.from_numpy(wte).to(self.device) xin_te = xte_tens[:, 0, :] te_loss = self.get_nll(xin_te, delta, weights=wte_tens, debug=False) te_nll.append(te_loss) print(f'epoch = {epoch_id}, tr_nll = {tr_nll_per_b}') print(f'epoch = {epoch_id}, te_nll = {te_loss}') tr_nll.append(tr_nll_per_b) # if (epoch_id + 1) % 20 == 0 and epoch_id <= 100: # _, samples_ind = self.sample_model(1, delta) # samples_ind = samples_ind.to(self.cpu).data.numpy().astype('int') # # ax = plt.subplot(1, 5, epoch_id // 20 + 1, adjustable='box', aspect=1) # self.plot_data(samples_ind, scatter_loc='figs/test_model_6_sub_scatter.png', # hist_loc='figs/test_model_6_sub_hist2D.png', ax=ax) # plt.tight_layout() # plt.savefig('figs/test_model_6_sub_hist2D.png') self.plot_learning(tr_nll, te_nll) # pdb.set_trace() samples, samples_ind = self.sample_model(1000, delta) samples_ind = samples_ind.to(self.cpu).data.numpy().astype('int') self.plot_data(samples_ind, scatter_loc='figs/test_model_6_scatter.png', hist_loc='figs/test_model_6_hist2D.png')