def main(): dataset_dir = sys.argv[1] train_rate = float(sys.argv[2]) # LOADING IMAGE NAMES AND LABELS labeled_set, hidden_set = dataset_loader.load_dataset(dataset_dir) print('Labeled set size: {}\nHidden set size: {}'.format(len(labeled_set), len(hidden_set))) # SHUFFLING IMAGES random.shuffle(labeled_set) # LOADING IMAGES ON A NUMPY ARRAY X_hidden = dataset_loader.load_images(hidden_set) X = dataset_loader.load_images([row[0] for row in labeled_set]) y = np.array([row[1] for row in labeled_set], dtype = np.int64) y = np.array([[1 if i == classification else 0 for i in range(10)] for classification in y]) # TRANSFORMING X IN A ARRAY X_hidden = X_hidden.reshape(X_hidden.shape[0], -1) X_hidden /= 255 X = X.reshape(X.shape[0], -1) X /= 255 # TRAIN/TEST SPLIT X_train , X_validation, y_train, y_validation = dataset_loader.dataset_split(X, y, train_rate) print('X_train: {}\nX_validation: {}\ny_train: {}\ny_validation: {}'.format(X_train.shape, X_validation.shape, y_train.shape, y_validation.shape))
def main(cfg): # parse config USER_ID = cfg["COLUMNS"]["USER_ID"] PREDICTION = cfg["COLUMNS"]["PREDICTION"] SUBMISSION_FILE = path.Path(cfg["SUBMISSION"]["FilePath"]) with open('state_dict.json', mode='r') as inp: state_dict = json.load(inp) columns_meta = state_dict['columns_meta'] ds, *_ = load_dataset(cfg, True, columns_meta) models_cnt = len(state_dict['models']) preds = np.zeros(len(ds)) for d in state_dict['models']: model = d['model'] thr = d['model_thr'] probs = lgb.Booster(model_file=model).predict(ds) preds += probs > thr submission = pd.DataFrame({ USER_ID: ds.index, PREDICTION: (preds >= models_cnt // 2).astype(np.int32) }) submission.to_csv(SUBMISSION_FILE, index=False)
def load_dataset_from_output_dir(dataset_name, output_dir): DATASETS[dataset_name]["ann_file"] = os.path.join(output_dir, "predictions.json") dataset = load_dataset(dataset_name) emb_file = os.path.join(output_dir, "embeddings.npy") dataset.add_embeddings_file(emb_file) # dataset.filter_by_score(0.5) # dataset.filter_by_cat_name("person") return dataset
def request_dataset(path): print("\n***** Load dataset *****") dataset, classes = dataset_loader.load_dataset(path=PACK_PATH+"/images", img_h=FLAGS.height, img_w=FLAGS.width) num_train = dataset.train.amount num_test = dataset.test.amount print(" Num of Train images : "+str(num_train)) print(" Num of Test images : "+str(num_test)) return dataset, classes, min(num_train, num_test)
def add_custom_metric(folder=None, dataset="radiomics1", old_way=False, verbose=True): """ I am hardcoding all this for now This should only work for datasets similar to radiomics1 (where a 3D volume is divided in 3 channel 2D slices) """ (x_train, y_train), (x_test, y_test) = load_dataset(dataset) patients_train, patients_test = load_patients_dataset(dataset) # Navigate to folder and load result.yaml if folder is not None: os.chdir(folder) with open("results.yaml") as f: try: result = yaml.load(f) except yaml.YAMLError as YamlError: print( "There was an error parsing 'results.yaml'. Plotting aborted.") print(YamlError) if folder is not None: os.chdir("./..") return try: os.remove("new_results.yaml") print("File 'new_results.yaml' removed") except FileNotFoundError: pass folders = sorted(result.keys()) for id in folders: print("Folder:", id) custom_params = calc_vol_acc_Tr_Te(x_train, y_train, x_test, y_test, patients_train, patients_test, old_way, folder=id) vol_acc_Tr, vol_acc_Te, num_volTr, num_volTe, num_Tr, num_Te = custom_params result[id]["result"]["volAccTr"] = float(vol_acc_Tr) result[id]["result"]["volAccTe"] = float(vol_acc_Te) result[id]["result"]["num2dImagesTr"] = num_Tr result[id]["result"]["num2dImagesTe"] = num_Te result[id]["result"]["num3dVolumesTr"] = num_volTr result[id]["result"]["num3dVolumesTe"] = num_volTe if verbose: print("accTr:", result[id]["result"]["accTr"]) print("accTe:", result[id]["result"]["accTe"]) print("volAccTr:", result[id]["result"]["volAccTr"]) print("volAccTe:", result[id]["result"]["volAccTe"]) print("num2dImagesTr:", result[id]["result"]["num2dImagesTr"]) print("num2dImagesTe:", result[id]["result"]["num2dImagesTe"]) print("num3dVolumesTr:", result[id]["result"]["num3dVolumesTr"]) print("num3dVolumesTe:", result[id]["result"]["num3dVolumesTe"]) print(" ") with open("new_results.yaml", "a") as f: f.write( yaml.dump_all([{ id: result[id] }], default_flow_style=False, explicit_start=False)) if folder is not None: os.chdir("./..")
# coding: utf-8 # 数据集预览,测试loader能否正常工作 # by z0gSh1u @ https://github.com/z0gSh1u import cv2 from dataset_loader import load_dataset train_x, train_y, test_x, test_y, classes = load_dataset() print('Size of train set={}'.format(train_x.shape)) # [number, x, y, channel] print('Size of test set={}'.format(test_x.shape)) print('Snapping train_y[10]={}'.format(train_y[18])) cv2.imshow('snap.jpg', train_x[18, :, :, :]) cv2.waitKey()
from tensorflow import keras import dataset_loader as dl from models import model_cnn_conv1d, model_cnn_conv2d, model_crnn_conv1d from model_final import EventDetector from model_exporter import export_model conv_type = '1d' model_to_use = 'cnn' final = True dataset_path = './dataset' train_dataset_path = os.path.join(dataset_path, 'train') test_dataset_path = os.path.join(dataset_path, 'test') train_dataset_raw = dl.load_dataset(train_dataset_path) test_dataset_raw = dl.load_dataset(test_dataset_path) labels_dict = train_dataset_raw['labels_str'] labels_count = len(labels_dict) train_dataset = tf.data.Dataset.from_tensor_slices( (train_dataset_raw['features'], train_dataset_raw['labels'])) test_dataset = tf.data.Dataset.from_tensor_slices( (test_dataset_raw['features'], test_dataset_raw['labels'])) if conv_type == '2d': train_dataset = train_dataset.map(lambda x, y: (tf.expand_dims( x, -1), tf.keras.backend.one_hot(y, labels_count))) test_dataset = test_dataset.map(lambda x, y: (tf.expand_dims( x, -1), tf.keras.backend.one_hot(y, labels_count)))
# Hyper Params EPOCH = 200 BATCH_SIZE = 64 LR = 0.001 OPTIMIZER = Adam(lr=LR) LOSSFUNC = 'categorical_crossentropy' VALIDATION_SPLIT = 0.1 # Other Params IMG_ROWS, IMG_COLS = 64, 64 CLASSES = 6 INPUT_SHAPE = (IMG_ROWS, IMG_COLS, 3) # Load dataset x_train, y_train, x_test, y_test, _ = load_dataset() # Normalization x_train = x_train / 255 x_test = x_test / 255 # One-hot y_train = np_utils.to_categorical(y_train, CLASSES) y_test = np_utils.to_categorical(y_test, CLASSES) # Build model model = SignNet.build(INPUT_SHAPE, CLASSES) model.compile(optimizer=OPTIMIZER, loss=LOSSFUNC, metrics=['accuracy']) model.summary() # === For TensorBoard usage, uncomment them if you need # cb_tf = keras.callbacks.TensorBoard(write_images=1, histogram_freq=1) # cbks = [cb_tf]
import sys from dataset_loader import load_dataset from models import m_alexnet, m_googlenet, m_resnet18, m_resnet50, m_resnet110 if __name__ == '__main__': training_dataset, testing_dataset = load_dataset() print("Menu:") print("1. AlexNet") print("2. GoogLeNet") print("3. ResNet-18") print("4. ResNet-50") print("5. ResNet-110") print("6. Exit") user_input = int(input("Enter your selection: ")) if user_input > 5 or user_input < 1: sys.exit() epochs = int(input("\nNumber of epochs: ")) lr = float(input("Learning rate: ")) load_w = int(input("Load weights? (1 for yes, 0 for no) ")) save_w = int(input("Save weights? (1 for yes, 0 for no) ")) if load_w == 1: load_weights = True
def main(cfg): # parse config dataset, trg = load_dataset(cfg, False, None) print(f'Dataset shape: {dataset.shape}') print('Dataset:') print(dataset.head()) print('Target:') print(trg.head()) print() print('Columns:') print('\n'.join(dataset.columns.to_list())) state_dict = {'columns_meta': dataset.columns.to_list(), 'models': []} gains = trg[cfg['COLUMNS']['GAINS']].to_numpy() n_calls = trg[cfg['COLUMNS']['N_CALLS']].to_numpy() kfold = KFold(n_splits=5, shuffle=True, random_state=42) folds = [(tr, te) for tr, te in kfold.split(dataset, trg.sale_flg)] X_train = lgb.Dataset( data=dataset, label=trg.sale_flg.to_numpy(), ) params = { 'objective': 'binary', 'metric': 'auc', 'learning_rate': 0.05, 'subsample': 0.7, 'class_weight': 'balanced', 'colsample_bytree': 0.7, 'max_depth': 5, 'num_leaves': 256, } trees = 1000 cv = lgb.cv(params, X_train, show_stdv=False, verbose_eval=True, num_boost_round=trees, early_stopping_rounds=50, return_cvbooster=True, folds=kfold) boosters = cv.pop('cvbooster', None).boosters for i, (b, (tr, te)) in enumerate(zip(boosters, folds)): model_name = f'model_{i}.bst' model_thr, model_sc = find_threshold(b, dataset.iloc[te], gains[te], n_calls[te]) state_dict['models'].append({ 'model': model_name, 'model_thr': model_thr, 'model_sc': model_sc, 'model_auc': roc_auc_score(trg.sale_flg.iloc[te], b.predict(dataset.iloc[te])) }) b.save_model(model_name) for md in state_dict['models']: print( f'Found threshold {md["model_thr"]:4.3f} with score {md["model_sc"]:10.3f} for model {md["model"]}' ) print(f'AUC {md["model_auc"]} for model {md["model"]}') with open('state_dict.json', mode='w') as out: json.dump(state_dict, out)
epochs = 100 save_dir = os.path.join(os.getcwd(), "saved_models") model_name = "keras_cifar10_trained_model.h5" train_dataset = "../dataset/train/train" dataset_ids = np.genfromtxt( os.path.join(train_dataset, "..", "train.truth.csv"), delimiter=",", skip_header=True, dtype=str, ) dataset_ids = {x[0]: x[1] for x in dataset_ids} (x_train, y_train), (x_validation, y_validation) = load_dataset(train_dataset, dataset_ids, 0.1) y_train = tf.keras.utils.to_categorical(y_train, num_classes) y_validation = tf.keras.utils.to_categorical(y_validation, num_classes) model = Sequential() model.add(Conv2D(32, (3, 3), padding="same", input_shape=x_train.shape[1:])) model.add(Activation("relu")) model.add(Conv2D(32, (3, 3))) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), padding="same")) model.add(Activation("relu")) model.add(Conv2D(64, (3, 3))) model.add(Activation("relu"))
self.save() print("model saved!") def load(self): import os self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) if not os.path.exists(self.saving_path): os.makedirs(self.saving_path) if not tf.train.checkpoint_exists(self.saving_path + 'checkpoint'): print('Saved temp_models not found! Randomly initialized.') else: self.saver.restore(self.sess, self.saving_path) print('Model loaded!') def save(self): self.saver.save(self.sess, self.saving_path) def predict(self, data): return np.argmax( self.sess.run(self.pred_label, feed_dict={self.input_data: data}), 1) train_data, train_labels, test_data, test_labels = dataset_loader.load_dataset( ) model = DenseNN(300, 3) model.train(train_data, train_labels, test_data, test_labels) print(model.predict(test_data))
def setUp(self): (X, Y) = load_dataset() self.X = X self.Y = Y
import numpy as np import os, sys currentdir = os.path.dirname(os.path.realpath(__file__)) parentdir = os.path.dirname(currentdir) sys.path.append(parentdir) cdr = os.path.dirname(__file__) if len(cdr) != 0: os.chdir(cdr) from dataset_loader import load_dataset rx_train, ry_train, rx_test, ry_test, labels, skip_ratio = load_dataset() cpi_filename = "cp_xnn/cp_info.text" if not os.path.isfile(cpi_filename): print("Error no cpi_filename {}".format(cpi_filename)) sys.exit(-1) with open(cpi_filename, 'r') as file: test_num = int(file.readline().rstrip()) feature_num = int(file.readline().rstrip()) skip_ratio = int(file.readline().rstrip()) model_name = file.readline() print("CPI", test_num, feature_num, skip_ratio, model_name) cpe_filename = "cp_xnn/MLPClassifier.class" if not os.path.isfile(cpe_filename): print("Error no cpe_filename {}".format(cpe_filename)) sys.exit(-1)
print("| Running: {:<72} |".format(" ".join(sys.argv))) print("| Time: {:<72} |".format("{} {:02d}:{:02d}:{:02d}".format( now.date(), now.hour, now.minute, now.second))) print( "---------------------------------------------------------------------------------------" ) print("Arguments used:") for arg in args._get_kwargs(): print(" {} : {}".format(arg[0], arg[1])) print(" ") # Imports that load the TensorFlow backend (slow, should only happen if we are going to use it) modular_NN = import_module("modular_neural_network") experiment = getattr(modular_NN, args.experiment) # Only import experiment used data = load_dataset(args.dataset) from results_plotter import plot_results from results_observer import observe_results from keras_experiments import experiments_runner # Run all experiments (according to the chosen experiment, performed over the chosen dataset) # and save results into folder with chosen folder name. #epochs and dr can also be set t = clock() # Start measure of time taken folder = experiments_runner(data, experiment, folder=args.folder, data_reduction=args.data_reduction, epochs=args.number_epochs, early_stopping=args.early_stopping) print("\nTime Taken to perform Experiment: {} s\n\n".format( timedelta(seconds=clock() - t)))
observe_training = 0 num_columns = 5 if len(sys.argv) > 1 and sys.argv[1].lower() != "none": folder = sys.argv[1] if len(sys.argv) > 2 and sys.argv[2].lower() != "none": filename = sys.argv[2] if len(sys.argv) > 3: dataset_name = sys.argv[3] if len(sys.argv) > 4: mode = int(sys.argv[4]) if len(sys.argv) > 5: observe_training = int(sys.argv[5]) if len(sys.argv) > 6: num_columns = int(sys.argv[6]) data = load_dataset(dataset_name) observe_results(data, folder=folder, filename=filename, mode=mode, data_reduction=None, observe_training=observe_training, num_columns=num_columns, custom_observation=custom_observation, old_way=old_way) """ Expects: py results_observer.py py results_observer.py folder py results_observer.py folder filename dataset_name py results_observer.py folder filename dataset_name mode(0-2)
import numpy as np import torch.optim as optim import matplotlib.pyplot as plt from PIL import Image from scipy.io import wavfile NEED_TO_CREATE_DATASET = False NEED_TO_CREATE_H5 = False if NEED_TO_CREATE_DATASET: dataset_loader.create_dataset("WAV_mini_speech_commands/") if NEED_TO_CREATE_H5: all_set = dataset_loader.load_dataset("IMG_mini_speech_commands/", 64, 64) dataset, labels = dataset_loader.join_sets(all_set) train_set, train_labels, test_set, test_labels = dataset_loader.split_dataset(dataset, labels, 0.8) dataset_loader.create_h5_dataset(train_set, train_labels, test_set, test_labels) train_set, train_labels, test_set, test_labels = dataset_loader.load_h5_dataset() BATCH_SIZE = 128 train_set, train_labels = dataset_loader.create_batch(train_set, train_labels, BATCH_SIZE) test_set, test_labels = dataset_loader.create_batch(test_set, test_labels, BATCH_SIZE) CNN = conv2d.CNN()