Exemplo n.º 1
0
def main():
	dataset_dir = sys.argv[1]
	train_rate = float(sys.argv[2])
	
	# LOADING IMAGE NAMES AND LABELS
	
	labeled_set, hidden_set = dataset_loader.load_dataset(dataset_dir)
	print('Labeled set size: {}\nHidden set size: {}'.format(len(labeled_set), len(hidden_set)))
	
	# SHUFFLING IMAGES
		
	random.shuffle(labeled_set)
	
	# LOADING IMAGES ON A NUMPY ARRAY
	
	X_hidden = dataset_loader.load_images(hidden_set)
	X = dataset_loader.load_images([row[0] for row in labeled_set])
	y = np.array([row[1] for row in labeled_set], dtype = np.int64)
	y = np.array([[1 if i == classification else 0 for i in range(10)] for classification in y])	
	
	# TRANSFORMING X IN A ARRAY
	
	X_hidden = X_hidden.reshape(X_hidden.shape[0], -1)
	X_hidden /= 255
	
	X = X.reshape(X.shape[0], -1)
	X /= 255
	
	# TRAIN/TEST SPLIT
	
	X_train , X_validation, y_train, y_validation = dataset_loader.dataset_split(X, y, train_rate)
	
	print('X_train: {}\nX_validation: {}\ny_train: {}\ny_validation: {}'.format(X_train.shape, X_validation.shape, y_train.shape, y_validation.shape))		
Exemplo n.º 2
0
def main(cfg):
    # parse config
    USER_ID = cfg["COLUMNS"]["USER_ID"]
    PREDICTION = cfg["COLUMNS"]["PREDICTION"]
    SUBMISSION_FILE = path.Path(cfg["SUBMISSION"]["FilePath"])

    with open('state_dict.json', mode='r') as inp:
        state_dict = json.load(inp)

    columns_meta = state_dict['columns_meta']
    ds, *_ = load_dataset(cfg, True, columns_meta)

    models_cnt = len(state_dict['models'])
    preds = np.zeros(len(ds))
    for d in state_dict['models']:
        model = d['model']
        thr = d['model_thr']
        probs = lgb.Booster(model_file=model).predict(ds)
        preds += probs > thr

    submission = pd.DataFrame({
        USER_ID: ds.index,
        PREDICTION: (preds >= models_cnt // 2).astype(np.int32)
    })
    submission.to_csv(SUBMISSION_FILE, index=False)
Exemplo n.º 3
0
def load_dataset_from_output_dir(dataset_name, output_dir):
    DATASETS[dataset_name]["ann_file"] = os.path.join(output_dir,
                                                      "predictions.json")
    dataset = load_dataset(dataset_name)

    emb_file = os.path.join(output_dir, "embeddings.npy")
    dataset.add_embeddings_file(emb_file)
    # dataset.filter_by_score(0.5)
    # dataset.filter_by_cat_name("person")
    return dataset
Exemplo n.º 4
0
def request_dataset(path):

    print("\n***** Load dataset *****")

    dataset, classes = dataset_loader.load_dataset(path=PACK_PATH+"/images", img_h=FLAGS.height, img_w=FLAGS.width)

    num_train = dataset.train.amount
    num_test = dataset.test.amount
    print(" Num of Train images : "+str(num_train))
    print(" Num of Test images  : "+str(num_test))
    return dataset, classes, min(num_train, num_test)
def add_custom_metric(folder=None,
                      dataset="radiomics1",
                      old_way=False,
                      verbose=True):
    """
    I am hardcoding all this for now
    This should only work for datasets similar to radiomics1 (where a 3D volume is divided
    in 3 channel 2D slices)
    """

    (x_train, y_train), (x_test, y_test) = load_dataset(dataset)
    patients_train, patients_test = load_patients_dataset(dataset)

    # Navigate to folder and load result.yaml
    if folder is not None:
        os.chdir(folder)
    with open("results.yaml") as f:
        try:
            result = yaml.load(f)
        except yaml.YAMLError as YamlError:
            print(
                "There was an error parsing 'results.yaml'. Plotting aborted.")
            print(YamlError)
            if folder is not None:
                os.chdir("./..")
            return

    try:
        os.remove("new_results.yaml")
        print("File 'new_results.yaml' removed")
    except FileNotFoundError:
        pass

    folders = sorted(result.keys())
    for id in folders:
        print("Folder:", id)
        custom_params = calc_vol_acc_Tr_Te(x_train,
                                           y_train,
                                           x_test,
                                           y_test,
                                           patients_train,
                                           patients_test,
                                           old_way,
                                           folder=id)
        vol_acc_Tr, vol_acc_Te, num_volTr, num_volTe, num_Tr, num_Te = custom_params
        result[id]["result"]["volAccTr"] = float(vol_acc_Tr)
        result[id]["result"]["volAccTe"] = float(vol_acc_Te)
        result[id]["result"]["num2dImagesTr"] = num_Tr
        result[id]["result"]["num2dImagesTe"] = num_Te
        result[id]["result"]["num3dVolumesTr"] = num_volTr
        result[id]["result"]["num3dVolumesTe"] = num_volTe
        if verbose:
            print("accTr:", result[id]["result"]["accTr"])
            print("accTe:", result[id]["result"]["accTe"])
            print("volAccTr:", result[id]["result"]["volAccTr"])
            print("volAccTe:", result[id]["result"]["volAccTe"])
            print("num2dImagesTr:", result[id]["result"]["num2dImagesTr"])
            print("num2dImagesTe:", result[id]["result"]["num2dImagesTe"])
            print("num3dVolumesTr:", result[id]["result"]["num3dVolumesTr"])
            print("num3dVolumesTe:", result[id]["result"]["num3dVolumesTe"])
            print(" ")
        with open("new_results.yaml", "a") as f:
            f.write(
                yaml.dump_all([{
                    id: result[id]
                }],
                              default_flow_style=False,
                              explicit_start=False))

    if folder is not None:
        os.chdir("./..")
Exemplo n.º 6
0
# coding: utf-8
# 数据集预览,测试loader能否正常工作
# by z0gSh1u @ https://github.com/z0gSh1u

import cv2
from dataset_loader import load_dataset

train_x, train_y, test_x, test_y, classes = load_dataset()

print('Size of train set={}'.format(train_x.shape)) # [number, x, y, channel]
print('Size of test set={}'.format(test_x.shape))
print('Snapping train_y[10]={}'.format(train_y[18]))
cv2.imshow('snap.jpg', train_x[18, :, :, :])
cv2.waitKey()
Exemplo n.º 7
0
from tensorflow import keras
import dataset_loader as dl
from models import model_cnn_conv1d, model_cnn_conv2d, model_crnn_conv1d
from model_final import EventDetector
from model_exporter import export_model

conv_type = '1d'
model_to_use = 'cnn'
final = True

dataset_path = './dataset'

train_dataset_path = os.path.join(dataset_path, 'train')
test_dataset_path = os.path.join(dataset_path, 'test')

train_dataset_raw = dl.load_dataset(train_dataset_path)
test_dataset_raw = dl.load_dataset(test_dataset_path)

labels_dict = train_dataset_raw['labels_str']
labels_count = len(labels_dict)

train_dataset = tf.data.Dataset.from_tensor_slices(
    (train_dataset_raw['features'], train_dataset_raw['labels']))
test_dataset = tf.data.Dataset.from_tensor_slices(
    (test_dataset_raw['features'], test_dataset_raw['labels']))

if conv_type == '2d':
    train_dataset = train_dataset.map(lambda x, y: (tf.expand_dims(
        x, -1), tf.keras.backend.one_hot(y, labels_count)))
    test_dataset = test_dataset.map(lambda x, y: (tf.expand_dims(
        x, -1), tf.keras.backend.one_hot(y, labels_count)))
Exemplo n.º 8
0

# Hyper Params
EPOCH = 200
BATCH_SIZE = 64
LR = 0.001
OPTIMIZER = Adam(lr=LR)
LOSSFUNC = 'categorical_crossentropy'
VALIDATION_SPLIT = 0.1
# Other Params
IMG_ROWS, IMG_COLS = 64, 64
CLASSES = 6
INPUT_SHAPE = (IMG_ROWS, IMG_COLS, 3)

# Load dataset
x_train, y_train, x_test, y_test, _ = load_dataset()
# Normalization
x_train = x_train / 255
x_test = x_test / 255
# One-hot
y_train = np_utils.to_categorical(y_train, CLASSES)
y_test = np_utils.to_categorical(y_test, CLASSES)

# Build model
model = SignNet.build(INPUT_SHAPE, CLASSES)
model.compile(optimizer=OPTIMIZER, loss=LOSSFUNC, metrics=['accuracy'])
model.summary()

# === For TensorBoard usage, uncomment them if you need
# cb_tf = keras.callbacks.TensorBoard(write_images=1, histogram_freq=1)
# cbks = [cb_tf]
Exemplo n.º 9
0
import sys

from dataset_loader import load_dataset
from models import m_alexnet, m_googlenet, m_resnet18, m_resnet50, m_resnet110



if __name__ == '__main__':
    training_dataset, testing_dataset = load_dataset()
    

    print("Menu:")
    print("1. AlexNet")
    print("2. GoogLeNet")
    print("3. ResNet-18")
    print("4. ResNet-50")
    print("5. ResNet-110")
    print("6. Exit")
    
    user_input = int(input("Enter your selection: "))
    
    if user_input > 5 or user_input < 1:
        sys.exit()
    
    epochs = int(input("\nNumber of epochs: "))
    lr = float(input("Learning rate: "))
    load_w = int(input("Load weights? (1 for yes, 0 for no) "))
    save_w = int(input("Save weights? (1 for yes, 0 for no) "))
    
    if load_w == 1:
        load_weights = True
Exemplo n.º 10
0
def main(cfg):
    # parse config
    dataset, trg = load_dataset(cfg, False, None)

    print(f'Dataset shape: {dataset.shape}')
    print('Dataset:')
    print(dataset.head())

    print('Target:')
    print(trg.head())
    print()
    print('Columns:')
    print('\n'.join(dataset.columns.to_list()))

    state_dict = {'columns_meta': dataset.columns.to_list(), 'models': []}

    gains = trg[cfg['COLUMNS']['GAINS']].to_numpy()
    n_calls = trg[cfg['COLUMNS']['N_CALLS']].to_numpy()

    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    folds = [(tr, te) for tr, te in kfold.split(dataset, trg.sale_flg)]

    X_train = lgb.Dataset(
        data=dataset,
        label=trg.sale_flg.to_numpy(),
    )

    params = {
        'objective': 'binary',
        'metric': 'auc',
        'learning_rate': 0.05,
        'subsample': 0.7,
        'class_weight': 'balanced',
        'colsample_bytree': 0.7,
        'max_depth': 5,
        'num_leaves': 256,
    }

    trees = 1000
    cv = lgb.cv(params,
                X_train,
                show_stdv=False,
                verbose_eval=True,
                num_boost_round=trees,
                early_stopping_rounds=50,
                return_cvbooster=True,
                folds=kfold)

    boosters = cv.pop('cvbooster', None).boosters
    for i, (b, (tr, te)) in enumerate(zip(boosters, folds)):
        model_name = f'model_{i}.bst'
        model_thr, model_sc = find_threshold(b, dataset.iloc[te], gains[te],
                                             n_calls[te])
        state_dict['models'].append({
            'model':
            model_name,
            'model_thr':
            model_thr,
            'model_sc':
            model_sc,
            'model_auc':
            roc_auc_score(trg.sale_flg.iloc[te], b.predict(dataset.iloc[te]))
        })
        b.save_model(model_name)

    for md in state_dict['models']:
        print(
            f'Found threshold {md["model_thr"]:4.3f} with score {md["model_sc"]:10.3f} for model {md["model"]}'
        )
        print(f'AUC {md["model_auc"]} for model {md["model"]}')

    with open('state_dict.json', mode='w') as out:
        json.dump(state_dict, out)
Exemplo n.º 11
0
epochs = 100
save_dir = os.path.join(os.getcwd(), "saved_models")
model_name = "keras_cifar10_trained_model.h5"

train_dataset = "../dataset/train/train"

dataset_ids = np.genfromtxt(
    os.path.join(train_dataset, "..", "train.truth.csv"),
    delimiter=",",
    skip_header=True,
    dtype=str,
)
dataset_ids = {x[0]: x[1] for x in dataset_ids}

(x_train, y_train), (x_validation,
                     y_validation) = load_dataset(train_dataset, dataset_ids,
                                                  0.1)
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_validation = tf.keras.utils.to_categorical(y_validation, num_classes)

model = Sequential()
model.add(Conv2D(32, (3, 3), padding="same", input_shape=x_train.shape[1:]))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
Exemplo n.º 12
0
                self.save()
        print("model saved!")

    def load(self):
        import os
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())
        if not os.path.exists(self.saving_path):
            os.makedirs(self.saving_path)
        if not tf.train.checkpoint_exists(self.saving_path + 'checkpoint'):
            print('Saved temp_models not found! Randomly initialized.')
        else:
            self.saver.restore(self.sess, self.saving_path)
            print('Model loaded!')

    def save(self):
        self.saver.save(self.sess, self.saving_path)

    def predict(self, data):
        return np.argmax(
            self.sess.run(self.pred_label, feed_dict={self.input_data: data}),
            1)


train_data, train_labels, test_data, test_labels = dataset_loader.load_dataset(
)

model = DenseNN(300, 3)
model.train(train_data, train_labels, test_data, test_labels)
print(model.predict(test_data))
Exemplo n.º 13
0
 def setUp(self):
     (X, Y) = load_dataset()
     self.X = X
     self.Y = Y
Exemplo n.º 14
0
import numpy as np
import os, sys
currentdir = os.path.dirname(os.path.realpath(__file__))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)

cdr = os.path.dirname(__file__)
if len(cdr) != 0:
    os.chdir(cdr)

from dataset_loader import load_dataset
rx_train, ry_train, rx_test, ry_test, labels, skip_ratio = load_dataset()

cpi_filename = "cp_xnn/cp_info.text"
if not os.path.isfile(cpi_filename):
    print("Error no cpi_filename {}".format(cpi_filename))
    sys.exit(-1)

with open(cpi_filename, 'r') as file:
    test_num = int(file.readline().rstrip())
    feature_num = int(file.readline().rstrip())
    skip_ratio = int(file.readline().rstrip())
    model_name = file.readline()

print("CPI", test_num, feature_num, skip_ratio, model_name)

cpe_filename = "cp_xnn/MLPClassifier.class"
if not os.path.isfile(cpe_filename):
    print("Error no cpe_filename {}".format(cpe_filename))
    sys.exit(-1)
    print("|  Running: {:<72}  |".format(" ".join(sys.argv)))
    print("|  Time:    {:<72}  |".format("{} {:02d}:{:02d}:{:02d}".format(
        now.date(), now.hour, now.minute, now.second)))
    print(
        "---------------------------------------------------------------------------------------"
    )
    print("Arguments used:")
    for arg in args._get_kwargs():
        print("    {} : {}".format(arg[0], arg[1]))
    print(" ")

    # Imports that load the TensorFlow backend (slow, should only happen if we are going to use it)
    modular_NN = import_module("modular_neural_network")
    experiment = getattr(modular_NN,
                         args.experiment)  # Only import experiment used
    data = load_dataset(args.dataset)
    from results_plotter import plot_results
    from results_observer import observe_results
    from keras_experiments import experiments_runner

    # Run all experiments (according to the chosen experiment, performed over the chosen dataset)
    # and save results into folder with chosen folder name. #epochs and dr can also be set
    t = clock()  # Start measure of time taken
    folder = experiments_runner(data,
                                experiment,
                                folder=args.folder,
                                data_reduction=args.data_reduction,
                                epochs=args.number_epochs,
                                early_stopping=args.early_stopping)
    print("\nTime Taken to perform Experiment: {} s\n\n".format(
        timedelta(seconds=clock() - t)))
    observe_training = 0
    num_columns = 5
    if len(sys.argv) > 1 and sys.argv[1].lower() != "none":
        folder = sys.argv[1]
    if len(sys.argv) > 2 and sys.argv[2].lower() != "none":
        filename = sys.argv[2]
    if len(sys.argv) > 3:
        dataset_name = sys.argv[3]
    if len(sys.argv) > 4:
        mode = int(sys.argv[4])
    if len(sys.argv) > 5:
        observe_training = int(sys.argv[5])
    if len(sys.argv) > 6:
        num_columns = int(sys.argv[6])

    data = load_dataset(dataset_name)
    observe_results(data,
                    folder=folder,
                    filename=filename,
                    mode=mode,
                    data_reduction=None,
                    observe_training=observe_training,
                    num_columns=num_columns,
                    custom_observation=custom_observation,
                    old_way=old_way)
    """
    Expects:
        py results_observer.py
        py results_observer.py folder
        py results_observer.py folder filename dataset_name
        py results_observer.py folder filename dataset_name mode(0-2)
Exemplo n.º 17
0
import numpy as np
import torch.optim as optim
import matplotlib.pyplot as plt

from PIL import Image
from scipy.io import wavfile

NEED_TO_CREATE_DATASET = False
NEED_TO_CREATE_H5 = False

if NEED_TO_CREATE_DATASET:
    dataset_loader.create_dataset("WAV_mini_speech_commands/")


if NEED_TO_CREATE_H5:
    all_set = dataset_loader.load_dataset("IMG_mini_speech_commands/", 64, 64)

    dataset, labels = dataset_loader.join_sets(all_set)
    train_set, train_labels, test_set, test_labels = dataset_loader.split_dataset(dataset, labels, 0.8)

    dataset_loader.create_h5_dataset(train_set, train_labels, test_set, test_labels)

train_set, train_labels, test_set, test_labels = dataset_loader.load_h5_dataset()

BATCH_SIZE = 128

train_set, train_labels = dataset_loader.create_batch(train_set, train_labels, BATCH_SIZE)
test_set, test_labels = dataset_loader.create_batch(test_set, test_labels, BATCH_SIZE)

CNN = conv2d.CNN()