def train_model(): IMAGE_MEAN, base_model = load_model() X, y = load.load_data(IMAGE_MEAN) skf = StratifiedKFold(y, n_folds=5, shuffle=True) for fold_number, train_index, test_index in enumerate(skf): print("TRAIN:", train_index, "TEST:", test_index) X_te, y_te = X[test_index], y[test_index] train_index, val_index = train_test_split(train_index) X_tr, y_tr = X[train_index], y[train_index] X_val, y_val = X[val_index], y[val_index] index = {"train": train_index, "val": val_index, "test": val_index} pickle.dump(index, open("index_fold{number}.plk".format(number=fold_number), "w")) cnn_model = redesign_model(base_model) train_fn, val_fn, pred_fn = new_model_params(cnn_model) train_one_net(cnn_model, train_fn, val_fn, pred_fn, X_tr, y_tr, X_val, y_val, X_te, y_te, fold_number) # X_tr, y_tr, X_val, y_val, X_te, y_te = load.load_dataset_transfer(IMAGE_MEAN)
from data_preprocessing import PreprocessedData import rospkg from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm import matplotlib.pyplot as plt import numpy as np rospack = rospkg.RosPack() map_path = rospack.get_path('krp_localization') + '/maps/test' train_data_prefix = "test4" train_data_path = rospack.get_path('krp_localization') + '/data/' print('Loading train data path: {}'.format(train_data_path)) raw_rssi, poses = load_data(file_name=train_data_prefix, file_path=train_data_path) #odometry is not needed print('Number of RSSI dataset : {}'.format(len(raw_rssi))) print('Number of AMCL poses : {}'.format(len(poses))) traindata = PreprocessedData(raw_rssi, flag_negative_db=True, flag_min_distance=False, flag_fuse_measurements=True, flag_min_points_per_AP=False, flag_mode_filter=False, flag_discard_non_pose=True, poses=poses, filter_fuse_measurements=1) distance = np.sum(traindata.data['X'][1:]**2 + traindata.data['X'][:-1]**2 -
test_hdf5_file = args.data_dir + \ f'/{args.imsize}x{args.imsize}/kle512_lhs1000_val.hdf5' ntrain_total, ntest_total = 10000, 1000 elif args.data == 'channelized': train_hdf5_file = args.data_dir + \ f'/{args.imsize}x{args.imsize}/channel_ng64_n4096_train.hdf5' test_hdf5_file = args.data_dir + \ f'/{args.imsize}x{args.imsize}/channel_ng64_n512_test.hdf5' ntrain_total, ntest_total = 4096, 512 assert args.ntrain <= ntrain_total, f"Only {args.ntrain_total} data "\ f"available in {args.data} dataset, but needs {args.ntrain} training data." assert args.ntest <= ntest_total, f"Only {args.ntest_total} data "\ f"available in {args.data} dataset, but needs {args.ntest} test data." train_loader, _ = load_data(train_hdf5_file, args.ntrain, args.batch_size, only_input=False, return_stats=False) test_loader, test_stats = load_data(test_hdf5_file, args.ntest, args.test_batch_size, only_input=False, return_stats=True) y_test_variation = test_stats['y_variation'] print(f'Test output variation per channel: {y_test_variation}') optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = OneCycleScheduler(lr_max=args.lr, div_factor=args.lr_div,
from utils.load import load_data from utils.preprocess import preprocess_data from features.importance import use_most_important_features from parameters.optimize import optimize_hyperparameters import performance.learning_curves from performance.learning_curves import plot_learning_curves from performance.learning_curves import plot_ROC_curve from prediction.predict import predict_survival # set pandas print width pd.options.display.width = 200 # load data print("Loading Data...") DIR = "./data" train_df, test_df = load_data(DIR) # preprocess, massage, scale, merge and clean data print("Preprocessing Data...") train_df, test_df = preprocess_data(train_df, test_df) # use only most important features print("Extracting Most Important Features...") train_df, test_df = use_most_important_features(train_df, test_df) # optimize hyperparameters print("Optimizing Hyperparameters...") optimize_hyperparameters(train_df) # plot learning curves print("Plot Learning Curves...")
ci_and_tc = (torch.cat(convo_ids, dim=0), torch.cat(turn_counts, dim=0)) if args.cascade else (0, 0) utils = {'kb_labels': kb_labels, 'ci_and_tc': ci_and_tc} metrics, res_name = quantify(args, grouped_preds, grouped_labels, utils) exp_logger.end_eval(metrics, kind=args.filename) return (metrics, res_name) if split == 'dev' else metrics if __name__ == "__main__": args = solicit_params() args = setup_gpus(args) set_seed(args) ckpt_dir, cache_results = check_directories(args) raw_data = load_data(args, cache_results[1]) tokenizer, ontology = load_tokenizer(args) features, mappings = process_data(args, tokenizer, ontology, raw_data, *cache_results) exp_logger = ExperienceLogger(args, ckpt_dir) if args.task == 'ast': datasets = { split: ActionDataset(args, feats) for split, feats in features.items() } model = ActionStateTracking(args, mappings, ckpt_dir) elif args.task == 'cds': datasets = { split: CascadeDataset(args, feats) for split, feats in features.items()
def create_mne_raw_object(save=False, proj=True): eeg1, eeg2, eeg3, eeg4, eeg5, eeg6, eeg7, pulse, x, y, z = load_data("../data/raw/X_train.h5") data = np.zeros([12, np.concatenate(eeg1).shape[0]]) data[0, :] = np.concatenate(eeg1) del eeg1 data[1, :] = np.concatenate(eeg2) del eeg2 data[2, :] = np.concatenate(eeg3) del eeg3 gc.collect() data[3, :] = np.concatenate(eeg4) del eeg4 data[4, :] = np.concatenate(eeg5) del eeg5 data[5, :] = np.concatenate(eeg6) del eeg6 gc.collect() data[6, :] = np.concatenate(eeg7) del eeg7 data[7, :] = np.concatenate(sync_matrix(pulse)) del pulse gc.collect() data[8, :] = np.concatenate(sync_matrix(x)) del x data[9, :] = np.concatenate(sync_matrix(y)) del y data[10, :] = np.concatenate(sync_matrix(z)) del z gc.collect() # get chanel names ch_names = ["Fpz", "O1", "F7", "F8", "Fp2", "O2", "Fp1", "pulse", "x", "y", "z", "sleep_state"] ch_types = [*['eeg'] * 7, 'ecg', *['misc'] * 3, "stim"] # scale data *= 1e-7 data[7] *= 1e-2 # create and populate MNE info structure info = mne.create_info(ch_names, sfreq=250.0, ch_types=ch_types) # create raw object raw = mne.io.RawArray(data, info, verbose=False) raw.set_montage("standard_1020") del data gc.collect() Y = pd.read_csv("../data/raw/y_train.csv") Y = np.array(Y.sleep_stage) + 1 new_events = mne.make_fixed_length_events(raw, start=0.001, stop=148128.5, duration=6.) new_events.shape new_events[:, 2] = Y raw.add_events(new_events, stim_channel ='sleep_state') if proj == True: projs = mne.compute_proj_raw(raw, n_grad=0, n_mag=0, n_eeg=2, n_jobs=4) raw.add_proj(projs) if save == True: raw.save("../data/mne/X_train_raw.fif") return raw