def random_search_unsupervised(data_and_labels: tuple, model: Union[LocalOutlierFactor, IsolationForest], params: Dict) -> Dict: x_train, x_test, _, y_test = data_and_labels scores = [] for experiment in params['experiments']: model.set_params(**experiment['hyperparameters']) print(f'Model current hyperparameters are: {experiment["hyperparameters"]}.') if isinstance(model, LocalOutlierFactor): y_pred = model.fit_predict(x_test) # return labels else: model.fit(x_train) y_pred = model.predict(x_test) # return labels y_pred = convert_predictions(y_pred) metrics_report(y_test, y_pred) model_path = create_model_path(DIR_TO_EXPERIMENTS, str(uuid.uuid4())) torch.save(model, model_path) res = create_experiment_report(get_metrics(y_test, y_pred), experiment['hyperparameters'], file_path=model_path) scores.append(res) create_checkpoint({'experiments': scores}, EXPERIMENT_PATH) return { 'experiments': scores }
def random_search(data_and_labels: tuple, model: TransformerAutoEncoder, params: Dict) -> Dict: x_train, x_test, _, y_test = data_and_labels scores = [] for conf in zip(*params.values()): kwargs = {k: val for k, val in zip(params.keys(), conf)} model.set_params(**kwargs) print(f'Model current hyperparameters are: {kwargs}.') model.fit(x_train) y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) scores.append( create_experiment_report(get_metrics(y_test, y_pred), kwargs)) # visualize_distribution_with_labels(y_pred, y_test, to_file=False) from sklearn.metrics import confusion_matrix print(confusion_matrix(y_test, y_pred)) create_checkpoint({'experiments': scores}, EXPERIMENT_PATH) return {'experiments': scores}
def grid_search(data_and_labels: tuple, model: Union[LocalOutlierFactor, IsolationForest], params: Dict) -> Dict: x_train, x_test, _, y_test = data_and_labels scores = [] for conf in itertools.product(*params.values()): kwargs = {k: val for k, val in zip(params.keys(), conf)} model.set_params(**kwargs) print(f'Model (hyper)parameters are: {model.get_params()}.') if isinstance(model, LocalOutlierFactor): y_pred = model.fit_predict(x_test) else: model.fit(x_train) y_pred = model.predict(x_test) y_pred = convert_predictions(y_pred) metrics_report(y_test, y_pred) scores.append( create_experiment_report(get_metrics(y_test, y_pred), model.get_params())) return {'experiments': scores}
def random_search(data_and_labels: tuple, model: Union[AutoEncoder, VanillaTCN, AETCN, AECNN1D, CNN1D, CNN2D, TCNCNN1D, SACNN1D, SACNN2D], params: Dict) -> Dict: x_train, x_test, _, y_test = data_and_labels scores = [] for experiment in params['experiments']: model.set_params(**experiment['hyperparameters']) print(f'Model current hyperparameters are: {experiment["hyperparameters"]}.') model.fit(x_train) y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) model_path = create_model_path(DIR_TO_EXPERIMENTS, str(uuid.uuid4())) torch.save(model, model_path) res = create_experiment_report(get_metrics(y_test, y_pred), experiment['hyperparameters'], theta, model_path) scores.append(res) create_checkpoint({'experiments': scores}, EXPERIMENT_PATH) return { 'experiments': scores }
def find_optimal_threshold(y_true: np.array, y_pred: np.array) -> tuple: ret = {} for th in set(y_pred[y_true == 1]): tmp = classify(y_pred, th) f1 = get_metrics(y_true, tmp)['f1_score'] ret[th] = f1 return max(ret.items(), key=lambda x: x[1])
def evaluate_unsupervised(x_test: np.ndarray, y_test: np.array, experiments: Dict) -> Dict: model_config = find_best_model(experiments) model = torch.load(model_config['model_path']) if isinstance(model, LocalOutlierFactor): y_pred = model.fit_predict(x_test) # return labels else: y_pred = model.predict(x_test) # return labels y_pred = convert_predictions(y_pred) auc_score = roc_auc_score(y_test, y_pred) metrics_report(y_test, y_pred) return create_report( model_config, { **get_metrics(y_test, y_pred), 'auc_score': float(auc_score) })
def evaluate(x_test: np.ndarray, y_test: np.array, experiments: Dict) -> Dict: model_config = find_best_model(experiments) model = torch.load(model_config['model_path']) theta = model_config['threshold'] y_pred = model.predict(x_test) # return reconstruction errors np.savez('preds', y_pred=y_pred, y_test=y_test) auc_score = roc_auc_score(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) # print('# trainable params:', sum(p.numel() for p in model._model.parameters() if p.requires_grad), ',# params:', sum(p.numel() for p in model._model.parameters())) return create_report( model_config, { **get_metrics(y_test, y_pred), 'auc_score': float(auc_score) })
def random_search(data_and_labels: tuple, model: Union[AutoEncoder, IsolationForest], params: Dict) -> Dict: x_train, x_test, _, y_test = data_and_labels scores = [] for conf in zip(*params.values()): kwargs = {k: val for k, val in zip(params.keys(), conf)} model.set_params(**kwargs) print(f'Model current hyperparameters are: {kwargs}.') model.fit(x_train) y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) scores.append( create_experiment_report(get_metrics(y_test, y_pred), kwargs)) create_checkpoint({'experiments': scores}, EXPERIMENT_PATH) return {'experiments': scores}
def train_window(x_train: List, x_test: List, y_train: np.array, y_test: np.array) -> Dict: sc = CustomMinMaxScaler() x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) scores = [] for w in range(1, 50, 2): print('Window:', w) model = VanillaTCN(epochs=1, window=w) model.fit(x_train[y_train == 0]) y_pred = model.predict(x_test) # return reconstruction errors theta, f1 = find_optimal_threshold(y_test, y_pred) y_pred = classify(y_pred, theta) metrics_report(y_test, y_pred) scores.append( create_experiment_report(get_metrics(y_test, y_pred), {'window': w})) create_checkpoint( {'experiments': scores}, '../../models/TCN-cropped-window-embeddings-HDFS1.json') return {'experiments': scores}
# noise = np.random.normal(0, 1, 2000).reshape((20, 100)) # x[i, ...] = org + noise # # org = np.random.randn(20, 100) # for i in range(49000, 50000): # noise = np.random.exponential(1, 2000).reshape((20, 100)) # x[i, ...] = org + noise # y[i] = 1 # # x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, random_state=1) sc = CustomMinMaxScaler() x_train = sc.fit_transform(x_train).astype(np.float32) x_test = sc.transform(x_test).astype(np.float32) model = VanillaTCN(epochs=1, learning_rate=0.0001) model._initialize_model(20, [20], 7, 0.0) model.fit(x_train[y_train == 0]) y_pred = model.predict(x_test) f1_max = 0 for th in sorted(y_pred[y_test == 1]): tmp = np.zeros(shape=y_pred.shape) tmp[y_pred > th] = 1 f1_max = max(f1_max, get_metrics(y_test, tmp)['f1_score']) print(f1_max) visualize_distribution_with_labels(y_pred, y_test) print(min(y_pred[y_test == 1]), max(y_pred[y_test == 1]))