def _data(data_pth, split_val=True, verbose=1): data = np.load(data_pth, allow_pickle=True) x, y = data['x'], data['y'] x = x[:, :, np.newaxis] x_train, y_train, x_test, y_test = split_data(x, y) class_weights_dict = calc_class_weights(y_train) if split_val: x_train, y_train, x_val, y_val = split_data(x_train, y_train) y_train = to_one_hot(y_train, dimension=10) y_test = to_one_hot(y_test, dimension=10) y_val = to_one_hot(y_val, dimension=10) if verbose: print('\nx_train shape: %s' '\ny_train shape: %s' '\nx_test shape: %s' '\ny_test shape: %s' '\nx_val shape: %s' '\ny_val shape: %s' % (x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape)) return x_train, y_train, x_test, y_test, x_val, y_val, class_weights_dict else: y_train = to_one_hot(y_train, dimension=10) y_test = to_one_hot(y_test, dimension=10) if verbose: print('\nx_train shape: %s' '\ny_train shape: %s' '\nx_test shape: %s' '\ny_test shape: %s' % (x_train.shape, y_train.shape, x_test.shape, y_test.shape)) return x_train, y_train, x_test, y_test, class_weights_dict
def train_model(model_svm_path, tr_features, tr_labels, verbose=False, **model_kwargs): c_value = model_kwargs.get("c_value", 1) gamma_value = model_kwargs.get("gamma_value", 0) multi_mode = model_kwargs.get("multi_model", "ovo") use_probability = False class_weights = calc_class_weights(tr_labels) # class_weights = "balanced", gamma_value = gamma_value if gamma_value != 0.0 else "auto" # TODO load best params from cross validation! # NOTE 0.0 means 1/n_features default value svm_model = svm.SVC(C=c_value, kernel='rbf', degree=3, gamma=gamma_value, coef0=0.0, shrinking=True, probability=use_probability, tol=0.001, cache_size=200, class_weight=class_weights, verbose=verbose, max_iter=-1, decision_function_shape=multi_mode, random_state=None) with PrintTime("train the model", verbose=verbose): svm_model.fit(tr_features, tr_labels) if model_svm_path: # Export model: save/write trained SVM model if not os.path.exists(os.path.dirname(model_svm_path)): os.makedirs(os.path.dirname(model_svm_path)) joblib.dump(svm_model, model_svm_path) # TODO Export StandardScaler() return svm_model
A.shape).toarray(), csr_matrix(A.shape).toarray() orc = OllivierRicci(G, alpha=0.5, verbose="INFO") orc.compute_ricci_curvature() frc = FormanRicci(G) frc.compute_ricci_curvature() for tup in orc.G.edges: i, j = tup[0], tup[1] ollivier_curv_vals[i][j] = map_curvature_val( orc.G[i][j]['ricciCurvature'], alpha=4) forman_curv_vals[i][j] = map_curvature_val( frc.G[i][j]['formanCurvature'], alpha=4) K = A.shape[1] if X is None else X.shape[0] nC = Y.shape[1] W = None if args.weighted: W = utils.calc_class_weights(Y[..., idx_train, :]) # ************************************************************ # calculate node features # ************************************************************ vals = [] X = X.astype(np.float32) EYE = scipy.sparse.eye(K, dtype=np.float32, format='coo') A = A.astype(np.float32) # normalized x rowsum = sparse.as_coo(X).sum(axis=-1, keepdims=True) #rowsum.data[rowsum.data==0] = 1e-10 rowsum.data = 1.0 / rowsum.data vals.append((sparse.as_coo(X) * rowsum).to_scipy_sparse()) nodes = scipy.sparse.hstack(vals)
vocab[w] for w in re.findall('\w+', x_.lower()) if w in vocab ] for x_ in dataset.x_test] dev_x_int = [[ vocab[w] for w in re.findall('\w+', x_.lower()) if w in vocab ] for x_ in dataset.x_dev] max_sent_length = max([len(x_) for x_ in train_x_int]) train_x = pad_sequences(train_x_int, max_sent_length) test_x = pad_sequences(test_x_int, max_sent_length) dev_x = pad_sequences(dev_x_int, max_sent_length) if do_train: model = build_model(max_sent_length, vocab, embeddings, num_classes) print(model.summary()) class_weights = calc_class_weights(train_y, mlb.classes_) class_weights = {i: w for i, w in enumerate(class_weights)} early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True) try: model.fit(train_x, train_y, batch_size=32, epochs=50, verbose=1, validation_data=(dev_x, dev_y), class_weight=class_weights, callbacks=[early_stopping])