Пример #1
0
def _data(data_pth, split_val=True, verbose=1):
    data = np.load(data_pth, allow_pickle=True)
    x, y = data['x'], data['y']
    x = x[:, :, np.newaxis]
    x_train, y_train, x_test, y_test = split_data(x, y)

    class_weights_dict = calc_class_weights(y_train)

    if split_val:
        x_train, y_train, x_val, y_val = split_data(x_train, y_train)
        y_train = to_one_hot(y_train, dimension=10)
        y_test = to_one_hot(y_test, dimension=10)
        y_val = to_one_hot(y_val, dimension=10)
        if verbose:
            print('\nx_train shape: %s'
                  '\ny_train shape: %s'
                  '\nx_test shape: %s'
                  '\ny_test shape: %s'
                  '\nx_val shape: %s'
                  '\ny_val shape: %s' %
                  (x_train.shape, y_train.shape, x_test.shape, y_test.shape,
                   x_val.shape, y_val.shape))
        return x_train, y_train, x_test, y_test, x_val, y_val, class_weights_dict
    else:
        y_train = to_one_hot(y_train, dimension=10)
        y_test = to_one_hot(y_test, dimension=10)
        if verbose:
            print('\nx_train shape: %s'
                  '\ny_train shape: %s'
                  '\nx_test shape: %s'
                  '\ny_test shape: %s' %
                  (x_train.shape, y_train.shape, x_test.shape, y_test.shape))
        return x_train, y_train, x_test, y_test, class_weights_dict
Пример #2
0
def train_model(model_svm_path,
                tr_features,
                tr_labels,
                verbose=False,
                **model_kwargs):
    c_value = model_kwargs.get("c_value", 1)
    gamma_value = model_kwargs.get("gamma_value", 0)
    multi_mode = model_kwargs.get("multi_model", "ovo")
    use_probability = False
    class_weights = calc_class_weights(tr_labels)
    # class_weights = "balanced",
    gamma_value = gamma_value if gamma_value != 0.0 else "auto"

    # TODO load best params from cross validation!

    # NOTE 0.0 means 1/n_features default value
    svm_model = svm.SVC(C=c_value,
                        kernel='rbf',
                        degree=3,
                        gamma=gamma_value,
                        coef0=0.0,
                        shrinking=True,
                        probability=use_probability,
                        tol=0.001,
                        cache_size=200,
                        class_weight=class_weights,
                        verbose=verbose,
                        max_iter=-1,
                        decision_function_shape=multi_mode,
                        random_state=None)

    with PrintTime("train the model", verbose=verbose):
        svm_model.fit(tr_features, tr_labels)

    if model_svm_path:
        # Export model: save/write trained SVM model
        if not os.path.exists(os.path.dirname(model_svm_path)):
            os.makedirs(os.path.dirname(model_svm_path))
        joblib.dump(svm_model, model_svm_path)

    # TODO Export StandardScaler()

    return svm_model
Пример #3
0
            A.shape).toarray(), csr_matrix(A.shape).toarray()
        orc = OllivierRicci(G, alpha=0.5, verbose="INFO")
        orc.compute_ricci_curvature()
        frc = FormanRicci(G)
        frc.compute_ricci_curvature()
        for tup in orc.G.edges:
            i, j = tup[0], tup[1]
            ollivier_curv_vals[i][j] = map_curvature_val(
                orc.G[i][j]['ricciCurvature'], alpha=4)
            forman_curv_vals[i][j] = map_curvature_val(
                frc.G[i][j]['formanCurvature'], alpha=4)
    K = A.shape[1] if X is None else X.shape[0]
    nC = Y.shape[1]
    W = None
    if args.weighted:
        W = utils.calc_class_weights(Y[..., idx_train, :])

    # ************************************************************
    # calculate node features
    # ************************************************************
    vals = []
    X = X.astype(np.float32)
    EYE = scipy.sparse.eye(K, dtype=np.float32, format='coo')
    A = A.astype(np.float32)

    # normalized x
    rowsum = sparse.as_coo(X).sum(axis=-1, keepdims=True)
    #rowsum.data[rowsum.data==0] = 1e-10
    rowsum.data = 1.0 / rowsum.data
    vals.append((sparse.as_coo(X) * rowsum).to_scipy_sparse())
    nodes = scipy.sparse.hstack(vals)
Пример #4
0
        vocab[w] for w in re.findall('\w+', x_.lower()) if w in vocab
    ] for x_ in dataset.x_test]
    dev_x_int = [[
        vocab[w] for w in re.findall('\w+', x_.lower()) if w in vocab
    ] for x_ in dataset.x_dev]

    max_sent_length = max([len(x_) for x_ in train_x_int])
    train_x = pad_sequences(train_x_int, max_sent_length)
    test_x = pad_sequences(test_x_int, max_sent_length)
    dev_x = pad_sequences(dev_x_int, max_sent_length)

    if do_train:
        model = build_model(max_sent_length, vocab, embeddings, num_classes)
        print(model.summary())

        class_weights = calc_class_weights(train_y, mlb.classes_)
        class_weights = {i: w for i, w in enumerate(class_weights)}

        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=3,
                                       restore_best_weights=True)

        try:
            model.fit(train_x,
                      train_y,
                      batch_size=32,
                      epochs=50,
                      verbose=1,
                      validation_data=(dev_x, dev_y),
                      class_weight=class_weights,
                      callbacks=[early_stopping])