Exemplo n.º 1
0
def _train_svr(mat_files, out_file, C=1000, gamma=0.001, epsilon=0.01):
    """
    Trains SVR with the given parameters
    """
    peptides, signals = _get_peptides_signals(mat_files)
    model = SvrBlockade()
    model.train(peptides, signals, C, gamma, epsilon)
    store_model(model, out_file)
Exemplo n.º 2
0
def _train_random_forest(mat_files, out_file):
    """
    Trains Random Forest
    """
    peptides, signals = _get_peptides_signals(mat_files)
    model = RandomForestBlockade()
    model.train(peptides, signals)
    store_model(model, out_file)
Exemplo n.º 3
0
def _train_svr(mat_files, out_file, C=1000, gamma=0.001, epsilon=0.01):
    """
    Trains SVR with the given parameters
    """
    peptides, signals = _get_peptides_signals(mat_files)
    model = SvrBlockade()
    model.train(peptides, signals, C, gamma, epsilon)
    store_model(model, out_file)
Exemplo n.º 4
0
def _train_random_forest(mat_files, out_file):
    """
    Trains Random Forest
    """
    peptides, signals = _get_peptides_signals(mat_files)
    model = RandomForestBlockade()
    model.train(peptides, signals)
    store_model(model, out_file)
Exemplo n.º 5
0
def _cross_validate(train_mats, cv_mats, db_file, out_file):
    """
    Choosing the best parameters through cross-validation
    """
    CLUSTER_SIZE = 10

    eps_vec = [0.01, 0.001, 0.0001, 0.00001]
    C_vec = [1, 10, 100, 1000, 10000, 100000]
    gamma_vec = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]

    best_score = sys.maxint
    best_params = None

    print("C\tGam\tEps\tScore", file=sys.stderr)
    for C in C_vec:
        for gamma in gamma_vec:
            for eps in eps_vec:
                temp_model = _train_svr(train_mats, C, gamma, eps)

                scores = []
                for cv_mat in cv_mats:
                    pval, rank = pvalues_test(cv_mat, CLUSTER_SIZE, temp_model,
                                              db_file, False,
                                              open(os.devnull, "w"))
                    scores.append(rank)
                score = np.mean(scores)

                print("{0}\t{1}\t{2}\t{3}".format(C, gamma, eps, score),
                      file=sys.stderr)
                if score < best_score:
                    best_score = score
                    best_params = (C, gamma, eps)

    print(*best_params, file=sys.stderr)
    best_model = _train_svr(train_mats, *best_params)
    store_model(best_model, out_file)
Exemplo n.º 6
0
def _cross_validate(train_mats, cv_mats, db_file, out_file):
    """
    Choosing the best parameters through cross-validation
    """
    CLUSTER_SIZE = 10

    eps_vec = [0.01, 0.001, 0.0001, 0.00001]
    C_vec = [1, 10, 100, 1000, 10000, 100000]
    gamma_vec = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]

    best_score = sys.maxint
    best_params = None

    print("C\tGam\tEps\tScore", file=sys.stderr)
    for C in C_vec:
        for gamma in gamma_vec:
            for eps in eps_vec:
                temp_model = _train_svr(train_mats, C, gamma, eps)

                scores = []
                for cv_mat in cv_mats:
                    pval, rank = pvalues_test(cv_mat, CLUSTER_SIZE, temp_model,
                                              db_file, False,
                                              open(os.devnull, "w"))
                    scores.append(rank)
                score = np.mean(scores)

                print("{0}\t{1}\t{2}\t{3}".format(C, gamma, eps, score),
                      file=sys.stderr)
                if score < best_score:
                    best_score = score
                    best_params = (C, gamma, eps)

    print(*best_params, file=sys.stderr)
    best_model = _train_svr(train_mats, *best_params)
    store_model(best_model, out_file)