Пример #1
0
# Import data as pandas dataframe
if test_data_gen == 'False' or test_data_gen == 'false':

    print("Parsing data...")
    print("\n")

    data = pdp.pre_vec_parser(filepath, window_size)

    # Vectorise data

    print("Vectorising data...")
    print("\n")

    if use_pssm == False:

        X_test, Y_test = ddp.skl_parser(data, window_size)
    else:

        pssm_loc = repo_loc + 'SignalP/input/test_pssms/'
        data_pssm = pdp.pssm_parser(data, window_size, pssm_loc)

        X_test, Y_test = pdp.skl_pssm_parser(data,
                                             window_size,
                                             pssm_type='freq')

    # Test model

    scores = oD()
    scores['labels'] = np.array([-1, 0, 1])

    model = joblib.load(inpath + model_name)
Пример #2
0
# Import data as pandas dataframe

print("Parsing data...")
print("\n")

data = pdp.pre_vec_parser(filepath, window_size)

# Vectorise data

print("Vectorising data...")
print("\n")

if use_pssm == False:

    X, Y =  ddp.skl_parser(data, window_size)
else:
    data_pssm = pdp.pssm_parser(data, window_size, pssm_loc)
    X, Y = pdp.skl_pssm_parser(data_pssm, window_size, pssm_type=pssm_type)
  
# Train model

model = RandomForestClassifier(n_estimators = n_estimators, class_weight='balanced')

print("Training this magnificient model...")
print("\n")

fit_start = time.time()

scores = oD()
scores['labels'] = np.array([-1,0,1])
Пример #3
0
# Starting script

start = time.time()

f1_scorer = make_scorer(f1_score, labels=[-1,1], average='macro')

final_list = oD()

for windows in window_size:
 
    data = ddp.pre_vec_parser(filepath, windows)

    clf = LinearSVC(class_weight = 'balanced')

    X, Y = ddp.skl_parser(data, windows)

    parameters = {"C": [1,2,4,8] }

    model_tunning = GridSearchCV(clf, param_grid=parameters, scoring=f1_scorer, n_jobs=-3)

    model_tunning.fit(X,Y)

    s = model_tunning.best_score_
    p = model_tunning.best_params_
    
    final_list[str(windows)] = [s,p['C']]       

end = time.time()

best_table = pd.DataFrame.from_dict(final_list, orient='index')
Пример #4
0
    clf = LinearSVC(class_weight='balanced')

    scores = oD()
    scores['labels'] = np.array([-1, 0, 1])
    p = np.zeros(3)
    r = np.zeros(3)
    f = np.zeros(3)
    s = np.zeros(3)
    ft = np.zeros(3)

    for train_data, test_data in ddp.cv_data_gen(data,
                                                 cv_sets,
                                                 randomise=False):

        X_train, Y_train = ddp.skl_parser(train_data, windows)
        X_test, Y_test = ddp.skl_parser(test_data, windows)

        fit_start = time.time()
        clf.fit(X_train, Y_train)
        fit_end = time.time()

        fit_time = fit_end - fit_start

        predicted = clf.predict(X_test)

        precision, recall, fscore, support = score(Y_test, predicted)

        p = p + precision
        r = r + recall
        f = f + fscore