Пример #1
0
# get model_id, model_name, filename, email in command line input
model_id = sys.argv[1]
model_name = sys.argv[2]
filename = sys.argv[3]
email = sys.argv[4]

media_directory = 'media/'
model_directory = 'saved_models/'

training_file = pd.read_csv(media_directory + filename, delimiter=',')
train_y = pd.DataFrame(training_file['label'].astype(np.int8),
                       columns=['label'])

feature_start_time = time.time()
pos_ind = position_independent(training_file, 4).astype(np.int8)
pos_spe = position_specific(training_file, 4).astype(np.int8)
feature_end_time = time.time()
print('Feature generation time: ' + str(feature_end_time - feature_start_time))

train_x = pd.concat([pos_ind, pos_spe], axis=1, sort=False)

rf = RandomForestClassifier(n_estimators=500,
                            n_jobs=-1,
                            random_state=1,
                            verbose=2)

steps = [('SFM',
          SelectFromModel(estimator=rf, max_features=2899, threshold=-np.inf)),
         ('scaler', StandardScaler()),
         ('SVM',
          SVC(C=1,
Пример #2
0
    os.remove(static_directory + 'user_' + str(user_id) + '/' + model_id +
              '_roc_curve.png')

f = open(model_directory + model_id + '.pkl', 'rb')
model = pkl.load(f)

test_file = pd.read_csv(media_directory + prediction_file, delimiter=',')

test_file_x = test_file['sgRNA']
test_file_y = pd.DataFrame(data=[])
if test_file.shape[1] == 2:
    test_file_y = test_file['label']

feature_start_time = time.time()
pos_ind = position_independent(test_file, 4).astype(np.int8)
pos_spe = position_specific(test_file, 4).astype(np.int8)

if str(model_type) == '1':
    test_x = pd.concat([pos_ind, pos_spe], axis=1, sort=False)
else:
    gap = gap_features(test_file)
    test_x = pd.concat([pos_ind, pos_spe, gap], axis=1, sort=False)

feature_end_time = time.time()
print('Feature generation time: ' + str(feature_end_time - feature_start_time))

prediction_start_time = time.time()
prediction_y = model.predict(test_x)
prediction_y_proba = model.predict_proba(test_x)
prediction_end_time = time.time()
print('Prediction time: ' + str(prediction_end_time - prediction_start_time))