def cross_validation(): estimator = misvm.miSVM(kernel=kernel, gamma=gamma, C=C, verbose=True, max_iters=200) mil.crossValidation(estimator, 5, path)
def main(): # read hard and easy estimator = misvm.miSVM(kernel=kernel, gamma=gamma, C=C, verbose=True, max_iters=100) #estimator = misvm.miPSVM(feature=kernel, gamma=gamma, C=C, verbose=True, max_iters=100, n_components=36*3, sparse=['P']) mil.train(estimator=estimator, resultSuperDirPath=path)
def leave_one_out(n_jobs=8): estimator = misvm.miSVM(kernel=kernel, gamma=gamma, C=C, verbose=True, max_iters=200) mil.leaveOneOut(estimator=estimator, resultSuperDirPath=path, n_jobs=n_jobs, trainAcc=True)
def leave_one_person_out(n_jobs=8, resultVis=False): if resultVis: resultvis = 'inst_preds' else: resultvis = None estimator = misvm.miSVM(kernel=kernel, gamma=gamma, C=C, verbose=True, max_iters=200) mil.leaveOnePersonOut(estimator=estimator, resultSuperDirPath=path, n_jobs=n_jobs, trainAcc=True, resultVis=resultvis)
def gridsearch(params_grid, cv=2): estimator = misvm.miSVM(max_iters=250) mil.gridSearch(estimator=estimator, params_grid=params_grid, cv=cv)
def main(argv=sys.argv): pos_train_file, neg_train_file, method, model_file, scale_file, score_file, res_file = process_options( argv) ######train feature extraction feature_matrix = [] for line in pos_train_file: feature_vector = [] sequence_infor = line.split(',') header = sequence_infor[0] bag = sequence_infor[1] sequence = sequence_infor[3].strip() feature_vector.append(header) feature_vector.append(bag) feature_vector.extend( kmer(sequence) + ksnpf(sequence) + nucleic_shift(sequence)) #feature_vector.extend(ksnpf(sequence)) feature_vector.append('1') feature_matrix.append(feature_vector) pos_train_file.close() for line in neg_train_file: feature_vector = [] sequence_infor = line.split(',') header = sequence_infor[0] bag = sequence_infor[1] sequence = sequence_infor[3].strip() feature_vector.append(header) feature_vector.append(bag) feature_vector.extend( kmer(sequence) + ksnpf(sequence) + nucleic_shift(sequence)) #feature_vector.extend(ksnpf(sequence)) feature_vector.append('-1') feature_matrix.append(feature_vector) feature_array = np.array([b[2:-1] for b in feature_matrix], dtype=np.float32) min_max_scaler = preprocessing.MinMaxScaler(copy=True, feature_range=(-1, 1)) feature_scaled = min_max_scaler.fit_transform(feature_array) feature_matrix_T = map(list, zip(*feature_matrix)) feature_scaled_T = map(list, zip(*feature_scaled)) k = 0 train_feature_matrix_T = [] train_feature_matrix_T.append(feature_matrix_T[0]) train_feature_matrix_T.append(feature_matrix_T[1]) for i in range(len(feature_scaled_T)): train_feature_matrix_T.append(feature_scaled_T[k]) k = k + 1 train_feature_matrix_T.append(feature_matrix_T[-1]) train_feature_matrix = map(list, zip(*train_feature_matrix_T)) neg_train_file.close() np.savetxt("train_features.txt", train_feature_matrix, fmt='%s', delimiter=',') ######put samples into bags train_file_path = './train_features.txt' [ train_bag_targets, train_bag_samples, train_bag_instance_targets, sample_info ] = file_to_features(train_file_path) if method == 0: svc = misvm.miSVM(kernel='quadratic', C=5.4, max_iters=10) elif method == 1: svc = misvm.MISVM(kernel='quadratic', C=5.4, max_iters=10) svc.fit(train_bag_samples, train_bag_targets) #joblib.dump(svc,'./svc.pkl') joblib.dump(svc, model_file) joblib.dump(min_max_scaler, scale_file) bag_predictions, inst_predictions = svc.predict(train_bag_samples, 1) #score_file=open('training_score.txt','w+') score_file.write("name\tscore\n") for i in range(0, len(sample_info)): score_file.write(sample_info[i] + "\t" + str(inst_predictions[i]) + "\n") score_file.close() #reserved_samples=open('reserved_samples.txt','w+') res_file.write("name\n") for j in range(0, len(inst_predictions)): if inst_predictions[j] >= 0: res_file.write(sample_info[j] + "\n") res_file.close()
instance=True) results_inst_df.set_value(bag_size, proportion, test_inst_accuracy) valid_results_inst_df.set_value(bag_size, proportion, valid_inst_accuracy) results_df.to_csv('RESULTS_bag_size_proportion_{}.csv'.format(name)) valid_results_df.to_csv('RESULTS_bag_size_proportion_{}_validation.csv'.format(name)) if instancePrediction: results_inst_df.to_csv('RESULTS_bag_size_proportion_{}_instance.csv'.format(name)) valid_results_inst_df.to_csv('RESULTS_bag_size_proportion_{}_instance_validation.csv'.format(name)) if __name__ == '__main__': model_name = sys.argv[1] if model_name == 'miSVM': from misvm import miSVM model = miSVM(verbose=0, max_iters=500, restarts=3) run_model(model, test_grid, svm_grid, model_name, negative_label=-1, instancePrediction=True) elif model_name == 'MISVM': from misvm import MISVM model = MISVM(verbose=0, max_iters=500, restarts=3) run_model(model, test_grid, svm_grid, model_name, negative_label=-1, instancePrediction=False) elif model_name == 'SIL': from misvm import SIL model = SIL(verbose=0, max_iters=500, restarts=3) run_model(model, test_grid, sil_grid, model_name, negative_label=-1, instancePrediction=True) elif model_name == 'miNet':