示例#1
0
def cross_validation():
    estimator = misvm.miSVM(kernel=kernel,
                            gamma=gamma,
                            C=C,
                            verbose=True,
                            max_iters=200)
    mil.crossValidation(estimator, 5, path)
示例#2
0
def main():  # read hard and easy
    estimator = misvm.miSVM(kernel=kernel,
                            gamma=gamma,
                            C=C,
                            verbose=True,
                            max_iters=100)
    #estimator = misvm.miPSVM(feature=kernel, gamma=gamma, C=C, verbose=True, max_iters=100, n_components=36*3, sparse=['P'])
    mil.train(estimator=estimator, resultSuperDirPath=path)
示例#3
0
def leave_one_out(n_jobs=8):
    estimator = misvm.miSVM(kernel=kernel,
                            gamma=gamma,
                            C=C,
                            verbose=True,
                            max_iters=200)
    mil.leaveOneOut(estimator=estimator,
                    resultSuperDirPath=path,
                    n_jobs=n_jobs,
                    trainAcc=True)
示例#4
0
def leave_one_person_out(n_jobs=8, resultVis=False):
    if resultVis:
        resultvis = 'inst_preds'
    else:
        resultvis = None
    estimator = misvm.miSVM(kernel=kernel,
                            gamma=gamma,
                            C=C,
                            verbose=True,
                            max_iters=200)
    mil.leaveOnePersonOut(estimator=estimator,
                          resultSuperDirPath=path,
                          n_jobs=n_jobs,
                          trainAcc=True,
                          resultVis=resultvis)
示例#5
0
def gridsearch(params_grid, cv=2):
    estimator = misvm.miSVM(max_iters=250)
    mil.gridSearch(estimator=estimator, params_grid=params_grid, cv=cv)
示例#6
0
def main(argv=sys.argv):
    pos_train_file, neg_train_file, method, model_file, scale_file, score_file, res_file = process_options(
        argv)
    ######train feature extraction
    feature_matrix = []
    for line in pos_train_file:
        feature_vector = []
        sequence_infor = line.split(',')
        header = sequence_infor[0]
        bag = sequence_infor[1]
        sequence = sequence_infor[3].strip()
        feature_vector.append(header)
        feature_vector.append(bag)
        feature_vector.extend(
            kmer(sequence) + ksnpf(sequence) + nucleic_shift(sequence))
        #feature_vector.extend(ksnpf(sequence))
        feature_vector.append('1')
        feature_matrix.append(feature_vector)
    pos_train_file.close()

    for line in neg_train_file:
        feature_vector = []
        sequence_infor = line.split(',')
        header = sequence_infor[0]
        bag = sequence_infor[1]
        sequence = sequence_infor[3].strip()
        feature_vector.append(header)
        feature_vector.append(bag)
        feature_vector.extend(
            kmer(sequence) + ksnpf(sequence) + nucleic_shift(sequence))
        #feature_vector.extend(ksnpf(sequence))
        feature_vector.append('-1')
        feature_matrix.append(feature_vector)
    feature_array = np.array([b[2:-1] for b in feature_matrix],
                             dtype=np.float32)
    min_max_scaler = preprocessing.MinMaxScaler(copy=True,
                                                feature_range=(-1, 1))
    feature_scaled = min_max_scaler.fit_transform(feature_array)

    feature_matrix_T = map(list, zip(*feature_matrix))
    feature_scaled_T = map(list, zip(*feature_scaled))
    k = 0
    train_feature_matrix_T = []
    train_feature_matrix_T.append(feature_matrix_T[0])
    train_feature_matrix_T.append(feature_matrix_T[1])
    for i in range(len(feature_scaled_T)):
        train_feature_matrix_T.append(feature_scaled_T[k])
        k = k + 1
    train_feature_matrix_T.append(feature_matrix_T[-1])
    train_feature_matrix = map(list, zip(*train_feature_matrix_T))
    neg_train_file.close()
    np.savetxt("train_features.txt",
               train_feature_matrix,
               fmt='%s',
               delimiter=',')
    ######put samples into bags
    train_file_path = './train_features.txt'
    [
        train_bag_targets, train_bag_samples, train_bag_instance_targets,
        sample_info
    ] = file_to_features(train_file_path)
    if method == 0:
        svc = misvm.miSVM(kernel='quadratic', C=5.4, max_iters=10)
    elif method == 1:
        svc = misvm.MISVM(kernel='quadratic', C=5.4, max_iters=10)
    svc.fit(train_bag_samples, train_bag_targets)
    #joblib.dump(svc,'./svc.pkl')
    joblib.dump(svc, model_file)
    joblib.dump(min_max_scaler, scale_file)
    bag_predictions, inst_predictions = svc.predict(train_bag_samples, 1)
    #score_file=open('training_score.txt','w+')
    score_file.write("name\tscore\n")
    for i in range(0, len(sample_info)):
        score_file.write(sample_info[i] + "\t" + str(inst_predictions[i]) +
                         "\n")
    score_file.close()
    #reserved_samples=open('reserved_samples.txt','w+')
    res_file.write("name\n")
    for j in range(0, len(inst_predictions)):
        if inst_predictions[j] >= 0:
            res_file.write(sample_info[j] + "\n")
    res_file.close()
                                              instance=True)
                results_inst_df.set_value(bag_size, proportion, test_inst_accuracy)
                valid_results_inst_df.set_value(bag_size, proportion, valid_inst_accuracy)

    results_df.to_csv('RESULTS_bag_size_proportion_{}.csv'.format(name))
    valid_results_df.to_csv('RESULTS_bag_size_proportion_{}_validation.csv'.format(name))
    if instancePrediction:
        results_inst_df.to_csv('RESULTS_bag_size_proportion_{}_instance.csv'.format(name))
        valid_results_inst_df.to_csv('RESULTS_bag_size_proportion_{}_instance_validation.csv'.format(name))


if __name__ == '__main__':
    model_name = sys.argv[1]
    if model_name == 'miSVM':
        from misvm import miSVM
        model = miSVM(verbose=0, max_iters=500, restarts=3)
        run_model(model, test_grid, svm_grid, model_name, negative_label=-1, instancePrediction=True)

    elif model_name == 'MISVM':
        from misvm import MISVM
        model = MISVM(verbose=0, max_iters=500, restarts=3)
        run_model(model, test_grid, svm_grid, model_name, negative_label=-1, instancePrediction=False)


    elif model_name == 'SIL':
        from misvm import SIL
        model = SIL(verbose=0, max_iters=500, restarts=3)
        run_model(model, test_grid, sil_grid, model_name, negative_label=-1, instancePrediction=True)


    elif model_name == 'miNet':