Python DataFeeder.get_data примеры использования

Язык программирования: Python

Пространство имен/Пакет: data_feeder

Класс/Тип: DataFeeder

Метод/Функция: get_data

Примеров на hotexamples.com: 7

Python DataFeeder.get_data - 7 примеров найдено. Это лучшие примеры Python кода для data_feeder.DataFeeder.get_data, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DataFeeder(21)

get_data(7)

pca(5)

get_batch(3)

kill_queue_threads(3)

start_queue_threads(3)

fetch_train(1)

fetch_valid(1)

get_size(1)

produce(1)

request(1)

Пример #1

Показать файл

def run_4d_model():
    """
        4D example
    """
    print('\nLinear Discriminant Analysis - 4 dimensions\n')
    # get features of the data and the target
    dt = DataFeeder()
    X, y = dt.get_data()
    # reduce our features only to 2 dimensions
    X = run_pca(X, n_components=4, columns=['pc_1', 'pc_2', 'pc_3', 'pc_4'])
    # split data into 70% training & 30% testing
    X_train_std, X_test_std, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=0.3,
                                                                random_state=1)
    # create linear dicriminant analysis model
    model = LinearDiscriminantAnalysis()
    # train
    model.fit(X_train_std, y_train)
    # test
    y_pred = model.predict(X_test_std)
    # calculate model accuracy score
    score = accuracy_score(y_test, y_pred) * 100
    print('# Accuracy score: %.2f' % score)
    calculate_f1_score(y_test, y_pred)
    # plot confusion matrix
    plot_confusion_matrix(y_test,
                          y_pred,
                          normalize=True,
                          title='Confusion Matrix')
    plt.show()

Пример #2

Показать файл

Файл: gursajan _logistric_reg.py Проект: alexZaicev/Breast-Cancer-Classification

def main():
    # create data feeder and get features and target
    dt = DataFeeder()
    features, target = dt.get_data()
    
    # perform PCA with variety of components
    #features = dt.pca(2)
    features = dt.pca(10)
    
    # get best hyperparameters
    scorer = make_scorer(f1_score, pos_label=0)
    params = find_parameters(features, target, scorer=scorer)
    
    # run train test split without penalty
    print('#################################################')
    print('Train test split without penaty')          
    run_train_test_split(features, target, C=params['C'], penalty='none', solver='saga')
    # run train test split with L2 penalty
    print('#################################################')
    print('Train test split with L2 penaty')
    run_train_test_split(features, target, C=params['C'])
    # run cross validation with L2 penalty
    print('#################################################')
    print('Cross Validation with L2 penalty')
    run_cross_validation(features, target, C=params['C'], penalty='none', solver='saga', title='Cross validation with no penalty')
    # run cross validation without penalty
    print('#################################################')
    print('Cross Validation without penalty')
    run_cross_validation(features, target, C=params['C'], title='Cross validation with l2 penalty')
    
    # plot decission boundaries
    plt.show()

Пример #3

Показать файл

Файл: joel_naive_bayes.py Проект: alexZaicev/Breast-Cancer-Classification

def main():
    """ Initialise DataFrame and pull the features and targets """
    df = DataFeeder()
    features, target = df.get_data()
    """ Use only 1 component """
    features = df.pca(n_components=1)
    """ Split features and target into 70% train and 30% test """
    features_train, features_test, target_train, target_test = train_test_split(
        features, target, test_size=0.3, stratify=target, random_state=100)
    """ Initialise Gaussian Naive Bayes into variable clf """
    clf = GaussianNB()
    """ Fit the training data into the classifier and predict using test data """

    y_pred = clf.fit(features_train, target_train).predict(features_test)
    """ Calculate and print accuracy score """
    acc = accuracy_score(target_test, y_pred) * 100
    print("Accuracy Score: %.2f" % acc)
    print("F1 score: %.2f" % (f1_score(target_test, y_pred) * 100))
    print("Recall score: %.2f" % (recall_score(target_test, y_pred) * 100))
    print("Precision score: %.2f" %
          (precision_score(target_test, y_pred) * 100))

Пример #4

Показать файл

def run_2d_model():
    """
        2D example
    """
    print(
        '\nLinear Discriminant Analysis - 2 dimensions with decision regions\n'
    )
    # get features of the data and the target
    dt = DataFeeder()
    X, y = dt.get_data()
    # reduce our features only to 2 dimensions
    X = run_pca(X)
    # split data into 70% training & 30% testing
    X_train_std, X_test_std, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=0.3,
                                                                random_state=1)
    # create linear dicriminant analysis model
    model = LinearDiscriminantAnalysis()
    # train
    model.fit(X_train_std, y_train)
    # test
    y_pred = model.predict(X_test_std)
    # calculate model accuracy score
    score = accuracy_score(y_test, y_pred) * 100
    print('# Accuracy score: %.2f' % score)
    calculate_f1_score(y_test, y_pred)

    # prepare data for visualization
    X_combined_std = np.vstack((X_train_std, X_test_std))
    y_combined_std = np.hstack((y_train, y_test))
    # plot decision boundaries
    plt.figure()
    plot_decision_regions(X_combined_std, y_combined_std, model)
    # plot confusion matrix
    plot_confusion_matrix(y_test,
                          y_pred,
                          normalize=True,
                          title='Confusion Matrix')
    plt.show()

Пример #5

Показать файл

Файл: osamah_knn.py Проект: alexZaicev/Breast-Cancer-Classification

def main():
    # init data feeder
    df = DataFeeder()
    # get pre-processed features and target
    features, target = df.get_data()

    plot_hist(target, xlabel='Diagnosis', ylabel='Patient Records', title='Patient Diagnosis Distribution', xlim=['M', 'B'])

    # run PCA to reduce data dimensionality
    # features = df.pca(n_components=2)
    # features = df.pca(n_components=4)
    features = df.pca(n_components=10)

    # find best hyperparameter
    n_neighbors = find_best_params(features, target)['n_neighbors']
    print("Best number of neighbors: %d" % n_neighbors)
    # run train_test_split
    std_test_train_split(features, target, n_neighbors=n_neighbors)
    # run cross validation
    cross_validation(features, target, n_neighbors=n_neighbors)
    # show all graphs
    plt.show()

Пример #6

Показать файл

Файл: yuvraj_decision_tree.py Проект: alexZaicev/Breast-Cancer-Classification

def main():
    # initialize dataframe as data attained from the DataFeeder
    df = DataFeeder()
    # get feature and target data sets from cancer data
    features, target = df.get_data()

    # perform PCA with the option of 4 or 2 components
    #features = df.pca(n_components=4)
    features = df.pca(n_components=2)

    # find best hyperparameters (max depth for decision tree)
    scorer = make_scorer(f1_score, pos_label=0)
    params = find_best_params(features, target, scorer=scorer)

    features_train, features_test, target_train, target_test = train_test_split(
        features, target, stratify=target, random_state=1)

    # run training and testing data split
    std_train_test_split(features_train, features_test,
                         target_train, target_test, max_depth=int(params['max_depth']))

    # run cross validation
    cross_validation(features, target, max_depth=int(params['max_depth']))
    plt.show()

Пример #7

Показать файл

Файл: aleksej_svm.py Проект: alexZaicev/Breast-Cancer-Classification

def main():
    """
        Main function containing object initialization and method triggering order
    """
    # data feeding object
    df = DataFeeder()
    # evaluation object
    ev = Evaluator()
    # get features and target data sets
    features, target = df.get_data(normalize=False)

    Plotter.plot_distribution(target, ["M", "B"],
                              bins=2,
                              title="Diagnosis Distribution",
                              xlabel="Diagnosis",
                              ylabel="Records")
    Plotter.plot_distribution(features.iloc[:, 1],
                              bins=50,
                              title="Texture Mean Distribution",
                              xlabel="Texture Mean",
                              ylabel="Records")
    Plotter.plot_distribution(features.iloc[:, 2],
                              bins=50,
                              title="Perimeter Mean Distribution",
                              xlabel="Perimeter Mean",
                              ylabel="Records")
    # get features and target data sets
    features, target = df.get_data()

    # run PCA
    # features = df.pca(n_components=2)
    # features = df.pca(n_components=4)
    features = df.pca(n_components=10)

    # split data
    features_train, features_test, target_train, target_test = Evaluator.split(
        features, target, stratify=target)
    # find best parameters based on F1-score
    scorer = make_scorer(f1_score, pos_label=0)
    linear_params, rbf_params = Evaluator.find_best_params(features_train,
                                                           target_train,
                                                           n_folds=10,
                                                           scoring=scorer)
    # train and test model trained on K-fold cross validation
    ev.k_fold_cv(features,
                 target,
                 n_splits=10,
                 linear_params=linear_params,
                 rbf_params=rbf_params)
    # train and test linear SVM model with best parameter
    ev.run_linear_svm(features_train,
                      features_test,
                      target_train,
                      target_test,
                      params=linear_params)
    # train and test rbf SVM model with best parameter
    ev.run_rbf_svm(features_train,
                   features_test,
                   target_train,
                   target_test,
                   params=rbf_params)
    # show all plot figures
    plt.show()