示例#1
0
def age_prediction_with_trained_model(q2_model: qiime2.sample_estimator,
                                        test_table:biom.Table,
                                        test_metadata: qiime2.MetadataColumn) -> pd.DataFrame:
    '''
    Predict age in the test microbiome dataset based on a trained model.
    Parameters
    ----------
    trained_model: qiime2.sample_estimator 
        A trained regression model generated by qiime2.sample-regressor
    test_table: A pd.DataFrame with test data
    test_metadata : qiime2.CategoricalMetadataColumn
        metadata column with samples labeled as "case" or "control".
        All samples with either label are returned, normalized to the
        equivalent percentile in "control" samples.
    Returns
    -------
    test_df_padded: A pd.DataFrame with an updated test data with equal number of
        feature as the train data.
    '''

    #q2_model=q2.Artifact.load(q2_model)
    predictions=predict_regression(q2_test_table, q2_model).predictions
    #predictions.save(OUTDIR+'test_predictions.qza')

    test_pred_df=predictions.view(pd.Series)
    updated_test_metadata = pd.concat([test_metadata, test_pred_df], axis=1, sort=False)
    #result.to_csv(OUTDIR+'test_predictions_metadata.tsv',sep='\t') 
    
    return updated_test_metadata
示例#2
0
def age_prediction_with_train_data(train_table: biom.Table, 
                                    train_metadata:qiime2.Metadata, 
                                    train_target_field:str,
                                    test_table:biom.Table, 
                                    test_metadata: qiime2.metadata,
                                    n_jobs_or_threads: int = 4,
                                    cv: int = 5,
                                    n_estimators: int = 500,
                                    parameter_tuning: bool = False) -> pd.DataFrame:
    '''
    Predict age in the test microbiome dataset based on a trained model.
    Parameters
    ----------
    train_table: biom.Table
        Feature table with relative abundances for model training. Samples are in columns,
        features (i.e. OTUs) are in rows.
    train_metadata : qiime2.MetadataColumn
        metadata column with samples labeled as age values ranging from 0 to 120.
    test_table: Feature table with relative abundances for model testing. Samples are in columns,
        features (i.e. OTUs) are in rows.
    test_metadata : qiime2.MetadataColumn
        metadata column with samples labeled as age values ranging from 0 to 120.
    Returns
    -------
    updated_test_metadata: A pd.DataFrame with an updated test metadata where
        microbiome age has been inerted into the last column.
    '''
    
    # Filter metadata to only include IDs present in the table.
    # Also ensures every distance table ID is present in the metadata.
    train_metadata = train_metadata.filter_ids(train_table.ids(axis='sample'))
    train_metadata = train_metadata.drop_missing_values() 
    # filter sample IDs with missing values in the train_table
    train_table = train_table.filter(metadata.ids) 

    train_table_q2 = Artifact.import_data("FeatureTable[Frequency]", train_table)
    train_metadata_q2 = q2.Metadata(train_metadata) # q2 metadata
    train_y_q2=train_metadata_q2.get_column(train_target_field)

    
    # train the model with q2-sample-classifier
    out = regress_samples(q2_train_X, q2_train_y, cv=cv, n_jobs=n_jobs_or_threads, n_estimators=n_estimators, parameter_tuning=parameter_tuning)
    q2_model = out.sample_estimator

    # age prediction in the test table 
    test_table_q2 = Artifact.import_data("FeatureTable[Frequency]", test_table)
    #test_metadata_q2 = q2.Metadata(test_metadata) # q2 metadata
    predictions=predict_regression(test_table_q2, q2_model).predictions
    #predictions.save(OUTDIR+'test_predictions.qza')

    test_pred_df=predictions.view(pd.Series)

    updated_test_metadata = pd.concat([test_metadata, test_pred_df], axis=1, sort=False)
    #result.to_csv(OUTDIR+'test_predictions_metadata.tsv',sep='\t') 
    
    return updated_test_metadata
示例#3
0
        microbiome age has been inerted into the last column.
    '''
    try:
        train_y=train_metadata[train_target_field]
        except NameError:
            print("The train_target_field is not found!")
    train_table_q2 = Artifact.import_data("FeatureTable[Frequency]", train_table)
    train_metadata_q2 = q2.Metadata(train_metadata) # q2 metadata
    train_y_q2=train_metadata_q2.get_column(train_target_field)

    test_table_q2 = Artifact.import_data("FeatureTable[Frequency]", test_table)
    test_metadata_q2 = q2.Metadata(test_metadata) # q2 metadata
    out = regress_samples(q2_train_X, q2_train_y, cv=5, n_jobs=8, n_estimators=500, parameter_tuning=False)
    q2_model = out.sample_estimator

    predictions=predict_regression(test_table_q2, q2_model).predictions
    #predictions.save(OUTDIR+'test_predictions.qza')

    test_pred_df=predictions.view(pd.Series)

    updated_test_metadata = pd.concat([test_metadata, test_pred_df], axis=1, sort=False)
    #result.to_csv(OUTDIR+'test_predictions_metadata.tsv',sep='\t') 
    
    return updated_test_metadata

def age_prediction_with_train_data(train_table: biom.Table, 
                                    train_metadata:qiime2.Metadata, 
                                    train_target_field:str,
                                    test_table:biom.Table, 
                                    test_metadata: qiime2.metadata,
                                    n_jobs_or_threads: int = 4,
示例#4
0
    print("The # of features in the train data: ", len(a_feature_ids))
    print("The # of features in the original test data: ", len(b_feature_ids))
    a_uniq_f = list(set(a_feature_ids) - set(b_feature_ids))
    ab_shared_f = set(a_feature_ids).intersection(set(b_feature_ids))
    print("The # of features with all zeros in the new test data: ",
          len(a_uniq_f))
    print("The # of shared features kept in the new test data: ",
          len(ab_shared_f))
    b_padding_matrix = pd.DataFrame(0, index=b.index, columns=a_uniq_f)
    new_b = pd.concat([b[ab_shared_f], b_padding_matrix], axis=1)
    new_b = new_b[a_feature_ids]
    print("The shape of new test data: ", new_b.shape)
    new_b_qza = q2.Artifact.import_data('FeatureTable[Frequency]', new_b)
    return new_b_qza


# ## Align features in the test dataset with those in the train data

test_X_padding_qza = pad_features_by_qza(train_X_q2, test_X_q2)

# ##  Microbiome age prediction using `predict_regression`

predictions = predict_regression(test_X_padding_qza,
                                 trained_model_q2).predictions
predictions.save(OUTDIR + 'test_predictions.qza')

test_pred_df = predictions.view(pd.Series)

result = pd.concat([test_metadata, test_pred_df], axis=1, sort=False)
result.to_csv(OUTDIR + 'test_predictions_metadata.tsv', sep='\t')