Python Dataset.counts примеры использования

Язык программирования: Python

Пространство имен/Пакет: singlet.dataset

Класс/Тип: Dataset

Метод/Функция: counts

Примеров на hotexamples.com: 3

Python Dataset.counts - 3 примера найдено. Это лучшие примеры Python кода для singlet.dataset.Dataset.counts, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dataset(14)

copy(9)

query_samples_by_metadata(7)

samplesheet(4)

query_features_by_metadata(4)

split(4)

counts(3)

query_features_by_name(3)

query_samples_by_counts(2)

featuresheet(1)

query_features_by_counts(1)

__repr__(1)

compare(1)

query_samples_by_name(1)

reindex(1)

rename(1)

bootstrap(1)

Пример #1

Показать файл

    # NOTE: an env variable for the config file needs to be set when
    # calling this script
    from singlet.dataset import Dataset
    ds = Dataset(samplesheet='example_sheet_tsv', counts_table='example_table_tsv')

    print('Hierarchical clustering of samples')
    d = ds.cluster.hierarchical(
            'samples',
            optimal_ordering=True)
    assert(tuple(d['leaves']) == ('second_sample', 'test_pipeline',
                                  'first_sample', 'third_sample'))
    print('Done!')

    print('Hierarchical clustering of features')
    ds.counts = ds.counts.iloc[:200]
    d = ds.cluster.hierarchical(
            'features',
            optimal_ordering=True)
    assert(tuple(d['leaves'])[:3] == ('PNPLA4', 'ITGAL', 'HOXA11'))
    print('Done!')

    print('Hierarchical clustering of features and phenotypes')
    ds.counts = ds.counts.iloc[:200]
    d = ds.cluster.hierarchical(
            axis='features',
            phenotypes=('quantitative_phenotype_1_[A.U.]',),
            optimal_ordering=True)
    assert(d['leaves'][23] == 'quantitative_phenotype_1_[A.U.]')
    print('Done!')

Пример #2

Показать файл

    print('Add normalized virus counts')
    ds.samplesheet['virus_reads_per_million'] = 1e6 * ds.samplesheet[
        'n_reads_virus'] / ds.samplesheet['n_reads']
    ds.samplesheet['log_virus_reads_per_million'] = np.log10(
        0.1 + ds.samplesheet['virus_reads_per_million'])

    print('Filter low-quality cells')
    n_reads_min = args.n_reads_min
    ds.query_samples_by_metadata('n_reads > @n_reads_min',
                                 local_dict=locals(),
                                 inplace=True)

    print('Limit to decently expressed genes')
    ind = (ds.counts > args.n_cpm_min_genes[0]).sum(
        axis=1) >= args.n_cpm_min_genes[1]
    ds.counts = ds.counts.loc[ind]

    print('Ignore genes with multiple IDs')
    from collections import Counter
    genec = Counter(ds.featuresheet['GeneName'].values)
    genes_multiple = [k for k, v in genec.items() if v > 1]
    ds.featuresheet = ds.featuresheet.loc[~ds.featuresheet['GeneName'].
                                          isin(genes_multiple)]

    print('Translate to gene names')
    ds.rename(axis='features', column='GeneName', inplace=True)

    print('Restrict to virus genes')
    dsv = ds.query_features_by_metadata('Organism == "mCMV"')
    dsv.query_samples_by_metadata("moi in ('low', 'high')", inplace=True)

Пример #3

Показать файл

import pandas as pd
import xarray as xr
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

os.environ['SINGLET_CONFIG_FILENAME'] = 'singlet.yml'
sys.path.append('/home/fabio/university/postdoc/singlet')
from singlet.dataset import Dataset, CountsTable

# Script
if __name__ == '__main__':

    ds = Dataset(samplesheet='dengue', )
    data = xr.open_dataset('../bigdata/allele_frequencies.nc')
    ds.counts = CountsTable(data['aaf'].to_pandas().fillna(0))

    # Sync with Felix metadata
    with open('../data/metadataD_SNV_with_tsne.pkl', 'rb') as ff:
        metadata_felix = pickle.load(ff)
    samples = metadata_felix.index[(
        ~np.isnan(metadata_felix[['Dn', 'Ds']])).all(axis=1)]
    ds.samplesheet = ds.samplesheet.loc[samples]
    metadata_felix = metadata_felix.loc[samples]
    for col in [
            'coverage', 'Ds', 'Dn', 'depth', 'numSNV', 'Dn_s', 'tsne1_MOI1_10',
            'tsne2_MOI1_10'
    ]:
        ds.samplesheet[col] = metadata_felix[col]
    ds.samplesheet['log_Dn'] = np.log10(1e-6 + ds.samplesheet['Dn'])
    ds.samplesheet['log_Ds'] = np.log10(1e-6 + ds.samplesheet['Ds'])