Пример #1
0
    def test_embark_supplemental(self, study, tmpdir):
        import flotilla

        study_name = 'test_save_supplemental'
        study.supplemental.expression_corr = study.expression.data.corr()
        study.save(study_name, flotilla_dir=tmpdir)

        study2 = flotilla.embark(study_name, flotilla_dir=tmpdir)
        pdt.assert_frame_equal(study2.supplemental.expression_corr,
                               study.supplemental.expression_corr)
Пример #2
0
    def test_embark_supplemental(self, study, tmpdir):
        import flotilla

        study_name = 'test_save_supplemental'
        study.supplemental.expression_corr = study.expression.data.corr()
        study.save(study_name, flotilla_dir=tmpdir)

        study2 = flotilla.embark(study_name, flotilla_dir=tmpdir)
        pdt.assert_frame_equal(study2.supplemental.expression_corr,
                               study.supplemental.expression_corr)
Пример #3
0
def begin(
        flotilla_project="http://sauron.ucsd.edu/flotilla_projects/"
                         "neural_diff_chr22"):
    import flotilla

    study = flotilla.embark(flotilla_project)
    rpkms = study.expression.data
    psi = study.splicing.data
    rbp_genes = study.expression.feature_sets['rbp']
    rbpRpkms = rpkms[rbp_genes].fillna(0)
    project_id = flotilla_project.split("/")[-1]
    mongo_con, mongodb = get_mongo_db(project_id)
    sys.stderr.write("finished loading study_data\n")
    return (psi, rbpRpkms, mongodb)
Пример #4
0
import pandas as pd
import flotilla
import os
import sys

sns.set(style='ticks', context='paper', rc={'font.sans-serif':'Arial', 'pdf.fonttype': 42})

# Ensure the iteration is always at least 1
iteration = max(int(sys.argv[1]), 1)
iteration_str = str(iteration).zfill(4)
base_folder = '/home/obotvinnik/Dropbox/figures2/singlecell_pnm/figure2_modalities/bayesian/permutations'

seed = (sum(ord(c) for c in 'randomly_permute_modalities')/i) % 5437
np.random.seed(seed)

study = flotilla.embark('singlecell_pnm_figure1_supplementary_post_splicing_filtering')

not_outliers = study.splicing.singles.index.difference(study.splicing.outliers.index)

print splicing_singles_no_outliers.shape
splicing_singles_no_outliers = splicing_singles_no_outliers.groupby(
    study.sample_id_to_phenotype).apply(lambda x: x.dropna(thresh=20, axis=1))
print splicing_singles_no_outliers.shape

permuted_psi = splicing_singles_no_outliers.groupby(study.sample_id_to_phenotype).apply(
    lambda x: pd.DataFrame(np.random.permutation(x), index=x.index, columns=x.columns))


bayesian = anchor.BayesianModalities()
modality_assignments = permuted_psi.groupby(study.sample_id_to_phenotype).apply(bayesian.fit_predict)
"""
Plot the modality log-likelihoods and barplots during estimation
================================================================

See also
--------
:py:func:`Study.plot_event_modality_estimation`

"""
import flotilla
study = flotilla.embark('shalek2013')
study.plot_event_modality_estimation('chr8:97356415:97356600:-@chr8:97355689:97355825:-@chr8:97353054:97353130:-@chr8:97352177:97352339:-')
Пример #6
0
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

sns.set(style='ticks',
        context='paper',
        rc={
            'font.sans-serif': 'Arial',
            'pdf.fonttype': 42
        })

import flotilla
flotilla_dir = '/projects/ps-yeolab/obotvinnik/flotilla_projects'

study = flotilla.embark('singlecell_pnm_figure2_modalities_bayesian_kmers',
                        flotilla_dir=flotilla_dir)

corr = study.supplemental.kmer_zscores.fillna(0).T.corr()
print corr.shape
corr = corr.dropna(how='all', axis=1).dropna(how='all', axis=0)
print corr.shape

folder = '/home/obotvinnik/Dropbox/figures2/singlecell_pnm/figure2_modalities/bayesian'
figure_folder = '{}/kmer_counting'.format(folder)

g = sns.clustermap(corr)
g.savefig(
    '{}/modality_kmer_scores_pearson_correlated_clustermap_featurewise.pdf'.
    format(figure_folder))
Пример #7
0
def study(example_datapackage_path):
    import flotilla

    return flotilla.embark(example_datapackage_path)
Пример #8
0
"""
Plot bar graphs of percentage of splicing events in each modality
=================================================================

See also
--------
:py:func:`Study.plot_modalities_bars`

"""
import flotilla
study = flotilla.embark(flotilla._shalek2013)
study.plot_modalities_lavalamps()
Пример #9
0
"""
Compare gene expression in two features
======================================
"""
import flotilla
study = flotilla.embark(flotilla._brainspan)
study.plot_two_features('FOXP1', 'FOXJ1')
Пример #10
0
    def test_save(self, example_datapackage_path, tmpdir, monkeypatch):
        import flotilla
        from flotilla.external import get_resource_from_name

        study = flotilla.embark(example_datapackage_path,
                                load_species_data=False)
        study_name = 'test_save'
        study.save(study_name, flotilla_dir=tmpdir)

        assert len(tmpdir.listdir()) == 1
        save_dir = tmpdir.listdir()[0]

        with open('{}/datapackage.json'.format(save_dir)) as f:
            test_datapackage = json.load(f)
        with open(example_datapackage_path) as f:
            true_datapackage = json.load(f)

        assert study_name == save_dir.purebasename

        resource_keys_to_ignore = ('compression', 'format', 'path')
        keys_from_study = {
            'splicing': ['feature_rename_col'],
            'expression': ['feature_rename_col', 'log_base'],
            'metadata':
            ['phenotype_order', 'phenotype_to_color', 'phenotype_col'],
            'mapping_stats': [u'number_mapped_col']
        }
        resource_names = ('metadata', 'expression', 'splicing',
                          'mapping_stats', 'spikein')

        # Add auto-generated attributes into the true datapackage
        for name, keys in keys_from_study.iteritems():
            resource = get_resource_from_name(true_datapackage, name)
            for key in keys:
                monkeypatch.setitem(resource, key,
                                    eval('study.{}.{}'.format(name, key)))

        version = semantic_version.Version(study.version)
        version.patch += 1
        assert str(version) == test_datapackage['datapackage_version']
        assert study_name == test_datapackage['name']

        datapackage_keys_to_ignore = [
            'name', 'datapackage_version', 'resources'
        ]
        datapackages = (true_datapackage, test_datapackage)

        for name in resource_names:
            for datapackage in datapackages:
                resource = get_resource_from_name(datapackage, name)
                for key in resource_keys_to_ignore:
                    monkeypatch.delitem(resource, key, raising=False)

        # Have to check for resources separately because they could be in any
        # order, it just matters that the contents are equal
        assert sorted(true_datapackage['resources']) == sorted(
            test_datapackage['resources'])

        for key in datapackage_keys_to_ignore:
            for datapackage in datapackages:
                monkeypatch.delitem(datapackage, key)

        pdt.assert_dict_equal(test_datapackage, true_datapackage)
Пример #11
0
def test_embark(example_datapackage_path):
    test_study = flotilla.embark(example_datapackage_path)
Пример #12
0
    def test_real_init(self, example_datapackage_path):
        import flotilla

        flotilla.embark(example_datapackage_path, load_species_data=False)
Пример #13
0
def scrambled_study():
    import flotilla

    return flotilla.embark(flotilla._test_data)
Пример #14
0
def study(example_datapackage_path):
    import flotilla

    return flotilla.embark(example_datapackage_path)
Пример #15
0
def shalek2013():
    import flotilla

    return flotilla.embark(flotilla._shalek2013)
Пример #16
0
def test_embark(shalek2013_datapackage_path):
    test_study = flotilla.embark(shalek2013_datapackage_path)
Пример #17
0
    def test_real_init(self, shalek2013_datapackage_path):
        import flotilla

        flotilla.embark(shalek2013_datapackage_path, load_species_data=False)
Пример #18
0
"""
Plot the modality log-likelihoods and barplots during estimation
================================================================

See also
--------
:py:func:`Study.plot_event_modality_estimation`

"""
import flotilla

study = flotilla.embark('shalek2013')
study.plot_event_modality_estimation(
    'chr8:97356415:97356600:-@chr8:97355689:97355825:-@chr8:97353054:97353130:-@chr8:97352177:97352339:-'
)
Пример #19
0
    def test_real_init(self, example_datapackage_path):
        import flotilla

        flotilla.embark(example_datapackage_path, load_species_data=False)
Пример #20
0
"""
Perform classification on categorical traits
============================================

See also
--------
:py:func:`Study.interactive_classifier`

"""
import flotilla
study = flotilla.embark(flotilla._shalek2013)
study.plot_classifier('phenotype: Immature BDMC')
Пример #21
0
    def test_save(self, example_datapackage_path, example_datapackage, tmpdir,
                  monkeypatch):
        import flotilla
        from flotilla.datapackage import get_resource_from_name

        study = flotilla.embark(example_datapackage_path,
                                load_species_data=False)
        study_name = 'test_save'
        study.save(study_name, flotilla_dir=tmpdir)

        assert len(tmpdir.listdir()) == 1
        save_dir = tmpdir.listdir()[0]

        with open('{}/datapackage.json'.format(save_dir)) as f:
            test_datapackage = json.load(f)
        true_datapackage = example_datapackage.copy()

        assert study_name == save_dir.purebasename

        resource_keys_to_ignore = ('compression', 'format', 'path', 'url')
        keys_from_study = {'splicing': [],
                           'expression': ['thresh',
                                          'log_base'],
                           'metadata': ['phenotype_order',
                                        'phenotype_to_color',
                                        'phenotype_col',
                                        'phenotype_to_marker',
                                        'pooled_col',
                                        'minimum_samples'],
                           'mapping_stats': ['number_mapped_col'],
                           'expression_feature': ['rename_col',
                                                  'ignore_subset_cols'],
                           'splicing_feature': ['rename_col',
                                                'ignore_subset_cols']}
        resource_names = keys_from_study.keys()

        # Add auto-generated attributes into the true datapackage
        for name, keys in keys_from_study.iteritems():
            resource = get_resource_from_name(true_datapackage, name)
            for key in keys:
                if 'feature' in name:
                    command = 'study.{}.feature_{}'.format(name.rstrip(
                        '_feature'), key)
                else:
                    command = 'study.{}.{}'.format(name, key)
                monkeypatch.setitem(resource, key, eval(command))

        version = semantic_version.Version(study.version)
        version.patch += 1
        assert str(version) == test_datapackage['datapackage_version']
        assert study_name == test_datapackage['name']

        datapackage_keys_to_ignore = ['name', 'datapackage_version',
                                      'resources']
        datapackages = (true_datapackage, test_datapackage)

        for name in resource_names:
            for datapackage in datapackages:
                resource = get_resource_from_name(datapackage, name)
                for key in resource_keys_to_ignore:
                    monkeypatch.delitem(resource, key, raising=False)

        # Have to check for resources separately because they could be in any
        # order, it just matters that the contents are equal
        assert sorted(true_datapackage['resources']) == sorted(
            test_datapackage['resources'])

        for key in datapackage_keys_to_ignore:
            for datapackage in datapackages:
                monkeypatch.delitem(datapackage, key)

        pdt.assert_dict_equal(test_datapackage,
                              true_datapackage)