Exemplo n.º 1
0
    def __call__(self, track, slice=None):

        exp_statement = """
        SELECT TPM, gene_id, sample_name
        FROM sailfish_genes AS A
        JOIN samples AS B
        ON A.sample_id = B.id"""

        exp_df = self.getDataFrame(exp_statement)

        factors_statement = '''
        SELECT factor, factor_value, sample_name
        FROM samples AS A
        JOIN factors AS B
        ON A.id = B.sample_id
        WHERE factor != 'genome'
        '''

        factors_df = self.getDataFrame(factors_statement)

        merged_df = pd.merge(exp_df, factors_df,
                             left_on="sample_name", right_on="sample_name")

        genes = Pipeline.asList(Pipeline.peekParameters(
            ".", "pipeline_rnaseqqc.py")['genes_of_interest'])

        interest_df = merged_df[merged_df['gene_id'].isin(genes)]

        interest_df['TPM'] = interest_df['TPM'].astype(float)

        return interest_df.reset_index().set_index("factor")
Exemplo n.º 2
0
import sys
import os
import glob
import sqlite3
from CGATCore import Pipeline as P
import CGATPipelines.PipelineTracks as PipelineTracks

# load options from the config file
P.getParameters([
    "%s/pipeline.ini" % __file__[:-len(".py")], "../pipeline.ini",
    "pipeline.ini"
])

PARAMS = P.PARAMS

PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"],
                                      "pipeline_annotations.py")

Sample = PipelineTracks.Sample
TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory(
    glob.glob("medip_*"), "medip_(\S+)")


def connect():
    '''connect to database.

    This method also attaches to helper databases.
    '''

    dbh = sqlite3.connect(PARAMS["database_name"])
    statement = '''ATTACH DATABASE '%s' as annotations''' % (
        PARAMS["annotations_database"])
Exemplo n.º 3
0
        "pipeline.ini"
    ],
    defaults={
        "annotations_dir": "",
        "genesets_abinitio_coding": "pruned.gtf.gz",
        "genesets_abinitio_lncrna": "pruned.gtf.gz",
        "genesets_reference": "reference.gtf.gz",
        "genesets_refcoding": "refcoding.gtf.gz",
        "genesets_previous": ""
    })

PARAMS = P.PARAMS

PARAMS.update(
    P.peekParameters(PARAMS["annotations_annotations_dir"],
                     "pipeline_annotations.py",
                     prefix="annotations_",
                     update_interface=True))

PREVIOUS = P.asList(PARAMS["genesets_previous"])


def connect():
    '''connect to database.

    This method also attaches to helper databases.
    '''

    dbh = sqlite3.connect(PARAMS["database_name"])
    statement = '''ATTACH DATABASE '%s' as annotations''' % (
        PARAMS["annotations_database"])
    cc = dbh.cursor()
    "pipeline.ini"
])

PARAMS["projectsrc"] = os.path.dirname(__file__)
#for key, value in PARAMS.iteritems():
#    print "%s:\t%s" % (key,value)

# add configuration values from associated pipelines
#
# 1. pipeline_annotations: any parameters will be added with the
#    prefix "annotations_". The interface will be updated with
#    "annotations_dir" to point to the absolute path names.
PARAMS.update(
    P.peekParameters(PARAMS["annotations_dir"],
                     "pipeline_annotations.py",
                     on_error_raise=__name__ == "__main__",
                     prefix="annotations_",
                     update_interface=True))

# if necessary, update the PARAMS dictionary in any modules file.
# e.g.:
#
# import CGATPipelines.PipelineGeneset as PipelineGeneset
# PipelineGeneset.PARAMS = PARAMS
#
# Note that this is a hack and deprecated, better pass all
# parameters that are needed by a function explicitely.


# -----------------------------------------------
# Utility functions
Exemplo n.º 5
0
import CGAT.GTF as GTF
import CGATPipelines.PipelineTracks as PipelineTracks

# load options from the config file
P.getParameters(
    ["%s/pipeline.ini" % os.path.splitext(__file__)[0],
     "../pipeline.ini",
     "pipeline.ini"])

PARAMS = P.PARAMS

USECLUSTER = True

# link up with annotations
PARAMS_ANNOTATIONS = P.peekParameters(
    PARAMS["annotations_dir"],
    "pipeline_annotations.py")

# link up with ancestral repeats
PARAMS_ANCESTRAL_REPEATS = P.peekParameters(
    PARAMS["ancestral_repeats_dir"],
    "pipeline_ancestral_repeats.py")

###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
# collect sra nd fastq.gz tracks
TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory(
    glob.glob("*.gtf.gz"), "(\S+).gtf.gz", exclude=("repeats.gtf.gz", "introns.gtf.gz", "merged.gtf.gz"))
Exemplo n.º 6
0
from CGATCore import Pipeline as Pipeline
import CGATPipelines.PipelineTracks as PipelineTracks
###################################################################
###################################################################
# parameterization

EXPORTDIR = P.get('calling_exportdir', P.get('exportdir', 'export'))
DATADIR = P.get('calling_datadir', P.get('datadir', '.'))
DATABASE = P.get('calling_backend',
                 P.get('report_sql_backend', 'sqlite:///./csvdb'))

###################################################################
# cf. pipeline_chipseq.py
# This should be automatically gleaned from pipeline_chipseq.py
###################################################################
PARAMS_PIPELINE = Pipeline.peekParameters(".", "pipeline_chipseq.py")

Sample = PipelineTracks.Sample3

suffixes = ["export.txt.gz", "sra", "fastq.gz", "fastq.1.gz", "csfasta.gz"]

TRACKS = sum(
    itertools.chain([
        PipelineTracks.Tracks(Sample).loadFromDirectory([
            x for x in glob.glob("%s/*.%s" % (DATADIR, s)) if "input" not in x
        ], "%s/(\S+).%s" % (DATADIR, s)) for s in suffixes
    ]), PipelineTracks.Tracks(Sample))

Sample.setDefault("asTable")

ALL = PipelineTracks.Aggregate(TRACKS)