示例#1
0
def prepare_geo_rnaseq(resource, name=None):
    """Run ``Prepare GEO - RNA-Seq`` process on the resource.

    This method can be used to run ``Prepare GEO - RNA-Seq`` process
    on a single collection or a list of samples.

    :param resource: resource on which prepare_geo_rnaseq will be run
    :param str name: name of the prepare GEO tarball and table

    """
    reads = []
    expressions = []

    samples = get_samples(resource)
    resolwe = get_resolwe(*samples)
    collection_ids = set()

    for sample in samples:
        reads.append(sample.get_reads().id)
        expressions.append(sample.get_expression().id)
        collection_ids.add(get_resource_collection(sample))

    auto_name, collection = get_name_collection(collection_ids, resolwe)

    inputs = {
        'reads': reads,
        'expressions': expressions,
        'name': name or auto_name,
    }
    geo = resolwe.get_or_run(slug='prepare-geo-rnaseq', input=inputs)

    if collection:
        collection.add_data(geo)

    return geo
示例#2
0
def prepare_geo_rnaseq(resource, name=None):
    """Run ``Prepare GEO - RNA-Seq`` process on the resource.

    This method can be used to run ``Prepare GEO - RNA-Seq`` process
    on a single collection or a list of samples.

    :param resource: resource on which prepare_geo_rnaseq will be run
    :param str name: name of the prepare GEO tarball and table

    """
    reads = []
    expressions = []

    samples = get_samples(resource)
    resolwe = get_resolwe(*samples)
    collection_ids = set()

    for sample in samples:
        reads.append(sample.get_reads().id)
        expressions.append(sample.get_expression().id)
        collection_ids.add(get_resource_collection(sample))

    auto_name, collection = get_name_collection(collection_ids, resolwe)

    inputs = {
        'reads': reads,
        'expressions': expressions,
        'name': name or auto_name,
    }
    geo = resolwe.get_or_run(slug='prepare-geo-rnaseq', input=inputs)

    if collection:
        collection.add_data(geo)

    return geo
示例#3
0
def macs(resource, use_background=True, p_value=None):
    """Run ``MACS 1.4`` process on the resource.

    This method runs `MACS 1.4`_ process with ``p-value`` specified in
    arguments and ``bam`` file from the sample.

    If ``use_background`` argument is set to ``True``, ``bam`` file from
    background sample is passed to the process as the control. Mappable
    genome size is taken from the sample annotation.

    .. _MACS 1.4:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-macs14

    :param bool use_background: if set to ``True``, background sample
        will be used in the process
    :param float p_value: p-value used in the process

    """
    inputs = {}
    if p_value is not None:
        inputs['pvalue'] = p_value

    results = []

    if not isinstance(resource, list):
        resource = [resource]

    for single_resource in resource:

        background_filter = {}
        if use_background:
            collection_id = get_resource_collection(single_resource)
            if collection_id:
                background_filter['collection'] = collection_id

        for sample in get_samples(single_resource):
            inputs['treatment'] = sample.get_bam().id

            try:
                inputs['gsize'] = gsize_organism(
                    sample.descriptor['sample']['organism'])
            except KeyError:
                raise KeyError('{} is not annotated'.format(sample))

            if use_background:
                if is_background(sample) and not is_sample(single_resource):
                    # Don't run process on the background sample,
                    # but let it fail if it is run directly on sample
                    continue

                background = sample.get_background(**background_filter)
                inputs['control'] = background.get_bam().id

            macs_obj = sample.resolwe.get_or_run(slug='macs14', input=inputs)
            sample.add_data(macs_obj)
            results.append(macs_obj)

    return results
示例#4
0
    def test_get_resource_collection(self):
        collection = Collection(id=1, resolwe=MagicMock())
        collection.id = 1  # this is overriden when initialized
        self.assertEqual(get_resource_collection(collection), 1)

        relation = Relation(id=1, resolwe=MagicMock())
        relation._hydrated_collection = Collection(id=2, resolwe=MagicMock())
        relation._hydrated_collection.id = 2  # this is overriden when initialized
        self.assertEqual(get_resource_collection(relation), 2)

        data = Data(id=1, resolwe=MagicMock())
        data._collections = [Collection(id=3, resolwe=MagicMock())]
        data._collections[0].id = 3  # this is overriden when initialized
        self.assertEqual(get_resource_collection(data), 3)

        sample = Sample(id=1, resolwe=MagicMock())
        sample._collections = [Collection(id=4, resolwe=MagicMock())]
        sample._collections[0].id = 4  # this is overriden when initialized
        self.assertEqual(get_resource_collection(sample), 4)

        sample = Sample(id=1, resolwe=MagicMock())
        sample._collections = [
            Collection(id=5, resolwe=MagicMock()),
            Collection(id=6, resolwe=MagicMock())
        ]
        sample._collections[0].id = 5  # this is overriden when initialized
        sample._collections[1].id = 6  # this is overriden when initialized
        self.assertEqual(get_resource_collection(sample), None)
        with self.assertRaises(LookupError):
            get_resource_collection(sample, fail_silently=False)
示例#5
0
    def test_get_resource_collection(self):
        collection = Collection(id=1, resolwe=MagicMock())
        collection.id = 1  # this is overriden when initialized
        self.assertEqual(get_resource_collection(collection), 1)

        relation = Relation(id=1, resolwe=MagicMock())
        relation._hydrated_collection = Collection(id=2, resolwe=MagicMock())
        relation._hydrated_collection.id = 2  # this is overriden when initialized
        self.assertEqual(get_resource_collection(relation), 2)

        data = Data(id=1, resolwe=MagicMock())
        data._collections = [Collection(id=3, resolwe=MagicMock())]
        data._collections[0].id = 3  # this is overriden when initialized
        self.assertEqual(get_resource_collection(data), 3)

        sample = Sample(id=1, resolwe=MagicMock())
        sample._collections = [Collection(id=4, resolwe=MagicMock())]
        sample._collections[0].id = 4  # this is overriden when initialized
        self.assertEqual(get_resource_collection(sample), 4)

        sample = Sample(id=1, resolwe=MagicMock())
        sample._collections = [
            Collection(id=5, resolwe=MagicMock()),
            Collection(id=6, resolwe=MagicMock())
        ]
        sample._collections[0].id = 5  # this is overriden when initialized
        sample._collections[1].id = 6  # this is overriden when initialized
        self.assertEqual(get_resource_collection(sample), None)
        with self.assertRaises(LookupError):
            get_resource_collection(sample, fail_silently=False)
示例#6
0
def prepare_geo_chipseq(resource, name=None):
    """Run ``Prepare GEO - ChIP-Seq`` process on the resource.

    This method can be used to run ``Prepare GEO - ChIP-Seq`` process
    on a single collection or a list of samples.

    :param resource: resource on which prepare_geo_chipseq will be run
    :param str name: name of the prepare GEO tarball and table

    """
    reads = []
    macs = []

    samples = get_samples(resource)
    resolwe = get_resolwe(*samples)
    collection_ids = set()

    for sample in samples:
        reads.append(sample.get_reads().id)

        if sample.is_background:
            continue

        macs_list = sample.get_macs()
        if not macs_list:
            raise ValueError(
                "Sample {} has no `macs` data object!".format(sample))
        elif len(macs_list) != 1:
            raise ValueError(
                "Sample {} has more than one `macs` data objects!".format(
                    sample))

        macs.append(macs_list[0].id)

        if sample.background:
            if sample.background not in samples:
                raise ValueError(
                    "Background of the sample {} cannot be found in the resource you provided: "
                    "{}!".format(sample, resource))

        collection_ids.add(get_resource_collection(sample))

    auto_name, collection = get_name_collection(collection_ids, resolwe)

    inputs = {
        'reads': reads,
        'macs': macs,
        'name': name or auto_name,
    }
    geo = resolwe.get_or_run(slug='prepare-geo-chipseq', input=inputs)

    if collection:
        collection.add_data(geo)

    return geo
示例#7
0
def prepare_geo_chipseq(resource, name=None):
    """Run ``Prepare GEO - ChIP-Seq`` process on the resource.

    This method can be used to run ``Prepare GEO - ChIP-Seq`` process
    on a single collection or a list of samples.

    :param resource: resource on which prepare_geo_chipseq will be run
    :param str name: name of the prepare GEO tarball and table

    """
    reads = []
    macs = []

    samples = get_samples(resource)
    resolwe = get_resolwe(*samples)
    collection_ids = set()

    for sample in samples:
        reads.append(sample.get_reads().id)

        if sample.is_background:
            continue

        macs_list = sample.get_macs()
        if not macs_list:
            raise ValueError("Sample {} has no `macs` data object!".format(sample))
        elif len(macs_list) != 1:
            raise ValueError("Sample {} has more than one `macs` data objects!".format(sample))

        macs.append(macs_list[0].id)

        if sample.background:
            if sample.background not in samples:
                raise ValueError(
                    "Background of the sample {} cannot be found in the resource you provided: "
                    "{}!".format(sample, resource)
                )

        collection_ids.add(get_resource_collection(sample))

    auto_name, collection = get_name_collection(collection_ids, resolwe)

    inputs = {
        'reads': reads,
        'macs': macs,
        'name': name or auto_name,
    }
    geo = resolwe.get_or_run(slug='prepare-geo-chipseq', input=inputs)

    if collection:
        collection.add_data(geo)

    return geo
示例#8
0
def macs(resource, use_background=True, p_value=None):
    """Run ``MACS 1.4`` process on the resource.

    This method runs `MACS 1.4`_ process with ``p-value`` specified in
    arguments and ``bam`` file from the sample.

    If ``use_background`` argument is set to ``True``, ``bam`` file from
    background sample is passed to the process as the control.

    .. _MACS 1.4:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-macs14

    :param bool use_background: if set to ``True``, background sample
        will be used in the process
    :param float p_value: p-value used in the process

    """
    inputs = {}
    if p_value is not None:
        inputs['pvalue'] = p_value

    results = []

    if not isinstance(resource, list):
        resource = [resource]

    for single_resource in resource:

        background_filter = {}
        if use_background:
            collection_id = get_resource_collection(single_resource)
            if collection_id:
                background_filter['collection'] = collection_id

        for sample in get_samples(single_resource):
            inputs['treatment'] = sample.get_primary_bam(fallback_to_bam=True).id

            if use_background:
                if sample.is_background and not is_sample(single_resource):
                    # Don't run process on the background sample,
                    # but let it fail if it is run directly on sample
                    continue

                background = sample.get_background(**background_filter)
                inputs['control'] = background.get_primary_bam(fallback_to_bam=True).id

            macs_obj = sample.resolwe.get_or_run(slug='macs14', input=inputs)
            sample.add_data(macs_obj)
            results.append(macs_obj)

    return results
示例#9
0
def cuffnorm(resource, annotation, use_ercc=None):
    """Run Cuffnorm_ for selected cuffquats.

    This method runs `Cuffnorm`_ process on ``resource`` with
    ``annotation`` and ``use_ercc`` parameters specified in arguments.

    .. _Cuffnorm:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-upload-expression-cuffnorm

    :param resource: resource on which cuffnorm will be run
    :param annotation: annotation object used in cuffnorm
    :type annotation: `~resdk.resources.data.Data`
    :param bool use_ercc: use ERRCC spike-in controls for normalization

    """
    relation_filter = {}
    collection_id = get_resource_collection(resource)
    if collection_id:
        relation_filter['collection'] = collection_id

    samples = get_samples(resource)

    input_objects = [annotation]
    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    cuffquants = [get_data_id(sample.get_cuffquant()) for sample in samples]

    inputs = {
        'cuffquant': cuffquants,
        'annotation': get_data_id(annotation),
    }

    if use_ercc is not None:
        inputs['useERCC'] = use_ercc

    cuffnorm_obj = resolwe.get_or_run(slug='cuffnorm', input=inputs)

    if is_collection(resource):
        resource.add_data(cuffnorm_obj)
    elif is_relation(resource):
        resource.collection.add_data(cuffnorm_obj)

    return cuffnorm_obj
示例#10
0
def cuffnorm(resource, annotation, use_ercc=None):
    """Run Cuffnorm_ for selected cuffquats.

    This method runs `Cuffnorm`_ process on ``resource`` with
    ``annotation`` and ``use_ercc`` parameters specified in arguments.

    .. _Cuffnorm:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-upload-expression-cuffnorm

    :param resource: resource on which cuffnorm will be run
    :param annotation: annotation object used in cuffnorm
    :type annotation: `~resdk.resources.data.Data`
    :param bool use_ercc: use ERRCC spike-in controls for normalization

    """
    relation_filter = {}
    collection_id = get_resource_collection(resource)
    if collection_id:
        relation_filter['collection'] = collection_id

    samples = get_samples(resource)

    input_objects = [annotation]
    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    cuffquants = [get_data_id(sample.get_cuffquant()) for sample in samples]

    inputs = {
        'cuffquant': cuffquants,
        'annotation': get_data_id(annotation),
    }

    if use_ercc is not None:
        inputs['useERCC'] = use_ercc

    cuffnorm_obj = resolwe.get_or_run(slug='cuffnorm', input=inputs)

    if is_collection(resource):
        resource.add_data(cuffnorm_obj)
    elif is_relation(resource):
        resource.collection.add_data(cuffnorm_obj)

    return cuffnorm_obj
示例#11
0
def cuffdiff(resource,
             annotation,
             genome=None,
             multi_read_correct=None,
             fdr=None,
             library_type=None,
             library_normalization=None,
             dispersion_method=None,
             threads=None):
    """Run Cuffdiff_ for selected cuffquants.

    This method runs `Cuffdiff`_ process with ``annotation`` specified
    in arguments. Library type is by defalt fr-unstranded. Other parameters
    defaults: multi_read_correct=false, fdr=0.05, library_normalization=geometric,
    dispersion_method=pooled, threads=1. Parameter genome is optional.

    The way the function works depends on the resource. If it is run on a collection,
    it will perform cuffdiff on every 'compare' relation labeled 'case-control' in
    the selected collection. If it is run on a list of samples (not necesssarily in
    the same collection) it will run cuffdiff on all 'compare' relations labeled
    'case-control' containing all of the given samples but will discard those
    samples in a relation that are not in the list of samples.

    .. _Cuffdiff:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-cuffdiff

    :param annotation: annotation file
    :type annotation: `~resdk.resources.data.Data`
    :param genome: genome object to use for bias detection and
        correction algorithm
    :type genome: `~resdk.resources.data.Data`
    :param bool multi_read_correct: do initial estimation procedure to
        more accurately weight reads with multiple genome mappings
    :param fdr: the allowed false discovery rate
    :type fdr: decimal
    :param str library_type: options are: fr-unstranded, fr-firststrand,
        fr-secondstrand
    :param str library_normalization: options are: geometric, classic-fpkm,
        quartile
    :param str dispersion_method: options are: pooled, per-condition,
        blind, poisson
    :param int threads: use this many processor threads

    """
    inputs = {'annotation': get_data_id(annotation)}

    input_objects = [annotation]

    if genome is not None:
        inputs['genome'] = genome
        input_objects.append(genome)

    if multi_read_correct is not None:
        inputs['multi_read_correct'] = multi_read_correct

    if fdr is not None:
        inputs['fdr'] = fdr

    if library_type is not None:
        inputs['library_type'] = library_type

    if library_normalization is not None:
        inputs['library_normalization'] = library_normalization

    if dispersion_method is not None:
        inputs['dispersion_method'] = dispersion_method

    if threads is not None:
        inputs['threads'] = threads

    samples = get_samples(resource)
    sample_ids = [sample.id for sample in samples]

    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    collection_id = get_resource_collection(resource)

    relation_filter = {}
    if collection_id:
        relation_filter['collection'] = collection_id
    else:
        relation_filter['entity'] = sample_ids

    relations = resolwe.relation.filter(type='compare',
                                        label='case-control',
                                        **relation_filter)

    cuffdiff_objects = []
    for relation in relations:
        control = []
        case = []
        for sample, position in zip(relation.samples, relation.positions):
            if sample.id not in sample_ids:
                continue

            if position == 'case':
                case.append(get_data_id(sample.get_cuffquant()))
            elif position == 'control':
                control.append(get_data_id(sample.get_cuffquant()))
            else:
                raise ValueError(
                    "Position different from 'case' or 'control' was found in the "
                    "following relation: {}".format(relation.id))
示例#12
0
def cuffnorm(resource, annotation, use_ercc=None, threads=None):
    """Run Cuffnorm_ for selected cuffquats.

    This method runs `Cuffnorm`_ process on ``resource`` with
     ``annotation``, ``useERCC`` and ``threads`` parameters specified
     in arguments.

    .. _Cuffnorm:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-upload-expression-cuffnorm

    :param resource: resource on which cuffnorm will be run
    :param annotation: annotation object used in cuffnorm
    :type annotation: int or `~resdk.resources.data.Data`
    :param bool useERCC: use ERRCC spike-in controls for normalization
    :param int threads: use this many threads to align reads
        (default: ``1``)

    """
    relation_filter = {}
    collection_id = get_resource_collection(resource)
    if collection_id:
        relation_filter['collection'] = collection_id

    samples = get_samples(resource)

    input_objects = [annotation]
    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    cuffquants = [get_data_id(sample.get_cuffquant()) for sample in samples]

    labels = []
    replicates = []
    replicates_ids = {}
    for sample in samples:
        relations = resolwe.relation.filter(type='group',
                                            label='replicates',
                                            entity=[sample.id],
                                            **relation_filter)

        if len(relations) == 1:
            relation = relations[0]
        else:
            raise LookupError(
                "Cannot determine unique group relation with label `replicates` for the "
                "following sample: {}".format(sample.name))

        if relation.id not in replicates_ids:
            replicates_ids[relation.id] = str(len(replicates_ids))
        replicates.append(replicates_ids[relation.id])

        if str(relation.id) not in labels:
            labels.append(str(relation.id))

    inputs = {
        'cuffquant': cuffquants,
        'replicates': replicates,
        'annotation': get_data_id(annotation),
        'labels': labels,
    }

    if use_ercc is not None:
        inputs['useERCC'] = use_ercc

    if threads is not None:
        inputs['threads'] = threads

    cuffnorm_obj = resolwe.get_or_run(slug='cuffnorm', input=inputs)

    if is_collection(resource):
        resource.add_data(cuffnorm_obj)
    elif is_relation(resource):
        resource.collection.add_data(cuffnorm_obj)

    return cuffnorm_obj
def cuffdiff(resource, annotation, genome=None, multi_read_correct=None, fdr=None,
             library_type=None, library_normalization=None, dispersion_method=None):
    """Run Cuffdiff_ for selected cuffquants.

    This method runs `Cuffdiff`_ process with ``annotation`` specified
    in arguments. Library type is by defalt fr-unstranded. Other parameters
    defaults: multi_read_correct=false, fdr=0.05, library_normalization=geometric,
    dispersion_method=pooled, threads=1. Parameter genome is optional.

    The way the function works depends on the resource. If it is run on a collection,
    it will perform cuffdiff on every 'compare' relation labeled 'case-control' in
    the selected collection. If it is run on a list of samples (not necesssarily in
    the same collection) it will run cuffdiff on all 'compare' relations labeled
    'case-control' containing all of the given samples but will discard those
    samples in a relation that are not in the list of samples.

    .. _Cuffdiff:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-cuffdiff

    :param annotation: annotation file
    :type annotation: `~resdk.resources.data.Data`
    :param genome: genome object to use for bias detection and
        correction algorithm
    :type genome: `~resdk.resources.data.Data`
    :param bool multi_read_correct: do initial estimation procedure to
        more accurately weight reads with multiple genome mappings
    :param fdr: the allowed false discovery rate
    :type fdr: decimal
    :param str library_type: options are: fr-unstranded, fr-firststrand,
        fr-secondstrand
    :param str library_normalization: options are: geometric, classic-fpkm,
        quartile
    :param str dispersion_method: options are: pooled, per-condition,
        blind, poisson

    """
    inputs = {'annotation': get_data_id(annotation)}

    input_objects = [annotation]

    if genome is not None:
        inputs['genome'] = genome.id
        input_objects.append(genome)

    if multi_read_correct is not None:
        inputs['multi_read_correct'] = multi_read_correct

    if fdr is not None:
        inputs['fdr'] = fdr

    if library_type is not None:
        inputs['library_type'] = library_type

    if library_normalization is not None:
        inputs['library_normalization'] = library_normalization

    if dispersion_method is not None:
        inputs['dispersion_method'] = dispersion_method

    samples = get_samples(resource)
    sample_ids = [sample.id for sample in samples]

    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    collection_id = get_resource_collection(resource)

    relation_filter = {}
    if collection_id:
        relation_filter['collection'] = collection_id
    else:
        relation_filter['entity'] = sample_ids

    relations = resolwe.relation.filter(
        type='compare',
        **relation_filter
    )

    cuffdiff_objects = []
    for relation in relations:
        control = []
        case = []
        for partition in relation.partitions:
            sample = resolwe.sample.get(partition['entity'])
            label = partition['label']
            if sample.id not in sample_ids:
                continue

            if label == 'case':
                case.append(get_data_id(sample.get_cuffquant()))
            elif label == 'control':
                control.append(get_data_id(sample.get_cuffquant()))
            else:
                raise ValueError(
                    "Label different from 'case' or 'control' was found in the "
                    "following relation: {}".format(relation.id)
                )

        if not case or not control:
            continue

        inputs['case'] = case
        inputs['control'] = control

        cuffdiff_obj = resolwe.get_or_run(slug='cuffdiff', input=inputs)
        cuffdiff_objects.append(cuffdiff_obj)

        if is_collection(resource):
            resource.add_data(cuffdiff_obj)
        elif is_relation(resource):
            resource.collection.add_data(cuffdiff_obj)

    if not cuffdiff_objects:
        if not relations:
            raise ValueError("No relation containing all of the given samples was found")
        else:
            raise ValueError(
                "No suitable relation was found (given samples all have either 'case' label "
                "or 'control' label"
            )

    return cuffdiff_objects
示例#14
0
def rose2(resource, use_background=True, tss=None, stitch=None, beds=None):
    """Run ``ROSE 2`` process on the resource.

    This method runs `ROSE2`_ process with ``tss_exclusion`` and
    ``stitch`` parameters specified in arguments.

    Separate process is run for each bed file on the sample. To run
    process only on subset of those files, list them in ``beds``
    argument (if only one object is given, it will be auto-wrapped in
    list, if it is not already).

    If ``use_background`` argument is set to ``True``, bam file from
    background sample is passed to the process as the control.

    .. _ROSE2:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-rose2

    :param bool use_background: if set to ``True``, background sample
        will be used in the process
    :param int tss: TSS exclusion used in process
    :param int stitch: Stitch used in process
    :param list beds: subset of bed files to run process on, if empty
        processes for all bed files will be run

    """
    results = []

    if not isinstance(resource, list):
        resource = [resource]

    for single_resource in resource:

        background_filter = {}
        if use_background:
            collection_id = get_resource_collection(single_resource)
            if collection_id:
                background_filter['collection'] = collection_id

        for sample in get_samples(single_resource):
            inputs = {
                'rankby': sample.get_bam().id,
            }

            if tss is not None:
                inputs['tss'] = tss

            if stitch is not None:
                inputs['stitch'] = stitch

            if use_background:
                if sample.is_background and not is_sample(single_resource):
                    # Don't run process on the background sample,
                    # but let it fail if it is run directly on sample
                    continue

                background = sample.get_background(**background_filter)
                inputs['control'] = background.get_bam().id

            bed_list = sample.get_macs()
            if beds is not None:
                # Convert objects to the list of their ids
                if isinstance(beds, list):
                    bed_filter = [get_data_id(bed) for bed in beds]
                else:
                    bed_filter = [get_data_id(beds)]

                bed_list = bed_list.filter(id__in=bed_filter)

            for bed in bed_list:
                inputs['input'] = bed.id

                rose = sample.resolwe.get_or_run(slug='rose2', input=inputs)
                sample.add_data(rose)
                results.append(rose)

    return results
示例#15
0
def cuffnorm(resource, annotation, use_ercc=None):
    """Run Cuffnorm_ for selected cuffquats.

    This method runs `Cuffnorm`_ process on ``resource`` with
    ``annotation`` and ``use_ercc`` parameters specified in arguments.

    .. _Cuffnorm:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-upload-expression-cuffnorm

    :param resource: resource on which cuffnorm will be run
    :param annotation: annotation object used in cuffnorm
    :type annotation: `~resdk.resources.data.Data`
    :param bool use_ercc: use ERRCC spike-in controls for normalization

    """
    relation_filter = {}
    collection_id = get_resource_collection(resource)
    if collection_id:
        relation_filter['collection'] = collection_id

    samples = get_samples(resource)

    input_objects = [annotation]
    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    cuffquants = [get_data_id(sample.get_cuffquant()) for sample in samples]

    replicates = []
    replicates_ids = {}
    for sample in samples:
        relations = resolwe.relation.filter(
            type='group',
            entity=[sample.id],
            **relation_filter
        )

        if len(relations) == 1:
            relation = relations[0]
        else:
            raise LookupError(
                "Cannot determine unique group relation with label `replicates` for the "
                "following sample: {}".format(sample.name)
            )

        if relation.id not in replicates_ids:
            replicates_ids[relation.id] = str(len(replicates_ids))
        replicates.append(replicates_ids[relation.id])

    inputs = {
        'cuffquant': cuffquants,
        'replicates': replicates,
        'annotation': get_data_id(annotation),
    }

    if use_ercc is not None:
        inputs['useERCC'] = use_ercc

    cuffnorm_obj = resolwe.get_or_run(slug='cuffnorm', input=inputs)

    if is_collection(resource):
        resource.add_data(cuffnorm_obj)
    elif is_relation(resource):
        resource.collection.add_data(cuffnorm_obj)

    return cuffnorm_obj
示例#16
0
def rose2(resource, use_background=True, tss=None, stitch=None, beds=None):
    """Run ``ROSE 2`` process on the resource.

    This method runs `ROSE2`_ process with ``tss_exclusion`` and
    ``stitch`` parameters specified in arguments.

    Separate process is run for each bed file on the sample. To run
    process only on subset of those files, list them in ``beds``
    argument (if only one object is given, it will be auto-wrapped in
    list, if it is not already).

    If ``use_background`` argument is set to ``True``, bam file from
    background sample is passed to the process as the control.

    .. _ROSE2:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-rose2

    :param bool use_background: if set to ``True``, background sample
        will be used in the process
    :param int tss: TSS exclusion used in process
    :param int stitch: Stitch used in process
    :param list beds: subset of bed files to run process on, if empty
        processes for all bed files will be run

    """
    results = []

    if not isinstance(resource, list):
        resource = [resource]

    for single_resource in resource:

        background_filter = {}
        if use_background:
            collection_id = get_resource_collection(single_resource)
            if collection_id:
                background_filter['collection'] = collection_id

        for sample in get_samples(single_resource):
            inputs = {
                'rankby': sample.get_bam().id,
            }

            if tss is not None:
                inputs['tss'] = tss

            if stitch is not None:
                inputs['stitch'] = stitch

            if use_background:
                if sample.is_background and not is_sample(single_resource):
                    # Don't run process on the background sample,
                    # but let it fail if it is run directly on sample
                    continue

                background = sample.get_background(**background_filter)
                inputs['control'] = background.get_bam().id

            bed_list = sample.get_macs()
            if beds is not None:
                # Convert objects to the list of their ids
                if isinstance(beds, list):
                    bed_filter = [get_data_id(bed) for bed in beds]
                else:
                    bed_filter = [get_data_id(beds)]

                bed_list = bed_list.filter(id__in=bed_filter)

            for bed in bed_list:
                inputs['input'] = bed.id

                rose = sample.resolwe.get_or_run(slug='rose2', input=inputs)
                sample.add_data(rose)
                results.append(rose)

    return results