示例#1
0
def audit_biosample_depleted_term_match(value, system):
    '''
    The depleted_in_term_name and depleted_in_term_name
    should be concordant. This should be a calcualted field.
    If one exists, the other should.  This should be handled in the schema.
    '''
    if value['status'] == 'deleted':
        return

    if 'depleted_in_term_name' not in value:
        return

    if len(value['depleted_in_term_name']) != len(
            value['depleted_in_term_id']):
        detail = 'Biosample {} has a depleted_in_term_name array and depleted_in_term_id array of differing lengths'.format(
            value['@id'])
        raise AuditFailure('mismatched depleted_in_term length',
                           detail,
                           level='ERROR')
        return

    for i, dep_term in enumerate(value['depleted_in_term_name']):
        if (term_mapping[dep_term]) != (value['depleted_in_term_id'][i]):
            detail = 'Biosample {} has a mismatch between {} and {}'.format(
                value['@id'], dep_term, value['depleted_in_term_id'][i])
            raise AuditFailure('mismatched depleted_in_term',
                               detail,
                               level='ERROR')
示例#2
0
def audit_experiment_replicated(value, system):
    '''
    Experiments in ready for review or release ready state should be replicated. If not,
    wranglers should check with lab as to why before release.
    '''
    if value['status'] not in [
            'released', 'release ready', 'ready for review'
    ]:
        return
    '''
    Excluding single cell isolation experiments from the replication requirement
    '''
    if value['assay_term_name'] == 'single cell isolation followed by RNA-seq':
        return

    num_bio_reps = set()
    for rep in value['replicates']:
        num_bio_reps.add(rep['biological_replicate_number'])

    if len(num_bio_reps) <= 1:
        if value['status'] in ['released']:
            detail = 'Experiment {} has only one biological replicate and is released. Check for proper annotation of this state in the metadata'.format(
                value['@id'])
            raise AuditFailure('unreplicated experiment',
                               detail,
                               level='DCC_ACTION')
        if value['status'] in ['ready for review', 'release ready']:
            detail = 'Experiment {} has only one biological replicate, more than one is typically expected before release'.format(
                value['@id'])
            raise AuditFailure('unreplicated experiment',
                               detail,
                               level='WARNING')
示例#3
0
文件: file.py 项目: sailakss/encoded
def audit_file_paired_ended_run_type(value, system):
    '''
    Audit to catch those files that were upgraded to have run_type = paired ended
    resulting from its migration out of replicate but lack the paired_end property
    to specify which read it is. This audit will also catch the case where run_type
    = paired-ended but there is no paired_end = 2 due to registeration error.
    '''

    if value['status'] in ['deleted', 'replaced', 'revoked', 'upload failed']:
        return

    if value['file_format'] not in ['fastq', 'fasta', 'csfasta']:
        return

    if (value['output_type'] == 'reads') and (value.get('run_type')
                                              == 'paired-ended'):
        if 'paired_end' not in value:
            detail = 'File {} has a paired-ended run_type but is missing its paired_end value'.format(
                value['@id'])
            raise AuditFailure('missing paired_end',
                               detail,
                               level='DCC_ACTION')

        if (value['paired_end'] == 1) and 'paired_with' not in value:
            detail = 'File {} has a paired-ended run_type but is missing a paired_end=2 mate'.format(
                value['@id'])
            raise AuditFailure('missing mate pair', detail, level='DCC_ACTION')
示例#4
0
def audit_experiment_control(value, system):
    '''
    Certain assay types (ChIP-seq, ...) require possible controls with a matching biosample.
    Of course, controls do not require controls.
    '''

    if value['status'] in ['deleted', 'proposed']:
        return

    # Currently controls are only be required for ChIP-seq
    if value.get('assay_term_name') not in controlRequiredAssayList:
        return

    # We do not want controls
    if 'target' in value and 'control' in value['target']['investigated_as']:
        return

    if value['possible_controls'] == []:
        detail = '{} experiments require a value in possible_control'.format(
            value['assay_term_name'])
        raise AuditFailure('missing possible_controls',
                           detail,
                           level='NOT_COMPLIANT')

    for control in value['possible_controls']:
        if control.get('biosample_term_id') != value.get('biosample_term_id'):
            detail = 'Control {} is for {} but experiment is done on {}'.format(
                control['@id'], control.get('biosample_term_name'),
                value['biosample_term_name'])
            raise AuditFailure('mismatched control', detail, level='ERROR')
示例#5
0
def audit_experiment_replicates_biosample(value, system):
    if value['status'] in ['deleted', 'replaced', 'revoked']:
        return
    biological_replicates_dict = {}
    biosamples_list = []
    for rep in value['replicates']:
        bio_rep_num = rep['biological_replicate_number']
        tech_rep_num = rep['technical_replicate_number']
        if 'library' in rep and 'biosample' in rep['library']:
            biosample = rep['library']['biosample']

            if not bio_rep_num in biological_replicates_dict:
                biological_replicates_dict[bio_rep_num] = biosample[
                    'accession']
                if biosample['accession'] in biosamples_list:
                    detail = 'Experiment {} has multiple biological replicates associated with the same biosample {}'.format(
                        value['@id'], biosample['@id'])
                    raise AuditFailure(
                        'biological replicates with identical biosample',
                        detail,
                        level='DCC_ACTION')
                else:
                    biosamples_list.append(biosample['accession'])

            else:
                if biosample['accession'] != biological_replicates_dict[
                        bio_rep_num]:
                    detail = 'Experiment {} has technical replicates associated with the different biosamples'.format(
                        value['@id'])
                    raise AuditFailure(
                        'technical replicates with not identical biosample',
                        detail,
                        level='DCC_ACTION')
示例#6
0
def audit_library_depleted_in(value, system):
    '''
    If there is a depleted_term_name or term_id,
    both should exist - should be handled by schema
    They should match each other.
    This should also be replaced by a calculated field
    '''

    if value['status'] in ['deleted']:
        return

    if not value['depleted_in_term_name'] or not value['depleted_in_term_id']:
        return

    if len(value['depleted_in_term_name']) != len(value['depleted_in_term_id']):
        detail = 'Library {} has depleted_in_term_name array and depleted_in_term_id array of differing lengths'.format(
            value['@id'])
        yield AuditFailure('depleted_in length mismatch', detail, level='ERROR')

    for i, dep_term in enumerate(value['depleted_in_term_id']):
        if dep_term == value['nucleic_acid_term_id']:
            detail = 'Library {} of type {} cannot be depleted in {}'.format(
                value['@id'],
                value['nucleic_acid_term_id'],
                value['depleted_in_term_id'][i])
            yield AuditFailure('invalid depleted_in_term_id', detail, level='ERROR')

        expected = moleculeDict[value['depleted_in_term_name'][i]]
        if expected != value['depleted_in_term_id'][i]:
            detail = 'Library {} has mismatch between {} - {}'.format(
                value['@id'],
                value['depleted_in_term_name'][i],
                value['depleted_in_term_id'][i])
            yield AuditFailure('mismatched depleted_in_term', detail, level='ERROR')
示例#7
0
文件: file.py 项目: sailakss/encoded
def audit_file_controlled_by(value, system):
    '''
    A fastq in a ChIP-seq experiment should have a controlled_by
    '''

    if value['status'] in ['deleted', 'replaced', 'revoked']:
        return

    if value['dataset'].get('assay_term_name') not in [
            'ChIP-seq', 'RAMPAGE', 'CAGE',
            'shRNA knockdown followed by RNA-seq'
    ]:
        return

    if 'target' in value['dataset'] and 'control' in value['dataset'][
            'target'].get('investigated_as', []):
        return

    if 'controlled_by' not in value:
        value['controlled_by'] = []

    if (value['controlled_by'] == []) and (value['file_format'] in ['fastq']):
        detail = 'Fastq file {} from {} requires controlled_by'.format(
            value['@id'], value['dataset']['assay_term_name'])
        raise AuditFailure('missing controlled_by',
                           detail,
                           level='NOT_COMPLIANT')

    possible_controls = value['dataset'].get('possible_controls')
    biosample = value['dataset'].get('biosample_term_id')

    for ff in value['controlled_by']:
        control_bs = ff['dataset'].get('biosample_term_id')

        if control_bs != biosample:
            detail = 'File {} has a controlled_by file {} with conflicting biosample {}'.format(
                value['@id'], ff['@id'], control_bs)
            raise AuditFailure('mismatched controlled_by',
                               detail,
                               level='ERROR')
            return

        if ff['file_format'] != value['file_format']:
            detail = 'File {} with file_format {} has a controlled_by file {} with file_format {}'.format(
                value['@id'], value['file_format'], ff['@id'],
                ff['file_format'])
            raise AuditFailure('mismatched controlled_by',
                               detail,
                               level='ERROR')

        if (possible_controls is None) or (ff['dataset']['@id']
                                           not in possible_controls):
            detail = 'File {} has a controlled_by file {} with a dataset {} that is not in possible_controls'.format(
                value['@id'], ff['@id'], ff['dataset']['@id'])
            raise AuditFailure('mismatched controlled_by',
                               detail,
                               level='DCC_ACTION')
示例#8
0
def audit_experiment_assay(value, system):
    '''
    Experiments should have assays with valid ontologies term ids and names that
    are a valid synonym.
    '''
    if value['status'] == 'deleted':
        return

    if 'assay_term_id' not in value:
        detail = 'Experiment {} is missing assay_term_id'.format(value['@id'])
        yield AuditFailure('missing assay information', detail, level='ERROR')
        return
        # This should be a dependancy

    if 'assay_term_name' not in value:
        detail = 'Experiment {} is missing assay_term_name'.format(
            value['@id'])
        yield AuditFailure('missing assay information', detail, level='ERROR')
        return
        # This should be a dependancy

    ontology = system['registry']['ontology']
    term_id = value.get('assay_term_id')
    term_name = value.get('assay_term_name')

    if term_id.startswith('NTR:'):
        detail = 'Assay_term_id is a New Term Request ({} - {})'.format(
            term_id, term_name)
        yield AuditFailure('NTR assay', detail, level='DCC_ACTION')
        return

    if term_id not in ontology:
        detail = 'Assay_term_id {} is not found in cached version of ontology'.format(
            term_id)
        yield AuditFailure('assay_term_id not in ontology',
                           term_id,
                           level='DCC_ACTION')
        return

    ontology_term_name = ontology[term_id]['name']
    modifed_term_name = term_name + ' assay'
    if (ontology_term_name != term_name and term_name not in ontology[term_id]['synonyms']) and \
        (ontology_term_name != modifed_term_name and
            modifed_term_name not in ontology[term_id]['synonyms']):
        detail = 'Experiment has a mismatch between assay_term_name "{}" and assay_term_id "{}"'.format(
            term_name,
            term_id,
        )
        yield AuditFailure('mismatched assay_term_name',
                           detail,
                           level='DCC_ACTION')
        return
示例#9
0
def audit_biosample_concordance(value, system):
    '''
    The biosample details of the experiment of a replicate and the library.biosample of a replicate
    need to match.
    '''

    if value.get('status') in ['deleted', 'replaced']:
        return

    if 'library' not in value:
        return

    if 'biosample' not in value['library']:
        return

    exp = value['experiment']['@id']
    exp_type = value['experiment'].get('biosample_type')
    exp_name = value['experiment'].get('biosample_term_name')
    exp_id = value['experiment'].get('biosample_term_id')

    bio = value['library']['biosample']['@id']
    bs_type = value['library']['biosample'].get('biosample_type')
    bs_name = value['library']['biosample'].get('biosample_term_name')
    bs_id = value['library']['biosample'].get('biosample_term_id')

    if bs_type != exp_type:
        detail = '{} has mismatched biosample_type: {}, but {} in {}'.format(
            exp,
            exp_type,
            bs_type,
            bio
            )
        yield AuditFailure('mismatched biosample_type', detail, level='ERROR')

    if bs_name != exp_name:
        detail = '{} has mismatched biosample_term_name: {}, but {} in {}'.format(
            exp,
            exp_name,
            bs_name,
            bio
            )
        yield AuditFailure('mismatched biosample_term_name', detail, level='ERROR')

    if bs_id != exp_id:
        detail = '{} has mismatched biosample_term_id: {}, but {} in {}'.format(
            bio,
            exp_id,
            bs_id,
            bio
            )
        yield AuditFailure('mismatched biosample_term_id', detail, level='ERROR')
示例#10
0
def audit_antibody_characterization_unique_reviews(value, system):
    '''
    Make sure primary characterizations have unique lane, biosample_term_id and
    organism combinations for characterization reviews
    '''
    if (value['status'] in [
            'deleted', 'not submitted for review by lab', 'in progress',
            'not reviewed'
    ]):
        return

    if 'secondary_characterization_method' in value:
        return

    unique_reviews = set()
    for review in value['characterization_reviews']:
        lane = review['lane']
        term_id = review['biosample_term_id']
        organism = review['organism']
        review_lane = frozenset([lane, term_id, organism])
        if review_lane not in unique_reviews:
            unique_reviews.add(review_lane)
        else:
            detail = 'Characterization_review.lane {} is a duplicate review for {} - {}'.format(
                lane, term_id, organism)
            raise AuditFailure('duplicate lane review', detail, level='ERROR')
示例#11
0
def audit_experiment_spikeins(value, system):
    '''
    All ENCODE 3 long (>200) RNA-seq experiments should specify their spikeins.
    The spikeins specified should have datasets of type spikeins.
    The spikeins datasets should have a fasta file, a document, and maybe a tsv
    '''

    if value['status'] in ['deleted', 'replaced']:
        return

    if value.get('assay_term_name') != 'RNA-seq':
        return

    for rep in value['replicates']:

        lib = rep.get('library')
        if lib is None:
            continue

        size_range = lib.get('size_range')
        if size_range != '>200':
            continue

        spikes = lib.get('spikeins_used')
        if (spikes is None) or (spikes == []):
            detail = 'Library {} is in an RNA-seq experiment and has size_range >200. It requires a value for spikeins_used'.format(
                lib['@id'])
            yield AuditFailure('missing spikeins_used',
                               detail,
                               level='NOT_COMPLIANT')
示例#12
0
文件: file.py 项目: sailakss/encoded
def audit_file_format_specifications(value, system):

    for doc in value.get('file_format_specifications', []):
        if doc['document_type'] != "file format specification":
            detail = 'File {} has document {} not of type file format specification'.format(
                value['@id'], doc['@id'])
            raise AuditFailure('wrong document_type', detail, level='ERROR')
示例#13
0
def audit_biosample_donor(value, system):
    '''
    A biosample should have a donor.
    The organism of donor and biosample should match.
    Pooled_from biosamples do not need donors??
    '''
    if value['status'] in ['deleted']:
        return

    if ('donor' not in value) and (value['pooled_from']):
        return

    if ('donor' not in value) and (not value['pooled_from']):
        detail = 'Biosample {} requires a donor'.format(value['@id'])
        raise AuditFailure('missing donor', detail, level='ERROR')
        return

    donor = value['donor']
    if value['organism']['name'] != donor['organism']['name']:
        detail = 'Biosample {} is organism {}, yet its donor {} is organism {}. Biosamples require a donor of the same species'.format(
            value['@id'], value['organism']['name'], donor['@id'],
            donor['organism']['name'])
        raise AuditFailure('mismatched organism', detail, level='ERROR')

    if 'mutated_gene' not in donor:
        return

    if value['organism']['name'] != donor['mutated_gene']['organism']['name']:
        detail = 'Biosample {} is organism {}, but its donor {} mutated_gene is in {}. Donor mutated_gene should be of the same species as the donor and biosample'.format(
            value['@id'], value['organism']['name'], donor['@id'],
            donor['mutated_gene']['organism']['name'])
        raise AuditFailure('mismatched mutated_gene organism',
                           detail,
                           level='ERROR')

    for i in donor['mutated_gene']['investigated_as']:
        if i in [
                'histone modification', 'tag', 'control',
                'recombinant protein', 'nucleotide modification',
                'other post-translational modification'
        ]:
            detail = 'Donor {} has an invalid mutated_gene {}. Donor mutated_genes should not be tags, controls, recombinant proteins or modifications'.format(
                donor['@id'], donor['mutated_gene']['name'])
            raise AuditFailure('invalid donor mutated_gene',
                               detail,
                               level='ERROR')
示例#14
0
def audit_biosample_transfection_type(value, system):
    '''
    A biosample with constructs or rnais should have a
    transfection_type
    '''
    if value['status'] == 'deleted':
        return

    if (value['rnais']) and ('transfection_type' not in value):
        detail = 'Biosample {} with a value for RNAi requires transfection_type'.format(
            value['@id'])
        raise AuditFailure('missing transfection_type', detail, level='ERROR')

    if (value['constructs']) and ('transfection_type' not in value):
        detail = 'Biosample {} with a value for construct requires transfection_type'.format(
            value['@id'])
        raise AuditFailure('missing transfection_type', detail, level='ERROR')
示例#15
0
文件: file.py 项目: sailakss/encoded
def audit_file_size(value, system):

    if value['status'] in ['deleted', 'replaced', 'uploading', 'revoked']:
        return

    if 'file_size' not in value:
        detail = 'File {} requires a value for file_size'.format(value['@id'])
        raise AuditFailure('missing file_size', detail, level='DCC_ACTION')
示例#16
0
def audit_antibody_characterization_status(value, system):
    '''
    Make sure the lane_status matches
    the characterization status
    '''
    if 'secondary_characterization_method' in value:
        return

    if (value['status'] in [
            "deleted", "not submitted for review by lab", 'in progress',
            'not reviewed'
    ]):
        if 'characterization_reviews' in value:
            '''If any of these statuses, we shouldn't have characterization_reviews'''
            detail = 'Antibody_characterization.status of {} is incompatible with having a value for characterization_reviews'.format(
                value['status'])
            raise AuditFailure('unexpected characterization_reviews',
                               detail,
                               level='WARNING')
        else:
            return
    '''Check each of the lane_statuses in characterization_reviews for an appropriate match'''
    has_compliant_lane = False
    is_pending = False
    if value['status'] == 'pending dcc review':
        is_pending = True
    for lane in value['characterization_reviews']:
        if (is_pending and lane['lane_status'] != 'pending dcc review') or (
                not is_pending
                and lane['lane_status'] == 'pending dcc review'):
            detail = 'A lane.status of {} is incompatible with antibody_characterization.status of pending dcc review'.format(
                lane['lane_status'])
            raise AuditFailure('mismatched lane status',
                               detail,
                               level='WARNING')
            continue

        if lane['lane_status'] == 'compliant':
            has_compliant_lane = True

    if has_compliant_lane and value['status'] != 'compliant':
        detail = 'A lane.status of {} is incompatible with antibody_characterization status of {}'.format(
            lane['lane_status'], value['status'])
        raise AuditFailure('mismatched lane status',
                           detail,
                           level='DCC_ACTION')
示例#17
0
def audit_experiment_release_date(value, system):
    '''
    Released experiments need release date.
    This should eventually go to schema
    '''
    if value['status'] == 'released' and 'date_released' not in value:
        detail = 'Experiment {} is released and requires a value in date_released'.format(
            value['@id'])
        raise AuditFailure('missing date_released', detail, level='DCC_ACTION')
示例#18
0
def audit_experiment_ChIP_control(value, system):

    if value['status'] in [
            'deleted', 'proposed', 'preliminary', 'replaced', 'revoked'
    ]:
        return

    # Currently controls are only be required for ChIP-seq
    if value.get('assay_term_name') != 'ChIP-seq':
        return

    # We do not want controls
    if 'target' in value and 'control' in value['target']['investigated_as']:
        return

    if not value['possible_controls']:
        return

    num_IgG_controls = 0
    for control in value['possible_controls']:
        if ('target' not in control) or (
                'control' not in control['target']['investigated_as']):
            detail = 'Experiment {} is ChIP-seq but its control {} is not linked to a target with investigated.as = control'.format(
                value['@id'], control['@id'])
            raise AuditFailure('invalid possible_control',
                               detail,
                               level='ERROR')

        if not control['replicates']:
            continue

        if 'antibody' in control['replicates'][0]:
            num_IgG_controls += 1

    # If all of the possible_control experiments are mock IP control experiments
    if num_IgG_controls == len(value['possible_controls']):
        if value.get('assay_term_name') == 'ChIP-seq':
            # The binding group agreed that ChIP-seqs all should have an input control.
            detail = 'Experiment {} is ChIP-seq and requires at least one input control, as agreed upon by the binding group. {} is not an input control'.format(
                value['@id'], control['@id'])
            raise AuditFailure('missing input control',
                               detail,
                               level='NOT_COMPLIANT')
示例#19
0
def audit_library_biosample(value, system):
    '''
    The library should be linked to biosample 
    '''
    if value['status'] in ['deleted']:
        return
    if 'biosample' not in value:
        detail = 'Library {} has no biosample'.format(
            value['@id'])
        raise AuditFailure('missing biosample', detail, level='ERROR')
示例#20
0
文件: file.py 项目: sailakss/encoded
def audit_paired_with(value, system):
    '''
    A file with a paired_end needs a paired_with.
    Should be handled in the schema.
    A paired_with should be the same replicate
    '''

    if value['status'] in ['deleted', 'replaced', 'revoked']:
        return

    if 'paired_end' not in value:
        return

    if 'paired_with' not in value:
        detail = 'File {} has paired_end = {}. It requires a paired file'.format(
            value['@id'], value['paired_end'])
        raise AuditFailure('missing paired_with',
                           detail,
                           level='NOT_COMPLIANT')

    if 'replicate' not in value['paired_with']:
        return

    if 'replicate' not in value:
        detail = 'File {} has paired_end = {}. It requires a replicate'.format(
            value['@id'], value['paired_end'])
        raise AuditFailure('missing replicate', detail, level='DCC_ACTION')

    if value['replicate'] != value['paired_with']['replicate']:
        detail = 'File {} has replicate {}. It is paired_with file {} with replicate {}'.format(
            value['@id'], value.get('replicate'), value['paired_with']['@id'],
            value['paired_with'].get('replicate'))
        raise AuditFailure('mismatched paired_with', detail, level='ERROR')

    if value['paired_end'] == '1':
        context = system['context']
        paired_with = context.get_rev_links('paired_with')
        if len(paired_with) > 1:
            detail = 'Paired end 1 file {} paired_with by multiple paired end 2 files: {!r}'.format(
                value['@id'],
                paired_with,
            )
            raise AuditFailure('multiple paired_with', detail, level='ERROR')
示例#21
0
def audit_biosample_term(value, system):
    '''
    Biosample_term_id and biosample_term_name
    and biosample_type should all be present.
    This should be handled by schemas.
    Biosample_term_id should be in the ontology.
    Biosample_term_name should match biosample_term_id.
    '''

    if value['status'] in ['deleted']:
        return

    if 'biosample_term_id' not in value:
        return

    ontology = system['registry']['ontology']
    term_id = value['biosample_term_id']
    term_name = value.get('biosample_term_name')

    if term_id.startswith('NTR:'):
        detail = 'Biosample {} has a New Term Request {} - {}'.format(
            value['@id'], term_id, term_name)
        raise AuditFailure('NTR biosample', detail, level='DCC_ACTION')

    if term_id not in ontology:
        detail = 'Biosample {} has biosample_term_id of {} which is not in ontology'.format(
            value['@id'], term_id)
        raise AuditFailure('term_id not in ontology',
                           term_id,
                           level='DCC_ACTION')

    ontology_term_name = ontology[term_id]['name']
    if ontology_term_name != term_name and term_name not in ontology[term_id][
            'synonyms']:
        detail = 'Biosample {} has a mismatch between biosample_term_id "{}" and biosample_term_name "{}"'.format(
            value['@id'],
            term_id,
            term_name,
        )
        raise AuditFailure('mismatched biosample_term',
                           detail,
                           level='DCC_ACTION')
示例#22
0
def audit_references_for_publication(value, system):
    '''
    For datasets of type publication, there should be references. Those that
    do not should be earmarked so they can be added once the publication
    has been accepted
    '''
    if value['status'] in ['deleted', 'replaced', 'revoked', 'preliminary']:
        return

    if (value['dataset_type'] == 'publication') and (not value['references']):
        detail = 'publication dataset missing a reference to a publication'
        raise AuditFailure('missing reference', detail, level='WARNING')
示例#23
0
def audit_antibody_characterization_review(value, system):
    '''
    Make sure that biosample terms are in ontology
    for each characterization_review.
    '''
    if (value['status'] in [
            'not reviewed', 'not submitted for review by lab', 'deleted',
            'in progress'
    ]):
        return

    if 'secondary_characterization_method' in value:
        return

    if value['characterization_reviews']:
        ontology = system['registry']['ontology']
        for review in value['characterization_reviews']:
            term_id = review['biosample_term_id']
            term_name = review['biosample_term_name']

            if term_id.startswith('NTR:'):
                detail = 'Antibody_characterization {} contains a New Term Request {} - {}'.format(
                    value['@id'], term_id, term_name)
                raise AuditFailure('NTR biosample', detail, level='DCC_ACTION')

            if term_id not in ontology:
                detail = 'Antibody characterization {} contains a biosample_term_id {} that is not in the ontology'.format(
                    value['@id'], term_id)
                raise AuditFailure('term_id not in ontology',
                                   term_id,
                                   level='DCC_ACTION')

            ontology_term_name = ontology[term_id]['name']
            if ontology_term_name != term_name and term_name not in ontology[
                    term_id]['synonyms']:
                detail = 'Antibody characterization {} has a mismatched term {} - {} expected {}'.format(
                    value['@id'], term_id, term_name, ontology_term_name)
                raise AuditFailure('mismatched term_name',
                                   detail,
                                   level='ERROR')
示例#24
0
def audit_analysis_steps_closure(value, system):
    ''' The analysis_steps list should include all of a steps ancestors.
    '''
    ids = {step['@id'] for step in value['analysis_steps']}
    parents = {
        parent
        for step in value['analysis_steps']
        for parent in step.get('parents', [])
    }
    diff = parents.difference(ids)
    if diff:
        detail = ', '.join(sorted(diff))
        raise AuditFailure('incomplete analysis_steps', detail, level='ERROR')
示例#25
0
def audit_experiment_replicates_with_no_libraries(value, system):
    if value['status'] in ['deleted', 'replaced', 'revoked']:
        return
    if len(value['replicates']) == 0:
        return
    for rep in value['replicates']:
        if 'library' not in rep:
            detail = 'Experiment {} has a replicate {}, that has no library associated with'.format(
                value['@id'], rep['@id'])
            yield AuditFailure('replicate with no library',
                               detail,
                               level='DCC_ACTION')
    return
示例#26
0
def audit_library_RNA_size_range(value, system):
    '''
    An RNA library should have a size_range specified.
    This needs to accomodate the rfa
    '''

    if value['status'] in ['deleted']:
        return

    RNAs = ['SO:0000356', 'SO:0000871']

    if (value['nucleic_acid_term_id'] in RNAs) and ('size_range' not in value):
        detail = 'RNA library {} requires a value for size_range'.format(value['@id'])
        raise AuditFailure('missing size_range', detail, level='ERROR')
示例#27
0
文件: file.py 项目: sailakss/encoded
def audit_file_read_length(value, system):
    '''
    Reads files should have a read_length
    '''

    if value['status'] in ['deleted', 'replaced', 'revoked']:
        return

    if value['output_type'] != 'reads':
        return

    if 'read_length' not in value:
        detail = 'Reads file {} missing read_length'.format(value['@id'])
        raise AuditFailure('missing read_length', detail, level='ERROR')
示例#28
0
def audit_antibody_characterization_target(value, system):
    '''
    Make sure that target in characterization
    matches target of antibody
    '''
    antibody = value['characterizes']
    target = value['target']
    if 'recombinant protein' in target['investigated_as']:
        prefix = target['label'].split('-')[0]
        unique_antibody_target = set()
        unique_investigated_as = set()
        for antibody_target in antibody['targets']:
            label = antibody_target['label']
            unique_antibody_target.add(label)
            for investigated_as in antibody_target['investigated_as']:
                unique_investigated_as.add(investigated_as)
        if 'tag' not in unique_investigated_as:
            detail = 'Antibody {} is not for a tagged protein, yet target is investigated_as a recombinant protein'.format(
                antibody['@id'])
            raise AuditFailure('not tagged antibody', detail, level='ERROR')
        else:
            if prefix not in unique_antibody_target:
                detail = '{} is not found in target list for antibody {}'.format(
                    prefix, antibody['@id'])
                raise AuditFailure('mismatched tag target',
                                   detail,
                                   level='ERROR')
    else:
        target_matches = False
        for antibody_target in antibody['targets']:
            if target['name'] == antibody_target.get('name'):
                target_matches = True
        if not target_matches:
            detail = 'Target {} is not found in target list for antibody {}'.format(
                target['name'], antibody['@id'])
            raise AuditFailure('mismatched target', detail, level='ERROR')
示例#29
0
文件: file.py 项目: sailakss/encoded
def audit_run_type(value, system):
    '''
    A fastq file or a fasta file need to specify run_type.
    This was attempted to be a dependancy and didn't happen.
    '''

    if value['status'] in ['deleted', 'replaced', 'revoked']:
        return

    if value['file_format'] not in ['fastq', 'fasta']:
        return

    if 'run_type' not in value:
        detail = 'File {} has file_format {}. It requires a value for run_type'.format(
            value['@id'], value['file_format'])
        raise AuditFailure('missing run_type', detail, level='NOT_COMPLIANT')
示例#30
0
文件: file.py 项目: sailakss/encoded
def audit_file_platform(value, system):
    '''
    A raw data file should have a platform specified.
    Should be in the schema.
    '''

    if value['status'] in ['deleted', 'replaced']:
        return

    if value['file_format'] not in raw_data_formats:
        return

    if 'platform' not in value:
        detail = 'Raw data file {} missing platform information'.format(
            value['@id'])
        raise AuditFailure('missing platform', detail, level='ERROR')