示例#1
0
def rna_seq_wrapper(assembly, taxon_id):
    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=RNA-seq'

    # Used to set is_main
    track_hierarchy = {
        'signal_forward': [
            'plus strand signal of unique reads',
            'plus strand signal of all reads', 'plus strand signal',
            'raw plus strand signal'
        ],
        'signal_reverse': [
            'minus strand signal of unique reads',
            'minus strand signal of all reads', 'minus strand signal',
            'raw minus strand signal'
        ],
        'signal': [
            'signal of unique reads', 'signal of all reads', 'signal',
            'raw signal', 'splice junctions'
        ],
        'contigs': ['contigs']
    }

    size_range_to_experiment_type = {'>200': 'mRNA-seq', '<200': 'smRNA-seq'}

    def dataset_additions_f(experiment, json_object):

        #Set experiment_type
        size_range = None
        if 'size_range' in experiment['replicates'][0]['library']:
            size_range = experiment['replicates'][0]['library']['size_range']

        if size_range is None:
            print 'Could not find size_range ' + experiment['accession']
            json_object['experiment_attributes']['experiment_type'] = 'RNA-seq'
            json_object['experiment_attributes']['assay_type'] = 'RNA-seq'
        elif size_range not in size_range_to_experiment_type:
            print 'Size range not found: ' + experiment['replicates'][0][
                'library']['size_range']
            json_object['experiment_attributes']['experiment_type'] = 'RNA-seq'
            json_object['experiment_attributes']['assay_type'] = 'RNA-seq'
        else:
            json_object['experiment_attributes'][
                'experiment_type'] = size_range_to_experiment_type[size_range]
            json_object['experiment_attributes'][
                'assay_type'] = size_range_to_experiment_type[size_range]

        return json_object

    return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy,
                                  dataset_additions_f)
示例#2
0
def bisulfite_wrapper(assembly, taxon_id):
    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=whole-genome%20shotgun%20bisulfite%20sequencing'

    # Used to set is_main
    track_hierarchy = {'methylation_profile': ['methylation state at CpG', 'methylation state at CHH']}

    def dataset_additions_f(experiment, json_object):

        #Set experiment_type
        json_object['experiment_attributes']['experiment_type'] = 'DNA Methylation'
        json_object['experiment_attributes']['assay_type'] = 'WGB-Seq'

        return json_object

    return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f)
示例#3
0
def chip_seq_wrapper(assembly, taxon_id, target):
    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=ChIP-seq&target.name=%s-human' % target

    # Used to set is_main
    track_hierarchy = {'peak_calls': ['optimal idr thresholded peaks', 'conservative idr thresholded peaks',
                                'replicated peaks', 'peaks', 'hotspots'],
                           'signal': ['signal p-value', 'fold change over control', 'signal', 'raw signal']}

    def dataset_additions_f(experiment, json_object):

        #Set experiment_type
        json_object['experiment_attributes']['experiment_type'] = experiment['target']['label']

        return json_object

    return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f)
示例#4
0
def chip_seq_wrapper(target, version):
    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=ChIP-seq&target.name=%s-human' % target
    assembly = 'hg19'
    taxon_id = 9606

    # Used to set is_main
    track_hierarchy = {'peak_calls': ['optimal idr thresholded peaks', 'conservative idr thresholded peaks',
                                'replicated peaks', 'peaks', 'hotspots'],
                           'signal': ['signal p-value', 'fold change over control', 'signal', 'raw signal']}

    def dataset_additions_f(experiment, json_object):

        #Set experiment_type
        json_object['experiment_attributes']['experiment_type'] = experiment['target']['label']

        return json_object

    with open('../output/%s_v%s.json' % (target, version), 'w+') as outfile:
        json.dump(convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f), outfile, indent=4)
示例#5
0
def bisulfite_wrapper(assembly, taxon_id):
    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=whole-genome%20shotgun%20bisulfite%20sequencing'

    # Used to set is_main
    track_hierarchy = {
        'methylation_profile':
        ['methylation state at CpG', 'methylation state at CHH']
    }

    def dataset_additions_f(experiment, json_object):

        #Set experiment_type
        json_object['experiment_attributes'][
            'experiment_type'] = 'DNA Methylation'
        json_object['experiment_attributes']['assay_type'] = 'WGB-Seq'

        return json_object

    return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy,
                                  dataset_additions_f)
示例#6
0
def rna_seq_wrapper(version):
    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=RNA-seq'
    assembly = 'hg19'
    taxon_id = 9606

    # Used to set is_main
    track_hierarchy = {'signal_forward': ['plus strand signal of unique reads', 'plus strand signal of all reads',
                                              'plus strand signal', 'raw plus strand signal'],
                           'signal_reverse': ['minus strand signal of unique reads', 'minus strand signal of all reads',
                                              'minus strand signal', 'raw minus strand signal'],
                           'signal': ['signal of unique reads', 'signal of all reads', 'signal', 'raw signal', 'splice junctions'],
                           'contigs': ['contigs']}

    size_range_to_experiment_type = {
        '>200': 'mRNA-seq',
        '<200': 'smRNA-seq'
    }

    def dataset_additions_f(experiment, json_object):

        #Set experiment_type
        size_range = None
        if 'size_range' in experiment['replicates'][0]['library']:
            size_range = experiment['replicates'][0]['library']['size_range']

        if size_range is None:
            print 'Could not find size_range ' + experiment['accession']
            json_object['experiment_attributes']['experiment_type'] = 'RNA-seq'
            json_object['experiment_attributes']['assay_type'] = 'RNA-seq'
        elif size_range not in size_range_to_experiment_type:
            print 'Size range not found: ' + experiment['replicates'][0]['library']['size_range']
            json_object['experiment_attributes']['experiment_type'] = 'RNA-seq'
            json_object['experiment_attributes']['assay_type'] = 'RNA-seq'
        else:
            json_object['experiment_attributes']['experiment_type'] = size_range_to_experiment_type[size_range]
            json_object['experiment_attributes']['assay_type'] = size_range_to_experiment_type[size_range]

        return json_object

    with open('../output/RNAseq_v%s.json' % version, 'w+') as outfile:
        json.dump(convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f), outfile, indent=4)
示例#7
0
def chip_seq_wrapper(assembly, taxon_id, target):
    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=ChIP-seq&target.name=%s-human' % target

    # Used to set is_main
    track_hierarchy = {
        'peak_calls': [
            'optimal idr thresholded peaks',
            'conservative idr thresholded peaks', 'replicated peaks', 'peaks',
            'hotspots'
        ],
        'signal':
        ['signal p-value', 'fold change over control', 'signal', 'raw signal']
    }

    def dataset_additions_f(experiment, json_object):

        #Set experiment_type
        json_object['experiment_attributes']['experiment_type'] = experiment[
            'target']['label']

        return json_object

    return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy,
                                  dataset_additions_f)