Пример #1
0
def yield_ensembl_names(gene_ids):
    import biopsy.identifiers.biomart as biomart, csv
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, 'mmusculus_gene_ensembl')
    biomart.add_filter(dataset, 'ensembl_gene_id', ",".join(imap(str, gene_ids)))
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'external_gene_id')
    for row in biomart.yield_csv_query_results(query):
        yield row[0], row[1]
Пример #2
0
def yield_ensembl_names(gene_ids):
    import biopsy.identifiers.biomart as biomart, csv
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, 'mmusculus_gene_ensembl')
    biomart.add_filter(dataset, 'ensembl_gene_id',
                       ",".join(imap(str, gene_ids)))
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'external_gene_id')
    for row in biomart.yield_csv_query_results(query):
        yield row[0], row[1]
Пример #3
0
def yield_mouse_orthologs(hs_genes):
    # map into mouse orthologs using biomart
    query = B.new_query()
    dataset = B.add_dataset(query, 'hsapiens_gene_ensembl')
    B.add_attribute(dataset, 'ensembl_gene_id')
    B.add_attribute(dataset, 'mouse_ensembl_gene')
    filter = B.add_filter(dataset, name='ensembl_gene_id', value='')
    filter.set('value', ','.join(ensembl_hs_genes))
    for chunk in B.split_big_list(ensembl_hs_genes, 50):
        #logging.info('Querying Ensembl biomart for chunk of %d genes', len(chunk))
        filter.set('value', ','.join(chunk))
        for row in B.yield_csv_query_results(query):
            if row[1]:
                yield row[1]
Пример #4
0
def yield_mouse_orthologs(hs_genes):
    # map into mouse orthologs using biomart
    query = B.new_query()
    dataset = B.add_dataset(query, 'hsapiens_gene_ensembl')
    B.add_attribute(dataset, 'ensembl_gene_id')
    B.add_attribute(dataset, 'mouse_ensembl_gene')
    filter = B.add_filter(dataset, name='ensembl_gene_id', value='')
    filter.set('value', ','.join(ensembl_hs_genes))
    for chunk in B.split_big_list(ensembl_hs_genes, 50):
        #logging.info('Querying Ensembl biomart for chunk of %d genes', len(chunk))
        filter.set('value', ','.join(chunk))
        for row in B.yield_csv_query_results(query):
            if row[1]:
                yield row[1]
Пример #5
0
def get_ensembl_go_annotations(genes):
    "@return: A map from the given genes to sets of go annotations."
    import biopsy.identifiers.biomart as biomart
    logging.info('Querying Ensembl biomart for GO annotations of %d genes', len(genes))
    result = cookbook.DictOfLists()
    for id_attr, evidence_attr in [
      ('go_biological_process_id', 'go_biological_process_linkage_type'),
      ('go_cellular_component_id', 'go_cellular_component_linkage_type'),
      ('go_molecular_function_id', 'go_molecular_function_linkage_type'),
    ]:
        query = biomart.new_query()
        dataset = biomart.add_dataset(query, 'mmusculus_gene_ensembl')
        biomart.add_attribute(dataset, 'ensembl_gene_id')
        biomart.add_attribute(dataset, id_attr)
        biomart.add_attribute(dataset, evidence_attr)
        filter = biomart.add_filter(dataset, name='ensembl_gene_id', value='')
        for chunk in biomart.split_big_list((str(g) for g in genes), 50):
            #logging.info('Querying Ensembl biomart for chunk of %d genes', len(chunk))
            filter.set('value', ','.join(chunk))
            for row in biomart.yield_csv_query_results(query):
                if row[2] not in options.go_evidence_codes_to_ignore:
                    result[row[0]].append(row[1])
    logging.info('Found %d go annotations', sum(len(v) for v in result.values()))
    return result