Python RelationshipSet示例，graphio.RelationshipSet Python示例

示例#1

0

显示文件

class MirtarbaseParser(ReturnParser):

    def __init__(self):
        """
        """
        super(MirtarbaseParser, self).__init__()

        # RelationshipSets
        self.mirna_targets_gene = RelationshipSet('TARGETS', ['Mirna'], ['Gene'], ['name'], ['sid'])

    def run_with_mounted_arguments(self):
        self.run()

    def run(self):
        log.debug("Run {}".format(self.__class__.__name__))

        mirtarbase_instance = self.get_instance_by_name('Mirtarbase')

        mirtarbase_file = mirtarbase_instance.get_file(FILE_NAME)

        df = pandas.read_excel(mirtarbase_file, index_col=None, header=0)
        # rename columns for easier access
        df.columns = ['mirtarbase_id', 'mirna', 'species_mirna', 'target_genesymbol', 'target_entrez', 'species_target',
                      'experiments', 'support_type', 'references']

        for row in df.itertuples():
            self.mirna_targets_gene.add_relationship(
                {'name': row.mirna.strip()}, {'sid': str(row.target_entrez).strip()},
                {'experiments': row.experiments, 'support_type': row.support_type, 'references': row.references,
                 'source': mirtarbase_instance.datasource.name}
            )

示例#2

0

显示文件

文件： test_parser.py 项目： kaiserpreusse/graphpipeline

    def __init__(self):
        super(SomeParser, self).__init__()

        self.source = NodeSet(['Source'], merge_keys=['source_id'])
        self.target = NodeSet(['Target'], merge_keys=['target_id'])
        self.rels = RelationshipSet('FOO', ['Source'], ['Target'],
                                    ['source_id'], ['target_id'])

示例#3

0

显示文件

文件： ncbigene.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):

        super(NcbiGeneParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # output data
        # both gene IDs and GeneSymbols have the label 'Gene'
        # two different NodeSets are used because only the GeneSymbol nodes need taxid for uniqueness
        self.genes = NodeSet(['Gene'],
                             merge_keys=['sid'],
                             default_props={'source': 'ncbigene'})
        self.genesymbols = NodeSet(['Gene'],
                                   merge_keys=['sid', 'taxid'],
                                   default_props={
                                       'source': 'ncbigene',
                                       'type': 'symbol'
                                   })
        self.genesymbol_synonym_genesymbol = RelationshipSet(
            'SYNONYM', ['Gene'], ['Gene'], ['sid', 'taxid'], ['sid', 'taxid'],
            default_props={'source': 'ncbigene'})
        self.gene_maps_genesymbol = RelationshipSet(
            'MAPS', ['Gene'], ['Gene'], ['sid'], ['sid', 'taxid'],
            default_props={'source': 'ncbigene'})

示例#4

0

显示文件

    def __init__(self):
        """
        """
        super(MirtarbaseParser, self).__init__()

        # RelationshipSets
        self.mirna_targets_gene = RelationshipSet('TARGETS', ['Mirna'], ['Gene'], ['name'], ['sid'])

示例#5

0

显示文件

class DummyParser(ReturnParser):
    def __init__(self):
        super(DummyParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # output data
        self.dummy_nodes = NodeSet(['Dummy'], merge_keys=['sid'])
        self.fummy_nodes = NodeSet(['Fummy'], merge_keys=['sid'])

        self.dummy_knows_fummy = RelationshipSet('KNOWS', ['Dummy'], ['Fummy'],
                                                 ['sid'], ['sid'])

    def run_with_mounted_arguments(self):
        self.run(self.taxid)

    def run(self, taxid):
        dummy_instance = self.get_instance_by_name('Dummy')
        dummyfile = dummy_instance.get_file('file.txt')

        target_sids = list(string.ascii_lowercase)

        # Fummy nodes
        for i in range(10):
            self.fummy_nodes.add_node({'sid': i, 'taxid': taxid})

        with open(dummyfile) as f:
            for l in f:
                letter = l.strip()
                self.dummy_nodes.add_node({'sid': letter, 'taxid': taxid})
                self.dummy_knows_fummy.add_relationship({'sid': letter},
                                                        {'sid': randint(0, 9)},
                                                        {'key': 'value'})

示例#6

0

显示文件

文件： ensembl.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        """
        :param ensembl_instance: The ENSEMBL DataSource instance.
        """
        super(EnsemblEntityParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # NodeSets
        self.genes = NodeSet(['Gene'],
                             merge_keys=['sid'],
                             default_props={'source': 'ensembl'})
        self.transcripts = NodeSet(['Transcript'],
                                   merge_keys=['sid'],
                                   default_props={'source': 'ensembl'})
        self.proteins = NodeSet(['Protein'],
                                merge_keys=['sid'],
                                default_props={'source': 'ensembl'})

        # RelationshipSets
        self.gene_codes_transcript = RelationshipSet(
            'CODES', ['Gene'], ['Transcript'], ['sid'], ['sid'],
            default_props={'source': 'ensembl'})
        self.transcript_codes_protein = RelationshipSet(
            'CODES', ['Transcript'], ['Protein'], ['sid'], ['sid'],
            default_props={'source': 'ensembl'})

示例#7

0

显示文件

文件： geneontology.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        super(GeneOntologyAssociationParser, self).__init__()

        self.arguments = ['taxid']

        # RelationshipSets
        self.protein_associates_goterm = RelationshipSet(
            'ASSOCIATION', ['Protein'], ['Term'], ['sid'], ['sid'])

示例#8

0

显示文件

    def __init__(self):
        super(MirdbParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # RelationshipSets
        self.mirna_targets_transcript = RelationshipSet(
            'TARGETS', ['Mirna'], ['Transcript'], ['name'], ['sid'])

示例#9

0

显示文件

文件： ncbigene.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        super(NcbiLegacyGeneParser, self).__init__()

        self.arguments = ['taxid']

        self.legacy_genes = NodeSet(['Gene', 'Legacy'],
                                    merge_keys=['sid'],
                                    default_props={'source': 'ncbigene'})
        self.legacy_gene_now_gene = RelationshipSet(
            'REPLACED_BY', ['Gene', 'Legacy'], ['Gene'], ['sid'], ['sid'],
            default_props={'source': 'ncbigene'})

示例#10

0

显示文件

    def __init__(self):
        super(DummyParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # output data
        self.dummy_nodes = NodeSet(['Dummy'], merge_keys=['sid'])
        self.fummy_nodes = NodeSet(['Fummy'], merge_keys=['sid'])

        self.dummy_knows_fummy = RelationshipSet('KNOWS', ['Dummy'], ['Fummy'],
                                                 ['sid'], ['sid'])

示例#11

0

显示文件

文件： swisslipids.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):

        super(SwissLipidsParser, self).__init__()

        # define NodeSet and RelationshipSet
        self.lipids = NodeSet(['Lipid'], merge_keys=['sid'])

        self.lipid_fromclass_lipid = RelationshipSet('FROM_LIPID_CLASS', ['Lipid'], ['Lipid'], ['sid'], ['sid'])
        self.lipid_parent_lipid = RelationshipSet('HAS_PARENT', ['Lipid'], ['Lipid'], ['sid'], ['sid'])
        self.lipid_component_lipid = RelationshipSet('HAS_COMPONENT', ['Lipid'], ['Lipid'], ['sid'], ['sid'])
        self.lipid_maps_metabolite = RelationshipSet('MAPS', ['Lipid'], ['Metabolite'], ['sid'], ['sid'])
        self.lipid_associates_protein = RelationshipSet('HAS_ASSOCIATION', ['Lipid'], ['Protein'], ['sid'], ['sid'])

示例#12

0

显示文件

文件： refseq.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        """
        :param refseq_instance: The RefSeq DataSource instance.
        """
        super(RefseqCodesParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # define NodeSet and RelationshipSet
        self.gene_codes_transcript = RelationshipSet('CODES', ['Gene'], ['Transcript'], ['sid'], ['sid'], default_props={'source': 'refseq'})
        self.transcript_codes_protein = RelationshipSet('CODES', ['Transcript'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'refseq'})

示例#13

0

显示文件

文件： test_parserset.py 项目： kaiserpreusse/graphpipeline

class DependingTestParser(ReturnParser):
    def __init__(self):
        super(DependingTestParser, self).__init__()
        self.rels = RelationshipSet('FOO', ['Source'], ['Target'],
                                    ['source_id'], ['target_id'])

    def run_with_mounted_arguments(self):
        self.run()

    def run(self):
        for i in range(100):
            self.rels.add_relationship({'source_id': i}, {'target_id': i},
                                       {'source': 'test'})

示例#14

0

显示文件

文件： ncbigene.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        """

        :param ncbigene_instance: NcbiGene Instance
        :type ncbigene_instance: DataSourceInstance
        :param taxid:
        """
        super(NcbiGeneOrthologParser, self).__init__()

        self.gene_ortholog_gene = RelationshipSet('ORTHOLOG', ['Gene'],
                                                  ['Gene'], ['sid'], ['sid'])

        self.object_sets = [self.gene_ortholog_gene]
        self.container.add_all(self.object_sets)

示例#15

0

显示文件

文件： hmdb.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        super(HmdbParser, self).__init__()

        # NodeSets
        self.metabolites = NodeSet(['Metabolite'],
                                   merge_keys=['sid'],
                                   default_props={'source': 'hmdb'})

        self.metabolite_map_metabolite = RelationshipSet(
            'MAPS', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'],
            default_props={'source': 'hmdb'})
        self.metabolite_associates_protein = RelationshipSet(
            'HAS_ASSOCIATION', ['Metabolite'], ['Protein'], ['sid'], ['sid'],
            default_props={'source': 'hmdb'})

示例#16

0

显示文件

文件： ncbigene.py 项目： kaiserpreusse/biomedgraph

class NcbiLegacyGeneParser(ReturnParser):
    """
    Parse legacy gene IDs from gene_history.gz
    #tax_id GeneID  Discontinued_GeneID     Discontinued_Symbol     Discontinue_Date
    9       -       1246494 repA1   20031113
    9       -       1246495 repA2   20031113
    9       -       1246496 leuA    20031113
    """
    def __init__(self):
        super(NcbiLegacyGeneParser, self).__init__()

        self.arguments = ['taxid']

        self.legacy_genes = NodeSet(['Gene', 'Legacy'],
                                    merge_keys=['sid'],
                                    default_props={'source': 'ncbigene'})
        self.legacy_gene_now_gene = RelationshipSet(
            'REPLACED_BY', ['Gene', 'Legacy'], ['Gene'], ['sid'], ['sid'],
            default_props={'source': 'ncbigene'})

    def run_with_mounted_arguments(self):
        self.run(self.taxid)

    def run(self, taxid):
        log.debug(f'Run parser {self.__class__.__name__} for taxID: {taxid}.')
        ncbigene_instance = self.get_instance_by_name('NcbiGene')
        gene_history_file = ncbigene_instance.get_file('gene_history.gz')

        with gzip.open(gene_history_file, 'rt') as f:
            # skip header
            next(f)
            for l in f:
                flds = l.strip().split('\t')
                this_taxid = flds[0]
                if this_taxid == taxid:
                    new_gene_id = flds[1]
                    discontinued_gene_id = flds[2]
                    discontinued_symbol = flds[3]
                    date = flds[4]
                    self.legacy_genes.add_node({
                        'sid': discontinued_gene_id,
                        'date': date,
                        'symbol': discontinued_symbol,
                        'taxid': taxid
                    })
                    if new_gene_id != '-':
                        self.legacy_gene_now_gene.add_relationship(
                            {'sid': discontinued_gene_id},
                            {'sid': new_gene_id}, {})

示例#17

0

显示文件

文件： gtex.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        """

        :param mesh_instance: NcbiGene Instance
        :type mesh_instance: DataSourceInstance
        """
        super(GtexDataParser, self).__init__()

        self.gene_expressed_tissue = RelationshipSet('EXPRESSED', ['Gene'],
                                                     ['GtexDetailedTissue'],
                                                     ['sid'], ['name'])

        self.object_sets = [self.gene_expressed_tissue]

        self.container.add_all(self.object_sets)

示例#18

0

显示文件

文件： parser.py 项目： kaiserpreusse/graphpipeline

    def deserialize(cls, source_dir: str, metadata_only: bool = False) -> 'Parser':
        """
        Read from a serialized directory, recreate a Parser that can load to the database.

        :param source_dir: Directory to read from.
        :return: A Parser object.
        """
        log.debug(f"Read Parser from {source_dir}.")
        p = cls()

        for file in os.listdir(source_dir):
            if not metadata_only:
                if file.startswith('nodeset_'):
                    ns_name = file.replace('.json', '')
                    with open(os.path.join(source_dir, file), 'rt') as f:
                        log.debug(f"Deserialize {f}")
                        ns = NodeSet.from_dict(json.load(f))
                        log.debug(f"Num nodes in NodeSet: {len(ns.nodes)}")
                        p.__dict__[ns_name] = ns

                elif file.startswith('relationshipset_'):
                    rs_name = file.replace('.json', '')
                    with open(os.path.join(source_dir, file), 'rt') as f:
                        log.debug(f"Deserialize {f}")
                        rs = RelationshipSet.from_dict(json.load(f))
                        log.debug(f"Num relationships in RelationshipSet: {len(rs.relationships)}")
                        p.__dict__[rs_name] = rs

            if file == 'parser_data.json':
                with open(os.path.join(source_dir, file), 'rt') as f:
                    metadata = json.load(f)
                    # TODO add datasource instances to deserializer
                    p.name = metadata['name']

        return p

示例#19

0

显示文件

文件： load_to_neo4j.py 项目： jexp/case_data

def read_daily_report_data_csv_JHU(file):
    """
    Extract data from a single daile report file from JHU.

    :param file: Path to the CSV file
    :return:
    """
    log.info('Read JHU CSV file {}'.format(file))

    countries = NodeSet(['Country'], ['name'])
    provinces = NodeSet(['Province'], ['name'])
    updates = NodeSet(['DailyReport'], ['uuid'])
    province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name'])
    province_in_country.unique = True
    province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid'])

    with open(file, 'rt') as csvfile:
        rows = csv.reader(csvfile, delimiter=',', quotechar='"')
        # skip header
        next(rows)

        for row in rows:
            country = row[1]
            province = row[0]
            # if no name for province, use country name
            if not province:
                province = '{}_complete'.format(country)

            date = parse(row[2])
            uuid = country+province+str(date)
            confirmed = int(row[3]) if row[3] else 'na'
            death = int(row[4]) if row[4] else 'na'
            recovered = int(row[5]) if row[5] else 'na'

            lat = row[6] if len(row) >= 7 else None
            long = row[7] if len(row) >= 8 else None

            province_dict = {'name': province}
            if lat and long:
                province_dict['latitude'] = lat
                province_dict['longitude'] = long
            provinces.add_unique(province_dict)

            countries.add_unique({'name': country})

            updates.add_unique(
                {'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid})

            province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'})
            province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'})

    return countries, provinces, updates, province_in_country, province_rep_update

示例#20

0

显示文件

文件： gtex.py 项目： kaiserpreusse/biomedgraph

class GtexDataParser(ReturnParser):
    def __init__(self):
        """

        :param mesh_instance: NcbiGene Instance
        :type mesh_instance: DataSourceInstance
        """
        super(GtexDataParser, self).__init__()

        self.gene_expressed_tissue = RelationshipSet('EXPRESSED', ['Gene'],
                                                     ['GtexDetailedTissue'],
                                                     ['sid'], ['name'])

        self.object_sets = [self.gene_expressed_tissue]

        self.container.add_all(self.object_sets)

    def run_with_mounted_arguments(self):
        self.run()

    def run(self):
        gtex_instance = self.get_instance_by_name('Gtex')

        gtex_mean_gene = gtex_instance.get_file(
            'GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_median_tpm.gct.gz')

        with gzip.open(gtex_mean_gene, 'rt') as f:
            lines = f.readlines()
            # remove first two lines
            lines = lines[2:]
            # get header line
            header = lines.pop(0)
            header_fields = header.split('\t')

            # iterate data lines
            for line in lines:
                flds = line.split('\t')
                gene_id = flds[0].split('.')[0]
                data_flds = flds[2:]

                # iterate the other elements with index
                # have the index start at 2 to match the header which also includes the first two columns
                for i, value in enumerate(data_flds, start=2):
                    tissue_detailed_name = header_fields[i]
                    self.gene_expressed_tissue.add_relationship(
                        {'sid': gene_id}, {'name': tissue_detailed_name},
                        {'val': value})

示例#21

0

显示文件

    def __init__(self):
        """

        :param ncbigene_instance: NcbiGene Instance
        :type ncbigene_instance: DataSourceInstance
        :param taxid:
        """
        super(HGNCParser, self).__init__()

        # output data
        self.genes = NodeSet(['Gene'], merge_keys=['sid'])

        self.gene_maps_gene = RelationshipSet('MAPS', ['Gene'], ['Gene'],
                                              ['sid'], ['sid'])
        self.gene_maps_genesymbol = RelationshipSet('MAPS', ['Gene'],
                                                    ['GeneSymbol'], ['sid'],
                                                    ['sid', 'taxid'])

示例#22

0

显示文件

文件： test_parser.py 项目： kaiserpreusse/graphpipeline

class SomeParser(ReturnParser):
    def __init__(self):
        super(SomeParser, self).__init__()

        self.source = NodeSet(['Source'], merge_keys=['source_id'])
        self.target = NodeSet(['Target'], merge_keys=['target_id'])
        self.rels = RelationshipSet('FOO', ['Source'], ['Target'],
                                    ['source_id'], ['target_id'])

    def run_with_mounted_arguments(self):
        self.run()

    def run(self):
        for i in range(100):
            self.source.add_node({'source_id': i})
            self.target.add_node({'target_id': i})
            self.rels.add_relationship({'source_id': i}, {'target_id': i},
                                       {'source': 'test'})

示例#23

0

显示文件

文件： gtex.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        """

        :param mesh_instance: NcbiGene Instance
        :type mesh_instance: DataSourceInstance
        """
        super(GtexMetadataParser, self).__init__()

        # NodeSets
        self.tissues = NodeSet(['GtexTissue'], merge_keys=['name'])
        self.detailed_tissues = NodeSet(['GtexDetailedTissue'],
                                        merge_keys=['name'])
        self.sample = NodeSet(['GtexSample'], merge_keys=['sid'])

        self.sample_measures_tissue = RelationshipSet('MEASURES',
                                                      ['GtexSample'],
                                                      ['GtexTissue'], ['sid'],
                                                      ['name'])
        self.sample_measures_detailed_tissue = RelationshipSet(
            'MEASURES', ['GtexSample'], ['GtexDetailedTissue'], ['sid'],
            ['name'])
        self.tissue_parent_detailed_tissue = RelationshipSet(
            'PARENT', ['GtexTissue'], ['GtexDetailedTissue'], ['name'],
            ['name'])
        self.tissue_parent_detailed_tissue.unique = True

示例#24

0

显示文件

class HGNCParser(ReturnParser):
    def __init__(self):
        """

        :param ncbigene_instance: NcbiGene Instance
        :type ncbigene_instance: DataSourceInstance
        :param taxid:
        """
        super(HGNCParser, self).__init__()

        # output data
        self.genes = NodeSet(['Gene'], merge_keys=['sid'])

        self.gene_maps_gene = RelationshipSet('MAPS', ['Gene'], ['Gene'],
                                              ['sid'], ['sid'])
        self.gene_maps_genesymbol = RelationshipSet('MAPS', ['Gene'],
                                                    ['GeneSymbol'], ['sid'],
                                                    ['sid', 'taxid'])

    def run_with_mounted_arguments(self):
        self.run()

    def run(self):

        hgnc_instance = self.get_instance_by_name('HGNC')

        hgnc_complete_file = hgnc_instance.get_file('hgnc_complete_set.txt')
        self.parse_hgnc_complete_file(hgnc_complete_file)

    def parse_hgnc_complete_file(self, hgnc_complete_file):
        with open(hgnc_complete_file, 'rt') as f:
            header = next(f)

            for l in f:
                flds = l.strip().split('\t')
                sid = flds[0]
                gene_symbol = flds[1]
                ncbi_id = flds[18] if len(flds) > 18 else None
                ensembl_id = flds[19] if len(flds) > 19 else None

                all_props = dict(zip(header, flds))
                all_props['sid'] = sid
                all_props['source'] = 'hgnc'

                self.genes.add_node(all_props)

                if ncbi_id:
                    self.gene_maps_gene.add_relationship({'sid': sid},
                                                         {'sid': ncbi_id},
                                                         {'source': 'hgnc'})
                if ensembl_id:
                    self.gene_maps_gene.add_relationship({'sid': sid},
                                                         {'sid': ensembl_id},
                                                         {'source': 'hgnc'})

                if gene_symbol:
                    self.gene_maps_genesymbol.add_relationship({'sid': sid}, {
                        'sid': gene_symbol,
                        'taxid': '9606'
                    }, {'source': 'hgnc'})

示例#25

0

显示文件

文件： refseq.py 项目： kaiserpreusse/biomedgraph

    def __init__(self):
        super(RefseqRemovedRecordsParser, self).__init__()

        self.arguments = ['taxid']

        self.legacy_ids = set()

        self.legacy_transcripts = NodeSet(['Transcript', 'Legacy'], merge_keys=['sid'], default_props={'source': 'refseq'})
        self.legacy_transcript_now_transcript = RelationshipSet('REPLACED_BY', ['Transcript'], ['Transcript'], ['sid'], ['sid'], default_props={'source': 'refseq'})
        self.legacy_proteins = NodeSet(['Protein', 'Legacy'], merge_keys=['sid'], default_props={'source': 'refseq'})
        self.legacy_protein_now_protein = RelationshipSet('REPLACED_BY', ['Protein'], ['Protein'],
                                                                ['sid'], ['sid'], default_props={'source': 'refseq'})
        self.gene_codes_legacy_transcript = RelationshipSet('CODES', ['Gene'], ['Transcript', 'Legacy'], ['sid'], ['sid'], default_props={'source': 'refseq'})
        self.legacy_transcript_codes_protein = RelationshipSet('CODES', ['Transcript', 'Legacy'], ['Protein'],
                                                               ['sid'], ['sid'], default_props={'source': 'refseq'})

示例#26

0

显示文件

    def __init__(self):
        super(LncipediaParser, self).__init__()

        self.genes = NodeSet(['Gene'], merge_keys=['sid'])
        self.transcripts = NodeSet(['Transcript'], merge_keys=['sid'])
        self.gene_codes_transcripts = RelationshipSet('CODES', ['Gene'], ['Transcript'], ['sid'], ['sid'])
        self.gene_maps_gene = RelationshipSet('MAPS', ['Gene'], ['Gene'], ['sid'], ['sid'])
        self.transcript_maps_transcript = RelationshipSet('MAPS', ['Transcript'], ['Transcript'], ['sid'], ['sid'])

示例#27

0

显示文件

文件： ncbigene.py 项目： kaiserpreusse/biomedgraph

class NcbiGeneOrthologParser(ReturnParser):
    def __init__(self):
        """

        :param ncbigene_instance: NcbiGene Instance
        :type ncbigene_instance: DataSourceInstance
        :param taxid:
        """
        super(NcbiGeneOrthologParser, self).__init__()

        self.gene_ortholog_gene = RelationshipSet('ORTHOLOG', ['Gene'],
                                                  ['Gene'], ['sid'], ['sid'])

        self.object_sets = [self.gene_ortholog_gene]
        self.container.add_all(self.object_sets)

    def run_with_mounted_arguments(self):
        self.run()

    def run(self):
        """
        Get the Gene-ORTHOLOG-Gene relationships. This is currently not filteres for taxid.
        """
        ncbigene_instance = self.get_instance_by_name('NcbiGene')
        ortholog_file = ncbigene_instance.get_file('gene_orthologs.gz')

        with gzip.open(ortholog_file, 'rt') as f:
            # skip first line
            next(f)

            for l in f:
                flds = l.strip().split()
                g1 = flds[1]
                g2 = flds[4]

                self.gene_ortholog_gene.add_relationship({'sid': g1},
                                                         {'sid': g2}, {})

示例#28

0

显示文件

class MirdbParser(ReturnParser):
    def __init__(self):
        super(MirdbParser, self).__init__()

        # arguments
        self.arguments = ['taxid']

        # RelationshipSets
        self.mirna_targets_transcript = RelationshipSet(
            'TARGETS', ['Mirna'], ['Transcript'], ['name'], ['sid'])

    def run_with_mounted_arguments(self):
        self.run(self.taxid)

    def run(self, taxid):

        mirdb_instance = self.get_instance_by_name('Mirdb')
        mirdb_file = mirdb_instance.datasource.get_prediction_file(
            mirdb_instance)

        datasource_name = mirdb_instance.datasource.name
        mir_prefix = TAXID_2_MIRPREFIX[taxid]

        with gzip.open(mirdb_file, 'rt') as f:
            for l in f:
                flds = l.split()
                mir_name = flds[0]

                if mir_name.startswith(mir_prefix):
                    target = flds[1]
                    score = float(flds[2])

                    self.mirna_targets_transcript.add_relationship(
                        {'name': mir_name}, {'sid': target}, {
                            'score': score,
                            'source': datasource_name
                        })

示例#29

0

显示文件

    def __init__(self):
        super(MeshParser, self).__init__()

        # NodeSets
        self.descriptor = NodeSet(['MeshDescriptor'], merge_keys=['sid'])
        self.qualifier = NodeSet(['MeshQualifier'], merge_keys=['sid'])
        self.concept = NodeSet(['MeshConcept'], merge_keys=['sid'])
        self.term = NodeSet(['MeshTerm'], merge_keys=['sid'])

        self.descriptor_allowed_qualifier = RelationshipSet('ALLOWED', ['MeshDescriptor'], ['MeshQualifier'], ['sid'],
                                                            ['sid'])

        self.descriptor_has_concept = RelationshipSet('HAS', ['MeshDescriptor'], ['MeshConcept'], ['sid'], ['sid'])
        self.descriptor_has_concept.unique = True
        self.concept_has_term = RelationshipSet('HAS', ['MeshConcept'], ['MeshTerm'], ['sid'], ['sid'])
        self.concept_has_term.unique = True
        self.concept_related_concept = RelationshipSet('RELATED', ['MeshConcept'], ['MeshConcept'], ['sid'], ['sid'])
        self.concept_related_concept.unique = True

示例#30

0

显示文件

    def __init__(self):
        super(ChebiParser, self).__init__()

        # NodeSets
        self.metabolites = NodeSet(['Metabolite'],
                                   merge_keys=['sid'],
                                   default_props={'source': 'chebi'})
        self.metabolite_isa_metabolite = RelationshipSet(
            'IS_A', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'],
            default_props={'source': 'chebi'})
        self.metabolite_rel_metabolite = RelationshipSet(
            'CHEBI_REL', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'],
            default_props={'source': 'chebi'})
        self.metabolite_maps_metabolite = RelationshipSet(
            'MAPS', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'],
            default_props={'source': 'chebi'})