示例#1
0
def analyseFile(file, envPMIDs):

    allpmids = defaultdict(set)

    print("Starting file: ", file)

    procDB = neo4jInterface(simulate=False, printQueries=True)

    with open(file, 'r') as infile:

        for line in infile:

            aline = line.strip().split('\t')

            pmid_cites = aline[0]
            pmid_cited_by = aline[1]

            if pmid_cites in envPMIDs and pmid_cited_by in envPMIDs:
                allpmids[pmid_cites].add(pmid_cited_by)

    if len(allpmids) > 0:

        for pmid in allpmids:

            for opmid in allpmids[pmid]:

                if not opmid in envPMIDs:
                    continue

                procDB.createRelationship('cpmid', ['PUBMED'], {'id': pmid},
                                          'opmid', ['PUBMED'], {'id': opmid},
                                          ['PUBMED_CITED_BY'], None)

    procDB.close()
def analyseFile(splitFileID, relPMIDs):

    fileID = "{:>4}".format(splitFileID).replace(" ", "0")

    diseaseHitsFile = resultBase + "/disease/medline17n" + fileID + ".index"

    hitsFile = SyngrepHitFile(diseaseHitsFile, diseaseMap)

    if len(hitsFile) == 0:
        return

    print("Document: " + str(fileID))
    print("Start Document: " + str(fileID))

    procDB = neo4jInterface(simulate=False, printQueries=False)

    for docID in hitsFile:

        if not docID in relPMIDs:
            continue

        synHits = hitsFile.getHitsForDocument(docID)

        foundUniqueHits = set()
        for hit in synHits:

            if len(hit.foundSyn) < 5:
                if not hit.perfectHit:
                    continue

            hitSyn = hit.synonym
            foundUniqueHits.add(hitSyn.id.replace('_', ':'))

        for synonymID in foundUniqueHits:

            pubmedExists = False
            if addUnknownPubmeds:
                procDB.createNodeIfNotExists(['EVIDENCE', 'PUBMED'],
                                             {'id': docID})
                pubmedExists = True
            else:
                if procDB.nodeExists(['PUBMED'], {'id': docID}):
                    pubmedExists = True

            if pubmedExists:
                res = procDB.createRelationship('disease', ['DISEASE'],
                                                {'id': synonymID}, 'pubmed',
                                                ['PUBMED'], {'id': docID},
                                                ['DISEASE_MENTION'], None)
                print("Add: ", fileID, docID, synonymID,
                      [x for x in res if res != None])

    print("End Document: " + str(fileID))
    procDB.close()
    def __init__(self,
                 chemokines=list(),
                 db=neo4jInterface(simulate=False, printQueries=False)):

        self.chemokines = list(chemokines)
        self.db = db

        self.all_nodes = dict()
        self.all_relation_ids = set()
        self.all_relations = dict()

        self.simple_connections = set()

        if self.chemokines != None and len(self.chemokines) > 0:
            for chemokine in self.chemokines:
                self._queryDB(chemokine)
示例#4
0
def addMIRTs(mirtarbaseEvidences, mirtarEvs):

    print("Starting MIRTS: ", len(mirtarbaseEvidences[0]))

    dbcreatedMIRT2TAX = defaultdict(set)
    dbcreatedMIRT2PUBMED = defaultdict(set)
    dbcreatedMIRT2ExpTypes = defaultdict(set)
    dbcreatedMIRT2SupportTypes = defaultdict(set)

    dbcreatedMIRT2MIRNA = defaultdict(set)
    dbcreatedMIRT2GENE = defaultdict(set)

    dbcreatedMIRTIDs = set()

    procDB = neo4jInterface(simulate=False, printQueries=False)

    for mirnaEvidence in mirtarbaseEvidences[0]:

        # 		Species (miRNA)	Target Gene	Target Gene (Entrez Gene ID)	Species (Target Gene)	Experiments	Support Type	References (PMID)
        mirtarID = mirnaEvidence['miRTarBase ID']

        mirtarMIRNA = mirnaEvidence['miRNA']
        mirtarMIRNASpecies = mirnaEvidence['Species (miRNA)']
        mirtarGENE = mirnaEvidence['Target Gene'].upper()
        mirtarGENESpecies = mirnaEvidence['Species (Target Gene)']
        mirtarRefs = mirnaEvidence['References (PMID)']


        """
        EXPeriment and Functional Type
        """
        mirtarExperiment = mirnaEvidence['Experiments']

        mirtarExperiment = mirtarExperiment.split("/") if mirtarExperiment != None else []
        mirtarExperimentNew = []

        for x in [y.split(";") for y in mirtarExperiment if len(y) > 0]:
            for elem in x:
                mirtarExperimentNew.append(mirtarbase_exp_type(elem))

        mirtarExperiment = mirtarExperimentNew
        mirtarSupport = mirtarbase_function_label(mirnaEvidence['Support Type'])

        mirnaSpeciesID = speciesName2TaxID.get(mirtarMIRNASpecies, None)
        geneSpeciesID = speciesName2TaxID.get(mirtarGENESpecies, None)

        if mirnaSpeciesID == None and geneSpeciesID == None:
            continue

        if mirnaSpeciesID == geneSpeciesID:
            commonTaxID = mirnaSpeciesID
        else:
            commonTaxID = None

        procDB.createNodeIfNotExists(['PUBMED', 'EVIDENCE'], {'id': mirtarRefs}, 'n', ['PUBMED'], {'id': mirtarRefs})

        if not mirtarID in dbcreatedMIRTIDs:
            dbcreatedMIRTIDs.add(mirtarID)
            procDB.createNode(['MIRTARBASE', 'EVIDENCE'], {'id': mirtarID, 'tax_gene': geneSpeciesID, 'tax_mirna': mirnaSpeciesID})

        if not mirtarSupport in dbcreatedMIRT2SupportTypes[mirtarID]:
            dbcreatedMIRT2SupportTypes[mirtarID].add(mirtarSupport)
            procDB.createRelationship('ms', ['MIRTARBASE_SUPPORT'], {'id': mirtarSupport}, 'mtb', ['MIRTARBASE'], {'id': mirtarID}, ['MIRTARBASE_FUNCTIONAL_SUPPORT'], None)

        for expType in mirtarExperiment:

            if not expType in dbcreatedMIRT2ExpTypes[mirtarID]:
                dbcreatedMIRT2ExpTypes[mirtarID].add(expType)
                procDB.createRelationship('me', ['MIRTARBASE_EXPERIMENT'], {'id': expType}, 'mtb', ['MIRTARBASE'], {'id': mirtarID}, ['MIRTARBASE_EXPERIMENT_SUPPORT'], None)

        # TODO add relation props?
        if not mirtarRefs in dbcreatedMIRT2PUBMED[mirtarID]:
            dbcreatedMIRT2PUBMED[mirtarID].add(mirtarRefs)
            procDB.createRelationship('pb', ['PUBMED'], {'id': mirtarRefs}, 'mtb', ['MIRTARBASE'], {'id': mirtarID}, ['MIRTARBASE_LITERATURE_SUPPORT'], {})

        if commonTaxID != None:

            if not mirnaSpeciesID in dbcreatedMIRT2TAX[mirtarID]:
                dbcreatedMIRT2TAX[mirtarID].add(mirnaSpeciesID)
                procDB.createRelationship('mtb', ['MIRTARBASE'], {'id': mirtarID}, 'taxid', ['TAX'], {'id': mirnaSpeciesID}, ['ORGANISM_SUPPORT'], {})


        if not mirtarGENE in dbcreatedMIRT2GENE[mirtarID]:
            dbcreatedMIRT2GENE[mirtarID].add(mirtarGENE)
            procDB.createRelationship('gene', ['GENE'], {'id': mirtarGENE}, 'mtb', ['MIRTARBASE'], {'id': mirtarID}, ['GENE_MENTION'], {'tax': geneSpeciesID})

        if not mirtarMIRNA in dbcreatedMIRT2MIRNA[mirtarID]:
            dbcreatedMIRT2MIRNA[mirtarID].add(mirtarMIRNA)
            procDB.createRelationship('mtb', ['MIRTARBASE'], {'id': mirtarID}, 'mirna', ['MIRNA'],
                                             {'name': mirtarMIRNA}, ['MIRNA_MENTION'], {'tax': mirnaSpeciesID})
    procDB.close()
示例#5
0
from porestat.utils.DataFrame import DataFrame
from utils.idutils import ltype2label, makeDBGeneID, mirtarbase_exp_type, mirtarbase_function_label, speciesName2TaxID, \
    dataDir
from database.Neo4JInterface import neo4jInterface
from utils.parallel import MapReduce

mirtarbaseEvidences = DataFrame.parseFromFile(dataDir + "/miRExplore/miRTarBase.csv", bConvertTextToNumber=False)

print(mirtarbaseEvidences.getHeader())

experimentTypes = Counter()
supportTypes = Counter()
referencesWithComma = Counter()

db = neo4jInterface(simulate=False, printQueries=False)
db.deleteRelationship('n', ['GENE'], None, 'm', ['MIRTARBASE'], None, ['GENE_MENTION'], None, 'r')
db.deleteRelationship('n', ['MIRTARBASE'], None, 'm', ['MIRNA'], None, ['MIRNA_MENTION'], None, 'r')
db.deleteRelationship('n', ['MIRTARBASE'], None, 'm', ['PUBMED'], None, ['MIRTARBASE_LITERATURE_SUPPORT'], None, 'r')
db.deleteRelationship('n', ['MIRTARBASE_SUPPORT'], None, 'm', ['MIRTARBASE'], None, ['MIRTARBASE_FUNCTIONAL_SUPPORT'], None, 'r')
db.deleteRelationship('n', ['MIRTARBASE_EXPERIMENT'], None, 'm', ['MIRTARBASE'], None, ['MIRTARBASE_EXPERIMENT_SUPPORT'], None, 'r')
db.deleteRelationship('n', ['MIRTARBASE'], None, 'm', ['TAX'], None, ['ORGANISM_SUPPORT'], None, 'r')

db.deleteNode(["MIRTARBASE"], None)
db.deleteNode(["MIRTARBASE_SUPPORT"], None)
db.deleteNode(["MIRTARBASE_EXPERIMENT"], None)
db.createUniquenessConstraint('MIRTARBASE', 'id')

if False:
    db.close()
    exit(0)
import os

from mjoppich.geneontology import GeneOntology
from porestat.utils.Parallel import MapReduce

from database.Neo4JInterface import neo4jInterface
from synonymes.SynfileMap import SynfileMap
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import dataDir, eprint

resultBase = dataDir + "/miRExplore/textmine/results/"
diseaseMap = SynfileMap(resultBase + "/disease/synfile.map")
diseaseMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))
diseaseObo = GeneOntology(dataDir + "miRExplore/doid.obo")

db = neo4jInterface(simulate=False)
db.deleteRelationship('n', ['DISEASE'], None, 'm', ['PUBMED'], None,
                      ['DISEASE_MENTION'], None)

allfiles = glob.glob(resultBase + "/hgnc/medline17n*.index")
allfileIDs = [
    int(os.path.basename(x).replace('medline17n', '').replace('.index', ''))
    for x in allfiles
]
allfileIDs = sorted(allfileIDs, reverse=True)

addUnknownPubmeds = False

retVal = db.matchNodes(['PUBMED'], None, nodename='n')
relevantPMIDs = set()
def analyseFile(splitFileID, relPMIDs):

    fileID = "{:>4}".format(splitFileID).replace(" ", "0")

    diseaseHitsFile = resultBase + "/cellline/medline17n" + fileID + ".index"

    hitsFile = SyngrepHitFile(diseaseHitsFile, celllinesMap)

    if len(hitsFile) == 0:
        return

    print("Start Document: " + str(fileID))

    procDB = neo4jInterface(simulate=False, printQueries=False)

    for docID in hitsFile:

        if not docID in relPMIDs:
            continue

        synHits = hitsFile.getHitsForDocument(docID)

        foundUniqueHits = set()
        foundOrgs = set()

        for hit in synHits:

            if len(hit.foundSyn) < 5:
                if not hit.perfectHit:
                    continue
            hitSynFileID = hit.synonymID.synfile
            foundOrgs.add(synfileID2tax[hitSynFileID])

            hitSyn = hit.synonym
            foundUniqueHits.add(hitSyn.id)

        if len(foundUniqueHits) == 0:
            continue

        for celllineID in foundUniqueHits:

            pubmedExists = False
            if addUnknownPubmeds:
                procDB.createNodeIfNotExists(['EVIDENCE', 'PUBMED'],
                                             {'id': docID})
                pubmedExists = True
            else:
                if procDB.nodeExists(['PUBMED'], {'id': docID}):
                    pubmedExists = True

            if pubmedExists:
                res = procDB.createRelationship('cellline', ['CELLLINE'],
                                                {'id': celllineID}, 'pubmed',
                                                ['PUBMED'], {'id': docID},
                                                ['CELLLINE_MENTION'], None)
                print("Add: ", fileID, docID, celllineID,
                      [x for x in res if res != None])

        foundOrgs = foundOrgs.difference(allSet)

        if len(foundOrgs) == 1:
            pass
            # create relation
            # print('Associate: ' + str(foundOrgs))
        elif len(foundOrgs) == 0:
            pass
        elif len(foundOrgs) > 1:
            # print('Ambiguous pubmed: ' + docID)
            pass

    print("End Document: " + str(fileID))
    procDB.close()