Exemplo n.º 1
0
def main():
    '''
	general gist: loads necessary information, then calls parseLines() which returns a valid sentence to be parsed, valid meaning we are going to throw it in TEES or other NLP program.
	'''
    args = parseArgs()
    logging.info("Parsed arguments")
    geneFinder.initData(exclMarkerTypes=["dnaSeq"
                                         ])  # setup for findGenes() later
    logging.info("Set up environment for findGenes()")
    logging.info("Load pickled/marshaled relex, authors, entrez symbols")
    relex = pickle.load(args.extractionData)
    authors = pickle.load(args.extractionData)
    entrez = marshal.load(args.entrezData)['entrez2sym']
    logging.info("Open output files")
    sentenceFile, geneFile = open(args.timestamp + '-sentence.txt',
                                  'w'), open(args.timestamp + '-genes.txt',
                                             'w')
    for pmid, sentence, geneIds, geneNames, rawNames, relations in parseLines(
            args.inputFiles, entrez, relex, authors):
        geneFile.write(
            formatMeta(pmid, geneIds, geneNames, rawNames, relations))
        print formatMeta(pmid, geneIds, geneNames, rawNames, relations)
        sentenceFile.write(sentence + '\n')
        geneFile.flush()
    if len(nonamed) > 1:
        logging.warning("Not in entrez2sym: {}".format(nonamed))
Exemplo n.º 2
0
def main():
	'''
	general gist: loads necessary information, then calls parseLines() which returns a valid sentence to be parsed, valid meaning we are going to throw it in TEES or other NLP program.
	'''
	args = parseArgs()
	logging.info("Parsed arguments")
	geneFinder.initData(exclMarkerTypes=["dnaSeq"]) # setup for findGenes() later
	logging.info("Set up environment for findGenes()")
	logging.info("Load pickled/marshaled relex, authors, entrez symbols")
	relex = pickle.load(args.extractionData)
	authors = pickle.load(args.extractionData)
	entrez = marshal.load(args.entrezData)['entrez2sym']
	logging.info("Open output files")
	sentenceFile, geneFile = open(args.timestamp+'-sentence.txt', 'w'), open(args.timestamp+'-genes.txt', 'w')
	for pmid, sentence, geneIds, geneNames, rawNames, relations in parseLines(args.inputFiles, entrez, relex, authors):
		geneFile.write(formatMeta(pmid, geneIds, geneNames, rawNames, relations))
		print formatMeta(pmid, geneIds, geneNames, rawNames, relations)
		sentenceFile.write(sentence + '\n')
		geneFile.flush()
	if len(nonamed) > 1:
		logging.warning("Not in entrez2sym: {}".format(nonamed))
Exemplo n.º 3
0
def startup(paramDict):
    """ parse HUGO file into dict """
    geneFinder.initData(exclMarkerTypes=["dnaSeq"])
Exemplo n.º 4
0
 def startup(self, paramDict):
     " called once upon startup on each cluster node "
     geneFinder.initData(exclMarkerTypes=["dnaSeq"])
     self.rowCount = 0
Exemplo n.º 5
0
def startup(paramDict):
    """ parse HUGO file into dict """
    geneFinder.initData(exclMarkerTypes=["dnaSeq"])
Exemplo n.º 6
0
def startup(paramDict):
    varFinder.loadDb()
    geneFinder.initData(exclMarkerTypes=["dnaSeq"])
Exemplo n.º 7
0
 def startup(self, paramDict):
     " called once upon startup on each cluster node "
     geneFinder.initData(exclMarkerTypes=["dnaSeq"])
     self.rowCount = 0
Exemplo n.º 8
0
def startup(paramDict):
    geneFinder.initData(exclMarkerTypes=["dnaSeq", "band"])
    #varFinder.loadDb(loadSequences=False)
    varFinder.loadDb()
Exemplo n.º 9
0
def startup(paramDict):
    varFinder.loadDb()
    geneFinder.initData(exclMarkerTypes=["dnaSeq"])
Exemplo n.º 10
0
 def startup(self, paramDict, resultDict):
     self.maxCount = paramDict["maxCount"]
     kwFilename = paramDict["keywords"]
     self.searchTypes = getSearchTypes(paramDict)
     geneFinder.initData(self.searchTypes, addOptional=True)
Exemplo n.º 11
0
 def startup(self, paramDict):
     """ parse dictioary of keywords """
     #self.searchTypes = getSearchTypes(paramDict)
     geneFinder.initData(addOptional=True)
Exemplo n.º 12
0
def startup(paramDict):
    """ 
    """
    global seqCache
    # don't use seqs for gene finding
    geneFinder.initData(exclMarkerTypes=["dnaSeq"])
Exemplo n.º 13
0
 def startup(self, paramDict):
     """ parse dictioary of keywords """
     #self.searchTypes = getSearchTypes(paramDict)
     geneFinder.initData(addOptional=True)
Exemplo n.º 14
0
 def startup(self, paramDict, resultDict):
     self.maxCount = paramDict["maxCount"]
     kwFilename = paramDict["keywords"]
     self.searchTypes = getSearchTypes(paramDict)
     geneFinder.initData(self.searchTypes, addOptional=True)
Exemplo n.º 15
0
def startup(paramDict):
    """ 
    """
    global seqCache
    # don't use seqs for gene finding
    geneFinder.initData(exclMarkerTypes=["dnaSeq"])
Exemplo n.º 16
0
def startup(paramDict):
    geneFinder.initData(exclMarkerTypes=["dnaSeq", "band"])
    #varFinder.loadDb(loadSequences=False)
    varFinder.loadDb()