def init(): global fpMirbaseAssoc, mirbaseDict fpMirbaseAssoc = open(mirbaseAssocFile, 'w') # write the header: fpMirbaseAssoc.write('MGI%smiRBase%s' % (TAB, CRT)) user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) results = db.sql( '''select a1._Accession_key as aKey, a2.accid as mgiID from ACC_Accession a1, ACC_Accession a2 where a1._MGIType_key = 2 and a1._LogicalDB_key = 83 and a1._object_key = a2._object_key and a2._MGIType_key = 2 and a2._LogicalDB_key = 1 and a2. preferred = 1 and a2.prefixPart = 'MGI:' order by a2.accid ''', 'auto') for r in results: mgiID = r['mgiID'] accessionKey = r['aKey'] if mgiID not in mirbaseDict: mirbaseDict[mgiID] = [] mirbaseDict[mgiID].append(accessionKey) return
def process(): args = getArgs() db.set_sqlServer ( args.host) db.set_sqlDatabase( args.db) db.set_sqlUser ("mgd_public") db.set_sqlPassword("mgdpub") if args.verbose: sys.stderr.write( "Hitting database %s %s as mgd_public\n\n" % \ (args.host, args.db)) startTime = time.time() sys.stdout.write( FIELDSEP.join(OutputColumns) + RECORDSEP ) for i, q in enumerate(getQueries(args)): qStartTime = time.time() results = db.sql( string.split(q, SQLSEPARATOR), 'auto') if args.verbose: sys.stderr.write( "Query %d SQL time: %8.3f seconds\n\n" % \ (i, time.time()-qStartTime)) nResults = writeResults(results[-1]) # db.sql returns list of rslt lists if args.verbose: sys.stderr.write( "%d references processed\n\n" % (nResults) ) if args.verbose: sys.stderr.write( "Total time: %8.3f seconds\n\n" % \ (time.time()-startTime))
def initialize(): global mgiMapFile global fpMap global user global passwordFile mgiMapFile = os.getenv('MGI_MAP_FILE') user = os.getenv('MGD_DBUSER') passwordFile = os.getenv('MGD_DBPASSWORDFILE') rc = 0 # # Make sure the environment variables are set. # if not mgiMapFile: print 'Environment variable not set: MGI_MAP_FILE' rc = 1 # # Initialize file pointers. # fpMap = None # # Use one connection to the database # db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFile) db.useOneConnection(1) return rc
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global egToMarkerDict, mgiToMarkerDict global fpInFile, fpClustererFile, fpLoadFile, fpQcRpt user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) try: fpInFile = open(inFilePath, 'r') except: exit('Could not open file for reading %s\n' % inFilePath) try: fpLoadFile = open(loadFilePath, 'w') except: exit('Could not open file for writing %s\n' % loadFilePath) try: fpQcRpt = open(qcRptPath, 'w') except: exit('Could not open file for writing %s\n' % qcRptPath) # Create lookup of homology IDs to their marker keys results = db.sql( '''select a.accid, a._object_key as markerKey, m._organism_key from acc_accession a, mrk_marker m where a._mgitype_key = 2 and a._logicalDB_key in (47, 64, 172) and a._object_key = m._marker_key and m._marker_status_key = 1''', 'auto') for r in results: #print('hMrkID: %s orgKey: %s hMrkKey: %s' % (r['accid'], int(r['_organism_key']), int(r['markerKey']) )) homologyLookup[r['accid']] = [ int(r['_organism_key']), int(r['markerKey']) ] # Create lookup of mouse MGI IDs to their marker keys results = db.sql( '''select a.accid, a._object_key as markerKey from acc_accession a, mrk_marker m where a._mgitype_key = 2 and a._logicalDB_key = 1 and a.prefixPart = 'MGI:' and a._object_key = m._marker_key and m._marker_status_key = 1''', 'auto') for r in results: mouseLookup[r['accid']] = r['markerKey'] return
def init(): # Purpose: Initialization of database connection and file descriptors # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global fpCC, fpH, fpRptFile, fpLoadFile user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) # temp report fpCC = open('connComp.rpt', 'w') # temp report fpH = open('hybridCluster.rpt', 'w') # Sue's temp report fpRptFile = open(os.environ['HYBRID_RPT'], 'w') # Load file in homologyload format try: fpLoadFile = open(loadFilePath, 'w') except: exit('Could not open file for writing %s\n' % loadFilePath) return
def init(): # Purpose: Initialization of database connection and file descriptors, # and next available database keys # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global fpInFile, fpClusterBCP, fpMemberBCP, fpAccessionBCP global nextClusterKey, nextMemberKey, nextAccessionKey # create file descriptors for input/output files try: fpInFile = open(inFile, 'r') except: exit(1, 'Could not open file %s\n' % inFile) try: fpClusterBCP = open(clusterBCP, 'w') except: exit(1, 'Could not open file %s\n' % clusterBCP) try: fpMemberBCP = open(memberBCP, 'w') except: exit(1, 'Could not open file %s\n' % memberBCP) try: fpAccessionBCP = open(accessionBCP, 'w') except: exit(1, 'Could not open file %s\n' % accessionBCP) # get next ACC_Accession, MRK_Cluster and MRK_ClusterMember key user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) results = db.sql('''select max(_Cluster_key) + 1 as nextKey from MRK_Cluster''', 'auto') if results[0]['nextKey'] is None: nextClusterKey = 1000 else: nextClusterKey = results[0]['nextKey'] results = db.sql('''select max(_ClusterMember_key) + 1 as nextKey from MRK_ClusterMember''', 'auto') if results[0]['nextKey'] is None: nextMemberKey = 1000 else: nextMemberKey = results[0]['nextKey'] results = db.sql('''select max(_Accession_key) + 1 as nextKey from ACC_Accession''', 'auto') nextAccessionKey = results[0]['nextKey'] return
def init (): global accKey, gensatLogicalDBKey, egLogicalDBKey global markerMGITypeKey, createdByKey db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFile) db.useOneConnection(1) # # Get the keys from the database. # cmds = [] cmds.append('select max(_Accession_key) + 1 as _Accession_key from ACC_Accession') cmds.append('select _LogicalDB_key from ACC_LogicalDB where name = \'%s\'' % (gensatLogicalDB)) cmds.append('select _LogicalDB_key from ACC_LogicalDB where name = \'%s\'' % (egLogicalDB)) cmds.append('select _MGIType_key from ACC_MGIType where name = \'%s\'' % (markerMGIType)) cmds.append('select _User_key from MGI_User where name = \'%s\'' % (createdBy)) results = db.sql(cmds,'auto') # # If any of the keys cannot be found, stop the load. # if len(results[0]) == 1: accKey = results[0][0]['_Accession_key'] else: print 'Cannot determine the next Accession key' sys.exit(1) if len(results[1]) == 1: gensatLogicalDBKey = results[1][0]['_LogicalDB_key'] else: print 'Cannot determine the Logical DB key for "' + gensatLogicalDB + '"' sys.exit(1) if len(results[2]) == 1: egLogicalDBKey = results[2][0]['_LogicalDB_key'] else: print 'Cannot determine the Logical DB key for "' + egLogicalDB + '"' sys.exit(1) if len(results[3]) == 1: markerMGITypeKey = results[3][0]['_MGIType_key'] else: print 'Cannot determine the MGI Type key for "' + markerMGIType + '"' sys.exit(1) if len(results[4]) == 1: createdByKey = results[4][0]['_User_key'] else: print 'Cannot determine the User key for "' + createdBy + '"' sys.exit(1) return
def init(): ''' # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # 4. Initializes global keys # # returns: # ''' global inputFile, outputFile, diagFile, errorFile, errorFileName, diagFileName db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) diagFileName = inputFileName + '.diagnostics' errorFileName = inputFileName + '.error' outputFileName = inputFileName + '.trans' try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: outputFile = open(outputFileName, 'w') except: exit(1, 'Could not open file %s\n' % outputFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) # Set Log File Descriptor db.set_sqlLogFD(diagFile) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) diagFile.write('Input File: %s\n' % (inputFileName)) diagFile.write('Output File: %s\n' % (outputFileName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))
def init(): print('DB Server:' + db.get_sqlServer()) print('DB Name: ' + db.get_sqlDatabase()) sys.stdout.flush() db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) return
def init(): # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # 4. Initializes global keys # # returns: # global diagFile, errorFile, inputFile, errorFileName, diagFileName global strainFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) fdate = mgi_utils.date('%m%d%Y') # current date head, tail = os.path.split(inputFileName) diagFileName = tail + '.' + fdate + '.diagnostics' errorFileName = tail + '.' + fdate + '.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: strainFile = open(strainFileName, 'w') except: exit(1, 'Could not open file %s\n' % strainFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) # Set Log File Descriptor db.set_sqlLogFD(diagFile) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
def init (): print 'DB Server:' + db.get_sqlServer() print 'DB Name: ' + db.get_sqlDatabase() sys.stdout.flush() db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) return
def init(): global bcpCommand global diagFile, errorFile, inputFile, errorFileName, diagFileName global markerFile, refFile, aliasFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) bcpCommand = bcpCommand + db.get_sqlServer() + ' ' + db.get_sqlDatabase( ) + ' %s ' + currentDir + ' %s "\\t" "\\n" mgd' head, tail = os.path.split(inputFileName) diagFileName = outputDir + '/' + tail + '.diagnostics' errorFileName = outputDir + '/' + tail + '.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: markerFile = open(markerFileName, 'w') except: exit(1, 'Could not open file %s\n' % markerFileName) try: refFile = open(refFileName, 'w') except: exit(1, 'Could not open file %s\n' % refFileName) try: aliasFile = open(aliasFileName, 'w') except: exit(1, 'Could not open file %s\n' % aliasFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
def init(): global bcpCommand global diagFile, errorFile, inputFile, errorFileName, diagFileName global markerFile, refFile, aliasFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) bcpCommand = bcpCommand + db.get_sqlServer() + ' ' + db.get_sqlDatabase() + ' %s ' + currentDir + ' %s "\\t" "\\n" mgd' head, tail = os.path.split(inputFileName) diagFileName = outputDir + '/' + tail + '.diagnostics' errorFileName = outputDir + '/' + tail + '.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: markerFile = open(markerFileName, 'w') except: exit(1, 'Could not open file %s\n' % markerFileName) try: refFile = open(refFileName, 'w') except: exit(1, 'Could not open file %s\n' % refFileName) try: aliasFile = open(aliasFileName, 'w') except: exit(1, 'Could not open file %s\n' % aliasFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
def init(): global fpMirbaseAssoc fpMirbaseAssoc = open(mirbaseAssocFile, 'w') # write the header: fpMirbaseAssoc.write('MGI%smiRBase%s' % (TAB, CRT)) user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName)
def main(): #################### db.set_sqlServer(args.host) db.set_sqlDatabase(args.db) db.set_sqlUser("mgd_public") db.set_sqlPassword("mgdpub") startTime = time.time() if args.option == 'counts': doCounts() else: doSamples() verbose("Total time: %8.3f seconds\n\n" % (time.time() - startTime))
def init(): # Purpose: process command line options # Returns: nothing # Assumes: nothing # Effects: initializes global variables # exits if files cannot be opened # Throws: nothing global inputFile, diagFile, errorFile, errorFileName, diagFileName db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) fdate = mgi_utils.date('%m%d%Y') # current date head, tail = os.path.split(inputFileName) diagFileName = tail + '.' + fdate + '.diagnostics' errorFileName = tail + '.' + fdate + '.error' try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) # Set Log File Descriptor db.set_sqlLogFD(diagFile) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) diagFile.write('Input File: %s\n' % (inputFileName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
def main(): #################### db.set_sqlServer ( args.host) db.set_sqlDatabase( args.db) db.set_sqlUser ("mgd_public") db.set_sqlPassword("mgdpub") verbose( "Hitting database %s %s as mgd_public\n" % (args.host, args.db)) verbose( "Query option: %s\n" % args.queryKey) startTime = time.time() if args.counts: doCounts(args) else: doSamples(args) verbose( "Total time: %8.3f seconds\n\n" % (time.time()-startTime))
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global fpInFile, fpClusterBCP, fpMemberBCP global createdByKey, nextClusterKey, nextMemberKey # create file descriptors for input/output files try: fpInFile = open(inFile, 'r') except: exit(1, 'Could not open file %s\n' % inFile) try: fpClusterBCP = open(clusterBCP, 'w') except: exit(1, 'Could not open file %s\n' % clusterBCP) try: fpMemberBCP = open(memberBCP, 'w') except: exit(1, 'Could not open file %s\n' % memberBCP) # get next MRK_Cluster and MRK_ClusterMember key user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) results = db.sql('''select _User_key from MGI_User where login = '******' ''' % createdBy, 'auto') createdByKey = results[0]['_User_key'] #print 'createdByKey: %s' % createdByKey results = db.sql(''' select nextval('mrk_cluster_seq') as nextKey ''', 'auto') nextClusterKey = results[0]['nextKey'] results = db.sql(''' select nextval('mrk_clustermember_seq') as nextKey ''', 'auto') nextMemberKey = results[0]['nextKey'] return
def process (): # Purpose: Main routine of this script # Returns: nothing args = getArgs() notQuiet = not args[ "QUIET"] db.set_sqlServer ( args["DBSERVER"]) db.set_sqlDatabase( args["DBNAME"]) db.set_sqlUser ("MGD_PUBLIC") db.set_sqlPassword("mgdpub") query = SQL % {'year' : '2013'} queries = string.split(query, args[ "SQLSEPARATOR"]) if notQuiet: sys.stderr.write("Running %d SQL command(s) on %s..%s\n" % \ ( len( queries), args[ "DBSERVER"], args[ "DBNAME"]) ) sys.stderr.flush() startTime = time.time() results = db.sql( queries, 'auto') endTime = time.time() if notQuiet: sys.stderr.write( "Total SQL time: %8.3f seconds\n" % \ (endTime-startTime)) sys.stderr.flush() delim = args[ "DELIMITER"] result = results[2] # print column headers sys.stdout.write( string.join( COLUMNS, delim) ) sys.stdout.write( "\n") # print results, one line per row (result), tab-delimited for r in result: vals = [ r[col] for col in COLUMNS ] vals = map( cleanVal, vals) sys.stdout.write( string.join(vals, delim ) ) sys.stdout.write( "\n")
def getPubmedIDs(): args = getArgs() db.set_sqlServer(args.host) db.set_sqlDatabase(args.db) db.set_sqlUser("mgd_public") db.set_sqlPassword("mgdpub") if args.verbose: sys.stderr.write( "Hitting database %s %s as mgd_public\n\n" % \ (args.host, args.db)) queries = string.split(QUERY, SQLSEPARATOR) startTime = time.time() results = db.sql(queries, 'auto') endTime = time.time() if args.verbose: sys.stderr.write( "Total SQL time: %8.3f seconds\n\n" % \ (endTime-startTime)) fp = open(args.outputFile, 'w') fp.write('\t'.join([ 'pubmed', 'haspdf', 'year', 'journal', ]) + '\n') for i, r in enumerate(results[-1]): fp.write('\t'.join([ str(r['pubmed']), str(r['haspdf']), str(r['year']), r['journal'], ]) + '\n') if args.verbose and i % 1000 == 0: # write progress indicator sys.stderr.write('%d..' % i)
def init(): global diagFile, errorFile, inputFile, errorFileName, diagFileName db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) head, tail = os.path.split(inputFileName) diagFileName = outputDir + '/' + tail + '.diagnostics' errorFileName = outputDir + '/' + tail + '.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
def init (): global createdByKey, refKey, accKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFile) # # Get the created by key for the user. # createdByKey = loadlib.verifyUser(createdBy, 0, None) # # Get the reference key for the J-Number. # refKey = loadlib.verifyReference(jNumber, 0, None) # # Get the next available accession key. # results = db.sql('select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto') accKey = results[0]['maxKey'] return
def init(): global bcpCommand global diagFile, errorFile, inputFile, errorFileName, diagFileName global outImageFile, outPaneFile, outAccFile global outCopyrightFile, outCaptionFile global inImageFile, inPaneFile global createdByKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) bcpCommand = bcpCommand + db.get_sqlServer() + ' ' + db.get_sqlDatabase() + ' %s ' + currentDir + ' %s "\\t" "\\n" mgd' diagFileName = currentDir + '/gxdimageload.diagnostics' errorFileName = currentDir + '/gxdimageload.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) # Input Files try: inImageFile = open(inImageFileName, 'r') except: exit(1, 'Could not open file %s\n' % inImageFileName) try: inPaneFile = open(inPaneFileName, 'r') except: exit(1, 'Could not open file %s\n' % inPaneFileName) # Output Files try: outImageFile = open(outImageFileName, 'w') except: exit(1, 'Could not open file %s\n' % outImageFileName) try: outPaneFile = open(outPaneFileName, 'w') except: exit(1, 'Could not open file %s\n' % outPaneFileName) try: outAccFile = open(outAccFileName, 'w') except: exit(1, 'Could not open file %s\n' % outAccFileName) try: outCaptionFile = open(outCaptionFileName, 'w') except: exit(1, 'Could not open file %s\n' % outCaptionFileName) try: outCopyrightFile = open(outCopyrightFileName, 'w') except: exit(1, 'Could not open file %s\n' % outCopyrightFileName) db.setTrace(True) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) return
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global egToXenMarkerDict, egToMouseMarkerDict global fpEgFile, fpTransFile, fpOrthoFile, fpExprFile global fpLoadFile, fpQcRpt, mouseEgMultiGeneIdSet mouseEgMultiGeneIdSet = set([]) user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) try: fpEgFile = open(inFileEgPath, 'r') except: exit('Could not open file for reading %s\n' % inFileGenePath) try: fpTransFile = open(inFileTransPath, 'r') except: exit('Could not open file for reading %s\n' % inFileTransPath) try: fpOrthoFile = open(inFileOrthoPath, 'r') except: exit('Could not open file for reading %s\n' % inFileOrthoPath) try: fpExprFile = open(inFileExprPath, 'r') except: exit('Could not open file for reading %s\n' % inFileExprPath) try: fpLoadFile = open(loadFilePath, 'w') except: exit('Could not open file for writing %s\n' % loadFilePath) try: fpQcRpt = open(qcRptPath, 'w') except: exit('Could not open file for writing %s\n' % qcRptPath) # get all xenopus tropicalis markers that are associated with egIds results = db.sql( '''select distinct a.accid as egId, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 55 and a.preferred = 1 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 95''', 'auto') # # create Xenopus egID to marker lookup from database # for r in results: egId = r['egId'] markerKey = r['_Marker_key'] egToXenMarkerDict[egId] = markerKey # mouse egID to marker lookup from database results = db.sql( '''select distinct a.accID as egId, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 55 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 1''', 'auto') # removed per Richard # and a.preferred = 1 for r in results: egId = r['egId'] markerKey = r['_Marker_key'] egToMouseMarkerDict[egId] = markerKey return
def init(): # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # 4. Initializes global keys # # returns: # global inputFile, diagFile, errorFile, errorFileName, diagFileName global refFileName, refFile global mgiTypeKey global refAssocKey, createdByKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) fdate = mgi_utils.date('%m%d%Y') # current date head, tail = os.path.split(inputFileName) diagFileName = tail + '.' + fdate + '.diagnostics' errorFileName = tail + '.' + fdate + '.error' refFileName = tail + '.MGI_Reference_Assoc.bcp' try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: refFile = open(refFileName, 'w') except: exit(1, 'Could not open file %s\n' % refFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) # Set Log File Descriptor db.set_sqlLogFD(diagFile) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) diagFile.write('Object Type: %s\n' % (mgiType)) diagFile.write('Input File: %s\n' % (inputFileName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
def initialize(): global diagFile, errorFile, inputFile, errorFileName, diagFileName global alleleFile, mutationFile, mutantFile, refFile global accFile, accRefFile, noteFile, noteChunkFile, annotFile global newAlleleFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) head, tail = os.path.split(inputFileName) diagFileName = outputDir + '/' + tail + '.diagnostics' errorFileName = outputDir + '/' + tail + '.error' newAlleleFileName = outputDir + '/' + tail + '.new' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: newAlleleFile = open(newAlleleFileName, 'w') except: exit(1, 'Could not open file %s\n' % newAlleleFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: alleleFile = open(alleleFileName, 'w') except: exit(1, 'Could not open file %s\n' % alleleFileName) try: mutationFile = open(mutationFileName, 'w') except: exit(1, 'Could not open file %s\n' % mutationFileName) try: mutantFile = open(mutantFileName, 'w') except: exit(1, 'Could not open file %s\n' % mutantFileName) try: refFile = open(refFileName, 'w') except: exit(1, 'Could not open file %s\n' % refFileName) try: accFile = open(accFileName, 'w') except: exit(1, 'Could not open file %s\n' % accFileName) try: accRefFile = open(accRefFileName, 'w') except: exit(1, 'Could not open file %s\n' % accRefFileName) try: noteFile = open(noteFileName, 'w') except: exit(1, 'Could not open file %s\n' % noteFileName) try: noteChunkFile = open(noteChunkFileName, 'w') except: exit(1, 'Could not open file %s\n' % noteChunkFileName) try: annotFile = open(annotFileName, 'w') except: exit(1, 'Could not open file %s\n' % annotFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global fpInFile, fpClusterBCP, fpMemberBCP, fpAccessionBCP global fpPropertyBCP, createdByKey, nextClusterKey, nextMemberKey global nextAccessionKey, nextPropertyKey, propertyDict # create file descriptors for input/output files try: fpInFile = open(inFile, 'r') except: exit(1, 'Could not open file %s\n' % inFile) try: fpClusterBCP = open(clusterBCP, 'w') except: exit(1, 'Could not open file %s\n' % clusterBCP) try: fpMemberBCP = open(memberBCP, 'w') except: exit(1, 'Could not open file %s\n' % memberBCP) if accessionBCP != '': try: fpAccessionBCP = open(accessionBCP, 'w') except: exit(1, 'Could not open file %s\n' % accessionBCP) if propertyBCP != '': try: fpPropertyBCP = open(propertyBCP, 'w') except: exit(1, 'Could not open file %s\n' % propertyBCP) # get next ACC_Accession, MRK_Cluster and MRK_ClusterMember key user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) results = db.sql('''select _User_key from MGI_User where login = '******' ''' % createdBy, 'auto') createdByKey = results[0]['_User_key'] #print 'createdByKey: %s' % createdByKey results = db.sql('''select max(_Cluster_key) + 1 as nextKey from MRK_Cluster''', 'auto') if results[0]['nextKey'] is None: nextClusterKey = 1000 else: nextClusterKey = results[0]['nextKey'] results = db.sql('''select max(_ClusterMember_key) + 1 as nextKey from MRK_ClusterMember''', 'auto') if results[0]['nextKey'] is None: nextMemberKey = 1000 else: nextMemberKey = results[0]['nextKey'] results = db.sql('''select max(_Accession_key) + 1 as nextKey from ACC_Accession''', 'auto') nextAccessionKey = results[0]['nextKey'] results = db.sql('''select max(_Property_key) + 1 as nextKey from MGI_Property''', 'auto') if results[0]['nextKey'] is None: nextPropertyKey = 1000 else: nextPropertyKey = results[0]['nextKey'] if propertyTypeKey != '': results = db.sql('''select t._Term_key, t.term from VOC_Term t, MGI_PropertyType p where p._PropertyType_key = %s and p._Vocab_key = t._Vocab_key''' % propertyTypeKey, 'auto') for r in results: propertyDict[r['term']] = r['_Term_key'] return
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global egToXenMarkerDict, egToMouseMarkerDict global fpEgFile, fpTransFile, fpOrthoFile, fpExprFile global fpLoadFile, fpQcRpt, mouseEgMultiGeneIdSet mouseEgMultiGeneIdSet = set([]) user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) try: fpEgFile = open(inFileEgPath, 'r') except: exit('Could not open file for reading %s\n' % inFileGenePath) try: fpTransFile = open(inFileTransPath, 'r') except: exit('Could not open file for reading %s\n' % inFileTransPath) try: fpOrthoFile = open(inFileOrthoPath, 'r') except: exit('Could not open file for reading %s\n' % inFileOrthoPath) try: fpExprFile = open(inFileExprPath, 'r') except: exit('Could not open file for reading %s\n' % inFileExprPath) try: fpLoadFile = open(loadFilePath, 'w') except: exit('Could not open file for writing %s\n' % loadFilePath) try: fpQcRpt = open(qcRptPath, 'w') except: exit('Could not open file for writing %s\n' % qcRptPath) # get all xenopus tropicalis markers that are associated with egIds results = db.sql('''select distinct a.accid as egId, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 55 and a.preferred = 1 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 95''', 'auto') # # create Xenopus egID to marker lookup from database # for r in results: egId = r['egId'] markerKey = r['_Marker_key'] egToXenMarkerDict[egId] = markerKey # mouse egID to marker lookup from database results = db.sql('''select distinct a.accID as egId, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 55 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 1''', 'auto') # removed per Richard # and a.preferred = 1 for r in results: egId = r['egId'] markerKey = r['_Marker_key'] egToMouseMarkerDict[egId] = markerKey return
def init(): # Purpose: create lookups, open files, create db connection, gets max # keys from the db # Returns: Nothing # Assumes: Nothing # Effects: Sets global variables, exits if a file can't be opened, # creates files in the file system, creates connection to a database global nextRelationshipKey, mpHeaderLookup, hpoLookup # # Open input and output files # openFiles() # # create database connection # user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) # # get next MGI_Relationship key # results = db.sql('''select max(_Relationship_key) + 1 as nextKey from MGI_Relationship''', 'auto') if results[0]['nextKey'] is None: nextRelationshipKey = 1000 else: nextRelationshipKey = results[0]['nextKey'] # # create lookups # # lookup of MP header terms results = db.sql('''select a.accid, t.term, t._Term_key from DAG_Node n, VOC_Term t, ACC_Accession a where n._Label_key = 3 and n._Object_key = t._Term_key and t._Vocab_key = 5 and t.isObsolete = 0 and t._Term_key = a._Object_key and a._MGIType_key = 13 and a._LogicalDB_key = 34 and a.preferred = 1''', 'auto') for r in results: mpId = string.lower(r['accid']) termKey = r['_Term_key'] mpHeaderLookup[mpId] = termKey # load lookup of HPO terms results = db.sql('''select a.accid, t.term, t._Term_key from VOC_Term t, ACC_Accession a where t._Vocab_key = 106 and t._Term_key = a._Object_key and a._MGIType_key = 13 and a._LogicalDB_key = 180''', 'auto') for r in results: hpoId = string.lower(r['accid']) termKey = r['_Term_key'] hpoLookup[hpoId] = termKey return
args.host = 'bhmgidevdb01.jax.org' args.db = 'prod' else: args.host = args.server + '.jax.org' args.db = args.database return args #----------------------------------- args = getArgs() db.set_sqlServer(args.host) db.set_sqlDatabase(args.db) db.set_sqlUser("mgd_public") db.set_sqlPassword("mgdpub") #----------------------------------- class BaseRefSearch(object): # { """ Is: base class for a reference (article) search from the database Has: all the necessary SQL for the search, the result set, Does: Encapsulates the common SQL for specific searches that return result sets of references and counts/stats for these result sets. """ #################### # SQL fragments used to build up queries ####################
def init(): global diagFile, errorFile, errorFileName, diagFileName global outAccFile, outPrepFile, outAssayFile, outAssayNoteFile global outGelLaneFile, outGelLaneStFile, outGelRowFile, outGelBandFile global inPrimerFile, inPrepFile, inAssayFile, inGelLaneFile, inGelBandFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) fdate = mgi_utils.date('%m%d%Y') # current date diagFileName = sys.argv[0] + '.' + fdate + '.diagnostics' errorFileName = sys.argv[0] + '.' + fdate + '.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) # Input Files try: inPrepFile = open(inPrepFileName, 'r') except: exit(1, 'Could not open file %s\n' % inPrepFileName) try: inAssayFile = open(inAssayFileName, 'r') except: exit(1, 'Could not open file %s\n' % inAssayFileName) try: inGelLaneFile = open(inGelLaneFileName, 'r') except: exit(1, 'Could not open file %s\n' % inGelLaneFileName) try: inGelBandFile = open(inGelBandFileName, 'r') except: exit(1, 'Could not open file %s\n' % inGelBandFileName) # Output Files try: outPrepFile = open(outPrepFileName, 'w') except: exit(1, 'Could not open file %s\n' % outPrepFileName) try: outAssayFile = open(outAssayFileName, 'w') except: exit(1, 'Could not open file %s\n' % outAssayFileName) try: outAssayNoteFile = open(outAssayNoteFileName, 'w') except: exit(1, 'Could not open file %s\n' % outAssayNoteFileName) try: outGelLaneFile = open(outGelLaneFileName, 'w') except: exit(1, 'Could not open file %s\n' % outGelLaneFileName) try: outGelLaneStFile = open(outGelLaneStFileName, 'w') except: exit(1, 'Could not open file %s\n' % outGelLaneStFileName) try: outGelRowFile = open(outGelRowFileName, 'w') except: exit(1, 'Could not open file %s\n' % outGelRowFileName) try: outGelBandFile = open(outGelBandFileName, 'w') except: exit(1, 'Could not open file %s\n' % outGelBandFileName) try: outAccFile = open(outAccFileName, 'w') except: exit(1, 'Could not open file %s\n' % outAccFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
""" datatest classes """ import logging import os import db ### Globals ### # Track test failures FAILURES = [] CACHELOADS = set([]) ### initialize database settings ### db.set_sqlUser('mgd_public') db.set_sqlPassword('mgdpub') db.set_sqlServer( os.environ['DATATEST_DBSERVER'] ) db.set_sqlDatabase( os.environ['DATATEST_DBNAME'] ) ### Classes ### class DataTestCase(object): """ datatest Test Case Exposes special assertion methods Tracks and reports failures """ def __init__(self):
def init(): global nextRelationshipKey, tssLookup, p1Lookup, m1Lookup, p21Lookup global m2Lookup, p3Lookup, m3Lookup, p4Lookup, m4Lookup, p5Lookup, m5Lookup global p6Lookup, m6Lookup, p7Lookup, m7Lookup, p8Lookup, m8Lookup, p9Lookup global m9Lookup, p10Lookup, m10Lookup, p11Lookup, m11Lookup, p12Lookup global m12Lookup, p13Lookup, m13Lookup, p14Lookup, m14Lookup, p15Lookup global m15Lookup, p16Lookup, m16Lookup,p17Lookup, m17Lookup, p18Lookup global m18Lookup, p19Lookup, m19Lookup, pXLookup, mXLookup, pYLookup global mYLookup, pUNLookup, mUNLookup, mXYLookup, pXYLookup, pMTLookup global mMTLookup # # Open input and output files # openFiles() # # create database connection # db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) # # get next MGI_Relationship key # results = db.sql('''select nextval('mgi_relationship_seq') as nextKey''', 'auto') if results[0]['nextKey'] is None: nextRelationshipKey = 1000 else: nextRelationshipKey = results[0]['nextKey'] # # create lookups # # lookup of TSS markers results = db.sql('''select a.accid, m.symbol, m._Marker_key, lc.chromosome, lc.strand, cast(lc.startCoordinate as int) as start, cast(lc.endCoordinate as int) as end from MRK_Marker m, ACC_Accession a, MRK_Location_Cache lc where m._Organism_key = 1 and m._Marker_Status_key in (1,3) and m.name like 'transcription start site region %' and m._Marker_key = a._Object_key and a._MGIType_key = 2 and a._LogicalDB_key = 1 and a.preferred = 1 and m._Marker_key = lc._Marker_key and lc.startCoordinate is not null and lc.endCoordinate is not null''', 'auto') for r in results: tssId = str.lower(r['accid']) markerKey = r['_Marker_key'] tssLookup[markerKey] = [r['chromosome'], r['strand'], int(r['start']), int(r['end']), r['accid'], r['symbol']] # load lookup of Gene terms - exclude null strand and feature type # heritable phenotypic marker results = db.sql('''select a.accid, m.symbol, m._Marker_key, lc.chromosome, lc.strand, cast(lc.startCoordinate as int) as start, cast(lc.endCoordinate as int) as end from MRK_Marker m, ACC_Accession a, MRK_Location_Cache lc, VOC_Annot v where m._Organism_key = 1 and m._Marker_Status_key in (1,3) and m._Marker_Type_key in (1, 7) and m.name not like 'transcription start site region %' and m._Marker_key = a._Object_key and a._MGIType_key = 2 and a._LogicalDB_key = 1 and a.preferred = 1 and m._Marker_key = lc._Marker_key and lc.startCoordinate is not null and lc.endCoordinate is not null and lc.strand is not null and m._Marker_key = v._Object_key and v._AnnotType_key = 1011 and v._Term_key != 6238170''', 'auto') for r in results: geneId = str.lower(r['accid']) markerKey = r['_Marker_key'] strand = 'p' # plus strand if r['strand'] == '-': # minus strand strand = 'm' prefix = '%s%s' % (strand, r['chromosome']) currentLookup = eval('%sLookup' % (prefix)) currentLookup[markerKey] = [r['chromosome'], r['strand'], int(r['start']), int(r['end']), r['accid'], r['symbol']] #print(('%sLookup' % prefix)) #print('%s %s' %(geneId, r['_Marker_key'])) #print((currentLookup[geneId])) return
def init(): # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # 4. Initializes global keys # # returns: # global diagFileName, errorFileName, synFileName global inputFile, diagFile, errorFile, synFile global mgiTypeKey, createdByKey, referenceKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) head, tail = os.path.split(inputFileName) diagFileName = logDir + '/' + tail + '.diagnostics' errorFileName = logDir + '/' + tail + '.error' synFileName = 'MGI_Synonym.bcp' print inputFileName print logDir try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: synFile = open(outputDir + '/' + synFileName, 'w') except: exit(1, 'Could not open file %s\n' % synFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) diagFile.write('Object Type: %s\n' % (mgiType)) diagFile.write('Input File: %s\n' % (inputFileName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) # if reference is J:0, then no reference is given if jnum == 'J:0': referenceKey = '' else: referenceKey = loadlib.verifyReference(jnum, 0, errorFile) # exit if we can't resolve mgiType, createdBy or jnum if mgiTypeKey == 0 or \ createdByKey == 0 or \ referenceKey == 0: exit(1) if mode == 'reload': print 'mode is: %s, deleting synonyms' % mode sys.stdout.flush() db.sql('delete from MGI_Synonym ' + \ 'where _MGIType_key = %d ' % (mgiTypeKey) + \ 'and _CreatedBy_key = %d ' % (createdByKey), None)
def init(): global diagFile, errorFile, inputFile, errorFileName, diagFileName global outSetFile, outMemberFile global setKey, setMemberKey, createdByKey, mgiTypeKey, useSetKey global DEBUG db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) diagFileName = '%s/setload.diagnostics' % (outputDir) errorFileName = '%s/setload.error' % (outputDir) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) # Output Files try: fullPathSetFile = '%s/%s' % (outputDir, outSetFileName) outSetFile = open(fullPathSetFile, 'w') except: exit(1, 'Could not open file %s\n' % fullPathSetFile) try: fullPathMemberFile = '%s/%s' % (outputDir, outMemberFileName) outMemberFile = open(fullPathMemberFile, 'w') except: exit(1, 'Could not open file %s\n' % fullPathMemberFile) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) if mode == 'preview': DEBUG = 1 bcpon = 0 elif mode != 'load': exit(1, 'Invalid Processing Mode: %s\n' % (mode)) results = db.sql('select max(_Set_key) + 1 as maxKey from MGI_Set', 'auto') setKey = results[0]['maxKey'] createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) mgiTypeKey = loadlib.verifyMGIType(setType, 0, errorFile) # # use existing MGI_Set, or create a new one # results = db.sql( 'select _Set_key from MGI_Set where _MGIType_key = %s and name = \'%s\'' % (mgiTypeKey, setName), 'auto') if len(results) > 0: for r in results: setKey = r['_Set_key'] # delete/reload db.sql('delete from MGI_SetMember where _Set_key = %s' % (setKey), None) else: outSetFile.write(str(setKey) + TAB + \ str(mgiTypeKey) + TAB + \ str(setName) + TAB + \ '1' + TAB + \ str(createdByKey) + TAB + str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) results = db.sql( 'select max(_SetMember_key) + 1 as maxKey from MGI_SetMember', 'auto') setMemberKey = results[0]['maxKey'] return
def init(): # Purpose: create lookups, open files, create db connection, gets max # keys from the db # Returns: Nothing # Assumes: Nothing # Effects: Sets global variables, exits if a file can't be opened, # creates files in the file system, creates connection to a database global nextRelationshipKey, nextPropertyKey, nextNoteKey global categoryDict, relationshipDict, propertyDict global qualifierDict, evidenceDict, jNumDict, userDict, markerDict global alleleDict # # Open input and output files # openFiles() # # create database connection # user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) # # get next MGI_Relationship and MGI_Relationship_Property keys # results = db.sql('''select max(_Relationship_key) + 1 as nextKey from MGI_Relationship''', 'auto') if results[0]['nextKey'] is None: nextRelationshipKey = 1000 else: nextRelationshipKey = results[0]['nextKey'] results = db.sql('''select max(_RelationshipProperty_key) + 1 as nextKey from MGI_Relationship_Property''', 'auto') if results[0]['nextKey'] is None: nextPropertyKey = 1000 else: nextPropertyKey = results[0]['nextKey'] # # get next MGI_Note key # results = db.sql('''select max(_Note_key) + 1 as nextKey from MGI_Note''', 'auto') nextNoteKey = results[0]['nextKey'] # # create lookups # # FeaR Category Lookup results = db.sql('''select * from MGI_Relationship_Category''', 'auto') for r in results: name = r['name'].lower() cat = Category() cat.key = r['_Category_key'] cat.name = name cat.mgiTypeKey1 = r['_MGIType_key_1'] cat.mgiTypeKey2 = r['_MGIType_key_2'] categoryDict[name] = cat # FeaR vocab lookup results = db.sql('''select a.accid, a._Object_key from ACC_Accession a, VOC_Term t where a._MGIType_key = 13 and a._LogicalDB_key = 171 and a.preferred = 1 and a.private = 0 and a._Object_key = t._Term_key and t.isObsolete = 0''', 'auto') for r in results: relationshipDict[r['accid'].lower()] = r['_Object_key'] # FeaR qualifier lookup results = db.sql('''select _Term_key, term from VOC_Term where _Vocab_key = 94 and isObsolete = 0''', 'auto') for r in results: qualifierDict[r['term'].lower()] = r['_Term_key'] # FeaR evidence lookup results = db.sql('''select _Term_key, abbreviation from VOC_Term where _Vocab_key = 95 and isObsolete = 0''', 'auto') for r in results: evidenceDict[r['abbreviation'].lower()] = r['_Term_key'] # Reference lookup results = db.sql('''select a.accid, a._Object_key from ACC_Accession a where a._MGIType_key = 1 and a._LogicalDB_key = 1 and a.preferred = 1 and a.private = 0 and a.prefixPart = 'J:' ''', 'auto') for r in results: jNumDict[r['accid'].lower()] = r['_Object_key'] # marker lookup results = db.sql('''select a.accid, a._Object_key from ACC_Accession a where a._MGIType_key = 2 and a._LogicalDB_key = 1 and a.preferred = 1 and a.private = 0''', 'auto') for r in results: markerDict[r['accid'].lower()] = r['_Object_key'] # allele lookup results = db.sql('''select a.accid, a._Object_key from ACC_Accession a where a._MGIType_key = 11 and a._LogicalDB_key = 1 and a.preferred = 1 and a.private = 0''', 'auto') for r in results: alleleDict[r['accid'].lower()] = r['_Object_key'] # active status (not data load or inactive) results = db.sql('''select login, _User_key from MGI_User where _UserStatus_key = 316350''', 'auto') for r in results: userDict[r['login'].lower()] = r['_User_key'] # property term lookup results = db.sql('''select term, _Term_key from VOC_Term where _Vocab_key = 97''', 'auto') for r in results: propertyDict[r['term'].lower()] = r['_Term_key'] db.useOneConnection(0) return
import mgi_utils import accessionlib import loadlib # # from configuration file # user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] loaddate = loadlib.loaddate modifiedBy = os.environ['MODIFIEDBY'] modifiedByKey = None newMkrType = os.environ['NEWMKRTYPE'] newMkrTypeKey = None db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) db.set_sqlLogFunction(db.sqlLogAll) inputFileName = os.environ['NOMENDATAFILE'] inputFile = None # file descriptor updateList = [] # list of marker keys to update mgiToMrkKeyDict = {} # {mgiID:markerKey, ...} try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) # # get keys for NEWMKRTYPE and MODIFIEDBY # results = db.sql('''select _Marker_Type_key from MRK_Types where name = '%s' ''' % newMkrType, 'auto')
def initialize(): global diagFile, errorFile, inputFile, errorFileName, diagFileName global alleleFile, mutationFile, mutantFile, refFile global accFile, accRefFile, noteFile, annotFile global newAlleleFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) head, tail = os.path.split(inputFileName) diagFileName = outputDir + '/' + tail + '.diagnostics' errorFileName = outputDir + '/' + tail + '.error' newAlleleFileName = outputDir + '/' + tail + '.new' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: newAlleleFile = open(newAlleleFileName, 'w') except: exit(1, 'Could not open file %s\n' % newAlleleFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: alleleFile = open(alleleFileName, 'w') except: exit(1, 'Could not open file %s\n' % alleleFileName) try: mutationFile = open(mutationFileName, 'w') except: exit(1, 'Could not open file %s\n' % mutationFileName) try: mutantFile = open(mutantFileName, 'w') except: exit(1, 'Could not open file %s\n' % mutantFileName) try: refFile = open(refFileName, 'w') except: exit(1, 'Could not open file %s\n' % refFileName) try: accFile = open(accFileName, 'w') except: exit(1, 'Could not open file %s\n' % accFileName) try: accRefFile = open(accRefFileName, 'w') except: exit(1, 'Could not open file %s\n' % accRefFileName) try: noteFile = open(noteFileName, 'w') except: exit(1, 'Could not open file %s\n' % noteFileName) try: annotFile = open(annotFileName, 'w') except: exit(1, 'Could not open file %s\n' % annotFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))
def init(): global diagFile, errorFile, errorFileName, diagFileName global outResultStFile, outResultFile, outResultImageFile global inResultsFile, inStructureFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) diagFileName = 'resultsload.diagnostics' errorFileName = 'resultsload.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) # Input Files try: inResultsFile = open(inResultsFileName, 'r') except: exit(1, 'Could not open file %s\n' % inResultsFileName) try: inStructureFile = open(inStructureFileName, 'r') except: exit(1, 'Could not open file %s\n' % inStructureFileName) # Output Files try: outResultStFile = open(outResultStFileName, 'w') except: exit(1, 'Could not open file %s\n' % outResultStFileName) try: outResultFile = open(outResultFileName, 'w') except: exit(1, 'Could not open file %s\n' % outResultFileName) try: outResultImageFile = open(outResultImageFileName, 'w') except: exit(1, 'Could not open file %s\n' % outResultImageFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global egToMarkerDict, mgiToMarkerDict global fpInFile, fpClustererFile, fpLoadFile, fpQcRpt user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) try: fpInFile = open(inFilePath, 'r') except: exit('Could not open file for reading %s\n' % inFilePath) try: fpClustererFile = open(clustererFilePath, 'w') except: exit('Could not open file for writing %s\n' % clustererFilePath) try: fpLoadFile = open(loadFilePath, 'w') except: exit('Could not open file for writing %s\n' % loadFilePath) try: fpQcRpt = open(qcRptPath, 'w') except: exit('Could not open file for writing %s\n' % qcRptPath) # get all human markers that are associated with egIDs results = db.sql('''select distinct a.accid as egID, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 55 and a.preferred = 1 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 2''', 'auto') # # create Human egID to marker lookup from database # for r in results: egID = r['egID'] markerKey = r['_Marker_key'] egToMarkerDict[egID] = markerKey # get all mouse markers results = db.sql('''select distinct a.accID as mgiId, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 1 and a.prefixPart = 'MGI:' and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 1''', 'auto') # removed per Richard # and a.preferred = 1 for r in results: mgiId = r['mgiId'] markerKey = r['_Marker_key'] mgiToMarkerDict[mgiId] = markerKey return
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global hgncToMarkerDict, mgiToMarkerDict global fpInFile, fpClustererFile, fpLoadFile, fpQcRpt user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) try: fpInFile = open(inFilePath, 'r') except: exit('Could not open file for reading %s\n' % inFilePath) try: fpClustererFile = open(clustererFilePath, 'w') except: exit('Could not open file for writing %s\n' % clustererFilePath) try: fpLoadFile = open(loadFilePath, 'w') except: exit('Could not open file for writing %s\n' % loadFilePath) try: fpQcRpt = open(qcRptPath, 'w') except: exit('Could not open file for writing %s\n' % qcRptPath) # get all human markers that have hgncIDs results = db.sql( '''select distinct a.accid as hgncID, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 64 and a.preferred = 1 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 2''', 'auto') # # create hgncID to marker lookup from database # for r in results: hgncID = r['hgncID'] markerKey = r['_Marker_key'] hgncToMarkerDict[hgncID] = markerKey # get all mouse markers results = db.sql( '''select distinct a.accID as mgiId, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 1 and a.prefixPart = 'MGI:' and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 1''', 'auto') # removed per Richard # and a.preferred = 1 for r in results: mgiId = r['mgiId'] markerKey = r['_Marker_key'] mgiToMarkerDict[mgiId] = markerKey return
def init(): # Purpose: check args, create lookups, open files, create db connection, # gets max keys from the db # Returns: 1 if error, else 0 # Assumes: Nothing # Effects: Sets global variables, exits if a file can't be opened, # creates files in the file system, creates connection to a database global nextSMKey, nextAccKey, strainTranslationLookup, markerLookup global ensemblLookup, chrLookup global biotypeLookup, mcvTermLookup, messageMap checkArgs() # # Open input and output files # openFiles() # # create database connection # user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) # # get next MRK_StrainMarker key # results = db.sql( '''select max(_StrainMarker_key) + 1 as nextSMKey from MRK_StrainMarker''', 'auto') if results[0]['nextSMKey'] is None: nextSMKey = 1000 else: nextSMKey = results[0]['nextSMKey'] # # get next ACC_Accession key # results = db.sql( '''select max(_Accession_key) + 1 as nextAccKey from ACC_Accession''', 'auto') nextAccKey = results[0]['nextAccKey'] # load qcDict with keys; one for each reporting bucket that will be written to the # curation log qcDict['chr_m'] = [] # chr is missing, report/skip qcDict['chr_u'] = [] # chromosome unresolved, report/skip qcDict['start'] = [] # startCoordinate is missing, report/skip qcDict['end'] = [] # endCoordinate is missing, report/skip qcDict['start/end'] = [] # start > end, report/skip qcDict['strand'] = [] # strand is missing, report/skip qcDict['biotype_m'] = [] # biotype is missing, report/skip qcDict['biotype_u'] = {} # biotype unresolved {biotype:count}, report/skip qcDict['strain_u'] = [] # strain unresolved, fatal qcDict['mgp'] = [] # mgp is missing, report/skip qcDict['mgi_u'] = [] # Ensembl ID unresolved, report create strain # marker with null marker qcDict['mgi_s'] = [] # marker ID secondary, report, load qcDict['mgi_no'] = [] # marker ID not official, report create strain # marker with null marker qcDict['ens_no'] = [] # projection_parent_gene does not contain ENS ID, # report, create strain marker with null marker qcDict['ens_miss'] = [] # Missing projection_parent_gene (ensembl id) # report, create strain marker with null marker qcDict['ens_multi'] = [] # ensembl ID assoc > 1 marker, report, create # strain marker with null marker qcDict['mgi_mgp'] = [] # list of {mgiID: ([set of mpIDs]), ...}, one for # each strain file # used to # a) determine multiple MGP IDs (within a # given strain) per marker, report and create # strain gene # b) write out to bcp files # The QC header for each reporting bucket that will appear in the curation log messageMap['chr_m'] = 'Chromosome missing from input, record(s) skipped' messageMap['chr_u'] = 'Chromosome from input unresolved, record(s) skipped' messageMap[ 'start'] = 'Start Coordinate missing from input, record(s) skipped' messageMap['end'] = 'End Coordinate missing from input, record(s) skipped' messageMap[ 'start/end'] = 'Start Coordinate > End Coordinate, record(s) skipped' messageMap['strand'] = 'Strand missing from input, record(s) skipped' messageMap['biotype_m'] = 'Biotype missing from input, record(s) skipped' messageMap[ 'biotype_u'] = 'Biotype from input unresolved, record(s) skipped' messageMap['strain_u'] = 'Strain from input unresolved, load fails' messageMap['mgp'] = 'MGP ID missing from input, record(s) skipped' messageMap[ 'mgi_u'] = 'Ensembl ID from input unresolved, strain marker created with null marker' messageMap[ 'mgi_s'] = 'Marker from input is secondary ID, strain marker created' messageMap[ 'mgi_no'] = 'Marker from input is not official, strain marker created with null marker' messageMap[ 'ens_no'] = 'projection_parent_gene from input not an Ensembl ID, strain marker created with null marker' messageMap[ 'ens_miss'] = 'projection_parent_gene missing from input, strain marker created with null marker' messageMap[ 'ens_multi'] = 'Ensembl ID associated with > 1 marker, strain marker created with null marker' messageMap[ 'mgi_mgp'] = 'Markers from input with > 1 Strain specific MGP ID, report and load strain marker and MGI marker association' # # create lookups # # load lookup of strain translations results = db.sql( '''select t.badName, t._Object_key as strainKey, s.strain from MGI_Translation t, PRB_Strain s where t._TranslationType_key = 1021 and t._Object_key = s._Strain_key''', 'auto') for r in results: strainTranslationLookup[r['badName']] = [r['strainKey'], r['strain']] # load lookup of all marker MGI IDs results = db.sql( '''select m._Marker_key, m.symbol, s.status as markerStatus, a.accid as mgiID, a.preferred from ACC_Accession a, MRK_Marker m, MRK_Status s where a. _MGIType_key = 2 and a._LogicalDB_key = 1 and a.prefixPart = 'MGI:' and a._Object_key = m._Marker_key and m._Organism_key = 1 and m._Marker_Status_key = s._Marker_Status_key''', 'auto') for r in results: m = Marker() m.markerKey = r['_Marker_key'] m.markerID = r['mgiID'] m.symbol = r['symbol'] m.markerStatus = r['markerStatus'] m.markerPreferred = r['preferred'] markerLookup[m.markerID] = m # load lookup of ensembl ID to marker relationships results = db.sql( '''select a1.accid as ensID, a2.accid as mgiID from ACC_Accession a1, ACC_Accession a2 where a1._MGIType_key = 2 and a1._LogicalDB_key = 60 and a1._Object_key = a2._Object_key and a2._MGIType_key = 2 and a2._LogicalDB_key = 1 and a2.preferred = 1 and a2.prefixPart = 'MGI:' ''', 'auto') for r in results: ensID = r['ensID'] mgiID = r['mgiID'] if ensID not in ensemblLookup: ensemblLookup[ensID] = [] ensemblLookup[ensID].append(mgiID) # load lookup of 'mouse, laboratory' chromosomes results = db.sql( '''select chromosome, _Chromosome_key from MRK_Chromosome where _Organism_key = 1''', 'auto') for r in results: chrLookup[r['chromosome']] = r['_Chromosome_key'] # load lookup of raw MGP biotype to feature type results = db.sql( '''select t1.term as rawBiotype, t2.term as primaryMcvTerm from VOC_Term t1, VOC_Term t2, MRK_BiotypeMapping m where t1._Vocab_key = 136 --Biotype MGP and t1._Term_key = m._BiotypeTerm_key and m._PrimaryMCVTerm_key = t2._Term_key''', 'auto') for r in results: biotypeLookup[r['rawBiotype'].lower()] = r['primaryMcvTerm'] # load lookup of feature type vocabulary results = db.sql('''select term from VOC_Term where _Vocab_key = 79''') for r in results: mcvTermLookup.append(r['term'].lower()) return 0
def init(): global diagFile, errorFile, inputFile, errorFileName, diagFileName global strainFile, markerFile, accFile, annotFile global noteFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) fdate = mgi_utils.date('%m%d%Y') # current date head, tail = os.path.split(inputFileName) diagFileName = tail + '.' + fdate + '.diagnostics' errorFileName = tail + '.' + fdate + '.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r', encoding="latin-1") except: exit(1, 'Could not open file %s\n' % inputFileName) try: strainFile = open(strainFileName, 'w') except: exit(1, 'Could not open file %s\n' % strainFileName) try: markerFile = open(markerFileName, 'w') except: exit(1, 'Could not open file %s\n' % markerFileName) try: accFile = open(accFileName, 'w') except: exit(1, 'Could not open file %s\n' % accFileName) try: noteFile = open(noteFileName, 'w') except: exit(1, 'Could not open file %s\n' % noteFileName) try: annotFile = open(annotFileName, 'w') except: exit(1, 'Could not open file %s\n' % annotFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
def init(): # Purpose: create lookups, open files, create db connection, gets max # keys from the db # Returns: Nothing # Assumes: Nothing # Effects: Sets global variables, exits if a file can't be opened, # creates files in the file system, creates connection to a database global nextRelationshipKey, nextPropertyKey, nextNoteKey global categoryDict, relationshipDict, propertyDict global qualifierDict, evidenceDict, jNumDict, userDict, markerDict global alleleDict # # Open input and output files # openFiles() # # create database connection # user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) # # get next MGI_Relationship and MGI_Relationship_Property keys # results = db.sql('''select nextval('mgi_relationship_seq') as nextKey''', 'auto') nextRelationshipKey = results[0]['nextKey'] results = db.sql('''select nextval('mgi_relationship_property_seq') as nextKey''', 'auto') nextPropertyKey = results[0]['nextKey'] # # get next MGI_Note key # results = db.sql('''select nextval('mgi_note_seq') as nextKey''', 'auto') nextNoteKey = results[0]['nextKey'] # # create lookups # # FeaR Category Lookup results = db.sql('''select * from MGI_Relationship_Category''', 'auto') for r in results: name = r['name'].lower() cat = Category() cat.key = r['_Category_key'] cat.name = name cat.mgiTypeKey1 = r['_MGIType_key_1'] cat.mgiTypeKey2 = r['_MGIType_key_2'] categoryDict[name] = cat # FeaR vocab lookup results = db.sql('''select a.accid, a._Object_key from ACC_Accession a, VOC_Term t where a._MGIType_key = 13 and a._LogicalDB_key = 171 and a.preferred = 1 and a.private = 0 and a._Object_key = t._Term_key and t.isObsolete = 0''', 'auto') for r in results: relationshipDict[r['accid'].lower()] = r['_Object_key'] # FeaR qualifier lookup results = db.sql('''select _Term_key, term from VOC_Term where _Vocab_key = 94 and isObsolete = 0''', 'auto') for r in results: qualifierDict[r['term'].lower()] = r['_Term_key'] # FeaR evidence lookup results = db.sql('''select _Term_key, abbreviation from VOC_Term where _Vocab_key = 95 and isObsolete = 0''', 'auto') for r in results: evidenceDict[r['abbreviation'].lower()] = r['_Term_key'] # Reference lookup results = db.sql('''select a.accid, a._Object_key from ACC_Accession a where a._MGIType_key = 1 and a._LogicalDB_key = 1 and a.preferred = 1 and a.private = 0 and a.prefixPart = 'J:' ''', 'auto') for r in results: jNumDict[r['accid'].lower()] = r['_Object_key'] # marker lookup results = db.sql('''select a.accid, a._Object_key from ACC_Accession a where a._MGIType_key = 2 and a._LogicalDB_key = 1 and a.preferred = 1 and a.private = 0''', 'auto') for r in results: markerDict[r['accid'].lower()] = r['_Object_key'] # allele lookup results = db.sql('''select a.accid, a._Object_key from ACC_Accession a where a._MGIType_key = 11 and a._LogicalDB_key = 1 and a.preferred = 1 and a.private = 0''', 'auto') for r in results: alleleDict[r['accid'].lower()] = r['_Object_key'] # active status (not data load or inactive) results = db.sql('''select login, _User_key from MGI_User where _UserStatus_key = 316350''', 'auto') for r in results: userDict[r['login'].lower()] = r['_User_key'] # property term lookup results = db.sql('''select term, _Term_key from VOC_Term where _Vocab_key = 97''', 'auto') for r in results: propertyDict[r['term'].lower()] = r['_Term_key'] db.useOneConnection(0) return
def process (): # Purpose: Main routine of this script # Returns: nothing global notQuiet #args = getArgs() # NOT USED YET notQuiet = True #not args[ "QUIET"] db.set_sqlServer ( "PROD1_MGI") #db.set_sqlServer ( "DEV_MGI") db.set_sqlDatabase( "mgd") db.set_sqlUser ( "MGD_PUBLIC") db.set_sqlPassword( "mgdpub") #-- For papers w/ pubmed IDs #-- get their MGD dataset associations for Alleles/Pheno, Expr, GO, Tumor query = """ select a.accid pubmedid, b._refs_key, bd.abbreviation from acc_accession a inner join bib_refs b on (a._mgitype_key = 1 and a._object_key = b._refs_key and a._logicaldb_key = 29) inner join bib_dataset_assoc bda on (b._refs_key = bda._refs_key and bda.isneverused=0) inner join bib_dataset bd on (bda._dataset_key = bd._dataset_key and bd._dataset_key in (1002, 1004, 1005, 1007) ) """ results = runsql( query) if notQuiet: sys.stderr.write("Found %d pubmed to corpora associations\n" % \ len(results) ) # build dict w/ keys = pubmedIDs, values = {_refs_key : 0 } # build dict w/ keys = pubmedIDs, values = { classification : 0 } # If we find a pubmed ID w/ multiple _refs_keys, we remove it from # the second dict because we don't want to collapse multiple # classifications from multiple references onto a single pubmed ID # (there are some pubmedIDs for books/proceedings that are associated # with multiple references, these individual references have # classifications) pm2refs_key = {} pm2classes = {} for rcd in results: pmid = rcd[ "pubmedid"] refs_key = rcd[ "_refs_key"] classification = rcd[ "abbreviation"][0:1] # 1st letter of abbrev if not pm2refs_key.has_key( pmid): # 1st time we've seen this pmid pm2refs_key[ pmid] = { refs_key : 0} pm2classes[ pmid] = { classification : 0} else: # already seen this pmid pm2refs_key[ pmid][ refs_key] = 0 pm2classes[ pmid][ classification] = 0 numIDs = 0 numMultRefs = 0 # loop through pubmed IDs, and output those w/ their classifications for pmid in pm2refs_key.keys(): numIDs = numIDs +1 if len( pm2refs_key[ pmid]) == 1: # 1 _refs_key for this pmid cls = pm2classes[ pmid].keys() cn = "%s_%s.pdf" % ( str(pmid), string.join( cls, "") ) print cn else: # pmid w/ multipls _refs_key numMultRefs = numMultRefs +1 sys.stderr.write( "Pubmed ID '%s' has %d _refs_key's, skipping\n" \ % ( str(pmid), len( pm2refs_key[ pmid]) ) ) sys.stderr.write( "Pubmed IDs: %d, \t skipped IDs: %d\n" \ % (numIDs, numMultRefs) )
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global egToChickenDict, egToMouseDict global fpOrthoFile, fpExprFile global fpLoadFile, fpQcRpt user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) try: fpOrthoFile = open(inFileOrthoPath, 'r') except: exit('Could not open file for reading %s\n' % inFileOrthoPath) try: fpExprFile = open(inFileExprPath, 'r') except: exit('Could not open file for reading %s\n' % inFileExprPath) try: fpLoadFile = open(loadFilePath, 'w') except: exit('Could not open file for writing %s\n' % loadFilePath) try: fpQcRpt = open(qcRptPath, 'w') except: exit('Could not open file for writing %s\n' % qcRptPath) # get all chicken markers that are associated with EG IDs results = db.sql('''select distinct a.accid as egId, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 55 and a.preferred = 1 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 63''', 'auto') # # create Chicken egID to marker lookup from database # for r in results: egId = r['egId'] markerKey = r['_Marker_key'] egToChickenDict[egId] = markerKey # get all mouse markers results = db.sql('''select distinct a.accID as egId, m._Marker_key from ACC_Accession a, MRK_Marker m where a._MGIType_key = 2 and a._LogicalDB_key = 55 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = 1''', 'auto') for r in results: egId = r['egId'] markerKey = r['_Marker_key'] egToMouseDict[egId] = markerKey return
def init(): # Purpose: check args, create lookups, open files, create db connection, # gets max keys from the db # Returns: 1 if error, else 0 # Assumes: Nothing # Effects: Sets global variables, exits if a file can't be opened, # creates files in the file system, creates connection to a database global nextRelationshipKey, mpLookup, emapaLookup checkArgs() # # Open input and output files # openFiles() # # create database connection # user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) # # get next MGI_Relationship key # results = db.sql('''select nextval('mgi_relationship_seq') as nextKey''', 'auto') if results[0]['nextKey'] is None: nextRelationshipKey = 1000 else: nextRelationshipKey = results[0]['nextKey'] # # create lookups # # lookup of MP terms results = db.sql( '''select a.accid, a.preferred, t.term, t.isObsolete, t._Term_key from VOC_Term t, ACC_Accession a where t._Vocab_key = 5 and t._Term_key = a._Object_key and a._MGIType_key = 13 and a._LogicalDB_key = 34 and a.preferred = 1''', 'auto') for r in results: #mpId = str.lower(r['accid']) mpId = r['accid'] termKey = r['_Term_key'] isObsolete = r['isObsolete'] preferred = r['preferred'] mpLookup[mpId] = [termKey, isObsolete, preferred] # load lookup of EMAPA terms results = db.sql( '''select a.accid, a.preferred, t.term, t.isObsolete, t._Term_key from VOC_Term t, ACC_Accession a where t._Vocab_key = 90 and t._Term_key = a._Object_key and a._MGIType_key = 13 and a._LogicalDB_key = 169''', 'auto') for r in results: #emapaId = str.lower(r['accid']) emapaId = r['accid'] termKey = r['_Term_key'] isObsolete = r['isObsolete'] preferred = r['preferred'] emapaLookup[emapaId] = [termKey, isObsolete, preferred] return 0
def init(): global diagFile, errorFile, inputFile, errorFileName, diagFileName global strainFile, markerFile, accFile, annotFile global noteFile, noteChunkFile db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) fdate = mgi_utils.date('%m%d%Y') # current date head, tail = os.path.split(inputFileName) diagFileName = tail + '.' + fdate + '.diagnostics' errorFileName = tail + '.' + fdate + '.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: strainFile = open(strainFileName, 'w') except: exit(1, 'Could not open file %s\n' % strainFileName) try: markerFile = open(markerFileName, 'w') except: exit(1, 'Could not open file %s\n' % markerFileName) try: accFile = open(accFileName, 'w') except: exit(1, 'Could not open file %s\n' % accFileName) try: noteFile = open(noteFileName, 'w') except: exit(1, 'Could not open file %s\n' % noteFileName) try: noteChunkFile = open(noteChunkFileName, 'w') except: exit(1, 'Could not open file %s\n' % noteChunkFileName) try: annotFile = open(annotFileName, 'w') except: exit(1, 'Could not open file %s\n' % annotFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) return
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global fpInFile, fpOutFile, fpQcRpt global mrkToMultiEGDict, egToMarkerDict user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) try: fpInFile = open(inFilePath, 'r') except: exit('Could not open file for reading %s\n' % inFilePath) try: fpOutFile = open(outFilePath, 'w') except: exit('Could not open file for writing %s\n' % outFilePath) try: fpQcRpt = open(qcRptPath, 'w') except: exit('Could not open file for writing %s\n' % qcRptPath) # # create lookup from the database of all markers that map to multiple # EG IDs # # get all markers that are associated with egIds, all organisms db.sql('''select distinct a.accid as egId, m._Marker_key, m.symbol, o.commonName into temp eg from ACC_Accession a, MRK_Marker m, MGI_Organism o where a._MGIType_key = 2 and a._LogicalDB_key = 55 and a.preferred = 1 and a._Object_key = m._Marker_key and m._Marker_Status_key = 1 and m._Organism_key = o._Organism_key''', None) db.sql('''select _Marker_key into temp mrk from eg group by _Marker_key having count(*) > 1''', None) results = db.sql('''select e.egId, e._Marker_key, e.symbol, e.commonName from eg e, mrk m where e._Marker_key = m._Marker_key''', 'auto') for r in results: egId = r['egId'] markerKey = r['_Marker_key'] symbol = r['symbol'] organism = r['commonName'] if not mrkToMultiEGDict.has_key(markerKey): mrkToMultiEGDict[markerKey] = [] mrkToMultiEGDict[markerKey].append( Marker(markerKey, egId, symbol, organism) ) # # create lookup from the database mapping EG IDs to Marker instances # results = db.sql('''select * from eg''', 'auto') for r in results: egId = r['egId'] markerKey = r['_Marker_key'] symbol = r['symbol'] organism = r['commonName'] if not egToMarkerDict.has_key(egId): egToMarkerDict[egId] = [] egToMarkerDict[egId].append( Marker(markerKey, egId, symbol, organism) ) # # create lookup from the input file mapping EG ID to the line in the file for line in fpInFile.readlines(): (hgId, taxId, egId, junk1, junk2, junk3) = string.split(line[:-1], TAB) hgId = string.strip(hgId) taxId = string.strip(taxId) egId = string.strip(egId) if taxId not in taxIdList: continue if not egIdToLineDict.has_key(egId): egIdToLineDict[egId] = [] egIdToLineDict[egId].append(line) return
def init(): global diagFile, errorFile, inputFile, errorFileName, diagFileName global outSetFile, outMemberFile global setKey, setMemberKey, createdByKey, mgiTypeKey, useSetKey global DEBUG db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) diagFileName = '%s/setload.diagnostics' % (outputDir) errorFileName = '%s/setload.error' % (outputDir) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) # Output Files try: fullPathSetFile = '%s/%s' % (outputDir, outSetFileName) outSetFile = open(fullPathSetFile, 'w') except: exit(1, 'Could not open file %s\n' % fullPathSetFile) try: fullPathMemberFile = '%s/%s' % (outputDir, outMemberFileName) outMemberFile = open(fullPathMemberFile, 'w') except: exit(1, 'Could not open file %s\n' % fullPathMemberFile) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) if mode == 'preview': DEBUG = 1 bcpon = 0 elif mode != 'load': exit(1, 'Invalid Processing Mode: %s\n' % (mode)) results = db.sql('select max(_Set_key) + 1 as maxKey from MGI_Set', 'auto') setKey = results[0]['maxKey'] createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) mgiTypeKey = loadlib.verifyMGIType(setType, 0, errorFile) # # use existing MGI_Set, or create a new one # results = db.sql('select _Set_key from MGI_Set where _MGIType_key = %s and name = \'%s\'' % (mgiTypeKey, setName), 'auto') if len(results) > 0: for r in results: setKey = r['_Set_key'] # delete/reload db.sql('delete from MGI_SetMember where _Set_key = %s' % (setKey), None) else: outSetFile.write(str(setKey) + TAB + \ str(mgiTypeKey) + TAB + \ str(setName) + TAB + \ '1' + TAB + \ str(createdByKey) + TAB + str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) results = db.sql('select max(_SetMember_key) + 1 as maxKey from MGI_SetMember', 'auto') setMemberKey = results[0]['maxKey'] return
def init(): ''' # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # 4. Initializes global keys # # returns: # ''' global inputFile, diagFile, errorFile, errorFileName, diagFileName global transTypeFile, transFile global transTypeFileName, transFileName db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) # the default output file names are bases on 'inputFileName' head, fileName = os.path.split(inputFileName) # rename 'head' head = outputFileDir fdate = mgi_utils.date('%m%d%Y') # current date diagFileName = head + '/' + fileName + '.' + fdate + '.diagnostics' print diagFileName errorFileName = head + '/' + fileName + '.' + fdate + '.error' print errorFileName transTypeFileName = head + '/' + fileName + '.' + fdate + '.MGI_TranslationType.bcp' print transTypeFileName transFileName = head + '/' + fileName + '.' + fdate + '.MGI_Translation.bcp' print transFileName try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: transTypeFile = open(transTypeFileName, 'w') except: exit(1, 'Could not open file %s\n' % transTypeFileName) try: transFile = open(transFileName, 'w') except: exit(1, 'Could not open file %s\n' % transFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) # Set Log File Descriptor db.set_sqlLogFD(diagFile) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) diagFile.write('Input File: %s\n' % (inputFileName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))
def init(): global accKey, gensatLogicalDBKey, egLogicalDBKey global markerMGITypeKey, createdByKey db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFile) db.useOneConnection(1) # # Get the keys from the database. # cmds = [] cmds.append( 'select max(_Accession_key) + 1 as _Accession_key from ACC_Accession') cmds.append( 'select _LogicalDB_key from ACC_LogicalDB where name = \'%s\'' % (gensatLogicalDB)) cmds.append( 'select _LogicalDB_key from ACC_LogicalDB where name = \'%s\'' % (egLogicalDB)) cmds.append('select _MGIType_key from ACC_MGIType where name = \'%s\'' % (markerMGIType)) cmds.append('select _User_key from MGI_User where name = \'%s\'' % (createdBy)) results = db.sql(cmds, 'auto') # # If any of the keys cannot be found, stop the load. # if len(results[0]) == 1: accKey = results[0][0]['_Accession_key'] else: print 'Cannot determine the next Accession key' sys.exit(1) if len(results[1]) == 1: gensatLogicalDBKey = results[1][0]['_LogicalDB_key'] else: print 'Cannot determine the Logical DB key for "' + gensatLogicalDB + '"' sys.exit(1) if len(results[2]) == 1: egLogicalDBKey = results[2][0]['_LogicalDB_key'] else: print 'Cannot determine the Logical DB key for "' + egLogicalDB + '"' sys.exit(1) if len(results[3]) == 1: markerMGITypeKey = results[3][0]['_MGIType_key'] else: print 'Cannot determine the MGI Type key for "' + markerMGIType + '"' sys.exit(1) if len(results[4]) == 1: createdByKey = results[4][0]['_User_key'] else: print 'Cannot determine the User key for "' + createdBy + '"' sys.exit(1) return