Exemplo n.º 1
def init():
        # requires: 
        # effects: 
        # 1. Processes command line options
        # 2. Initializes local DBMS parameters
        # 3. Initializes global file descriptors
        # returns:
        global synFile, diagFile
        global synKey, userKey
            diagFile = open(diagFileName, 'w')
            exit(1, 'Could not open file %s\n' % diagFileName)
                synFile = open(synFileName, 'w')
                exit(1, 'Could not open file %s\n' % synFileName)
        # Get next available primary key

        results = db.sql(''' select nextval('mgi_synonym_seq') as maxKey ''', 'auto')
        synKey = results[0]['maxKey']

        userKey = loadlib.verifyUser(user, 0, None)
Exemplo n.º 2
def init():
	# requires: 
	# effects: 
	# 1. Processes command line options
	# 2. Initializes local DBMS parameters
	# 3. Initializes global file descriptors
	# returns:
	global synFile, diagFile
	global synKey, userKey
            diagFile = open(diagFileName, 'w')
            exit(1, 'Could not open file %s\n' % diagFileName)
		synFile = open(synFileName, 'w')
		exit(1, 'Could not open file %s\n' % synFileName)
	# Get next available primary key

	results = db.sql(''' select nextval('mgi_synonym_seq') as maxKey ''', 'auto')
	synKey = results[0]['maxKey']

	userKey = loadlib.verifyUser(user, 0, None)
Exemplo n.º 3
def init():
        # requires: 
        # effects: 
        # 1. Processes command line options
        # 2. Initializes local DBMS parameters
        # 3. Initializes global file descriptors
        # returns:

    global accFile, accrefFile, markerFile, diagFile
    global accKey, userKey, markerKey

        diagFile = open(diagFileName, 'w')
        exit(1, 'Could not open file %s\n' % diagFileName)

        accFile = open(accFileName, 'w')
        exit(1, 'Could not open file %s\n' % accFileName)

        accrefFile = open(accrefFileName, 'w')
        exit(1, 'Could not open file %s\n' % accrefFileName)

        markerFile = open(markerFileName, 'w')
        exit(1, 'Could not open file %s\n' % markerFileName)

    # Get next available primary key

    results = db.sql(
        'select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto')
    accKey = results[0]['maxKey']

    results = db.sql(''' select nextval('mrk_marker_seq') as maxKey ''',
    markerKey = results[0]['maxKey']

    userKey = loadlib.verifyUser(user, 0, None)
Exemplo n.º 4
def processFile():

    global execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            probeID = tokens[0]
            notes = tokens[1]
            createdBy = tokens[2]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if probeKey == 0:
            errorFile.write('Invalid Probe:  %s\n' % (probeID))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

        # Notes

# automatically deletes any existing notes for this probe
        if mode in ('preview', 'load'):
            execSQL.append(deleteSQL % (probeKey))

        if len(notes) > 0:
            notesFile.write('%s\t%s\t%s\t%s\n' %
                            (probeKey, notes, loaddate, loaddate))
Exemplo n.º 5
def init():
	# requires: 
	# effects: 
	# 1. Processes command line options
	# 2. Initializes local DBMS parameters
	# 3. Initializes global file descriptors
	# returns:
	global accFile, accrefFile, markerFile, diagFile
	global accKey, userKey, markerKey
            diagFile = open(diagFileName, 'w')
            exit(1, 'Could not open file %s\n' % diagFileName)
		accFile = open(accFileName, 'w')
		exit(1, 'Could not open file %s\n' % accFileName)
		accrefFile = open(accrefFileName, 'w')
		exit(1, 'Could not open file %s\n' % accrefFileName)
		markerFile = open(markerFileName, 'w')
		exit(1, 'Could not open file %s\n' % markerFileName)

	# Get next available primary key

	results = db.sql('select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto')
	accKey = results[0]['maxKey']

	results = db.sql(''' select nextval('mrk_marker_seq') as maxKey ''', 'auto')
	markerKey = results[0]['maxKey']

	userKey = loadlib.verifyUser(user, 0, None)
Exemplo n.º 6
def setPrimaryKeys():
    Assign global primary key variables
	using max keys from database

    global seqKey, assocKey, accKey, userKey

    results = db.sql("select max(_Sequence_key) + 1 as maxKey from %s" % (seqTable), "auto")
    seqKey = results[0]["maxKey"]

    results = db.sql("select max(_Assoc_key) + 1 as maxKey from %s" % (sourceTable), "auto")
    assocKey = results[0]["maxKey"]

    results = db.sql("select max(_Accession_key) + 1 as maxKey from %s" % (accTable), "auto")
    accKey = results[0]["maxKey"]

    userKey = loadlib.verifyUser(os.environ['MGD_DBUSER'], 1, None)
Exemplo n.º 7
def init ():
    global createdByKey, refKey, accKey


    # Get the created by key for the user.
    createdByKey = loadlib.verifyUser(createdBy, 0, None)

    # Get the reference key for the J-Number.
    refKey = loadlib.verifyReference(jNumber, 0, None)

    # Get the next available accession key.
    results = db.sql('select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto')
    accKey = results[0]['maxKey']

Exemplo n.º 8
def processFile():
	# requires:
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	# returns:
	#	nothing

	global referenceKey
	global exptDict, seqExptDict

	lineNum = 0
	note = ''

	# For each line in the input file

	inputFile = open(inputFileName, 'r')
	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '|')

			markerID = tokens[0]
			chromosome = tokens[1]
			updateChr = tokens[2]
			band = tokens[3]
			assay = tokens[4]
			description = tokens[5]
			jnum = tokens[6]
			createdBy = tokens[7]
			# if it's not a valid line, assume it's the note
			note = line
#			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

		markerKey, markerSymbol = verifyMarker(markerID, lineNum)
		assayKey = verifyAssay(assay)
	        referenceKey = loadlib.verifyReference(jnum, 0, errorFile)
	        createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
		error = not verifyChromosome(chromosome, lineNum)

		if markerKey == 0 or \
		   assayKey == 0 or \
		   referenceKey == 0 or \
		   createdByKey == 0:
			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:

		# if no errors, process

		# run once...needs the reference
		if lineNum == 1:

		# determine experiment key for this chromosome
		# if it doesn't exist, create it

		if not exptDict.has_key(chromosome):

		if not exptDict.has_key(chromosome):
			errorFile.write('Cannot Find Experiment Key For Chromosome (%d): %s\n' % (lineNum, chromosome))
			chrExptKey = 0
			chrExptKey = exptDict[chromosome]

		# if errors, continue to next record
		if chrExptKey == 0:

		# add marker to experiment marker file
		bcpWrite(exptMarkerFile, \
			[chrExptKey, \
			markerKey, \
			alleleKey, \
			assayKey, \
			seqExptDict[chrExptKey], \
			markerSymbol, \
			description, \
			matrixData, \
			loaddate, loaddate])

		# increment marker sequence number for the experiment
		seqExptDict[chrExptKey] = seqExptDict[chrExptKey] + 1

#	end of "for line in inputFile.readlines():"

	if len(note) > 0:
		bcpWrite(noteFile, [referenceKey, note, loaddate, loaddate])
Exemplo n.º 9
def processFile():
        # requires:
        # effects:
        #       Reads input file
        #       Verifies and Processes each line in the input file
        # returns:
        #       nothing

    global strainalleleKey

    lineNum = 0
    notDeleted = 1

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

	    strainID = tokens[0]
	    alleleID = tokens[1]
	    qualifier = tokens[2]
	    createdBy = tokens[3]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	if len(strainID) == 4:
	    strainID = '00' + strainID
	if len(strainID) == 3:
	    strainID = '000' + strainID
	if len(strainID) == 2:
	    strainID = '0000' + strainID
	if len(strainID) == 1:
	    strainID = '00000' + strainID

	strainKey = loadlib.verifyObject(strainID, strainTypeKey, None, lineNum, errorFile)

	# this could generate an error because the ID is a marker, not an allele
	# just ignore the error in the error file if it gets resolved later
	alleleKey = loadlib.verifyObject(alleleID, alleleTypeKey, None, lineNum, errorFile)
	markerKey = 0

	if alleleKey == 0:
	    markerKey = loadlib.verifyObject(alleleID, markerTypeKey, None, lineNum, errorFile)

	qualifierKey = verifyQualifier(qualifier, lineNum)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if notDeleted:
	    db.sql('delete PRB_Strain_Marker where _CreatedBy_key = %s' % (createdByKey), None)
	    notDeleted = 0

	# if Allele found, resolve to Marker

	if alleleKey > 0:
	    results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
	    if len(results) > 0:
		markerKey = results[0]['_Marker_key']

        elif markerKey == 0:
	    errorFile.write('Invalid Allele (%s): %s\n' % (lineNum, alleleID))
	    error = 1

        if strainKey == 0 or markerKey == 0 or qualifierKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

	if alleleKey == 0:
	    alleleKey = ''

        strainFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainalleleKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, loaddate, loaddate))

        strainalleleKey = strainalleleKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    db.sql('select * from ACC_setMax (%d);' % (lineNum), None)

    # update prb_strain_marker_seq auto-sequence
    db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None)
Exemplo n.º 10
def init():
	# requires: 
	# effects: 
	# 1. Processes command line options
	# 2. Initializes local DBMS parameters
	# 3. Initializes global file descriptors/file names
	# 4. Initializes global keys
	# returns:
	global inputFile, diagFile, errorFile, errorFileName, diagFileName
	global refFileName, refFile
	global mgiTypeKey
	global refAssocKey, createdByKey
	fdate = mgi_utils.date('%m%d%Y')	# current date
	head, tail = os.path.split(inputFileName) 
	diagFileName = tail + '.' + fdate + '.diagnostics'
	errorFileName = tail + '.' + fdate + '.error'
	refFileName = tail + '.MGI_Reference_Assoc.bcp'

		inputFile = open(inputFileName, 'r')
		exit(1, 'Could not open file %s\n' % inputFileName)
		diagFile = open(diagFileName, 'w')
		exit(1, 'Could not open file %s\n' % diagFileName)
		errorFile = open(errorFileName, 'w')
		exit(1, 'Could not open file %s\n' % errorFileName)
		refFile = open(refFileName, 'w')
		exit(1, 'Could not open file %s\n' % refFileName)
	# Log all SQL

	# Set Log File Descriptor

	diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
	diagFile.write('Server: %s\n' % (db.get_sqlServer()))
	diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))
	diagFile.write('Object Type: %s\n' % (mgiType))
	diagFile.write('Input File: %s\n' % (inputFileName))

	errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

	mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
Exemplo n.º 11
def processFile():

    global primerKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

	    markerSymbol = tokens[0]	# not used
	    markerIDs = string.split(tokens[1], '|')
	    name = tokens[2]
	    jnum = tokens[3]
	    regionCovered = tokens[4]
	    sequence1 = tokens[5]
	    sequence2 = tokens[6]
	    productSize = tokens[7]
	    notes = tokens[8]
	    sequenceIDs = tokens[9]
	    aliasList = string.split(tokens[10], '|')
	    createdBy = tokens[11]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if len(markerID) > 0 and markerKey == 0:
	        errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
	        error = 1
            elif len(markerID) > 0:

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	# sequence IDs
	seqAccList = string.split(sequenceIDs, '|')

        # if errors, continue to next record
        if error:

        # if no errors, process the primer

        primerFile.write('%d\t%s\t\t%d\t%d\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\n' \
            % (primerKey, name, NA, vectorKey, segmentTypeKey, mgi_utils.prvalue(sequence1), \
	    mgi_utils.prvalue(sequence2), mgi_utils.prvalue(regionCovered), mgi_utils.prvalue(productSize), \
	    createdByKey, createdByKey, loaddate, loaddate))

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
		    % (primerKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
		errorFile.write('Invalid Marker Duplicate:  %s, %s\n' % (name, markerID))

	# loaddate))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' % (refKey, primerKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
                    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

	newPrimerFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
	   % (markerSymbol, string.join(markerIDs, '|'), name, jnum, regionCovered, sequence1, sequence2, productSize, notes, sequenceIDs, createdBy, mgiPrefix, mgiKey))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

	# sequence accession ids
	for acc in seqAccList:

	    if len(acc) == 0:

	    prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, logicalDBKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
	    accKey = accKey + 1

	# notes

	if len(notes) > 0:
	   noteFile.write('%s|1\t%s\t%s\t%s\n' \
		% (primerKey, notes, loaddate, loaddate))

	refKey = refKey + 1
        primerKey = primerKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
Exemplo n.º 12
def init():
    global diagFile, errorFile, inputFile, errorFileName, diagFileName
    global outSetFile, outMemberFile
    global setKey, setMemberKey, createdByKey, mgiTypeKey, useSetKey
    global DEBUG
    diagFileName = '%s/setload.diagnostics' % (outputDir)
    errorFileName = '%s/setload.error' % (outputDir)

        diagFile = open(diagFileName, 'w')
        exit(1, 'Could not open file %s\n' % diagFileName)
        errorFile = open(errorFileName, 'w')
        exit(1, 'Could not open file %s\n' % errorFileName)
        inputFile = open(inputFileName, 'r')
        exit(1, 'Could not open file %s\n' % inputFileName)
    # Output Files

	fullPathSetFile = '%s/%s' % (outputDir, outSetFileName)
        outSetFile = open(fullPathSetFile, 'w')
        exit(1, 'Could not open file %s\n' % fullPathSetFile)

	fullPathMemberFile  = '%s/%s' % (outputDir, outMemberFileName)
        outMemberFile = open(fullPathMemberFile, 'w')
        exit(1, 'Could not open file %s\n' % fullPathMemberFile)

    # Log all SQL

    diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
    diagFile.write('Server: %s\n' % (db.get_sqlServer()))
    diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))
    errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

    if mode == 'preview':
        DEBUG = 1
        bcpon = 0
    elif mode != 'load':
        exit(1, 'Invalid Processing Mode:  %s\n' % (mode))

    results = db.sql('select max(_Set_key) + 1 as maxKey from MGI_Set', 'auto')
    setKey = results[0]['maxKey']

    createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
    mgiTypeKey = loadlib.verifyMGIType(setType, 0, errorFile)

    # use existing MGI_Set, or create a new one
    results = db.sql('select _Set_key from MGI_Set where _MGIType_key = %s and name = \'%s\'' 
	% (mgiTypeKey, setName), 'auto')

    if len(results) > 0:
        for r in results:
            setKey = r['_Set_key']
	# delete/reload
	db.sql('delete from MGI_SetMember where _Set_key = %s' % (setKey), None)
        outSetFile.write(str(setKey) + TAB + \
	   str(mgiTypeKey) + TAB + \
	   str(setName) + TAB + \
	   '1' + TAB + \
	   str(createdByKey) + TAB + str(createdByKey) + TAB + \
	   loaddate + TAB + loaddate + CRT)

    results = db.sql('select max(_SetMember_key) + 1 as maxKey from MGI_SetMember', 'auto')
    setMemberKey = results[0]['maxKey']

Exemplo n.º 13
def processFile():

    global primerKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            markerSymbol = tokens[0]  # not used
            markerIDs = string.split(tokens[1], '|')
            name = tokens[2]
            jnum = tokens[3]
            regionCovered = tokens[4]
            sequence1 = tokens[5]
            sequence2 = tokens[6]
            productSize = tokens[7]
            notes = tokens[8]
            sequenceIDs = tokens[9]
            aliasList = string.split(tokens[10], '|')
            createdBy = tokens[11]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

# marker IDs

        markerList = []
        for markerID in markerIDs:

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if len(markerID) > 0 and markerKey == 0:
                errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
                error = 1
            elif len(markerID) > 0:

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        # sequence IDs
        seqAccList = string.split(sequenceIDs, '|')

        # if errors, continue to next record
        if error:

        # if no errors, process the primer

        primerFile.write('%d\t%s\t\t%d\t%d\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\n' \
            % (primerKey, name, NA, vectorKey, segmentTypeKey, mgi_utils.prvalue(sequence1), \
     mgi_utils.prvalue(sequence2), mgi_utils.prvalue(regionCovered), mgi_utils.prvalue(productSize), \
     createdByKey, createdByKey, loaddate, loaddate))

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
      % (primerKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
                errorFile.write('Invalid Marker Duplicate:  %s, %s\n' %
                                (name, markerID))

# loaddate))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' %
                      (refKey, primerKey, referenceKey, createdByKey,
                       createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
                    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        newPrimerFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
           % (markerSymbol, string.join(markerIDs, '|'), name, jnum, regionCovered, sequence1, sequence2, productSize, notes, sequenceIDs, createdBy, mgiPrefix, mgiKey))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

        # sequence accession ids
        for acc in seqAccList:

            if len(acc) == 0:

            prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, logicalDBKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
            accKey = accKey + 1

# notes

        if len(notes) > 0:
            noteFile.write('%s|1\t%s\t%s\t%s\n' \
          % (primerKey, notes, loaddate, loaddate))

        refKey = refKey + 1
        primerKey = primerKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
Exemplo n.º 14
def init():
    global bcpCommand
    global diagFile, errorFile, inputFile, errorFileName, diagFileName
    global outImageFile, outPaneFile, outAccFile
    global outCopyrightFile, outCaptionFile
    global inImageFile, inPaneFile
    global createdByKey
    bcpCommand = bcpCommand + db.get_sqlServer() + ' ' + db.get_sqlDatabase() + ' %s ' + currentDir + ' %s "\\t" "\\n" mgd'

    diagFileName = currentDir + '/gxdimageload.diagnostics'
    errorFileName = currentDir + '/gxdimageload.error'

        diagFile = open(diagFileName, 'w')
        exit(1, 'Could not open file %s\n' % diagFileName)
        errorFile = open(errorFileName, 'w')
        exit(1, 'Could not open file %s\n' % errorFileName)
    # Input Files

        inImageFile = open(inImageFileName, 'r')
        exit(1, 'Could not open file %s\n' % inImageFileName)

        inPaneFile = open(inPaneFileName, 'r')
        exit(1, 'Could not open file %s\n' % inPaneFileName)

    # Output Files

        outImageFile = open(outImageFileName, 'w')
        exit(1, 'Could not open file %s\n' % outImageFileName)

        outPaneFile = open(outPaneFileName, 'w')
        exit(1, 'Could not open file %s\n' % outPaneFileName)

        outAccFile = open(outAccFileName, 'w')
        exit(1, 'Could not open file %s\n' % outAccFileName)

        outCaptionFile = open(outCaptionFileName, 'w')
        exit(1, 'Could not open file %s\n' % outCaptionFileName)

        outCopyrightFile = open(outCopyrightFileName, 'w')
        exit(1, 'Could not open file %s\n' % outCopyrightFileName)


    diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
    diagFile.write('Server: %s\n' % (db.get_sqlServer()))
    diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))

    errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

    createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

Exemplo n.º 15
def processFile():

    global refKey, aliasKey
    global execProbeSQL
    global execAssaySQL
    global execRefSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            fromID = tokens[0]
            name = tokens[1]
            toID = tokens[2]
            jnum = tokens[3]
            createdBy = tokens[4]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum,
        toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum,
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if fromKey == 0:
            errorFile.write('Invalid Probe "From":  %s\n' % (fromID))
            error = 1

        if toKey == 0:
            errorFile.write('Invalid Probe "To":  %s\n' % (toID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

        # check that all genes are the same
        checkGenesSQL = '''
			select f.*
			from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a
			where f._Probe_key = %s
			and t._Probe_key = %s
			and p._Probe_key = %s
			and p._ProbePrep_key = a._ProbePrep_key
			and f._Marker_key = t._Marker_key
			and f._Marker_key = a._Marker_key
			''' % (fromKey, toKey, fromKey)

        checkGenes = db.sql(checkGenesSQL, 'auto')
        if len(checkGenes) == 0:
                'Gene of GenePaint, Eurexpress and Assay are not the same:  %s, %s\n'
                % (fromID, toID))
            error = 1

        # check that the J: is on at least one Assay
        checkJAssaySQL = '''
			 select a.*
			 from GXD_ProbePrep p, GXD_Assay a
			 where p._Probe_key = %s
			 and p._ProbePrep_key = a._ProbePrep_key
			 and a._Refs_key = %s
			 ''' % (fromKey, referenceKey)

        checkJAssay = db.sql(checkJAssaySQL, 'auto')
        if len(checkJAssay) == 0:
                'J: is not on any Assays attached to the probe:  %s\n' %
            error = 1

    # if errors, continue to next record
        if error:

        # add alias using fromID name (from) to toID

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
         % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
        aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
         % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate))
        refKey = refKey + 1
        aliasKey = aliasKey + 1

        # move assay information from fromID to toID
        execAssaySQL.append(updateAssaySQL % (toKey, fromKey))

        # move fromID (from) references to toID
        execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey))

        # delete fromID (from)
        execProbeSQL.append(deleteProbeSQL % (fromKey))
Exemplo n.º 16
def processFile():

    global refKey, aliasKey, execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            probeID = tokens[0]
            markerIDs = string.split(tokens[1], '|')
            jnum = tokens[2]
            relationship = tokens[3]
            aliasList = string.split(tokens[4], '|')
            createdBy = tokens[5]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        refsKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if probeKey == 0:
            errorFile.write('Invalid Probe:  %s\n' % (probeID))
            error = 1

        if refsKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

        results = db.sql(
            '''select _Reference_key from PRB_Reference
		where _Probe_key = %s
		and _Refs_key = %s
		''' % (probeKey, refsKey), 'auto')
        referenceKey = results[0]['_Reference_key']
        if referenceKey == 0:
            errorFile.write('Invalid Probe/Reference:  %s\n' % (jnum))
            error = 1

# marker IDs

        markerList = []
        for markerID in markerIDs:

            if markerID == 'none':

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if markerKey == 0:
                errorFile.write('Invalid Marker:  %s\n' % (markerID))
                error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
      % (probeKey, markerKey, refsKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
                execSQL.append(deleteSQL % (probeKey, markerKey))
                errorFile.write('Invalid Marker Duplicate:  %s\n' % (markerID))

        if referenceKey > 0:
            refKey = referenceKey
            refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
      % (refKey, probeKey, refsKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
      % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

# only used if referenceKey == 0
        refKey = refKey + 1
Exemplo n.º 17
def processFile():
        # requires:
        # effects:
        #	Reads input file
        #	Verifies and Processes each line in the input file
        # returns:
        #	nothing

    global referenceKey
    global exptDict, seqExptDict

    lineNum = 0
    note = ''

    # For each line in the input file

    inputFile = open(inputFileName, 'r')
    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = str.split(line[:-1], '|')

            mappingKey = tokens[0]
            markerID = tokens[1]
            chromosome = tokens[2]
            updateChr = tokens[3]
            band = tokens[4]
            assay = tokens[5]
            description = tokens[6]
            jnum = tokens[7]
            createdBy = tokens[8]
            # if it's not a valid line, assume it's the note
            note = line
#			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        markerKey, markerSymbol = verifyMarker(markerID, lineNum)
        assayKey = verifyAssay(assay)
        referenceKey = loadlib.verifyReference(jnum, 0, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
        error = not verifyChromosome(chromosome, lineNum)

        if markerKey == 0 or \
           assayKey == 0 or \
           referenceKey == 0 or \
           createdByKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

        # run once...needs the reference
        if lineNum == 1:

        # determine experiment key for this chromosome
        # if it doesn't exist, create it

        if chromosome not in exptDict:

        if chromosome not in exptDict:
                'Cannot Find Experiment Key For Chromosome (%d): %s\n' %
                (lineNum, chromosome))
            chrExptKey = 0
            chrExptKey = exptDict[chromosome]

        # if errors, continue to next record
        if chrExptKey == 0:

        # add marker to experiment marker file
        bcpWrite(exptMarkerFile, \
                [mappingKey, \
                chrExptKey, \
                markerKey, \
                alleleKey, \
                assayKey, \
                seqExptDict[chrExptKey], \
                description, \
                matrixData, \
                loaddate, loaddate])

        # increment marker sequence number for the experiment
        seqExptDict[chrExptKey] = seqExptDict[chrExptKey] + 1

#	end of "for line in inputFile.readlines():"

    if len(note) > 0:
        bcpWrite(noteFile, [referenceKey, note, loaddate, loaddate])
Exemplo n.º 18
def processFile():

    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        lineNum = lineNum + 1
	#print line
        # Split the line into tokens
        tokens = line[:-1].split('\t')

	    name = tokens[0]
	    alleleIDs = tokens[1]
	    strainType = tokens[2]
	    species = tokens[3]
	    isStandard = tokens[4]
	    createdBy = tokens[5]
	    mutantNote = tokens[6]
	    colonyNote = tokens[7]
	    annotations = tokens[8].split('|')
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	strainExistKey = verifyStrain(name, lineNum)
	strainTypeKey = verifyStrainType(strainType, lineNum)
	speciesKey = verifySpecies(species, lineNum)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

	# if the strain exist, but with no colony id note, create one
	if strainExistKey > 0:
	    print 'strain in database checking colony note : %s' % line
	    if (not checkColonyNote(strainExistKey) ):
		#print 'colony note not in the database: %s' % colonyNote
		createNote(strainExistKey, colonyNote, mgiColonyNoteTypeKey, createdByKey)
		print 'colony note in database: %s'  % colonyNote
	    print 'strain not in database : %s' % line

	# if strain does not exist and  verification failed on strain type, 
	# species or createdBy, skip the record
        if strainTypeKey == 0 or speciesKey == 0 \
		or createdByKey == 0:
	    #print 'verification failed on strain type, species or createdBy: %s %s %s ' % (strainTypeKey, speciesKey, createdByKey)

        # if no errors, process
        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, 
		isPrivate, isGeneticBackground, createdByKey, createdByKey, 
		    cdate, cdate))

	# if Allele found, resolve to Marker
	allAlleles = alleleIDs.split('|')

	for a in allAlleles:
		alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
		#print 'makeStrains.py allele: %s marker key: %s' % (a, alleleKey)
	    	results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
		markerKey = results[0]['_Marker_key']

		markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
		    % (strainmarkerKey, strainKey, markerKey, alleleKey, 
			qualifierKey, createdByKey, createdByKey, cdate, cdate))
		strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for the strain
	if isStandard == '1':
	    accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
	    % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
	       createdByKey, createdByKey, cdate, cdate))
	    accKey = accKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Colony ID Note

        if len(colonyNote) > 0:
	    createNote(strainKey, colonyNote, mgiColonyNoteTypeKey, createdByKey)

        # storing data in MGI_Note/MGI_NoteChunk
        # Mutant Cell Line of Origin Note
        if len(mutantNote) > 0:
	    createNote(strainKey, mutantNote, mgiMutOrigNoteTypeKey, createdByKey)

        # Annotations
	# _AnnotType_key = 1009 =  "Strain/Attributes"
	# _Qualifier_key = 1614158 =  null

	for a in annotations:

	    # strain annotation type
	    annotTypeKey = 1009

	    # this is a null qualifier key
	    annotQualifierKey = 1614158

	    annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
	    if annotTermKey == 0:

            annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
              % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
            annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
Exemplo n.º 19
def processFile():
	# requires:
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	# returns:
	#	nothing

	results = db.sql('select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto')
	transKey = results[0]['maxKey']
	if transKey is None:
		transKey = 1000

	lineNum = 0

	# sequence number of bad name in translation list
	seq = 1

	# For each line in the input file

	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '\t')

			objectID = tokens[0]
			objectDescription = tokens[1]
			term = tokens[2]
			userID = tokens[3]
			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

		if vocabKey > 0:
		    objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile)
		    objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile)

		userKey = loadlib.verifyUser(userID, lineNum, errorFile)

		if objectKey == 0 or userKey == 0:
			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:

		# if no errors, process

		# add term to translation file
		bcpWrite(transFile, [transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate])
		transKey = transKey + 1
		seq = seq + 1

#	end of "for line in inputFile.readlines():"

	if newTransType:
		bcpWrite(transTypeFile, [transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate])
Exemplo n.º 20
def processFile():

    global refKey, aliasKey, execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

	    probeID = tokens[0]
	    markerIDs = string.split(tokens[1], '|')
	    jnum = tokens[2]
	    relationship = tokens[3]
	    aliasList = string.split(tokens[4], '|')
	    createdBy = tokens[5]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        refsKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if probeKey == 0:
	    errorFile.write('Invalid Probe:  %s\n' % (probeID))
	    error = 1

	if refsKey == 0:
	    errorFile.write('Invalid Reference:  %s\n' % (jnum))
	    error = 1

	if createdByKey == 0:
	    errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
	    error = 1

	results = db.sql('''select _Reference_key from PRB_Reference
		where _Probe_key = %s
		and _Refs_key = %s
		''' % (probeKey, refsKey), 'auto')
        referenceKey = results[0]['_Reference_key']
	if referenceKey == 0:
	    errorFile.write('Invalid Probe/Reference:  %s\n' % (jnum))
	    error = 1

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    if markerID == 'none':

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if markerKey == 0:
	        errorFile.write('Invalid Marker:  %s\n' % (markerID))
	        error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
		    % (probeKey, markerKey, refsKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
		execSQL.append(deleteSQL % (probeKey, markerKey))
		errorFile.write('Invalid Marker Duplicate:  %s\n' % (markerID))

	if referenceKey > 0:
	    refKey = referenceKey
            refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
		    % (refKey, probeKey, refsKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
		    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
	    aliasKey = aliasKey + 1

	# only used if referenceKey == 0
	refKey = refKey + 1
Exemplo n.º 21
def processFile():

    global execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

	    probeID = tokens[0]
	    markerIDs = string.split(tokens[1], '|')
	    jnum = tokens[2]
	    relationship = tokens[3]
	    createdBy = tokens[4]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if probeKey == 0:
	    errorFile.write('Invalid Probe:  %s\n' % (probeID))
	    error = 1

	if referenceKey == 0:
	    errorFile.write('Invalid Reference:  %s\n' % (jnum))
	    error = 1

	if createdByKey == 0:
	    errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
	    error = 1

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if markerKey == 0:
	        errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
	        error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s|%s|%d|%s|%s|%s|%s|%s\n' \
		    % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
		execSQL.append(deleteSQL % (probeKey, markerKey))
		errorFile.write('Invalid Marker Duplicate:  %s, %s\n' % (name, markerID))
Exemplo n.º 22
def processFile():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey, mutationKey
    global alleleLookup

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')
        #print line
            markerID = tokens[0]
            symbol = tokens[1]
            name = tokens[2]
            alleleStatus = tokens[3]
            alleleType = tokens[4]
            alleleSubtypes = tokens[5]
            collectionKey = tokens[6]
            germLine = tokens[7]
            references = tokens[8]
            strainOfOrigin = tokens[9]
            mutantCellLine = tokens[10]
            molecularNotes = tokens[11]
            driverNotes = tokens[12]
            ikmcNotes = tokens[13]
            mutations = tokens[14]
            inheritanceMode = tokens[15]
            isMixed = tokens[16]
            isExtinct = tokens[17]
            createdBy = tokens[18]
            createMCL = tokens[19]
            createNote = tokens[20]
            setStatus = tokens[21]
            existingAlleleID = tokens[22]
            ikmcSymbol = tokens[23]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        # creator
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)
        if createdByKey == 0:

        # processing for IKMC-only
        if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0:
            processFileIKMC(createMCL, createNote, setStatus, \
                    symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \
                    createdByKey, existingAlleleID)

        # marker key
        markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

        # hard-coded
        # _vocab_key = 73 (Marker-Allele Association Status)
        # _term_key = 4268545 (Curated)
        markerStatusKey = 4268545

        # _vocab_key = 37 (Allele Status)
        alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum,

        # _vocab_key = 38 (Allele Type)
        alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum,

        # _vocab_key = 61 (Allele Transmission)
        germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile)

        # _vocab_key = 36 (Allele Molecular Mutation)
        allMutations = mutations.split('|')

        # _vocab_key = 35 (Allele Status)
        inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode,
                                                lineNum, errorFile)

        # strains
        strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum,

        # reference
        refKey = loadlib.verifyReference(jnum, lineNum, errorFile)

        # if errors, continue to next record
        # errors are stored (via loadlib) in the .error log

        if markerKey == 0 \
                or markerStatusKey == 0 \
                or alleleStatusKey == 0 \
                or alleleTypeKey == 0 \
                or germLineKey == 0 \
                or allMutations == 0 \
                or inheritanceModeKey == 0 \
                or strainOfOriginKey == 0 \
                or refKey == 0 \
                or createdByKey == 0:

        # if no errors, process the allele

        # not specified/testing
        #collectionKey = 11025586

        # allele (master)
        alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
            alleleStatusKey, germLineKey, collectionKey, symbol, name, \
            isExtinct, isMixed, refKey, markerStatusKey, \
            createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

        # molecular mutation
        for mutation in allMutations:
            mutationTermKey = loadlib.verifyTerm('', 36, mutation, lineNum,
            mutationFile.write('%s|%s|%s|%s|%s\n' \
            % (mutationKey, alleleKey, mutationTermKey, loaddate, loaddate))
            mutationKey = mutationKey + 1

        # allele references
        allReferences = references.split('||')
        for reference in allReferences:
            refType, refID = reference.split('|')
            refKey = loadlib.verifyReference(refID, lineNum, errorFile)

            if refType == 'Original':
                refAssocTypeKey = 1011
            elif refType == 'Transmission':
                refAssocTypeKey = 1023
            elif refType == 'Molecular':
                refAssocTypeKey = 1012

            refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                    % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \
                    createdByKey, createdByKey, loaddate, loaddate))
            refAssocKey = refAssocKey + 1

        # allele subtypes
        allSubtypes = alleleSubtypes.split('|')
        for s in allSubtypes:

            # _vocab_key = 93 (Allele Subtype)
            alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum,

            annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \
                            qualifierKey, loaddate, loaddate))
            annotKey = annotKey + 1

        # mutant cell line
        if len(mutantCellLine) > 0:
            addMutantCellLine(alleleKey, mutantCellLine, createdByKey)

        # MGI Accession ID for the allelearker

        accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
               createdByKey, createdByKey, loaddate, loaddate))

        # storing data in MGI_Note
        # molecular notes

        mgiNoteSeqNum = 1
        if len(molecularNotes) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \
                   molecularNotes, createdByKey, createdByKey, loaddate, loaddate))

            noteKey = noteKey + 1

        # driver notes
        # TR12662/MGI_Relationship._Category_key = 1006
        # removed noteFile code
        # place hodler for MGI_Relationship code
        # the IKMC is the only product using this and IKMC does not add any driver note
        #mgiNoteSeqNum = 1
        #if len(driverNotes) > 0:

        # ikmc notes
        useIKMCnotekey = 0
        if len(ikmcNotes) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
                   ikmcNotes, createdByKey, createdByKey, loaddate, loaddate))

            useIKMCnotekey = noteKey
            noteKey = noteKey + 1

        # Print out a new text file and attach the new MGI Allele IDs as the last field

        if createdBy == 'ikmc_alleleload':
            newAlleleFile.write('%s\t%s%s\t%s\n' \
            % (mgi_utils.prvalue(ikmcNotes), \
                    mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \
            newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \
            % (mgi_utils.prvalue(markerID), \
            mgi_utils.prvalue(symbol), \
            mgi_utils.prvalue(name), \
            mgi_utils.prvalue(alleleStatus), \
            mgi_utils.prvalue(alleleType), \
            mgi_utils.prvalue(alleleSubtype), \
            mgi_utils.prvalue(collection), \
            mgi_utils.prvalue(germLine), \
            mgi_utils.prvalue(references), \
            mgi_utils.prvalue(strainOfOrigin), \
            mgi_utils.prvalue(mutantCellLine), \
            mgi_utils.prvalue(allMutations), \
            mgi_utils.prvalue(inheritanceMode), \
            mgi_utils.prvalue(isMixed), \
            mgi_utils.prvalue(isExtinct), \
            mgi_utils.prvalue(refKey), \
            mgi_utils.prvalue(markerStatusKey), \
            mgi_utils.prvalue(createdBy), \
            mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey)))

        # save symbol/alleleKey/ikmc note key
        alleleLookup[symbol] = []
            (alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    if not DEBUG:
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
Exemplo n.º 23
def processFile():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey
    global alleleLookup

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')
	#print line
	    markerID = tokens[0]
	    symbol = tokens[1]
	    name = tokens[2]
	    alleleStatus = tokens[3]
	    alleleType = tokens[4]
	    alleleSubtypes = tokens[5]
	    collectionKey = tokens[6]
	    germLine = tokens[7]
	    references = tokens[8]
	    strainOfOrigin = tokens[9]
	    mutantCellLine = tokens[10]
	    molecularNotes = tokens[11]
	    driverNotes = tokens[12]
	    ikmcNotes = tokens[13]
	    mutations = tokens[14]
	    inheritanceMode = tokens[15]
	    isMixed = tokens[16]
	    isExtinct = tokens[17]
	    createdBy = tokens[18]
	    createMCL = tokens[19]
	    createNote = tokens[20]
	    setStatus = tokens[21]
	    existingAlleleID = tokens[22]
	    ikmcSymbol = tokens[23]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	# creator
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)
        if createdByKey == 0:

	# processing for IKMC-only
	if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0:
		processFileIKMC(createMCL, createNote, setStatus, \
			symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \
			createdByKey, existingAlleleID)

	# marker key
	markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	# hard-coded
	# _vocab_key = 73 (Marker-Allele Association Status)
	# _term_key = 4268545 (Curated)
	markerStatusKey = 4268545

	# _vocab_key = 37 (Allele Status)
	alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile)

	# _vocab_key = 38 (Allele Type)
	alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile)

	# _vocab_key = 61 (Allele Transmission)
	germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile)

	# _vocab_key = 36 (Allele Molecular Mutation)
	allMutations = mutations.split('|')

	# _vocab_key = 35 (Allele Status)
	inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile)

	# strains
	strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile)

	# reference
	refKey = loadlib.verifyReference(jnum, lineNum, errorFile)

        # if errors, continue to next record
	# errors are stored (via loadlib) in the .error log

        if markerKey == 0 \
		or markerStatusKey == 0 \
		or alleleStatusKey == 0 \
		or alleleTypeKey == 0 \
		or germLineKey == 0 \
		or allMutations == 0 \
		or inheritanceModeKey == 0 \
		or strainOfOriginKey == 0 \
		or refKey == 0 \
		or createdByKey == 0:

        # if no errors, process the allele

	# not specified/testing
	#collectionKey = 11025586

	# allele (master)
        alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
	    alleleStatusKey, germLineKey, collectionKey, symbol, name, \
	    isExtinct, isMixed, refKey, markerStatusKey, \
	    createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

	# molecular mutation
	for mutation in allMutations:
		mutationKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile)
        	mutationFile.write('%s|%s|%s|%s\n' \
	    	% (alleleKey, mutationKey, loaddate, loaddate))

	# allele references
	allReferences = references.split('||')
	for reference in allReferences:
		refType, refID = reference.split('|')
		refKey = loadlib.verifyReference(refID, lineNum, errorFile)

		if refType == 'Original':
			refAssocTypeKey = 1011
		elif refType == 'Transmission':
			refAssocTypeKey = 1023
		elif refType == 'Molecular':
			refAssocTypeKey = 1012

        	refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
	    		% (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \
	       		createdByKey, createdByKey, loaddate, loaddate))
		refAssocKey = refAssocKey + 1

	# allele subtypes
	allSubtypes = alleleSubtypes.split('|')
	for s in allSubtypes:

		# _vocab_key = 93 (Allele Subtype)
		alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile)

        	annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                	% (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \
        			qualifierKey, loaddate, loaddate))
		annotKey = annotKey + 1

        # mutant cell line
        if len(mutantCellLine) > 0:
            addMutantCellLine(alleleKey, mutantCellLine, createdByKey)

        # MGI Accession ID for the allelearker

        accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
	       createdByKey, createdByKey, loaddate, loaddate))

	# storing data in MGI_Note/MGI_NoteChunk
	# molecular notes

	mgiNoteSeqNum = 1
	if len(molecularNotes) > 0:

	    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \
		   createdByKey, createdByKey, loaddate, loaddate))

	    noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, mgiNoteSeqNum, molecularNotes, createdByKey, createdByKey, loaddate, loaddate))

	    noteKey = noteKey + 1

	# driver notes
	# TR12662/MGI_Relationship._Category_key = 1006
	# removed noteFile code
	# place hodler for MGI_Relationship code
	# the IKMC is the only product using this and IKMC does not add any driver note
	#mgiNoteSeqNum = 1
	#if len(driverNotes) > 0:

	# ikmc notes
	useIKMCnotekey = 0
	if len(ikmcNotes) > 0:

	    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
		   createdByKey, createdByKey, loaddate, loaddate))

	    noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, 1, ikmcNotes, createdByKey, createdByKey, loaddate, loaddate))

	    useIKMCnotekey = noteKey
	    noteKey = noteKey + 1

	# Print out a new text file and attach the new MGI Allele IDs as the last field

	if createdBy == 'ikmc_alleleload':
        	newAlleleFile.write('%s\t%s%s\t%s\n' \
	    	% (mgi_utils.prvalue(ikmcNotes), \
			mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \
        	newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \
	    	% (mgi_utils.prvalue(markerID), \
	       	mgi_utils.prvalue(symbol), \
	       	mgi_utils.prvalue(name), \
	       	mgi_utils.prvalue(alleleStatus), \
	       	mgi_utils.prvalue(alleleType), \
	       	mgi_utils.prvalue(alleleSubtype), \
	       	mgi_utils.prvalue(collection), \
	       	mgi_utils.prvalue(germLine), \
	       	mgi_utils.prvalue(references), \
	       	mgi_utils.prvalue(strainOfOrigin), \
	       	mgi_utils.prvalue(mutantCellLine), \
	       	mgi_utils.prvalue(allMutations), \
	       	mgi_utils.prvalue(inheritanceMode), \
	       	mgi_utils.prvalue(isMixed), \
	       	mgi_utils.prvalue(isExtinct), \
	       	mgi_utils.prvalue(refKey), \
	       	mgi_utils.prvalue(markerStatusKey), \
	       	mgi_utils.prvalue(createdBy), \
	       	mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey)))

	# save symbol/alleleKey/ikmc note key
	alleleLookup[symbol] = []
	alleleLookup[symbol].append((alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    if not DEBUG:
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
Exemplo n.º 24
def processFile():

    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')

	    id = tokens[0]
	    externalPrefix = id
	    externalNumeric = ''
	    #(externalPrefix, externalNumeric) = id.split(':')
	    name = tokens[1]
	    alleleIDs = tokens[2]
	    strainType = tokens[3]
	    species = tokens[4]
	    isStandard = tokens[5]
	    sooNote = tokens[6]
	    externalLDB = tokens[7]
            externalTypeKey = tokens[8]
	    annotations = tokens[9]
	    createdBy = tokens[10]
	    mutantNote = tokens[11]
	    isPrivate = tokens[12]
	    impcColonyNote = tokens[13]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	strainExistKey = verifyStrain(name, lineNum)
	strainTypeKey = verifyStrainType(strainType, lineNum)
	speciesKey = verifySpecies(species, lineNum)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground,
	       createdByKey, createdByKey, cdate, cdate))

	# if Allele found, resolve to Marker

	if len(alleleIDs) > 0:
	    allAlleles = alleleIDs.split('|')
	    for a in allAlleles:
		alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
		if alleleKey == 0:
	    	results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
		markerKey = results[0]['_Marker_key']

		markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
	    		% (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, 
	       		createdByKey, createdByKey, cdate, cdate))
		strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for all strain

        accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
        	% (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
		createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # external accession id
        # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, 
	#for ids that contain prefix:numeric
        accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \
          % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, 
	     createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Strain of Origin Note

        if len(sooNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Mutant Cell Line of Origin Note

        if len(mutantNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            if len(mutantNote) > 0:
                noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (noteKey, 1, mutantNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # IMPC Colony Note

        if len(impcColonyNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # Annotations
	# _AnnotType_key = 1009
	# _Qualifier_ke = 1614158

	if len(annotations) > 0:
	    annotations = annotations.split('|')
	    for a in annotations:

	        # strain annotation type
	        annotTypeKey = 1009

	        # this is a null qualifier key
	        annotQualifierKey = 1614158

	        annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
	        if annotTermKey == 0:
                annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                  % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
                annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    db.sql('select * from ACC_setMax (%d)' % (lineNum), None)

    # update prb_strain_marker_seq auto-sequence
    db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None)

    # update voc_annot_seq auto-sequence
    db.sql(''' select setval('voc_annot_seq', (select max(_Annot_key) from VOC_Annot)) ''', None)
Exemplo n.º 25
def processFile():

    global refKey, aliasKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

	    probeID = probeName = tokens[0]
	    jnum = tokens[1]
	    aliasList = string.split(tokens[2], '|')
	    createdBy = tokens[3]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	if probeID.find('MGI:') >= 0:
            probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
	    probeKey, probeID = verifyProbe(probeName, lineNum, errorFile)

        probeReferenceKey = verifyProbeReference(probeID, jnum, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if probeKey == 0:
	    errorFile.write('Invalid Probe:  %s\n' % (probeID))
	    error = 1

	if referenceKey == 0:
	    errorFile.write('Invalid Reference:  %s\n' % (jnum))
	    error = 1

	#if probeReferenceKey == 0:
	#    errorFile.write('Invalid Probe Reference:  %s, %s\n' % (probeID, jnum))
	#    error = 1

	if createdByKey == 0:
	    errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
	    error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

	# create a new probe-reference key if one does not already exist
	# else use the existing probe-reference key

        if probeReferenceKey == 0:
            refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
		    % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
	    aliasrefKey = refKey
	    refKey = refKey + 1
	    #errorFile.write('Probe/Reference Already Exists: %s\n' % (tokens))
	    aliasrefKey = probeReferenceKey

        # aliases

        for alias in aliasList:

	    if len(alias) == 0:

            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
		    % (aliasKey, aliasrefKey, alias, createdByKey, createdByKey, loaddate, loaddate))
	    aliasKey = aliasKey + 1
Exemplo n.º 26
def processFile():
    # requires:
    # effects:
    #	Reads input file
    #	Verifies and Processes each line in the input file
    # returns:
    #	nothing

    global refAssocKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            accID = tokens[0]
            jnum = tokens[1]
            refAssocType = tokens[2]
            createdBy = tokens[3]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum,
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if objectKey == 0 or \
         referenceKey == 0 or \
         refAssocTypeKey == 0 or \
         createdByKey == 0:

            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process the marker

        # could move to verifyDuplicate routine

        key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey)
        if refDict.has_key(key):
            errorFile.write('Duplicate (%d) %s\n' % (lineNum, line))

        refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
  % (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        refAssocKey = refAssocKey + 1
Exemplo n.º 27
def main():
    global userKey

	    optlist, args = getopt.getopt(sys.argv[1:], 'S:D:U:P:K:')

    server = None
    database = None
    user = None
    password = None
    objectKey = None

    for opt in optlist:
	    if opt[0] == '-S':
		    server = opt[1]
	    elif opt[0] == '-D':
		    database = opt[1]
	    elif opt[0] == '-U':
		    user = opt[1]
	    elif opt[0] == '-P':
		    password = string.strip(open(opt[1], 'r').readline())
	    elif opt[0] == '-K':
		    objectKey = opt[1]

    if server is None or \
       database is None or \
       user is None or \
       password is None or \
       objectKey is None:

    db.set_sqlLogin(user, password, server, database)

    userKey = loadlib.verifyUser(user, 0, None)

    # call functions based on the way the program is invoked

    scriptName = os.path.basename(sys.argv[0])

    # initialize the cre-system lookups

    # all of these invocations will only affect a certain subset of data

    if scriptName == 'allelecrecache.py':
    elif scriptName == 'allelecrecacheByAllele.py':
    elif scriptName == 'allelecrecacheByAssay.py':


Exemplo n.º 28
def processFile():

    global lineNum
    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')

            id = tokens[0]
            externalPrefix = id
            externalNumeric = ''
            #(externalPrefix, externalNumeric) = id.split(':')
            name = tokens[1]
            alleleIDs = tokens[2]
            strainType = tokens[3]
            species = tokens[4]
            isStandard = tokens[5]
            sooNote = tokens[6]
            externalLDB = tokens[7]
            externalTypeKey = tokens[8]
            annotations = tokens[9]
            createdBy = tokens[10]
            mutantNote = tokens[11]
            isPrivate = tokens[12]
            impcColonyNote = tokens[13]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        strainExistKey = verifyStrain(name, lineNum)
        strainTypeKey = verifyStrainType(strainType, lineNum)
        speciesKey = verifySpecies(species, lineNum)
        createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground,
	       createdByKey, createdByKey, cdate, cdate))

	# if Allele found, resolve to Marker

        if len(alleleIDs) > 0:
            allAlleles = alleleIDs.split('|')
            for a in allAlleles:
                alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
                if alleleKey == 0:
                if alleleKey == None:
                results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
                markerKey = results[0]['_Marker_key']
                if markerKey != None:
                    markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                    % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, 
                    createdByKey, createdByKey, cdate, cdate))
                    markerFile.write('%s|%s||%s|%s|%s|%s|%s|%s\n' \
                    % (strainmarkerKey, strainKey, alleleKey, qualifierKey, 
                    createdByKey, createdByKey, cdate, cdate))
                strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for all strain
        # all private = 0 (false)

        accFile.write('%d|%s%d|%s|%s|1|%d|%d|%s|1|%s|%s|%s|%s\n' \
                % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
                isPrivate, createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # external accession id
        # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, 
        #for ids that contain prefix:numeric
        accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \
          % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, 
             createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # storing data in MGI_Note
        # Strain of Origin Note

        if len(sooNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, sooNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note
        # Mutant Cell Line of Origin Note

        if len(mutantNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, mutantNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note
        # IMPC Colony Note

        if len(impcColonyNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, impcColonyNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # Annotations
        # _AnnotType_key = 1009
        # _Qualifier_ke = 1614158

        if len(annotations) > 0:
            annotations = annotations.split('|')
            for a in annotations:

                # strain annotation type
                annotTypeKey = 1009

                # this is a null qualifier key
                annotQualifierKey = 1614158

                annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
                if annotTermKey == 0:
                annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                  % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
                annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1
Exemplo n.º 29
def processFile():

    global probeKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

	    name = tokens[0]
	    jnum = tokens[1]
	    parentID = tokens[2]
	    sourceName = tokens[3]
	    organism = tokens[4]
	    strain = tokens[5]
	    tissue = tokens[6]
	    gender = tokens[7]
	    cellLine = tokens[8]
	    age = tokens[9]
	    vectorType = tokens[10]
	    segmentType = tokens[11]
	    regionCovered = tokens[12]
	    insertSite = tokens[13]
	    insertSize = tokens[14]
	    markerIDs = string.split(tokens[15], '|')
	    relationship = tokens[16]
	    sequenceIDs = tokens[17]
	    aliasList = string.split(tokens[18], '|')
	    notes = tokens[19]
	    rawnotes = tokens[20]
	    createdBy = tokens[21]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	isParent = 0
	isSource = 0
	parentProbeKey = '';
	sourceKey = 0

	if parentID != '':
	    isParent = 1

	if sourceName != '':
	    isSource = 1

	if not isParent and not isSource:
	    organismKey = sourceloadlib.verifyOrganism(organism, lineNum, errorFile)
	    strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile)
	    tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile)
	    genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile)
	    cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum, errorFile)
	    vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile)
	    segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile)
	    sourceKey = sourceloadlib.verifySource(segmentTypeKey, \
		vectorKey, organismKey, strainKey, \
		tissueKey, genderKey, cellLineKey, age, lineNum, errorFile)

	    if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \
               genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \
               segmentTypeKey == 0 or sourceKey == 0:
		errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' % (segmentType, vectorType, organism, strain, tissue, gender, cellLine, age))
	        error = 1

        elif not isParent and isSource:
	    vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile)
	    segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile)
	    sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum, errorFile)

	    if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0:
	        error = 1

	# parent from = yes, source given = yes or no (ignored)
	    parentProbeKey, sourceKey = verifyParentProbe(parentID, lineNum, errorFile)
	    vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile)
	    segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile)

	    if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0:
	        error = 1

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if referenceKey == 0:
	    errorFile.write('Invalid Reference:  %s\n' % (jnum))
	    error = 1

	if createdByKey == 0:
	    errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
	    error = 1

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if len(markerID) > 0 and markerKey == 0:
	        errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
	        error = 1
            elif len(markerID) > 0:

	# sequence IDs
	seqAccDict = {}
	for seqID in string.split(sequenceIDs, '|'):
	    if len(seqID) > 0:
	        [logicalDB, acc] = string.split(seqID, ':')
	        logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum, errorFile)
	        if logicalDBKey > 0:
		    seqAccDict[acc] = logicalDBKey

        # if errors, continue to next record
        if error:

        # if no errors, process the probe

        probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \
            % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \
	    mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate))

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
		    % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
		errorFile.write('Invalid Marker Duplicate:  %s, %s\n' % (name, markerID))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
		% (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
	    if len(alias) == 0:
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
		    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
	    aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

	# Print out a new text file and attach the new MGI Probe IDs as the last field

        newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
	    % (name, jnum, \
	    mgi_utils.prvalue(sourceName), \
	    organism, \
	    mgi_utils.prvalue(strain), \
	    mgi_utils.prvalue(tissue), \
	    mgi_utils.prvalue(gender), \
	    mgi_utils.prvalue(cellLine), \
	    mgi_utils.prvalue(age), \
	    mgi_utils.prvalue(vectorType), \
	    mgi_utils.prvalue(segmentType), \
	    mgi_utils.prvalue(regionCovered) + \
	    mgi_utils.prvalue(insertSite), \
	    mgi_utils.prvalue(insertSize), \
	    string.join(markerIDs, '|'), \
	    relationship, \
	    mgi_utils.prvalue(sequenceIDs), \
	    string.join(aliasList, '|'), \
	    mgi_utils.prvalue(notes), \
	    createdBy, mgiPrefix, mgiKey))

	# Print out a raw note file

        if len(rawnotes) > 0:
            rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes))

	# Notes

        if len(notes) > 0:
	    noteFile.write('%s\t%s\t%s\t%s\n' % (probeKey, notes, loaddate, loaddate))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

	# sequence accession ids
	for acc in seqAccDict.keys():
	    prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
	    accKey = accKey + 1

	refKey = refKey + 1
        probeKey = probeKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
Exemplo n.º 30
def processFile():

    global execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            probeID = tokens[0]
            markerIDs = string.split(tokens[1], '|')
            jnum = tokens[2]
            relationship = tokens[3]
            createdBy = tokens[4]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if probeKey == 0:
            errorFile.write('Invalid Probe:  %s\n' % (probeID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

# marker IDs

        markerList = []
        for markerID in markerIDs:

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if markerKey == 0:
                errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
                error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s|%s|%d|%s|%s|%s|%s|%s\n' \
      % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
                execSQL.append(deleteSQL % (probeKey, markerKey))
                errorFile.write('Invalid Marker Duplicate:  %s, %s\n' %
                                (name, markerID))
Exemplo n.º 31
def processFile():
	# requires:
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	# returns:
	#	nothing

	global refAssocKey

	lineNum = 0
	# For each line in the input file

	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '\t')

			accID = tokens[0]
			jnum = tokens[1]
			refAssocType = tokens[2]
			createdBy = tokens[3]
			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

		objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile)
		referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
		refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum)
		createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

		if objectKey == 0 or \
			referenceKey == 0 or \
			refAssocTypeKey == 0 or \
			createdByKey == 0:

			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:

		# if no errors, process the marker

		# could move to verifyDuplicate routine

		key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey)
		if refDict.has_key(key):
		        errorFile.write('Duplicate (%d) %s\n' % (lineNum, line))

        	refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
			% (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate))

		refAssocKey = refAssocKey + 1
Exemplo n.º 32
	results = db.sql(cmd, 'auto')

	for r in results:

		outBCP.write(str(r['_Map_key']) + DL + \
			str(r['_Object_key']) + DL + \
			r['chromosome'] + DL + \
			str(r['startCoordinate']) + DL + \
			str(r['endCoordinate']) + DL + \
			str(r['strand']) + DL + \
			str(r['mapUnits']) + DL + \
			str(r['provider']) + DL + \
			str(r['version']) + DL + \
			str(userKey) + DL + str(userKey) + DL + \
			loaddate + DL + loaddate + NL)


# Main Routine

userKey = loadlib.verifyUser(os.environ['MGD_DBUSER'], 1, None)

print '%s' % mgi_utils.date()
print '%s' % mgi_utils.date()

Exemplo n.º 33
def processFile():
    # Purpose: processes input file
    # Returns: nothing
    # Assumes: nothing
    # Effects: nothing
    # Throws: nothing

    global libraryName, libraryID, libraryKey, logicalDBKey
    global segmentTypeKey, vectorTypeKey, organismKey, referenceKey, strainKey, tissueKey
    global age, ageMin, ageMax, genderKey, cellLineKey, createdByKey
    global strainNS, tissueNS, genderNS, cellLineNS, ageNS

    lineNum = 0

    # retrieve next available primary key for Library record
    results = db.sql(
        'select maxKey = max(_Source_key) + 1 from %s' % (libraryTable),
    newlibraryKey = results[0]['maxKey']

    strainNS = sourceloadlib.verifyStrain(NS, 0, None)
    tissueNS = sourceloadlib.verifyTissue(NS, 0, None)
    genderNS = sourceloadlib.verifyGender(NS, 0, None)
    cellLineNS = sourceloadlib.verifyCellLine(NS, 0, None)
    ageNS = NS

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens

            [libraryName, \
      logicalDB, \
      libraryID, \
      segmentType, \
      vectorType, \
      organism, \
      strain, \
      tissue, \
      age, \
      gender, \
      cellLine, \
      jnum, \
      note, \
      cloneCollections, \
      createdBy] = string.split(line[:-1], TAB)
            exit(1, 'Invalid Line (line: %d): %s\n' % (lineNum, line))

        libraryKey = sourceloadlib.verifyLibrary(libraryName, lineNum)

        if len(logicalDB) > 0:
            logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum,
            logicalDBKey = 0

        if libraryKey == 0 and len(libraryID) > 0:
            libraryKey = sourceloadlib.verifyLibraryID(libraryID, logicalDBKey,
                                                       lineNum, errorFile)

        segmentTypeKey = sourceloadlib.verifySegmentType(
            segmentType, lineNum, errorFile)
        vectorTypeKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
        strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile)
        tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile)
        genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile)
        cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum,
        ageMin, ageMax = sourceloadlib.verifyAge(age, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if segmentTypeKey == 0 or \
    vectorTypeKey == 0 or \
           strainKey == 0 or \
           tissueKey == 0 or \
           genderKey == 0 or \
    cellLineKey == 0 or \
    organismKey == 0 or \
           referenceKey == 0 or \
    createdByKey == 0 or \
    ageMin is None:
            # set error flag to true
            error = 1
            #	    print str(segmentTypeKey)
            #	    print str(vectorTypeKey)
            #	    print str(strainKey)
            #	    print str(tissueKey)
            #	    print str(genderKey)
            #	    print str(cellLineKey)
            #	    print str(organismKey)
            #	    print str(referenceKey)
            #	    print str(createdByKey)
            #	    print str(ageMin)
            errorFile.write('Errors:  %s\n' % (libraryName))

        # if errors, continue to next record
        if error:

        # if no errors, continue processing

        # process new library
        if libraryKey == 0:

            libraryKey = newlibraryKey

            # increment primary keys
            newlibraryKey = newlibraryKey + 1

# else, process existing library


Exemplo n.º 34
def init():
	# requires: 
	# effects: 
	# 1. Processes command line options
	# 2. Initializes local DBMS parameters
	# 3. Initializes global file descriptors/file names
	# returns:
	global inputFile, diagFile, errorFile, errorFileName, diagFileName
	global passwordFileName
	global noteFile, noteFileName, noteChunkFile, noteChunkFileName, sqlFile, sqlFileName
	global mode
	global noteTypeName
	global objectTypeKey, createdByKey
	global mgiObjects
		optlist, args = getopt.getopt(sys.argv[1:], 'S:D:U:P:M:I:O:T:')
	# Set server, database, user, passwords depending on options
	# specified by user.
	server = None
	database = None
	user = None
	password = None
	for opt in optlist:
                if opt[0] == '-S':
                        server = opt[1]
                elif opt[0] == '-D':
                        database = opt[1]
                elif opt[0] == '-U':
                        user = opt[1]
                elif opt[0] == '-P':
			passwordFileName = opt[1]
                elif opt[0] == '-M':
                        mode = opt[1]
                elif opt[0] == '-I':
                        inputFileName = opt[1]
                elif opt[0] == '-O':
                        objectType = opt[1]
                elif opt[0] == '-T':
                        noteTypeName = re.sub('"', '', opt[1])
	# Initialize db.py DBMS parameters
        password = string.strip(open(passwordFileName, 'r').readline())
	db.set_sqlLogin(user, password, server, database)

	head, tail = os.path.split(inputFileName) 
	diagFileName = tail + '.diagnostics'
	errorFileName = tail + '.error'
	noteFileName = tail + '.' + noteTable + '.bcp'
	noteChunkFileName = tail + '.' + noteChunkTable + '.bcp'
	sqlFileName = tail + '.sql'

		inputFile = open(inputFileName, 'r')
		exit(1, 'Could not open file %s\n' % inputFileName)
		diagFile = open(diagFileName, 'w')
		exit(1, 'Could not open file %s\n' % diagFileName)
		errorFile = open(errorFileName, 'w')
		exit(1, 'Could not open file %s\n' % errorFileName)
		noteFile = open(noteFileName, 'w')
		exit(1, 'Could not open file %s\n' % noteFileName)
		noteChunkFile = open(noteChunkFileName, 'w')
		exit(1, 'Could not open file %s\n' % noteChunkFileName)
		sqlFile = open(sqlFileName, 'w')
		exit(1, 'Could not open file %s\n' % sqlFileName)
	# Set Log File Descriptor

	diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
	diagFile.write('Server: %s\n' % (server))
	diagFile.write('Database: %s\n' % (database))
	diagFile.write('User: %s\n' % (user))
	diagFile.write('Input File: %s\n' % (inputFileName))
	diagFile.write('Object Type: %s\n' % (objectType))
	diagFile.write('Note Type: %s\n' % (noteTypeName))

	errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

	objectTypeKey = accessionlib.get_MGIType_key(objectType)
	createdByKey = loadlib.verifyUser(db.get_sqlUser(), 0, errorFile)

	results = db.sql('''
		select accID, _Object_key from ACC_Accession
		where _MGIType_key = %s 
		and _LogicalDB_key = 1 
		and prefixPart = 'MGI:'
		and preferred = 1
		''' % (objectTypeKey), 'auto')
	for r in results:
		mgiObjects[r['accID']] = r['_Object_key']
Exemplo n.º 35
def processFile():
    # Purpose: Read the input file, resolve values to keys. Create bcp files
    # Returns: 1 if error,  else 0
    # Assumes: file descriptors have been initialized
    # Effects: exits if the line does not have 15 columns
    # Throws: Nothing

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey
    global alleleLookup, alleleMutationKey

    lineNum = 0
    # For each line in the input file

    for line in fpInputFile.readlines():

        error = 0
        lineNum = lineNum + 1
        print('%s: %s' % (lineNum, line))
        # Split the line into tokens
        tokens = line[:-1].split('\t')
            markerID = tokens[0]
            markerSymbol = tokens[1]
            mutationType = tokens[2]  # IMPC allele type
            description = tokens[3]
            colonyID = tokens[4]
            strainOfOrigin = tokens[5]
            alleleSymbol = tokens[6]
            alleleName = tokens[7]
            inheritanceMode = tokens[8]
            alleleType = tokens[9]  # IMPC allele class
            alleleSubType = tokens[10]
            alleleStatus = tokens[11]
            transmission = tokens[12]
            collection = tokens[13]
            jNum = tokens[14]
            createdBy = tokens[15]

            print('exiting with invalid line')
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        print('validating data and getting keys')
        # marker key
        markerKey = loadlib.verifyMarker(markerID, lineNum, fpErrorFile)

        # _vocab_key = 36 (Allele Molecular Mutation)
        mutationList = str.split(mutationType, ';')
        if len(mutationList) > 1:
            print('mutationList: %s' % mutationList)
        mutationKeyList = []
        for m in mutationList:
            mutationKey = loadlib.verifyTerm('', 36, m, lineNum, fpErrorFile)
            if mutationKey != 0:
        if len(mutationKeyList) > 1:
            print('mutationKeyList: %s' % mutationKeyList)
        # strains
        strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum,

        # _vocab_key = 35 (Allele Inheritance Mode)
        inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode,
                                                lineNum, fpErrorFile)

        # _vocab_key = 38 (Allele Type)
        alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum,

        # _vocab_key = 93 (Allele Subtype)
        subTypeList = str.split(alleleSubType, ';')
        if len(subTypeList) > 1:
            print('subTypeList: %s' % subTypeList)
        subTypeKeyList = []
        for s in subTypeList:
            if s != '':  # if we have a subtype, get it's key
                subTypeKey = loadlib.verifyTerm('', 93, s, lineNum,
                if subTypeKey != 0:
        if len(subTypeKeyList) > 1:
            print('subTypeKeyList: %s' % subTypeKeyList)

        # _vocab_key = 37 (Allele Status)
        alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum,

        # _vocab_key = 61 (Allele Transmission)
        transmissionKey = loadlib.verifyTerm('', 61, transmission, lineNum,

        # _vocab_key = 92
        collectionKey = loadlib.verifyTerm('', 92, collection, lineNum,

        # _vocab_key = 73 (Marker-Allele Association Status)
        # _term_key = 4268545 (Curated)
        markerStatusKey = 4268545

        # reference
        refKey = loadlib.verifyReference(jNum, lineNum, fpErrorFile)

        # creator
        createdByKey = loadlib.verifyUser(createdBy, lineNum, fpErrorFile)
        if createdByKey == 0:

        print('checking for missing data')
        # if errors, continue to next record
        # errors are stored (via loadlib) in the .error log
        if markerKey == 0 \
                or mutationKeyList == [] \
                or strainOfOriginKey == 0 \
                or inheritanceModeKey == 0 \
                or alleleTypeKey == 0 \
                or alleleStatusKey == 0 \
                or transmissionKey == 0 \
                or collectionKey == 0 \
                or refKey == 0 \
                or createdByKey == 0:
            print('missing data, skipping this line')

        # if no errors, process the allele
        print('writing to allele file')
        # allele (isWildType = 0)
        fpAlleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
            alleleStatusKey, transmissionKey, collectionKey, alleleSymbol, alleleName, \
            isExtinct, isMixed, refKey, markerStatusKey, \
            createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

        # molecular mutation
        for mutationKey in mutationKeyList:
            fpMutationFile.write('%s|%s|%s|%s|%s\n' \
                % (alleleMutationKey, alleleKey, mutationKey, loaddate, loaddate))
            alleleMutationKey += 1

        # reference associations

        # Original
        fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (refAssocKey, refKey, alleleKey, mgiTypeKey, origRefTypeKey, \
                        createdByKey, createdByKey, loaddate, loaddate))
        refAssocKey = refAssocKey + 1

        # Molecular
        fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (refAssocKey, refKey, alleleKey, mgiTypeKey, molRefTypeKey, \
                        createdByKey, createdByKey, loaddate, loaddate))
        refAssocKey = refAssocKey + 1

        # allele subtype
        for subTypeKey in subTypeKeyList:
            fpAnnotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (annotKey, annotTypeKey, alleleKey, subTypeKey, \
                            qualifierKey, loaddate, loaddate))
            annotKey = annotKey + 1

        # MGI Accession ID for the allele
        alleleID = '%s%s' % (mgiPrefix, mgiKey)
        fpAccFile.write('%s|%s|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, alleleID, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
               createdByKey, createdByKey, loaddate, loaddate))

        # storing data in MGI_Note
        # molecular note

        fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (noteKey, alleleKey, mgiTypeKey, molecularNoteTypeKey, description,\
               createdByKey, createdByKey, loaddate, loaddate))

        noteKey = noteKey + 1

        # colony ID note
        fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (noteKey, alleleKey, mgiTypeKey, colonyIdNoteTypeKey, colonyID, \
               createdByKey, createdByKey, loaddate, loaddate))

        noteKey = noteKey + 1

        # Print out a new text file and attach the new MGI Allele IDs
        # as the last field

        fpNewAlleleRptFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
        % (mgi_utils.prvalue(alleleID), \
        mgi_utils.prvalue(alleleSymbol), \
        mgi_utils.prvalue(alleleName), \
        mgi_utils.prvalue(markerID), \
        mgi_utils.prvalue(markerSymbol), \

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    # Update the AccessionMax value
    print('DEBUG: %s' % DEBUG)
    if DEBUG == 'false':
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)

    return 0
Exemplo n.º 36
		password = string.strip(open(opt[1], 'r').readline())
	elif opt[0] == '-K':
		objectKey = opt[1]

if server is None or \
   database is None or \
   user is None or \
   password is None or \
   objectKey is None:

db.set_sqlLogin(user, password, server, database)
userKey = loadlib.verifyUser(user, 0, None)

# call functions based on the way the program is invoked

scriptName = os.path.basename(sys.argv[0])

# all of these invocations will only affect a certain subset of data

if scriptName == 'allelecombination.py':

elif scriptName == 'allelecombinationByAllele.py':

elif scriptName == 'allelecombinationByMarker.py':
Exemplo n.º 37
def processFile():

    global refKey, aliasKey
    global execProbeSQL
    global execAssaySQL
    global execRefSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

	error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

	    fromID = tokens[0]
	    name = tokens[1]
	    toID = tokens[2]
	    jnum = tokens[3]
	    createdBy = tokens[4]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile)
        toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile)
	referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if fromKey == 0:
            errorFile.write('Invalid Probe "From":  %s\n' % (fromID))
            error = 1

	if toKey == 0:
            errorFile.write('Invalid Probe "To":  %s\n' % (toID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

	# check that all genes are the same
	checkGenesSQL = '''
			select f.*
			from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a
			where f._Probe_key = %s
			and t._Probe_key = %s
			and p._Probe_key = %s
			and p._ProbePrep_key = a._ProbePrep_key
			and f._Marker_key = t._Marker_key
			and f._Marker_key = a._Marker_key
			''' % (fromKey, toKey, fromKey)

	checkGenes = db.sql(checkGenesSQL, 'auto')
        if len(checkGenes) == 0:
            errorFile.write('Gene of GenePaint, Eurexpress and Assay are not the same:  %s, %s\n' % (fromID, toID))
            error = 1

	# check that the J: is on at least one Assay
	checkJAssaySQL = '''
			 select a.*
			 from GXD_ProbePrep p, GXD_Assay a
			 where p._Probe_key = %s
			 and p._ProbePrep_key = a._ProbePrep_key
			 and a._Refs_key = %s
			 ''' % (fromKey, referenceKey)

	checkJAssay = db.sql(checkJAssaySQL, 'auto')
        if len(checkJAssay) == 0:
            errorFile.write('J: is not on any Assays attached to the probe:  %s\n' % (fromID))
            error = 1

        # if errors, continue to next record
        if error:

	# add alias using fromID name (from) to toID

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
        	% (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
        aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
        	% (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate))
        refKey = refKey + 1
        aliasKey = aliasKey + 1

	# move assay information from fromID to toID
	execAssaySQL.append(updateAssaySQL % (toKey, fromKey))

	# move fromID (from) references to toID
	execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey))

	# delete fromID (from)
	execProbeSQL.append(deleteProbeSQL % (fromKey))
Exemplo n.º 38
def processFile():

    global probeKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            name = tokens[0]
            jnum = tokens[1]
            parentID = tokens[2]
            sourceName = tokens[3]
            organism = tokens[4]
            strain = tokens[5]
            tissue = tokens[6]
            gender = tokens[7]
            cellLine = tokens[8]
            age = tokens[9]
            vectorType = tokens[10]
            segmentType = tokens[11]
            regionCovered = tokens[12]
            insertSite = tokens[13]
            insertSize = tokens[14]
            markerIDs = string.split(tokens[15], '|')
            relationship = tokens[16]
            sequenceIDs = tokens[17]
            aliasList = string.split(tokens[18], '|')
            notes = tokens[19]
            rawnotes = tokens[20]
            createdBy = tokens[21]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        isParent = 0
        isSource = 0
        parentProbeKey = ''
        sourceKey = 0

        if parentID != '':
            isParent = 1

        if sourceName != '':
            isSource = 1

        if not isParent and not isSource:
            organismKey = sourceloadlib.verifyOrganism(organism, lineNum,
            strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile)
            tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile)
            genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile)
            cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum,
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)
            sourceKey = sourceloadlib.verifySource(segmentTypeKey, \
         vectorKey, organismKey, strainKey, \
         tissueKey, genderKey, cellLineKey, age, lineNum, errorFile)

            if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \
                      genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \
                      segmentTypeKey == 0 or sourceKey == 0:
                errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' %
                                (segmentType, vectorType, organism, strain,
                                 tissue, gender, cellLine, age))
                error = 1

        elif not isParent and isSource:
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)
            sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum,

            if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0:
                error = 1

# parent from = yes, source given = yes or no (ignored)
            parentProbeKey, sourceKey = verifyParentProbe(
                parentID, lineNum, errorFile)
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)

            if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0:
                error = 1

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

# marker IDs

        markerList = []
        for markerID in markerIDs:

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if len(markerID) > 0 and markerKey == 0:
                errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
                error = 1
            elif len(markerID) > 0:

# sequence IDs
        seqAccDict = {}
        for seqID in string.split(sequenceIDs, '|'):
            if len(seqID) > 0:
                [logicalDB, acc] = string.split(seqID, ':')
                logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum,
                if logicalDBKey > 0:
                    seqAccDict[acc] = logicalDBKey

        # if errors, continue to next record
        if error:

        # if no errors, process the probe

        probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \
            % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \
     mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate))

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
      % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
                errorFile.write('Invalid Marker Duplicate:  %s, %s\n' %
                                (name, markerID))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
  % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
      % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        # Print out a new text file and attach the new MGI Probe IDs as the last field

        newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
     % (name, jnum, \
     mgi_utils.prvalue(sourceName), \
     organism, \
     mgi_utils.prvalue(strain), \
     mgi_utils.prvalue(tissue), \
     mgi_utils.prvalue(gender), \
     mgi_utils.prvalue(cellLine), \
     mgi_utils.prvalue(age), \
     mgi_utils.prvalue(vectorType), \
     mgi_utils.prvalue(segmentType), \
     mgi_utils.prvalue(regionCovered) + \
     mgi_utils.prvalue(insertSite), \
     mgi_utils.prvalue(insertSize), \
     string.join(markerIDs, '|'), \
     relationship, \
     mgi_utils.prvalue(sequenceIDs), \
     string.join(aliasList, '|'), \
     mgi_utils.prvalue(notes), \
     createdBy, mgiPrefix, mgiKey))

        # Print out a raw note file

        if len(rawnotes) > 0:
            rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes))

# Notes

        if len(notes) > 0:
            noteFile.write('%s\t%s\t%s\t%s\n' %
                           (probeKey, notes, loaddate, loaddate))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

        # sequence accession ids
        for acc in seqAccDict.keys():
            prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
            accKey = accKey + 1

        refKey = refKey + 1
        probeKey = probeKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
Exemplo n.º 39
def processFile():

    global refKey, aliasKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            probeID = probeName = tokens[0]
            jnum = tokens[1]
            aliasList = string.split(tokens[2], '|')
            createdBy = tokens[3]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        if probeID.find('MGI:') >= 0:
            probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
            probeKey, probeID = verifyProbe(probeName, lineNum, errorFile)

        probeReferenceKey = verifyProbeReference(probeID, jnum, lineNum,
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if probeKey == 0:
            errorFile.write('Invalid Probe:  %s\n' % (probeID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

#if probeReferenceKey == 0:
#    errorFile.write('Invalid Probe Reference:  %s, %s\n' % (probeID, jnum))
#    error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

# create a new probe-reference key if one does not already exist
# else use the existing probe-reference key

        if probeReferenceKey == 0:
            refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
      % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
            aliasrefKey = refKey
            refKey = refKey + 1
            #errorFile.write('Probe/Reference Already Exists: %s\n' % (tokens))
            aliasrefKey = probeReferenceKey

        # aliases

        for alias in aliasList:

            if len(alias) == 0:

            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
      % (aliasKey, aliasrefKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1
Exemplo n.º 40
def init():
    # requires:
    # effects:
    # 1. Processes command line options
    # 2. Initializes local DBMS parameters
    # 3. Initializes global file descriptors/file names
    # 4. Initializes global keys
    # returns:

    global inputFile, diagFile, errorFile, errorFileName, diagFileName
    global refFileName, refFile
    global mgiTypeKey
    global refAssocKey, createdByKey


    fdate = mgi_utils.date('%m%d%Y')  # current date
    head, tail = os.path.split(inputFileName)
    diagFileName = tail + '.' + fdate + '.diagnostics'
    errorFileName = tail + '.' + fdate + '.error'
    refFileName = tail + '.MGI_Reference_Assoc.bcp'

        inputFile = open(inputFileName, 'r')
        exit(1, 'Could not open file %s\n' % inputFileName)

        diagFile = open(diagFileName, 'w')
        exit(1, 'Could not open file %s\n' % diagFileName)

        errorFile = open(errorFileName, 'w')
        exit(1, 'Could not open file %s\n' % errorFileName)

        refFile = open(refFileName, 'w')
        exit(1, 'Could not open file %s\n' % refFileName)

    # Log all SQL

    # Set Log File Descriptor

    diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
    diagFile.write('Server: %s\n' % (db.get_sqlServer()))
    diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))
    diagFile.write('Object Type: %s\n' % (mgiType))
    diagFile.write('Input File: %s\n' % (inputFileName))

    errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

    mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile)
    createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
Exemplo n.º 41
def init():
    global diagFile, errorFile, inputFile, errorFileName, diagFileName
    global outSetFile, outMemberFile
    global setKey, setMemberKey, createdByKey, mgiTypeKey, useSetKey
    global DEBUG


    diagFileName = '%s/setload.diagnostics' % (outputDir)
    errorFileName = '%s/setload.error' % (outputDir)

        diagFile = open(diagFileName, 'w')
        exit(1, 'Could not open file %s\n' % diagFileName)

        errorFile = open(errorFileName, 'w')
        exit(1, 'Could not open file %s\n' % errorFileName)

        inputFile = open(inputFileName, 'r')
        exit(1, 'Could not open file %s\n' % inputFileName)

    # Output Files

        fullPathSetFile = '%s/%s' % (outputDir, outSetFileName)
        outSetFile = open(fullPathSetFile, 'w')
        exit(1, 'Could not open file %s\n' % fullPathSetFile)

        fullPathMemberFile = '%s/%s' % (outputDir, outMemberFileName)
        outMemberFile = open(fullPathMemberFile, 'w')
        exit(1, 'Could not open file %s\n' % fullPathMemberFile)

    # Log all SQL

    diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
    diagFile.write('Server: %s\n' % (db.get_sqlServer()))
    diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))
    errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

    if mode == 'preview':
        DEBUG = 1
        bcpon = 0
    elif mode != 'load':
        exit(1, 'Invalid Processing Mode:  %s\n' % (mode))

    results = db.sql('select max(_Set_key) + 1 as maxKey from MGI_Set', 'auto')
    setKey = results[0]['maxKey']

    createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
    mgiTypeKey = loadlib.verifyMGIType(setType, 0, errorFile)

    # use existing MGI_Set, or create a new one
    results = db.sql(
        'select _Set_key from MGI_Set where _MGIType_key = %s and name = \'%s\''
        % (mgiTypeKey, setName), 'auto')

    if len(results) > 0:
        for r in results:
            setKey = r['_Set_key']
        # delete/reload
        db.sql('delete from MGI_SetMember where _Set_key = %s' % (setKey),
        outSetFile.write(str(setKey) + TAB + \
           str(mgiTypeKey) + TAB + \
           str(setName) + TAB + \
           '1' + TAB + \
           str(createdByKey) + TAB + str(createdByKey) + TAB + \
           loaddate + TAB + loaddate + CRT)

    results = db.sql(
        'select max(_SetMember_key) + 1 as maxKey from MGI_SetMember', 'auto')
    setMemberKey = results[0]['maxKey']

Exemplo n.º 42
def init():
	# requires: 
	# effects: 
	# 1. Processes command line options
	# 2. Initializes local DBMS parameters
	# 3. Initializes global file descriptors/file names
	# 4. Initializes global keys
	# returns:
	global diagFileName, errorFileName, synFileName
	global inputFile, diagFile, errorFile, synFile
	global mgiTypeKey, createdByKey, referenceKey

	head, tail = os.path.split(inputFileName) 
	diagFileName = logDir + '/' + tail + '.diagnostics'
	errorFileName = logDir + '/' + tail + '.error'
	synFileName = 'MGI_Synonym.bcp'

	print inputFileName
	print logDir

		inputFile = open(inputFileName, 'r')
		exit(1, 'Could not open file %s\n' % inputFileName)
		diagFile = open(diagFileName, 'w')
		exit(1, 'Could not open file %s\n' % diagFileName)
		errorFile = open(errorFileName, 'w')
		exit(1, 'Could not open file %s\n' % errorFileName)
		synFile = open(outputDir + '/' + synFileName, 'w')
		exit(1, 'Could not open file %s\n' % synFileName)
	# Log all SQL

	diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
	diagFile.write('Server: %s\n' % (db.get_sqlServer()))
	diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))
	diagFile.write('Object Type: %s\n' % (mgiType))
	diagFile.write('Input File: %s\n' % (inputFileName))

	errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

	mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        # if reference is J:0, then no reference is given
	if jnum == 'J:0':
		referenceKey = ''
		referenceKey = loadlib.verifyReference(jnum, 0, errorFile)

        # exit if we can't resolve mgiType, createdBy or jnum
	if mgiTypeKey == 0 or \
		createdByKey == 0 or \
		referenceKey == 0:

        if mode == 'reload':
		print 'mode is: %s, deleting synonyms' % mode
        	db.sql('delete from MGI_Synonym ' + \
			'where _MGIType_key = %d ' % (mgiTypeKey) + \
			'and _CreatedBy_key = %d ' % (createdByKey), None)
Exemplo n.º 43
def processFile():
	# requires:
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	# returns:
	#	nothing

    results = db.sql(
        'select maxKey = max(_Translation_key) + 1 from MGI_Translation',
    transKey = results[0]['maxKey']
    if transKey is None:
        transKey = 1000

    lineNum = 0

    # sequence number of bad name in translation list
    seq = 1

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

            objectID = tokens[0]
            objectDescription = tokens[1]
            term = tokens[2]
            userID = tokens[3]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        if vocabKey > 0:
            objectKey = loadlib.verifyTerm(objectID, vocabKey,
                                           objectDescription, lineNum,
            objectKey = loadlib.verifyObject(objectID, mgiTypeKey,
                                             objectDescription, lineNum,

        userKey = loadlib.verifyUser(userID, lineNum, errorFile)

        if objectKey == 0 or userKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

        # add term to translation file
        bcpWrite(transFile, [
            transKey, transTypeKey, objectKey, term, seq, userKey, userKey,
            loaddate, loaddate
        transKey = transKey + 1
        seq = seq + 1

#	end of "for line in inputFile.readlines():"

    if newTransType:
        bcpWrite(transTypeFile, [
            transTypeKey, mgiTypeKey, vocabKey, transTypeName,
            transCompression, 0, userKey, userKey, loaddate, loaddate
Exemplo n.º 44
def processAssayFile():

    global assayAssay, assayKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inAssayFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], TAB)

	    assayID = tokens[0]
	    markerID = tokens[1]
	    jnum = tokens[2]
	    assayType = tokens[3]
	    reporterGene = tokens[4]
	    note = tokens[5]
	    createdBy = tokens[6]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	assayTypeKey = gxdloadlib.verifyAssayType(assayType, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if markerKey == 0 or referenceKey == 0 or assayTypeKey == 0:
            # set error flag to true
            error = 1

        if len(reporterGene) > 0:
            reporterGeneKey = gxdloadlib.verifyReporterGene(reporterGene, lineNum, errorFile)
	    if reporterGeneKey == 0:
                error = 1
            reporterGeneKey = ''

        # if errors, continue to next record
        if error:

	if assayProbePrep.has_key(assayID):
	    probePrepKey = assayProbePrep[assayID]
	    probePrepKey = ''

        # if no errors, process

        outAssayFile.write(str(assayKey) + TAB + \
	    str(assayTypeKey) + TAB + \
	    str(referenceKey) + TAB + \
	    str(markerKey) + TAB + \
	    str(probePrepKey) + TAB + \
	    TAB + \
	    TAB + \
            str(reporterGeneKey) + TAB + \
            str(createdByKey) + TAB + \
            str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

	if len(note) > 0:
	    i = 0
	    while i < len(note):
		outAssayNoteFile.write(str(assayKey) + TAB + \
		    note[i:i+ASSAY_NOTE_LENGTH] + TAB + \
		    loaddate + TAB + loaddate + CRT)

        # MGI Accession ID for the assay

	outAccFile.write(str(accKey) + TAB + \
	    mgiPrefix + str(mgiKey) + TAB + \
	    mgiPrefix + TAB + \
	    str(mgiKey) + TAB + \
	    accLogicalDBKey + TAB + \
	    str(assayKey) + TAB + \
	    assayMgiTypeKey + TAB + \
	    accPrivate + TAB + \
	    accPreferred + TAB + \
            str(createdByKey) + TAB + \
            str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

	assayAssay[assayID] = assayKey
	accKey = accKey + 1
	mgiKey = mgiKey + 1
        assayKey = assayKey + 1

    #	end of "for line in inAssayFile.readlines():"

    return lineNum
Exemplo n.º 45
def init():
	# requires: 
	# effects: 
	# 1. Processes command line options
	# 2. Initializes local DBMS parameters
	# 3. Initializes global file descriptors/file names
	# 4. Initializes global keys
	# returns:
	global diagFileName, errorFileName, synFileName
	global inputFile, diagFile, errorFile, synFile
	global mgiTypeKey, createdByKey, referenceKey

	head, tail = os.path.split(inputFileName) 
	diagFileName = logDir + '/' + tail + '.diagnostics'
	errorFileName = logDir + '/' + tail + '.error'
	synFileName = 'MGI_Synonym.bcp'

	print inputFileName
	print logDir

		inputFile = open(inputFileName, 'r')
		exit(1, 'Could not open file %s\n' % inputFileName)
		diagFile = open(diagFileName, 'w')
		exit(1, 'Could not open file %s\n' % diagFileName)
		errorFile = open(errorFileName, 'w')
		exit(1, 'Could not open file %s\n' % errorFileName)
		synFile = open(outputDir + '/' + synFileName, 'w')
		exit(1, 'Could not open file %s\n' % synFileName)
	# Log all SQL

	diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
	diagFile.write('Server: %s\n' % (db.get_sqlServer()))
	diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))
	diagFile.write('Object Type: %s\n' % (mgiType))
	diagFile.write('Input File: %s\n' % (inputFileName))

	errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

	mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        # if reference is J:0, then no reference is given
	if jnum == 'J:0':
		referenceKey = ''
		referenceKey = loadlib.verifyReference(jnum, 0, errorFile)

        # exit if we can't resolve mgiType, createdBy or jnum
	if mgiTypeKey == 0 or \
		createdByKey == 0 or \
		referenceKey == 0:

        if mode == 'reload':
		print 'mode is: %s, deleting synonyms' % mode
        	db.sql('delete from MGI_Synonym ' + \
			'where _MGIType_key = %d ' % (mgiTypeKey) + \
			'and _CreatedBy_key = %d ' % (createdByKey), None)
Exemplo n.º 46
def processFile():
    # requires:
    # effects:
    #       Reads input file
    #       Verifies and Processes each line in the input file
    # returns:
    #       nothing

    global strainalleleKey

    lineNum = 0
    notDeleted = 1

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = str.split(line[:-1], '\t')

            strainID = tokens[0]
            alleleID = tokens[1]
            qualifier = tokens[2]
            createdBy = tokens[3]
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        if len(strainID) == 4:
            strainID = '00' + strainID
        if len(strainID) == 3:
            strainID = '000' + strainID
        if len(strainID) == 2:
            strainID = '0000' + strainID
        if len(strainID) == 1:
            strainID = '00000' + strainID

        strainKey = loadlib.verifyObject(strainID, strainTypeKey, None,
                                         lineNum, errorFile)

        # this could generate an error because the ID is a marker, not an allele
        # just ignore the error in the error file if it gets resolved later
        alleleKey = loadlib.verifyObject(alleleID, alleleTypeKey, None,
                                         lineNum, errorFile)
        markerKey = 0

        if alleleKey == 0:
            markerKey = loadlib.verifyObject(alleleID, markerTypeKey, None,
                                             lineNum, errorFile)

        qualifierKey = verifyQualifier(qualifier, lineNum)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if notDeleted:
                'delete PRB_Strain_Marker where _CreatedBy_key = %s' %
                (createdByKey), None)
            notDeleted = 0

        # if Allele found, resolve to Marker

        if alleleKey > 0:
            results = db.sql(
                'select _Marker_key from ALL_Allele where _Allele_key = %s' %
                (alleleKey), 'auto')
            if len(results) > 0:
                markerKey = results[0]['_Marker_key']

        elif markerKey == 0:
            errorFile.write('Invalid Allele (%s): %s\n' % (lineNum, alleleID))
            error = 1

        if strainKey == 0 or markerKey == 0 or qualifierKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:

        # if no errors, process

        if alleleKey == 0:
            alleleKey = ''

        strainFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainalleleKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, loaddate, loaddate))

        strainalleleKey = strainalleleKey + 1

    #	end of "for line in inputFile.readlines():"

    # Update the AccessionMax value

    db.sql('select * from ACC_setMax (%d);' % (lineNum), None)

    # update prb_strain_marker_seq auto-sequence
        ''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''',